New Upstream Release - vkfft

QA Page
Maintainer email: debian-pan-maintainers@alioth-lists.debian.net
Automatic publish policy: main: push-derived , pristine-tar: push-derived , upstream: push-derived
Last processed: 2023-11-06T02:21 (took 20m2s)
Branch URL: https://salsa.debian.org/science-team/vkfft.git -b master (taken from version 1.3.4+ds2-1)
Queue position: 121203 (a 50w5d wait)

Ready changes

Summary

Merged new upstream version: 1.2.31+ds1 (was: 1.2.26+ds1).

Resulting package

Built on 2023-06-11T12:55 (took 9m47s)

The resulting binary packages can be installed (if you have the apt repository enabled) by running one of:

apt install -t fresh-releases libvkfft-dev

Lintian Result

libvkfft-dev_1.2.31+ds1-1~jan+nur1_all.deb

vkfft_1.2.31+ds1-1~jan+nur1.dsc

vkfft_1.2.31+ds1-1~jan+nur1_amd64.buildinfo

vkfft_1.2.31+ds1-1~jan+nur1_amd64.changes

Diff

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 25e74e8..5b1a2b8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,7 @@ project(Vulkan_FFT)
 set(CMAKE_CONFIGURATION_TYPES "Release" CACHE STRING "" FORCE)
 set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
 include(FetchContent)
-set(VKFFT_BACKEND 0 CACHE STRING "0 - Vulkan, 1 - CUDA, 2 - HIP, 3 - OpenCL, 4 - Level Zero")
+set(VKFFT_BACKEND 0 CACHE STRING "0 - Vulkan, 1 - CUDA, 2 - HIP, 3 - OpenCL, 4 - Level Zero, 5 - Metal")
 
 if(${VKFFT_BACKEND} EQUAL 1)
 	option(build_VkFFT_cuFFT_benchmark "Build VkFFT cuFFT benchmark" ON)
@@ -18,6 +18,8 @@ else()
 endif()
 
 option(build_VkFFT_FFTW_precision "Build VkFFT FFTW precision comparison" OFF)
+option(VkFFT_use_FP128_Bluestein_RaderFFT "Use FP128 for Bluestein and Rader FFT kernel calculations. Currently requires FP128 FFT library, like FFTWl" OFF)
+
 if (MSVC)
 	set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJECT_NAME})
 	add_definitions(-D_CRT_SECURE_NO_WARNINGS)
@@ -75,7 +77,7 @@ else()
 		benchmark_scripts/vkFFT_scripts/src/sample_1001_benchmark_VkFFT_double_2_4096.cpp
 		benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp)
 endif()
-target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_11)
+target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_11)  
 add_definitions(-DVKFFT_BACKEND=${VKFFT_BACKEND})
 if(${VKFFT_BACKEND} EQUAL 0)
 	find_package(Vulkan REQUIRED)
@@ -122,9 +124,18 @@ elseif(${VKFFT_BACKEND} EQUAL 4)
 		NO_DEFAULT_PATH
 	  )
 	target_include_directories(${PROJECT_NAME} PUBLIC ${LevelZero_INCLUDES})
+elseif(${VKFFT_BACKEND} EQUAL 5)
+	add_compile_options(-WMTL_IGNORE_WARNINGS)
+	find_library(FOUNDATION_LIB Foundation REQUIRED)
+	find_library(QUARTZ_CORE_LIB QuartzCore REQUIRED)	
+	find_library(METAL_LIB Metal REQUIRED)
+	target_include_directories(${PROJECT_NAME} PUBLIC "metal-cpp/")
 endif()
 
 target_compile_definitions(${PROJECT_NAME} PUBLIC -DVK_API_VERSION=11)#10 - Vulkan 1.0, 11 - Vulkan 1.1, 12 - Vulkan 1.2 
+if(VkFFT_use_FP128_Bluestein_RaderFFT)
+        target_compile_definitions(${PROJECT_NAME} PUBLIC -DVkFFT_use_FP128_Bluestein_RaderFFT)
+endif()  
 if(${VKFFT_BACKEND} EQUAL 0)
 	FetchContent_Declare(
 		glslang-master
@@ -159,15 +170,17 @@ elseif(${VKFFT_BACKEND} EQUAL 3)
 	target_link_libraries(${PROJECT_NAME} PUBLIC OpenCL::OpenCL VkFFT half)
 elseif(${VKFFT_BACKEND} EQUAL 4)
 	target_link_libraries(${PROJECT_NAME} PUBLIC ze_loader VkFFT half)
+elseif(${VKFFT_BACKEND} EQUAL 5)
+	target_link_libraries(${PROJECT_NAME} PUBLIC ${FOUNDATION_LIB} ${QUARTZ_CORE_LIB} ${METAL_LIB} VkFFT half)
 endif()
 
-if(build_VkFFT_FFTW_precision)
+if(build_VkFFT_FFTW_precision OR VkFFT_use_FP128_Bluestein_RaderFFT)
 	add_definitions(-DUSE_FFTW)
 	set(FFTW3_LIB_DIR "/usr/lib/x86_64-linux-gnu/")
 	set(FFTW3_INCLUDE_DIR "/usr/include/")
 	find_library(
 		FFTW_LIB
-		NAMES "libfftw3-3" "fftw3"
+		NAMES "libfftw3-3" "fftw3" 
 		PATHS ${FFTW3_LIB_DIR}
 		PATH_SUFFIXES "lib" "lib64"
 		NO_DEFAULT_PATH
@@ -179,9 +192,19 @@ if(build_VkFFT_FFTW_precision)
 		PATH_SUFFIXES "include"
 		NO_DEFAULT_PATH
 	  )
-	
-	target_link_libraries (${PROJECT_NAME} PUBLIC ${FFTW_LIB})
-	target_include_directories(${PROJECT_NAME} PUBLIC ${FFTW_INCLUDES})
+        target_include_directories(${PROJECT_NAME} PUBLIC ${FFTW_INCLUDES})
+if(VkFFT_use_FP128_Bluestein_RaderFFT)        
+	find_library(
+		FFTWL_LIB
+		NAMES "libfftw3l" "fftw3l"
+		PATHS ${FFTW3_LIB_DIR}
+		PATH_SUFFIXES "lib" "lib64"
+		NO_DEFAULT_PATH
+	  )
+	target_link_libraries (${PROJECT_NAME} PUBLIC ${FFTW_LIB} ${FFTWL_LIB})
+else()
+        target_link_libraries (${PROJECT_NAME} PUBLIC ${FFTW_LIB})
+endif()
 endif()
 
 if(build_VkFFT_cuFFT_benchmark)
diff --git a/README.md b/README.md
index e0b10e8..0baeef6 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,8 @@
 [![Build Status](https://travis-ci.com/DTolm/VkFFT.svg?token=nMgUQeqx7PXMeCFaXqsb&branch=master)](https://travis-ci.com/github/DTolm/VkFFT)
-# VkFFT - Vulkan/CUDA/HIP/OpenCL/Level Zero Fast Fourier Transform library
-VkFFT is an efficient GPU-accelerated multidimensional Fast Fourier Transform library for Vulkan/CUDA/HIP/OpenCL/Level Zero projects. VkFFT aims to provide the community with an open-source alternative to Nvidia's cuFFT library while achieving better performance. VkFFT is written in C language and supports Vulkan, CUDA, HIP, OpenCL and Level Zero as backends.
+# VkFFT - Vulkan/CUDA/HIP/OpenCL/Level Zero/Metal Fast Fourier Transform library
+VkFFT is an efficient GPU-accelerated multidimensional Fast Fourier Transform library for Vulkan/CUDA/HIP/OpenCL/Level Zero/Metal projects. VkFFT aims to provide the community with an open-source alternative to Nvidia's cuFFT library while achieving better performance. VkFFT is written in C language and supports Vulkan, CUDA, HIP, OpenCL, Level Zero and Metal as backends.
+
+## Check out my poster at SC22: https://sc22.supercomputing.org/presentation/?id=rpost143&sess=sess273
 
 ## Check out my panel at Nvidia's GTC 2021 in Higher Education and Research category: https://gtc21.event.nvidia.com/
 
@@ -15,6 +17,7 @@ VkFFT is an efficient GPU-accelerated multidimensional Fast Fourier Transform li
   - Forward and inverse directions of FFT
   - Support for big FFT dimension sizes. Current limits: C2C or even C2R/R2C - (2^32, 2^32, 2^32).  Odd C2R/R2C - (2^12, 2^32, 2^32). R2R - (2^12, 2^12, 2^12). Depends on the amount of shared memory on the device. (will be increased later).
   - Radix-2/3/4/5/7/8/11/13 FFT. Sequences using radix 3, 5, 7, 11 and 13 have comparable performance to that of powers of 2.
+  - Rader's FFT algorithm for primes from 17 up to max shared memory length (~10000). Inlined and done without additional memory transfers.
   - Bluestein's FFT algorithm for all other sequences. Full coverage of C2C range, single upload (2^12, 2^12, 2^12) for R2C/C2R/R2R. Optimized to have as few memory transfers as possible by using zero padding and merged convolution support of VkFFT
   - Single, double and half precision support. Double precision uses CPU-generated LUT tables. Half precision still does all computations in single and only uses half precision to store data.
   - All transformations are performed in-place with no performance loss. Out-of-place transforms are supported by selecting different input/output buffers.
@@ -25,9 +28,9 @@ VkFFT is an efficient GPU-accelerated multidimensional Fast Fourier Transform li
   - WHDCN layout - data is stored in the following order (sorted by increase in strides): the width, the height, the depth, the coordinate (the number of feature maps), the batch number
   - Multiple feature/batch convolutions - one input, multiple kernels
   - Multiple input/output/temporary buffer split. Allows using data split between different memory allocations and mitigates 4GB single allocation limit.
-  - Works on Nvidia, AMD and Intel GPUs. And Raspberry Pi 4 GPU.
+  - Works on Nvidia, AMD, Intel and Apple GPUs. And Raspberry Pi 4 GPU.
   - Works on Windows, Linux and macOS
-  - VkFFT supports Vulkan, CUDA, HIP, OpenCL and Level Zero as backend to cover wide range of APIs
+  - VkFFT supports Vulkan, CUDA, HIP, OpenCL, Level Zero and Metal as backend to cover wide range of APIs
   - Header-only library with Vulkan interface, which allows appending VkFFT directly to user's command buffer. Kernels are compiled at run-time
 ## Future release plan
  - ##### Planned
@@ -52,6 +55,11 @@ To build OpenCL version of the benchmark, replace VKFFT_BACKEND in CMakeLists (l
 Level Zero:
 Include the vkFFT.h file. Provide the library with correctly chosen VKFFT_BACKEND definition. Clang and llvm-spirv must be valid system calls. Only single/double precision for now.\
 To build Level Zero version of the benchmark, replace VKFFT_BACKEND in CMakeLists (line 5) with the value 4 and optionally enable FFTW.
+
+Metal:
+Include the vkFFT.h file. Provide the library with correctly chosen VKFFT_BACKEND definition. VkFFT uses metal-cpp as a C++ bindings to Apple's libraries - Foundation.hpp, QuartzCore.hpp and Metal.hpp. Only single precision.\
+To build Metal version of the benchmark, replace VKFFT_BACKEND in CMakeLists (line 5) with the value 5 and optionally enable FFTW.
+
 ## Command-line interface
 VkFFT has a command-line interface with the following set of commands:\
 -h: print help\
@@ -70,32 +78,19 @@ VkFFT.h is a library that can append FFT, iFFT or convolution calculation to the
 VkFFT achieves striding by grouping nearby FFTs instead of transpositions. \
 Explicit VkFFT documentation can be found in the documentation folder.
 ## Benchmark results in comparison to cuFFT
-To measure how Vulkan FFT implementation works in comparison to cuFFT, we will perform many 1D, 2D and 3D tests, ranging from the small systems to the big ones. The test will consist of performing C2C FFT and inverse C2C FFT consecutively multiple times to calculate the average time required. The results are obtained on Nvidia RTX 3080, AMD Radeon VII and AMD Radeon 6800XT graphics cards with no other GPU load. Launching -test key from Vulkan_FFT.cpp performs VkFFT/cuFFT benchmark. The overall benchmark score is calculated as an averaged performance score over presented set of systems (the bigger - the better): sum(system_size/average_iteration_time) /num_benchmark_samples
-
-The stable flat lines present for small sequence lengths indicate that time scales linearly with the system size, so the bigger the bandwidth the better the result will be. The stepwise drops occur once the amount of transfers increases from to 2x and to 3x when compute unit can't hold full sequence and splits it into  the combination of smaller ones. Radeon VII is faster than RTX 3080 below 2^18 (=2MB - page file size on AMD due to it having HBM2 memory with higher bandwidth, however, this GPU apparently has TLB miss problems on large buffer sizes. On RTX 3080, VkFFT is faster than cuFFT in single precision batched 1D FFTs on the range from 2^3 to 2^27:
-![alt text](https://github.com/DTolm/VkFFT/blob/master/benchmark_plot/vkfft_benchmark_single.png?raw=true)
-In double precision Radeon VII is able to get an advantage due to its high double precision core count. Radeon RX 6800XT can store LUT in the L3 cache and has a higher double precision core count as well:
-![alt text](https://github.com/DTolm/VkFFT/blob/master/benchmark_plot/vkfft_benchmark_double.png?raw=true)
-In half precision mode, VkFFT only uses it for data storage, all computations are performed in single. It still proves to be enough to get a stable 2x performance gain on RTX 3080: 
-![alt text](https://github.com/DTolm/VkFFT/blob/master/benchmark_plot/vkfft_benchmark_half.png?raw=true)
-Multidimensional systems are optimized as well. Benchmark shows Radeon RX 6800XT can store systems up to 128MB in the L3 cache for big performance gains. Native support for zero padding allows to transfer less data and get up to 3x performance boost in multidimensional FFTs:
-![alt text](https://github.com/DTolm/VkFFT/blob/master/benchmark_plot/vkfft_benchmark_2d.png?raw=true)
-![alt text](https://github.com/DTolm/VkFFT/blob/master/benchmark_plot/vkfft_benchmark_3d.png?raw=true)
-The test configuration below takes multiple 1D FFTs of a supported sequence length from the range of 2 to 4096, batch them together so the full system takes from 500MB to 1GB of data and perform multiple consecutive FFTs/iFFTs (-vkfft 1000 key). After that time per a single FFT is obtained by averaging the result.   Total system size will be divided by the time taken by a single transform upload+download, resulting in the achieved bandwidth. The GPUs used in this comparison are Nvidia A100 and AMD MI100. The performance was compared against Nvidia cuFFT (CUDA 11.2 version) and AMD rocFFT (ROCm 4.1 version) libraries in single precision: 
-![alt text](https://github.com/DTolm/VkFFT/blob/master/benchmark_plot/fp32_cuda_a100.png?raw=true)
-![alt text](https://github.com/DTolm/VkFFT/blob/master/benchmark_plot/fp32_hip_mi100.png?raw=true)
+The test configuration below takes multiple 1D FFTs of all lengths from the range of 2 to 4096, batch them together so the full system takes from 500MB to 1GB of data and perform multiple consecutive FFTs/iFFTs (-vkfft 1001 key). After that time per a single FFT is obtained by averaging the result.   Total system size will be divided by the time taken by a single transform upload+download, resulting in the estimation of an achieved global bandwidth. The GPUs used in this comparison are Nvidia A100 and AMD MI250. The performance was compared against Nvidia cuFFT (CUDA 11.7 version) and AMD rocFFT (ROCm 5.2 version) libraries in double precision: 
+![alt text](https://github.com/DTolm/VkFFT/blob/master/benchmark_plot/fp64_cuda_a100.png?raw=true)
+![alt text](https://github.com/DTolm/VkFFT/blob/master/benchmark_plot/fp64_hip_mi250.png?raw=true)
 ## Precision comparison of cuFFT/VkFFT/FFTW
-To measure how VkFFT (single/double/half precision) results compare to cuFFT/rocFFT (single/double/half precision) and FFTW (double precision), a set of ~60 systems covering full FFT range was filled with random complex data on the scale of [-1,1] and one C2C transform was performed on each system. Samples 11(single), 12(double), 13(half) calculate for each value of the transformed system:
+![alt text](https://github.com/DTolm/VkFFT/blob/master/precision_results/FP64_precision.png?raw=true)
+![alt text](https://github.com/DTolm/VkFFT/blob/master/precision_results/FP32_precision.png?raw=true)
+
+Above, VkFFT precision is verified by comparing its results with FP128 version of FFTW. We test all FFT lengths from the [2, 100000] range. We perform tests in single and double precision on random input data from [-1;1] range.
+
+For both precisions, all tested libraries exhibit logarithmic error scaling. The main source of error is imprecise twiddle factor computation – sines and cosines used by FFT algorithms. For FP64 they are calculated on the CPU either in FP128 or in FP64 and stored in the lookup tables. With FP128 precomputation (left) VkFFT is more precise than cuFFT and rocFFT. 
 
-- Max difference between cuFFT/rocFFT/VkFFT result and FFTW result
-- Average difference between cuFFT/rocFFT/VkFFT result and FFTW result
-- Max ratio of the difference between cuFFT/rocFFT/VkFFT result and FFTW result to the FFTW result
-- Average ratio of the difference between cuFFT/rocFFT/VkFFT result and FFTW result to the FFTW result
+For FP32, twiddle factors can be calculated on-the-fly in FP32 or precomputed in FP64/FP32. With FP32 twiddle factors (right) VkFFT is slightly less precise in Bluestein’s and Rader’s algorithms. If needed, this can be solved with FP64 precomputation. 
 
-FFTW is required to launch these samples (specify in CMakeLists include and library directories). If cuFFT is disabled, only FFTW/VkFFT results are calculated.\
-The precision_cuFFT_VkFFT_FFTW.txt file contains the single precision results for Nvidia's 1660Ti GPU and AMD Ryzen 2700 CPU. On average, the results fluctuate both for cuFFT and VkFFT with no clear winner in single precision. Max ratio stays in the range of 2% for both cuFFT and VkFFT, while the average ratio stays below 1e-6.\
-The precision_cuFFT_VkFFT_FFTW_double.txt file contains the double precision results for Nvidia's 1660Ti GPU and AMD Ryzen 2700 CPU. On average, VkFFT is more precise than cuFFT in double precision (see: max_difference and max_eps columns), however, it is also ~20% slower (vkfft_benchmark_double.png). Note that double precision is still in testing and these results may change in the future. Max ratio stays in the range of 5e-10% for both cuFFT and VkFFT, while the average ratio stays below 1e-15. Overall, double precision is ~7 times slower than single on Nvidia's 1660Ti GPU.\
-The precision_cuFFT_VkFFT_FFTW_half.txt file contains the half precision results for Nvidia's 1660Ti GPU and AMD Ryzen 2700 CPU. On average, VkFFT is at least two times more precise than cuFFT in half precision (see: max_difference and max_eps columns), while being faster on average (vkfft_benchmark_half.png). Note that half precision is still in testing and is only used to store data in VkFFT. cuFFT script can probably also be improved. The average ratio stays in the range of 0.2% for both cuFFT and VkFFT. Overall, half precision of VkFFT is ~50%-100% times faster than single on Nvidia's 1660Ti GPU.
 ## Contact information
 The initial version of VkFFT is developed by Tolmachev Dmitrii\
-E-mail 1: <dtolm96@gmail.com>
\ No newline at end of file
+E-mail 1: <dtolm96@gmail.com>
diff --git a/Vulkan_FFT.cpp b/Vulkan_FFT.cpp
index 825102e..2e1ad44 100644
--- a/Vulkan_FFT.cpp
+++ b/Vulkan_FFT.cpp
@@ -35,6 +35,19 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#ifndef NS_PRIVATE_IMPLEMENTATION
+#define NS_PRIVATE_IMPLEMENTATION
+#endif
+#ifndef CA_PRIVATE_IMPLEMENTATION
+#define CA_PRIVATE_IMPLEMENTATION
+#endif
+#ifndef MTL_PRIVATE_IMPLEMENTATION
+#define MTL_PRIVATE_IMPLEMENTATION
+#endif
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -270,192 +283,198 @@ VkFFTResult launchVkFFT(VkGPU* vkGPU, uint64_t sample_id, bool file_output, FILE
 		free(deviceList);
 	}
 	free(drivers);
+#elif(VKFFT_BACKEND==5)
+    NS::Array* devices = MTL::CopyAllDevices();
+    MTL::Device* device = (MTL::Device*)devices->object(vkGPU->device_id);
+    vkGPU->device = device;
+    MTL::CommandQueue* queue = device->newCommandQueue();
+    vkGPU->queue = queue;
 #endif
 
 	uint64_t isCompilerInitialized = 1;
 
-	switch (sample_id) {
-	case 0:
-	{
-		resFFT = sample_0_benchmark_VkFFT_single(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
-	case 1:
-	{
-		resFFT = sample_1_benchmark_VkFFT_double(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
+    switch (sample_id) {
+    case 0:
+    {
+        resFFT = sample_0_benchmark_VkFFT_single(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
+    case 1:
+    {
+        resFFT = sample_1_benchmark_VkFFT_double(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
 #if ((VKFFT_BACKEND==0)&&(VK_API_VERSION>10))
-	case 2:
-	{
-		resFFT = sample_2_benchmark_VkFFT_half(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
+    case 2:
+    {
+        resFFT = sample_2_benchmark_VkFFT_half(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
 #endif
-	case 3:
-	{
-		resFFT = sample_3_benchmark_VkFFT_single_3d(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
-	case 4:
-	{
-		resFFT = sample_4_benchmark_VkFFT_single_3d_zeropadding(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
-	case 5:
-	{
-		resFFT = sample_5_benchmark_VkFFT_single_disableReorderFourStep(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
-	case 6:
-	{
-		resFFT = sample_6_benchmark_VkFFT_single_r2c(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
-	case 7:
-	{
-		resFFT = sample_7_benchmark_VkFFT_single_Bluestein(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
-	case 8:
-	{
-		resFFT = sample_8_benchmark_VkFFT_double_Bluestein(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
+    case 3:
+    {
+        resFFT = sample_3_benchmark_VkFFT_single_3d(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
+    case 4:
+    {
+        resFFT = sample_4_benchmark_VkFFT_single_3d_zeropadding(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
+    case 5:
+    {
+        resFFT = sample_5_benchmark_VkFFT_single_disableReorderFourStep(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
+    case 6:
+    {
+        resFFT = sample_6_benchmark_VkFFT_single_r2c(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
+    case 7:
+    {
+        resFFT = sample_7_benchmark_VkFFT_single_Bluestein(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
+    case 8:
+    {
+        resFFT = sample_8_benchmark_VkFFT_double_Bluestein(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
 #if(VKFFT_BACKEND==0)
-	case 10:
-	{
-		resFFT = sample_10_benchmark_VkFFT_single_multipleBuffers(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
+    case 10:
+    {
+        resFFT = sample_10_benchmark_VkFFT_single_multipleBuffers(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
 #endif
 #ifdef USE_FFTW
-	case 11:
-	{
-		resFFT = sample_11_precision_VkFFT_single(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
-	case 12:
-	{
-		resFFT = sample_12_precision_VkFFT_double(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
+    case 11:
+    {
+        resFFT = sample_11_precision_VkFFT_single(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
+    case 12:
+    {
+        resFFT = sample_12_precision_VkFFT_double(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
 #if ((VKFFT_BACKEND==0)&&(VK_API_VERSION>10))
-	case 13:
-	{
-		resFFT = sample_13_precision_VkFFT_half(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
+    case 13:
+    {
+        resFFT = sample_13_precision_VkFFT_half(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
 #endif
-	case 14:
-	{
-		resFFT = sample_14_precision_VkFFT_single_nonPow2(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
-	case 15:
-	{
-		resFFT = sample_15_precision_VkFFT_single_r2c(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
-	case 16:
-	{
-		resFFT = sample_16_precision_VkFFT_single_dct(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
-	case 17:
-	{
-		resFFT = sample_17_precision_VkFFT_double_dct(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
-	case 18:
-	{
-		resFFT = sample_18_precision_VkFFT_double_nonPow2(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
+    case 14:
+    {
+        resFFT = sample_14_precision_VkFFT_single_nonPow2(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
+    case 15:
+    {
+        resFFT = sample_15_precision_VkFFT_single_r2c(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
+    case 16:
+    {
+        resFFT = sample_16_precision_VkFFT_single_dct(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
+    case 17:
+    {
+        resFFT = sample_17_precision_VkFFT_double_dct(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
+    case 18:
+    {
+        resFFT = sample_18_precision_VkFFT_double_nonPow2(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
 #endif
-	case 50:
-	{
-		resFFT = sample_50_convolution_VkFFT_single_1d_matrix(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
-	case 51:
-	{
-		resFFT = sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
-	case 52:
-	{
-		resFFT = sample_52_convolution_VkFFT_single_2d_batched_r2c(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
-	case 110:
-	{
-		resFFT = sample_100_benchmark_VkFFT_single_nd_dct(vkGPU, file_output, output, isCompilerInitialized, 1);
-		break;
-	}
-	case 120:
-	{
-		resFFT = sample_100_benchmark_VkFFT_single_nd_dct(vkGPU, file_output, output, isCompilerInitialized, 2);
-		break;
-	}
-	case 130:
-	{
-		resFFT = sample_100_benchmark_VkFFT_single_nd_dct(vkGPU, file_output, output, isCompilerInitialized, 3);
-		break;
-	}
-	case 140:
-	{
-		resFFT = sample_100_benchmark_VkFFT_single_nd_dct(vkGPU, file_output, output, isCompilerInitialized, 4);
-		break;
-	}
-	case 111:
-	{
-		resFFT = sample_101_benchmark_VkFFT_double_nd_dct(vkGPU, file_output, output, isCompilerInitialized, 1);
-		break;
-	}
-	case 121:
-	{
-		resFFT = sample_101_benchmark_VkFFT_double_nd_dct(vkGPU, file_output, output, isCompilerInitialized, 2);
-		break;
-	}
-	case 131:
-	{
-		resFFT = sample_101_benchmark_VkFFT_double_nd_dct(vkGPU, file_output, output, isCompilerInitialized, 3);
-		break;
-	}
-	case 141:
-	{
-		resFFT = sample_101_benchmark_VkFFT_double_nd_dct(vkGPU, file_output, output, isCompilerInitialized, 4);
-		break;
-	}
-	case 200: case 201:
-	{
-		resFFT = user_benchmark_VkFFT(vkGPU, file_output, output, isCompilerInitialized, userParams);
-		break;
-	}
+    case 50:
+    {
+        resFFT = sample_50_convolution_VkFFT_single_1d_matrix(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
+    case 51:
+    {
+        resFFT = sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
+    case 52:
+    {
+        resFFT = sample_52_convolution_VkFFT_single_2d_batched_r2c(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
+    case 110:
+    {
+        resFFT = sample_100_benchmark_VkFFT_single_nd_dct(vkGPU, file_output, output, isCompilerInitialized, 1);
+        break;
+    }
+    case 120:
+    {
+        resFFT = sample_100_benchmark_VkFFT_single_nd_dct(vkGPU, file_output, output, isCompilerInitialized, 2);
+        break;
+    }
+    case 130:
+    {
+        resFFT = sample_100_benchmark_VkFFT_single_nd_dct(vkGPU, file_output, output, isCompilerInitialized, 3);
+        break;
+    }
+    case 140:
+    {
+        resFFT = sample_100_benchmark_VkFFT_single_nd_dct(vkGPU, file_output, output, isCompilerInitialized, 4);
+        break;
+    }
+    case 111:
+    {
+        resFFT = sample_101_benchmark_VkFFT_double_nd_dct(vkGPU, file_output, output, isCompilerInitialized, 1);
+        break;
+    }
+    case 121:
+    {
+        resFFT = sample_101_benchmark_VkFFT_double_nd_dct(vkGPU, file_output, output, isCompilerInitialized, 2);
+        break;
+    }
+    case 131:
+    {
+        resFFT = sample_101_benchmark_VkFFT_double_nd_dct(vkGPU, file_output, output, isCompilerInitialized, 3);
+        break;
+    }
+    case 141:
+    {
+        resFFT = sample_101_benchmark_VkFFT_double_nd_dct(vkGPU, file_output, output, isCompilerInitialized, 4);
+        break;
+    }
+    case 200: case 201:
+    {
+        resFFT = user_benchmark_VkFFT(vkGPU, file_output, output, isCompilerInitialized, userParams);
+        break;
+    }
 #if ((VKFFT_BACKEND==0)&&(VK_API_VERSION>10))
-	case 202:
-	{
-		resFFT = user_benchmark_VkFFT(vkGPU, file_output, output, isCompilerInitialized, userParams);
-		break;
-	}
+    case 202:
+    {
+        resFFT = user_benchmark_VkFFT(vkGPU, file_output, output, isCompilerInitialized, userParams);
+        break;
+    }
 #endif
-	case 1000:
-	{
-		resFFT = sample_1000_VkFFT_single_2_4096(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
-	case 1001:
-	{
-		resFFT = sample_1001_benchmark_VkFFT_double_2_4096(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
-	case 1003:
-	{
-		resFFT = sample_1003_benchmark_VkFFT_single_3d_2_512(vkGPU, file_output, output, isCompilerInitialized);
-		break;
-	}
-	}
+    case 1000:
+    {
+        resFFT = sample_1000_VkFFT_single_2_4096(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
+    case 1001:
+    {
+        resFFT = sample_1001_benchmark_VkFFT_double_2_4096(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
+    case 1003:
+    {
+        resFFT = sample_1003_benchmark_VkFFT_single_3d_2_512(vkGPU, file_output, output, isCompilerInitialized);
+        break;
+    }
+    }
 #if(VKFFT_BACKEND==0)
 	vkDestroyFence(vkGPU->device, vkGPU->fence, NULL);
 	vkDestroyCommandPool(vkGPU->device, vkGPU->commandPool, NULL);
@@ -475,6 +494,10 @@ VkFFTResult launchVkFFT(VkGPU* vkGPU, uint64_t sample_id, bool file_output, FILE
 	res = zeCommandQueueDestroy(vkGPU->commandQueue);
 	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_RELEASE_COMMAND_QUEUE;
 	res = zeContextDestroy(vkGPU->context);
+#elif(VKFFT_BACKEND==5)
+    vkGPU->queue->release();
+    vkGPU->device->release();
+    devices->release();
 #endif
 
 	return resFFT;
@@ -510,7 +533,7 @@ int main(int argc, char* argv[])
 		version_decomposed[0] = version / 10000;
 		version_decomposed[1] = (version - version_decomposed[0] * 10000) / 100;
 		version_decomposed[2] = (version - version_decomposed[0] * 10000 - version_decomposed[1] * 100);
-		printf("VkFFT v%d.%d.%d (03-08-2022). Author: Tolmachev Dmitrii\n", version_decomposed[0], version_decomposed[1], version_decomposed[2]);
+		printf("VkFFT v%d.%d.%d (25-10-2022). Author: Tolmachev Dmitrii\n", version_decomposed[0], version_decomposed[1], version_decomposed[2]);
 #if (VKFFT_BACKEND==0)
 		printf("Vulkan backend\n");
 #elif (VKFFT_BACKEND==1)
@@ -521,6 +544,8 @@ int main(int argc, char* argv[])
 		printf("OpenCL backend\n");
 #elif (VKFFT_BACKEND==4)
 		printf("Level Zero backend\n");
+#elif (VKFFT_BACKEND==5)
+        printf("Metal backend\n");
 #endif
 		printf("	-h: print help\n");
 		printf("	-devices: print the list of available device ids, used as -d argument\n");
diff --git a/benchmark_plot/fp64_cuda_a100.png b/benchmark_plot/fp64_cuda_a100.png
new file mode 100644
index 0000000..9c949a1
Binary files /dev/null and b/benchmark_plot/fp64_cuda_a100.png differ
diff --git a/benchmark_plot/fp64_hip_mi250.png b/benchmark_plot/fp64_hip_mi250.png
new file mode 100644
index 0000000..333cd0d
Binary files /dev/null and b/benchmark_plot/fp64_hip_mi250.png differ
diff --git a/benchmark_plot/VkFFT_benchmark_results.txt b/benchmark_plot/old_results/old_VkFFT_benchmark_results.txt
similarity index 100%
rename from benchmark_plot/VkFFT_benchmark_results.txt
rename to benchmark_plot/old_results/old_VkFFT_benchmark_results.txt
diff --git a/benchmark_plot/benchmark.png b/benchmark_plot/old_results/old_benchmark.png
similarity index 100%
rename from benchmark_plot/benchmark.png
rename to benchmark_plot/old_results/old_benchmark.png
diff --git a/benchmark_plot/cuFFT_benchmark_results.txt b/benchmark_plot/old_results/old_cuFFT_benchmark_results.txt
similarity index 100%
rename from benchmark_plot/cuFFT_benchmark_results.txt
rename to benchmark_plot/old_results/old_cuFFT_benchmark_results.txt
diff --git a/benchmark_plot/fp32_cuda_a100.png b/benchmark_plot/old_results/old_fp32_cuda_a100.png
similarity index 100%
rename from benchmark_plot/fp32_cuda_a100.png
rename to benchmark_plot/old_results/old_fp32_cuda_a100.png
diff --git a/benchmark_plot/fp32_hip_mi100.png b/benchmark_plot/old_results/old_fp32_hip_mi100.png
similarity index 100%
rename from benchmark_plot/fp32_hip_mi100.png
rename to benchmark_plot/old_results/old_fp32_hip_mi100.png
diff --git a/benchmark_plot/vkfft_benchmark_2d.png b/benchmark_plot/old_results/old_vkfft_benchmark_2d.png
similarity index 100%
rename from benchmark_plot/vkfft_benchmark_2d.png
rename to benchmark_plot/old_results/old_vkfft_benchmark_2d.png
diff --git a/benchmark_plot/vkfft_benchmark_3d.png b/benchmark_plot/old_results/old_vkfft_benchmark_3d.png
similarity index 100%
rename from benchmark_plot/vkfft_benchmark_3d.png
rename to benchmark_plot/old_results/old_vkfft_benchmark_3d.png
diff --git a/benchmark_plot/vkfft_benchmark_double.png b/benchmark_plot/old_results/old_vkfft_benchmark_double.png
similarity index 100%
rename from benchmark_plot/vkfft_benchmark_double.png
rename to benchmark_plot/old_results/old_vkfft_benchmark_double.png
diff --git a/benchmark_plot/vkfft_benchmark_half.png b/benchmark_plot/old_results/old_vkfft_benchmark_half.png
similarity index 100%
rename from benchmark_plot/vkfft_benchmark_half.png
rename to benchmark_plot/old_results/old_vkfft_benchmark_half.png
diff --git a/benchmark_plot/vkfft_benchmark_single.png b/benchmark_plot/old_results/old_vkfft_benchmark_single.png
similarity index 100%
rename from benchmark_plot/vkfft_benchmark_single.png
rename to benchmark_plot/old_results/old_vkfft_benchmark_single.png
diff --git a/benchmark_scripts/cuFFT_scripts/src/user_benchmark_cuFFT.cu b/benchmark_scripts/cuFFT_scripts/src/user_benchmark_cuFFT.cu
index b22a631..48bded0 100644
--- a/benchmark_scripts/cuFFT_scripts/src/user_benchmark_cuFFT.cu
+++ b/benchmark_scripts/cuFFT_scripts/src/user_benchmark_cuFFT.cu
@@ -24,7 +24,7 @@ void user_benchmark_cuFFT(bool file_output, FILE* output, cuFFTUserSystemParamet
 	cudaSetDevice(device_id);
 	const int num_runs = 3;
 	double benchmark_result[2] = { 0,0 };//averaged result = sum(system_size/iteration_time)/num_benchmark_samples
-	uint64_t storageComplexSize;
+	uint64_t storageComplexSize=8;
 	switch (userParams->P) {
 	case 0:
 		storageComplexSize = (2 * sizeof(float));
@@ -35,7 +35,22 @@ void user_benchmark_cuFFT(bool file_output, FILE* output, cuFFTUserSystemParamet
 	case 2:
 		storageComplexSize = (2 * 2);
 		break;
+	default:
+		storageComplexSize = (2 * sizeof(float));
+		break;
 	}
+    uint64_t bufferSize = 0;
+    if (userParams->R2C) {
+        bufferSize = (uint64_t)(storageComplexSize / 2) * (userParams->X + 2) * userParams->Y * userParams->Z * userParams->B;
+    }
+    else {
+		bufferSize = (uint64_t)storageComplexSize * userParams->X * userParams->Y * userParams->Z * userParams->B;
+    }
+    
+    float* buffer_input = (float*)malloc(bufferSize);
+    for (uint64_t i = 0; i < bufferSize/sizeof(float); i++) {
+        buffer_input[i] = (float)(2 * ((float)rand()) / RAND_MAX - 1.0);
+    }
 	for (int n = 0; n < 2; n++) {
 		double run_time[num_runs][2];
 		for (int r = 0; r < num_runs; r++) {
@@ -64,17 +79,14 @@ void user_benchmark_cuFFT(bool file_output, FILE* output, cuFFTUserSystemParamet
 				dims[2] = userParams->X;
 				break;
 			}
-			uint64_t bufferSize;
-			if (userParams->R2C)
-				bufferSize = (uint64_t)(storageComplexSize / 2) * (userParams->X + 2) * userParams->Y * userParams->Z * userParams->B;
-			else 
-				bufferSize = (uint64_t)storageComplexSize * userParams->X * userParams->Y * userParams->Z * userParams->B;
 			
 			cudaMalloc((void**)&dataC, bufferSize);
 			if (cudaGetLastError() != cudaSuccess) {
 				fprintf(stderr, "Cuda error: Failed to allocate\n");
 				return;
 			}
+			cudaMemcpy(dataC, buffer_input, bufferSize, cudaMemcpyHostToDevice);
+			
 			//forward + inverse
 			int iembed[2][3];
 			int istride[2] = { 1, 1 };
@@ -195,4 +207,5 @@ void user_benchmark_cuFFT(bool file_output, FILE* output, cuFFTUserSystemParamet
 			cudaDeviceSynchronize();
 		}
 	}
+	free(buffer_input);
 }
diff --git a/benchmark_scripts/rocFFT_scripts/src/user_benchmark_rocFFT.cpp b/benchmark_scripts/rocFFT_scripts/src/user_benchmark_rocFFT.cpp
index 60b06b8..17d479b 100644
--- a/benchmark_scripts/rocFFT_scripts/src/user_benchmark_rocFFT.cpp
+++ b/benchmark_scripts/rocFFT_scripts/src/user_benchmark_rocFFT.cpp
@@ -22,7 +22,7 @@ void user_benchmark_rocFFT(bool file_output, FILE* output, rocFFTUserSystemParam
 	hipSetDevice(device_id);
 	const int num_runs = 7;
 	double benchmark_result[2] = { 0,0 };//averaged result = sum(system_size/iteration_time)/num_benchmark_samples
-	uint64_t storageComplexSize;
+	uint64_t storageComplexSize=8;
 	switch (userParams->P) {
 	case 0:
 		storageComplexSize = (2 * sizeof(float));
@@ -33,7 +33,22 @@ void user_benchmark_rocFFT(bool file_output, FILE* output, rocFFTUserSystemParam
 	case 2:
 		storageComplexSize = (2 * 2);
 		break;
+	default:
+		storageComplexSize = (2 * sizeof(float));
+		break;
 	}
+    uint64_t bufferSize = 0;
+    if (userParams->R2C) {
+        bufferSize = (uint64_t)(storageComplexSize / 2) * (userParams->X + 2) * userParams->Y * userParams->Z * userParams->B;
+    }
+    else {
+		bufferSize = (uint64_t)storageComplexSize * userParams->X * userParams->Y * userParams->Z * userParams->B;
+    }
+    
+    float* buffer_input = (float*)malloc(bufferSize);
+    for (uint64_t i = 0; i < bufferSize/sizeof(float); i++) {
+        buffer_input[i] = (float)(2 * ((float)rand()) / RAND_MAX - 1.0);
+    }
 	for (int n = 0; n < 2; n++) {
 		double run_time[num_runs][2];
 		for (int r = 0; r < num_runs; r++) {
@@ -62,19 +77,15 @@ void user_benchmark_rocFFT(bool file_output, FILE* output, rocFFTUserSystemParam
 				dims[2] = userParams->X;
 				break;
 			}
-			uint64_t bufferSize;
-			if (userParams->R2C)
-				bufferSize = (uint64_t)(storageComplexSize / 2) * (userParams->X + 2) * userParams->Y * userParams->Z * userParams->B;
-			else
-				bufferSize = (uint64_t)storageComplexSize * userParams->X * userParams->Y * userParams->Z * userParams->B;
-
+		
 			hipMalloc((void**)&dataC, bufferSize);
 			
 			if (hipGetLastError() != hipSuccess) {
 				fprintf(stderr, "ROCM error: Failed to allocate\n");
 				return;
 			}
-
+			hipMemcpy(dataC, buffer_input, bufferSize, hipMemcpyHostToDevice);
+			
 			//forward + inverse
 			int iembed[2][3];
 			int istride[2] = { 1, 1 };
@@ -196,4 +207,5 @@ void user_benchmark_rocFFT(bool file_output, FILE* output, rocFFTUserSystemParam
 			//hipDeviceSynchronize();
 		}
 	}
+	free(buffer_input);
 }
diff --git a/benchmark_scripts/vkFFT_scripts/include/utils_VkFFT.h b/benchmark_scripts/vkFFT_scripts/include/utils_VkFFT.h
index 518331d..1387118 100644
--- a/benchmark_scripts/vkFFT_scripts/include/utils_VkFFT.h
+++ b/benchmark_scripts/vkFFT_scripts/include/utils_VkFFT.h
@@ -33,6 +33,9 @@ typedef struct {
 	ze_context_handle_t context;
 	ze_command_queue_handle_t commandQueue;
 	uint32_t commandQueueID;
+#elif(VKFFT_BACKEND==5)
+    MTL::Device* device;
+    MTL::CommandQueue* queue;
 #endif
 	uint64_t device_id;//an id of a device, reported by Vulkan device list
 } VkGPU;//an example structure containing Vulkan primitives
@@ -65,10 +68,10 @@ VkResult createFence(VkGPU* vkGPU);
 VkResult createCommandPool(VkGPU* vkGPU);
 VkFFTResult findMemoryType(VkGPU* vkGPU, uint64_t memoryTypeBits, uint64_t memorySize, VkMemoryPropertyFlags properties, uint32_t* memoryTypeIndex);
 VkFFTResult allocateBuffer(VkGPU* vkGPU, VkBuffer* buffer, VkDeviceMemory* deviceMemory, VkBufferUsageFlags usageFlags, VkMemoryPropertyFlags propertyFlags, uint64_t size);
-VkFFTResult transferDataFromCPU(VkGPU* vkGPU, void* arr, VkBuffer* buffer, uint64_t bufferSize);
-VkFFTResult transferDataToCPU(VkGPU* vkGPU, void* arr, VkBuffer* buffer, uint64_t bufferSize);
 #endif
+VkFFTResult transferDataToCPU(VkGPU* vkGPU, void* cpu_arr, void* output_buffer, uint64_t bufferSize);
+VkFFTResult transferDataFromCPU(VkGPU* vkGPU, void* cpu_arr, void* input_buffer, uint64_t bufferSize);
 VkFFTResult devices_list();
 VkFFTResult performVulkanFFT(VkGPU* vkGPU, VkFFTApplication* app, VkFFTLaunchParams* launchParams, int inverse, uint64_t num_iter);
 VkFFTResult performVulkanFFTiFFT(VkGPU* vkGPU, VkFFTApplication* app, VkFFTLaunchParams* launchParams, uint64_t num_iter, double* time_result);
-#endif
\ No newline at end of file
+#endif
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_0_benchmark_VkFFT_single.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_0_benchmark_VkFFT_single.cpp
index 1405ecd..b85dde5 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_0_benchmark_VkFFT_single.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_0_benchmark_VkFFT_single.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -56,6 +60,7 @@ VkFFTResult sample_0_benchmark_VkFFT_single(VkGPU* vkGPU, uint64_t file_output,
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "0 - VkFFT FFT + iFFT C2C benchmark 1D batched in single precision\n");
@@ -79,11 +84,18 @@ VkFFTResult sample_0_benchmark_VkFFT_single(VkGPU* vkGPU, uint64_t file_output,
 			configuration.FFTdim = 1; //FFT dimension, 1D, 2D or 3D (default 1).
 			configuration.size[0] = 4 * (uint64_t)pow(2, n); //Multidimensional FFT dimensions sizes (default 1). For best performance (and stability), order dimensions in descendant size order as: x>y>z.   
 			if (n == 0) configuration.size[0] = 4096;
-			configuration.numberBatches = (uint64_t)((64 * 32 * (uint64_t)pow(2, 16)) / configuration.size[0]);
+            configuration.numberBatches = (uint64_t)((64 * 32 * (uint64_t)pow(2, 16)) / configuration.size[0]);
 			if (configuration.numberBatches < 1) configuration.numberBatches = 1;
-			
+#if(VKFFT_BACKEND!=5)
+			if (r==0) configuration.saveApplicationToString = 1;
+			if (r!=0) configuration.loadApplicationFromString = 1;
+#endif
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -96,6 +108,8 @@ VkFFTResult sample_0_benchmark_VkFFT_single(VkGPU* vkGPU, uint64_t file_output,
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif
 			//Allocate buffer for the input data.
 			uint64_t bufferSize = (uint64_t)sizeof(float) * 2 * configuration.size[0] * configuration.numberBatches;
@@ -127,6 +141,10 @@ VkFFTResult sample_0_benchmark_VkFFT_single(VkGPU* vkGPU, uint64_t file_output,
 			res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize, sizeof(float), vkGPU->device, &buffer);
 			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 			configuration.buffer = &buffer;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
+            buffer = vkGPU->device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+            configuration.buffer = &buffer;
 #endif
 
 			configuration.bufferSize = &bufferSize;
@@ -144,42 +162,42 @@ VkFFTResult sample_0_benchmark_VkFFT_single(VkGPU* vkGPU, uint64_t file_output,
 				}
 
 			*/
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
-#if(VKFFT_BACKEND==0)
-			resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
-			if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-			res = cudaMemcpy(buffer, buffer_input, bufferSize, cudaMemcpyHostToDevice);
-			if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-			res = hipMemcpy(buffer, buffer_input, bufferSize, hipMemcpyHostToDevice);
-			if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-			res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize, buffer_input, 0, NULL, NULL);
-			if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-			ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				vkGPU->commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-			};
-			ze_command_list_handle_t copyCommandList;
-			res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-			res = zeCommandListAppendMemoryCopy(copyCommandList,buffer,buffer_input,bufferSize,0,0,0);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-			res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-			if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+            resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
+            if (resFFT != VKFFT_SUCCESS) return resFFT;
 
+			if (configuration.loadApplicationFromString) {
+				FILE* kernelCache;
+				uint64_t str_len;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "rb");
+				if (!kernelCache) return VKFFT_ERROR_EMPTY_FILE;
+				fseek(kernelCache, 0, SEEK_END);
+				str_len = ftell(kernelCache);
+				fseek(kernelCache, 0, SEEK_SET);
+				configuration.loadApplicationString = malloc(str_len);
+				fread(configuration.loadApplicationString, str_len, 1, kernelCache);
+				fclose(kernelCache);
+			}
 			//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.  
 			resFFT = initializeVkFFT(&app, configuration);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
 
+			if (configuration.loadApplicationFromString)
+				free(configuration.loadApplicationString);
+
+			if (configuration.saveApplicationToString) {
+				FILE* kernelCache;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "wb");
+				fwrite(app.saveApplicationString, app.applicationStringSize, 1, kernelCache);
+				fclose(kernelCache);
+			}
+
 			//Submit FFT+iFFT.
 			uint64_t num_iter = (((uint64_t)3 * 4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)(((uint64_t)3 * 4096 * 1024.0 * 1024.0) / bufferSize);
 #if(VKFFT_BACKEND==0)
@@ -238,6 +256,8 @@ VkFFTResult sample_0_benchmark_VkFFT_single(VkGPU* vkGPU, uint64_t file_output,
 			clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 			zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+            buffer->release();
 #endif
 
 			deleteVkFFT(&app);
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_1000_VkFFT_single_2_4096.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_1000_VkFFT_single_2_4096.cpp
index 564f238..00942e5 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_1000_VkFFT_single_2_4096.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_1000_VkFFT_single_2_4096.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -56,6 +60,7 @@ VkFFTResult sample_1000_VkFFT_single_2_4096(VkGPU* vkGPU, uint64_t file_output,
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "1000 - VkFFT FFT + iFFT C2C benchmark 1D batched in single precision: all supported systems from 2 to 4096\n");
@@ -92,9 +97,17 @@ VkFFTResult sample_1000_VkFFT_single_2_4096(VkGPU* vkGPU, uint64_t file_output,
 			if (temp != 1) break;*/
 			configuration.numberBatches = (uint64_t)pow(2, (uint64_t)log2((uint64_t)64 * 32 * (uint64_t)pow(2, 16) / configuration.size[0]));
 			if (configuration.numberBatches < 1) configuration.numberBatches = 1;
+#if(VKFFT_BACKEND!=5)
+			if (r==0) configuration.saveApplicationToString = 1;
+			if (r!=0) configuration.loadApplicationFromString = 1;
+#endif
 
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -107,6 +120,8 @@ VkFFTResult sample_1000_VkFFT_single_2_4096(VkGPU* vkGPU, uint64_t file_output,
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif
 			//Allocate buffer for the input data.
 			uint64_t bufferSize = (uint64_t)sizeof(float) * 2 * configuration.size[0] * configuration.numberBatches;
@@ -138,6 +153,10 @@ VkFFTResult sample_1000_VkFFT_single_2_4096(VkGPU* vkGPU, uint64_t file_output,
 			res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize, sizeof(float), vkGPU->device, &buffer);
 			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 			configuration.buffer = &buffer;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
+            buffer = vkGPU->device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+            configuration.buffer = &buffer;
 #endif
 
 			configuration.bufferSize = &bufferSize;
@@ -155,42 +174,42 @@ VkFFTResult sample_1000_VkFFT_single_2_4096(VkGPU* vkGPU, uint64_t file_output,
 				}
 
 			*/
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
-#if(VKFFT_BACKEND==0)
-			resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
-			if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-			res = cudaMemcpy(buffer, buffer_input, bufferSize, cudaMemcpyHostToDevice);
-			if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-			res = hipMemcpy(buffer, buffer_input, bufferSize, hipMemcpyHostToDevice);
-			if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-			res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize, buffer_input, 0, NULL, NULL);
-			if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-			ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				vkGPU->commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-			};
-			ze_command_list_handle_t copyCommandList;
-			res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-			res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, buffer_input, bufferSize, 0, 0, 0);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-			res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-			if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+            resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
+            if (resFFT != VKFFT_SUCCESS) return resFFT;
 
+			if (configuration.loadApplicationFromString) {
+				FILE* kernelCache;
+				uint64_t str_len;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "rb");
+				if (!kernelCache) return VKFFT_ERROR_EMPTY_FILE;
+				fseek(kernelCache, 0, SEEK_END);
+				str_len = ftell(kernelCache);
+				fseek(kernelCache, 0, SEEK_SET);
+				configuration.loadApplicationString = malloc(str_len);
+				fread(configuration.loadApplicationString, str_len, 1, kernelCache);
+				fclose(kernelCache);
+			}
 			//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.  
 			resFFT = initializeVkFFT(&app, configuration);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
 
+			if (configuration.loadApplicationFromString)
+				free(configuration.loadApplicationString);
+
+			if (configuration.saveApplicationToString) {
+				FILE* kernelCache;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "wb");
+				fwrite(app.saveApplicationString, app.applicationStringSize, 1, kernelCache);
+				fclose(kernelCache);
+			}
+
 			//Submit FFT+iFFT.
 			uint64_t num_iter = (((uint64_t)3 * 4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)3 * 4096 * 1024.0 * 1024.0) / bufferSize;
 #if(VKFFT_BACKEND==0)
@@ -249,6 +268,8 @@ VkFFTResult sample_1000_VkFFT_single_2_4096(VkGPU* vkGPU, uint64_t file_output,
 			clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 			zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+            buffer->release();
 #endif
 
 			deleteVkFFT(&app);
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_1001_benchmark_VkFFT_double_2_4096.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_1001_benchmark_VkFFT_double_2_4096.cpp
index c8af6d6..4d58c00 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_1001_benchmark_VkFFT_double_2_4096.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_1001_benchmark_VkFFT_double_2_4096.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -56,6 +60,7 @@ VkFFTResult sample_1001_benchmark_VkFFT_double_2_4096(VkGPU* vkGPU, uint64_t fil
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "1001 - VkFFT FFT + iFFT C2C benchmark 1D batched in double precision: all supported systems from 2 to 4096\n");
@@ -95,9 +100,17 @@ VkFFTResult sample_1001_benchmark_VkFFT_double_2_4096(VkGPU* vkGPU, uint64_t fil
 			configuration.size[2] = 1;
 
 			configuration.doublePrecision = true;
-
+#if(VKFFT_BACKEND!=5)
+			if (r==0) configuration.saveApplicationToString = 1;
+			if (r!=0) configuration.loadApplicationFromString = 1;
+#endif
+			
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -110,6 +123,8 @@ VkFFTResult sample_1001_benchmark_VkFFT_double_2_4096(VkGPU* vkGPU, uint64_t fil
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif			
 
 			//Allocate buffer for the input data.
@@ -142,6 +157,10 @@ VkFFTResult sample_1001_benchmark_VkFFT_double_2_4096(VkGPU* vkGPU, uint64_t fil
 			res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize, sizeof(float), vkGPU->device, &buffer);
 			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 			configuration.buffer = &buffer;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
+            buffer = vkGPU->device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+            configuration.buffer = &buffer;
 #endif
 
 			configuration.bufferSize = &bufferSize;
@@ -157,43 +176,43 @@ VkFFTResult sample_1001_benchmark_VkFFT_double_2_4096(VkGPU* vkGPU, uint64_t fil
 					}
 				}
 			*/
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
-#if(VKFFT_BACKEND==0)
-			resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
-			if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-			res = cudaMemcpy(buffer, buffer_input, bufferSize, cudaMemcpyHostToDevice);
-			if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-			res = hipMemcpy(buffer, buffer_input, bufferSize, hipMemcpyHostToDevice);
-			if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-			res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize, buffer_input, 0, NULL, NULL);
-			if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-			ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				vkGPU->commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-			};
-			ze_command_list_handle_t copyCommandList;
-			res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-			res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, buffer_input, bufferSize, 0, 0, 0);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-			res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-			if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+            resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
+            if (resFFT != VKFFT_SUCCESS) return resFFT;
 			//free(buffer_input);
 
+			if (configuration.loadApplicationFromString) {
+				FILE* kernelCache;
+				uint64_t str_len;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "rb");
+				if (!kernelCache) return VKFFT_ERROR_EMPTY_FILE;
+				fseek(kernelCache, 0, SEEK_END);
+				str_len = ftell(kernelCache);
+				fseek(kernelCache, 0, SEEK_SET);
+				configuration.loadApplicationString = malloc(str_len);
+				fread(configuration.loadApplicationString, str_len, 1, kernelCache);
+				fclose(kernelCache);
+			}
 			//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.  
 			resFFT = initializeVkFFT(&app, configuration);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
 
+			if (configuration.loadApplicationFromString)
+				free(configuration.loadApplicationString);
+
+			if (configuration.saveApplicationToString) {
+				FILE* kernelCache;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "wb");
+				fwrite(app.saveApplicationString, app.applicationStringSize, 1, kernelCache);
+				fclose(kernelCache);
+			}
+
 			//Submit FFT+iFFT.
 			uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize;
 #if(VKFFT_BACKEND==0)
@@ -252,6 +271,8 @@ VkFFTResult sample_1001_benchmark_VkFFT_double_2_4096(VkGPU* vkGPU, uint64_t fil
 			clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 			zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+            buffer->release();
 #endif
 			deleteVkFFT(&app);
 
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp
index 6ceeb8b..997eccd 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -56,6 +60,7 @@ VkFFTResult sample_1003_benchmark_VkFFT_single_3d_2_512(VkGPU* vkGPU, uint64_t f
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "1003 - VkFFT FFT + iFFT C2C multidimensional benchmark in single precision: all supported cubes from 2 to 512\n");
@@ -92,8 +97,17 @@ VkFFTResult sample_1003_benchmark_VkFFT_single_3d_2_512(VkGPU* vkGPU, uint64_t f
 			if (temp != 1) break;*/
 			configuration.size[1] = configuration.size[0];
 			configuration.size[2] = configuration.size[0];
+#if(VKFFT_BACKEND!=5)
+			if (r==0) configuration.saveApplicationToString = 1;
+			if (r!=0) configuration.loadApplicationFromString = 1;
+#endif
+			
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -106,6 +120,8 @@ VkFFTResult sample_1003_benchmark_VkFFT_single_3d_2_512(VkGPU* vkGPU, uint64_t f
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif
 			//Allocate buffer for the input data.
 			uint64_t bufferSize = (uint64_t)sizeof(float) * 2 * configuration.size[0] * configuration.size[1] * configuration.size[2];;
@@ -137,6 +153,10 @@ VkFFTResult sample_1003_benchmark_VkFFT_single_3d_2_512(VkGPU* vkGPU, uint64_t f
 			res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize, sizeof(float), vkGPU->device, &buffer);
 			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 			configuration.buffer = &buffer;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
+            buffer = vkGPU->device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+            configuration.buffer = &buffer;
 #endif
 
 			configuration.bufferSize = &bufferSize;
@@ -154,42 +174,41 @@ VkFFTResult sample_1003_benchmark_VkFFT_single_3d_2_512(VkGPU* vkGPU, uint64_t f
 				}
 
 			*/
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
-#if(VKFFT_BACKEND==0)
-			resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
-			if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-			res = cudaMemcpy(buffer, buffer_input, bufferSize, cudaMemcpyHostToDevice);
-			if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-			res = hipMemcpy(buffer, buffer_input, bufferSize, hipMemcpyHostToDevice);
-			if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-			res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize, buffer_input, 0, NULL, NULL);
-			if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-			ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				vkGPU->commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-			};
-			ze_command_list_handle_t copyCommandList;
-			res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-			res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, buffer_input, bufferSize, 0, 0, 0);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-			res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-			if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
-
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+            resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
+            if (resFFT != VKFFT_SUCCESS) return resFFT;
+			if (configuration.loadApplicationFromString) {
+				FILE* kernelCache;
+				uint64_t str_len;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "rb");
+				if (!kernelCache) return VKFFT_ERROR_EMPTY_FILE;
+				fseek(kernelCache, 0, SEEK_END);
+				str_len = ftell(kernelCache);
+				fseek(kernelCache, 0, SEEK_SET);
+				configuration.loadApplicationString = malloc(str_len);
+				fread(configuration.loadApplicationString, str_len, 1, kernelCache);
+				fclose(kernelCache);
+			}
 			//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.  
 			resFFT = initializeVkFFT(&app, configuration);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
 
+			if (configuration.loadApplicationFromString)
+				free(configuration.loadApplicationString);
+
+			if (configuration.saveApplicationToString) {
+				FILE* kernelCache;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "wb");
+				fwrite(app.saveApplicationString, app.applicationStringSize, 1, kernelCache);
+				fclose(kernelCache);
+			}
+
 			//Submit FFT+iFFT.
 			uint64_t num_iter = (((uint64_t)3 * 4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)3 * 4096 * 1024.0 * 1024.0) / bufferSize;
 #if(VKFFT_BACKEND==0)
@@ -248,6 +267,8 @@ VkFFTResult sample_1003_benchmark_VkFFT_single_3d_2_512(VkGPU* vkGPU, uint64_t f
 			clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 			zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+            buffer->release();
 #endif
 
 			deleteVkFFT(&app);
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_100_benchmark_VkFFT_single_nd_dct.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_100_benchmark_VkFFT_single_nd_dct.cpp
index 9be2427..f753278 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_100_benchmark_VkFFT_single_nd_dct.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_100_benchmark_VkFFT_single_nd_dct.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -56,6 +60,7 @@ VkFFTResult sample_100_benchmark_VkFFT_single_nd_dct(VkGPU* vkGPU, uint64_t file
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "1%" PRIu64 "0 - VkFFT FFT + iFFT R2R DCT-%" PRIu64 " multidimensional benchmark in single precision\n", dct_type, dct_type);
@@ -94,8 +99,16 @@ VkFFTResult sample_100_benchmark_VkFFT_single_nd_dct(VkGPU* vkGPU, uint64_t file
 			configuration.performDCT = dct_type;
 			//configuration.disableMergeSequencesR2C = 1;
 			//configuration.doublePrecision = 1;
+#if(VKFFT_BACKEND!=5)
+			if (r==0) configuration.saveApplicationToString = 1;
+			if (r!=0) configuration.loadApplicationFromString = 1;
+#endif
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -108,6 +121,8 @@ VkFFTResult sample_100_benchmark_VkFFT_single_nd_dct(VkGPU* vkGPU, uint64_t file
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif
 			//Allocate buffer for the input data.
 			uint64_t bufferSize = (uint64_t)sizeof(float) * configuration.size[0] * configuration.size[1] * configuration.size[2];;
@@ -139,6 +154,10 @@ VkFFTResult sample_100_benchmark_VkFFT_single_nd_dct(VkGPU* vkGPU, uint64_t file
 			res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize, sizeof(float), vkGPU->device, &buffer);
 			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 			configuration.buffer = &buffer;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
+            buffer = vkGPU->device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+            configuration.buffer = &buffer;
 #endif
 
 			configuration.bufferSize = &bufferSize;
@@ -154,39 +173,26 @@ VkFFTResult sample_100_benchmark_VkFFT_single_nd_dct(VkGPU* vkGPU, uint64_t file
 				}
 			}
 			*/
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
-#if(VKFFT_BACKEND==0)
-			resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
-			if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-			res = cudaMemcpy(buffer, buffer_input, bufferSize, cudaMemcpyHostToDevice);
-			if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-			res = hipMemcpy(buffer, buffer_input, bufferSize, hipMemcpyHostToDevice);
-			if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-			res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize, buffer_input, 0, NULL, NULL);
-			if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-			ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				vkGPU->commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-			};
-			ze_command_list_handle_t copyCommandList;
-			res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-			res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, buffer_input, bufferSize, 0, 0, 0);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-			res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-			if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+            resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
+            if (resFFT != VKFFT_SUCCESS) return resFFT;
 			//free(buffer_input);
 
+			if (configuration.loadApplicationFromString) {
+				FILE* kernelCache;
+				uint64_t str_len;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "rb");
+				if (!kernelCache) return VKFFT_ERROR_EMPTY_FILE;
+				fseek(kernelCache, 0, SEEK_END);
+				str_len = ftell(kernelCache);
+				fseek(kernelCache, 0, SEEK_SET);
+				configuration.loadApplicationString = malloc(str_len);
+				fread(configuration.loadApplicationString, str_len, 1, kernelCache);
+				fclose(kernelCache);
+			}
 			//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.  
 			resFFT = initializeVkFFT(&app, configuration);
 			if (resFFT == VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH_DCT) {
@@ -207,12 +213,29 @@ VkFFTResult sample_100_benchmark_VkFFT_single_nd_dct(VkGPU* vkGPU, uint64_t file
 				clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 				zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+				buffer->release();
 #endif
 				deleteVkFFT(&app);
+				if (configuration.loadApplicationFromString)
+					free(configuration.loadApplicationString);
 				continue;
 			}
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
 
+			if (configuration.loadApplicationFromString)
+				free(configuration.loadApplicationString);
+
+			if (configuration.saveApplicationToString) {
+				FILE* kernelCache;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "wb");
+				fwrite(app.saveApplicationString, app.applicationStringSize, 1, kernelCache);
+				fclose(kernelCache);
+			}
+
 			//Submit FFT+iFFT.
 			uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize;
 #if(VKFFT_BACKEND==0)
@@ -270,6 +293,8 @@ VkFFTResult sample_100_benchmark_VkFFT_single_nd_dct(VkGPU* vkGPU, uint64_t file
 			clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 			zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+            buffer->release();
 #endif
 			deleteVkFFT(&app);
 
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_101_benchmark_VkFFT_double_nd_dct.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_101_benchmark_VkFFT_double_nd_dct.cpp
index 60990f7..2867664 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_101_benchmark_VkFFT_double_nd_dct.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_101_benchmark_VkFFT_double_nd_dct.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -56,6 +60,7 @@ VkFFTResult sample_101_benchmark_VkFFT_double_nd_dct(VkGPU* vkGPU, uint64_t file
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "1%" PRIu64 "1 - VkFFT FFT + iFFT R2R DCT-%" PRIu64 " multidimensional benchmark in double precision\n", dct_type, dct_type);
@@ -95,8 +100,16 @@ VkFFTResult sample_101_benchmark_VkFFT_double_nd_dct(VkGPU* vkGPU, uint64_t file
 			configuration.doublePrecision = 1;
 			//configuration.disableMergeSequencesR2C = 1;
 			//configuration.doublePrecision = 1;
+#if(VKFFT_BACKEND!=5)
+			if (r==0) configuration.saveApplicationToString = 1;
+			if (r!=0) configuration.loadApplicationFromString = 1;
+#endif
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -109,6 +122,8 @@ VkFFTResult sample_101_benchmark_VkFFT_double_nd_dct(VkGPU* vkGPU, uint64_t file
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif
 			//Allocate buffer for the input data.
 			uint64_t bufferSize = (uint64_t)sizeof(double) * configuration.size[0] * configuration.size[1] * configuration.size[2];;
@@ -140,6 +155,10 @@ VkFFTResult sample_101_benchmark_VkFFT_double_nd_dct(VkGPU* vkGPU, uint64_t file
 			res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize, sizeof(float), vkGPU->device, &buffer);
 			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 			configuration.buffer = &buffer;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
+            buffer = vkGPU->device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+            configuration.buffer = &buffer;
 #endif
 
 			configuration.bufferSize = &bufferSize;
@@ -155,39 +174,26 @@ VkFFTResult sample_101_benchmark_VkFFT_double_nd_dct(VkGPU* vkGPU, uint64_t file
 				}
 			}
 			*/
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
-#if(VKFFT_BACKEND==0)
-			resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
-			if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-			res = cudaMemcpy(buffer, buffer_input, bufferSize, cudaMemcpyHostToDevice);
-			if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-			res = hipMemcpy(buffer, buffer_input, bufferSize, hipMemcpyHostToDevice);
-			if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-			res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize, buffer_input, 0, NULL, NULL);
-			if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-			ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				vkGPU->commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-			};
-			ze_command_list_handle_t copyCommandList;
-			res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-			res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, buffer_input, bufferSize, 0, 0, 0);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-			res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-			if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+            resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
+            if (resFFT != VKFFT_SUCCESS) return resFFT;
 			//free(buffer_input);
 
+			if (configuration.loadApplicationFromString) {
+				FILE* kernelCache;
+				uint64_t str_len;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "rb");
+				if (!kernelCache) return VKFFT_ERROR_EMPTY_FILE;
+				fseek(kernelCache, 0, SEEK_END);
+				str_len = ftell(kernelCache);
+				fseek(kernelCache, 0, SEEK_SET);
+				configuration.loadApplicationString = malloc(str_len);
+				fread(configuration.loadApplicationString, str_len, 1, kernelCache);
+				fclose(kernelCache);
+			}
 			//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.  
 			resFFT = initializeVkFFT(&app, configuration);
 			if (resFFT == VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH_DCT) {
@@ -208,12 +214,29 @@ VkFFTResult sample_101_benchmark_VkFFT_double_nd_dct(VkGPU* vkGPU, uint64_t file
 				clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 				zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+				buffer->release();
 #endif
 				deleteVkFFT(&app);
+				if (configuration.loadApplicationFromString)
+					free(configuration.loadApplicationString);
 				continue;
 			}
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
 
+			if (configuration.loadApplicationFromString)
+				free(configuration.loadApplicationString);
+
+			if (configuration.saveApplicationToString) {
+				FILE* kernelCache;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "wb");
+				fwrite(app.saveApplicationString, app.applicationStringSize, 1, kernelCache);
+				fclose(kernelCache);
+			}
+
 			//Submit FFT+iFFT.
 			uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize;
 #if(VKFFT_BACKEND==0)
@@ -271,6 +294,8 @@ VkFFTResult sample_101_benchmark_VkFFT_double_nd_dct(VkGPU* vkGPU, uint64_t file
 			clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 			zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+            buffer->release();
 #endif
 			deleteVkFFT(&app);
 
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_10_benchmark_VkFFT_single_multipleBuffers.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_10_benchmark_VkFFT_single_multipleBuffers.cpp
index 0e29cf7..610ec24 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_10_benchmark_VkFFT_single_multipleBuffers.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_10_benchmark_VkFFT_single_multipleBuffers.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -56,6 +60,7 @@ VkFFTResult sample_10_benchmark_VkFFT_single_multipleBuffers(VkGPU* vkGPU, uint6
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 #if(VKFFT_BACKEND==0)
 	if (file_output)
@@ -85,8 +90,16 @@ VkFFTResult sample_10_benchmark_VkFFT_single_multipleBuffers(VkGPU* vkGPU, uint6
 			//configuration.numberBatches = (configuration.numberBatches > 32768) ? 32768 : configuration.numberBatches;
 			uint64_t numBuf = 4;
 
+#if(VKFFT_BACKEND!=5)
+			if (r==0) configuration.saveApplicationToString = 1;
+			if (r!=0) configuration.loadApplicationFromString = 1;
+#endif
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
 			configuration.commandPool = &vkGPU->commandPool;
@@ -141,7 +154,7 @@ VkFFTResult sample_10_benchmark_VkFFT_single_multipleBuffers(VkGPU* vkGPU, uint6
 					}
 				}
 			*/
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
 			uint64_t shift = 0;
 			for (uint64_t i = 0; i < numBuf; i++) {
 				resFFT = transferDataFromCPU(vkGPU, (buffer_input + shift / sizeof(float)), &buffer[i], bufferSize[i]);
@@ -151,10 +164,38 @@ VkFFTResult sample_10_benchmark_VkFFT_single_multipleBuffers(VkGPU* vkGPU, uint6
 
 			//free(buffer_input);
 
+			if (configuration.loadApplicationFromString) {
+				FILE* kernelCache;
+				uint64_t str_len;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "rb");
+				if (!kernelCache) return VKFFT_ERROR_EMPTY_FILE;
+				fseek(kernelCache, 0, SEEK_END);
+				str_len = ftell(kernelCache);
+				fseek(kernelCache, 0, SEEK_SET);
+				configuration.loadApplicationString = malloc(str_len);
+				fread(configuration.loadApplicationString, str_len, 1, kernelCache);
+				fclose(kernelCache);
+			}
 			//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.  
 			resFFT = initializeVkFFT(&app, configuration);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
 
+			if (configuration.loadApplicationFromString)
+				free(configuration.loadApplicationString);
+
+			if (configuration.saveApplicationToString) {
+				FILE* kernelCache;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "wb");
+				fwrite(app.saveApplicationString, app.applicationStringSize, 1, kernelCache);
+				fclose(kernelCache);
+			}
+
 			//Submit FFT+iFFT.
 			uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / (numBuf * bufferSize[0]) > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / (numBuf * bufferSize[0]);
 			if (vkGPU->physicalDeviceProperties.vendorID == 0x8086) num_iter /= 4;
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_11_precision_VkFFT_single.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_11_precision_VkFFT_single.cpp
index 87c5125..59e8ec9 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_11_precision_VkFFT_single.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_11_precision_VkFFT_single.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -62,6 +66,7 @@ VkFFTResult sample_11_precision_VkFFT_single(VkGPU* vkGPU, uint64_t file_output,
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "11 - VkFFT/FFTW C2C precision test in single precision\n");
@@ -149,7 +154,11 @@ VkFFTResult sample_11_precision_VkFFT_single(VkGPU* vkGPU, uint64_t file_output,
 			configuration.size[2] = benchmark_dimensions[n][2];
 
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -162,6 +171,8 @@ VkFFTResult sample_11_precision_VkFFT_single(VkGPU* vkGPU, uint64_t file_output,
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif
 
 			uint64_t numBuf = 1;
@@ -186,6 +197,8 @@ VkFFTResult sample_11_precision_VkFFT_single(VkGPU* vkGPU, uint64_t file_output,
 			cl_mem buffer = 0;
 #elif(VKFFT_BACKEND==4)
 			void* buffer = 0;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
 #endif
 			for (uint64_t i = 0; i < numBuf; i++) {
 #if(VKFFT_BACKEND==0)
@@ -207,6 +220,8 @@ VkFFTResult sample_11_precision_VkFFT_single(VkGPU* vkGPU, uint64_t file_output,
 				device_desc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
 				res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize[i], sizeof(float), vkGPU->device, &buffer);
 				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+#elif(VKFFT_BACKEND==5)
+                buffer = vkGPU->device->newBuffer(bufferSize[i], MTL::ResourceStorageModePrivate);
 #endif
 			}
 
@@ -222,38 +237,15 @@ VkFFTResult sample_11_precision_VkFFT_single(VkGPU* vkGPU, uint64_t file_output,
 			*/ //Can specify buffers at launch
 			configuration.bufferSize = bufferSize;
 
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
 			uint64_t shift = 0;
 			for (uint64_t i = 0; i < numBuf; i++) {
 #if(VKFFT_BACKEND==0)
 				resFFT = transferDataFromCPU(vkGPU, (inputC + shift / sizeof(fftwf_complex)), &buffer[i], bufferSize[i]);
 				if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-				res = cudaMemcpy(buffer, inputC, bufferSize[i], cudaMemcpyHostToDevice);
-				if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-				res = hipMemcpy(buffer, inputC, bufferSize[i], hipMemcpyHostToDevice);
-				if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-				res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize[i], inputC, 0, NULL, NULL);
-				if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-				ze_command_queue_desc_t commandQueueCopyDesc = {
-					ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-					0,
-					vkGPU->commandQueueID,
-					0, // index
-					0, // flags
-					ZE_COMMAND_QUEUE_MODE_DEFAULT,
-					ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-				};
-				ze_command_list_handle_t copyCommandList;
-				res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-				res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, inputC, bufferSize[i], 0, 0, 0);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-				res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-				if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+#else
+                resFFT = transferDataFromCPU(vkGPU, (inputC + shift / sizeof(fftwf_complex)), &buffer, bufferSize[i]);
+                if (resFFT != VKFFT_SUCCESS) return resFFT;
 #endif
 				shift += bufferSize[i];
 			}
@@ -275,6 +267,8 @@ VkFFTResult sample_11_precision_VkFFT_single(VkGPU* vkGPU, uint64_t file_output,
 			launchParams.buffer = &buffer;
 #elif(VKFFT_BACKEND==4)
 			launchParams.buffer = (void**)&buffer;
+#elif(VKFFT_BACKEND==5)
+            launchParams.buffer = &buffer;
 #endif
 			resFFT = performVulkanFFT(vkGPU, &app, &launchParams, -1, num_iter);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
@@ -286,32 +280,9 @@ VkFFTResult sample_11_precision_VkFFT_single(VkGPU* vkGPU, uint64_t file_output,
 #if(VKFFT_BACKEND==0)
 				resFFT = transferDataToCPU(vkGPU, (output_VkFFT + shift / sizeof(fftwf_complex)), &buffer[i], bufferSize[i]);
 				if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-				res = cudaMemcpy(output_VkFFT, buffer, bufferSize[i], cudaMemcpyDeviceToHost);
-				if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-				res = hipMemcpy(output_VkFFT, buffer, bufferSize[i], hipMemcpyDeviceToHost);
-				if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-				res = clEnqueueReadBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize[i], output_VkFFT, 0, NULL, NULL);
-				if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-				ze_command_queue_desc_t commandQueueCopyDesc = {
-					ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-					0,
-					vkGPU->commandQueueID,
-					0, // index
-					0, // flags
-					ZE_COMMAND_QUEUE_MODE_DEFAULT,
-					ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-				};
-				ze_command_list_handle_t copyCommandList;
-				res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-				res = zeCommandListAppendMemoryCopy(copyCommandList, output_VkFFT, buffer, bufferSize[i], 0, 0, 0);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-				res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-				if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+#else
+                resFFT = transferDataToCPU(vkGPU, (output_VkFFT + shift / sizeof(fftwf_complex)), &buffer, bufferSize[i]);
+                if (resFFT != VKFFT_SUCCESS) return resFFT;
 #endif
 				shift += bufferSize[i];
 			}
@@ -384,6 +355,8 @@ VkFFTResult sample_11_precision_VkFFT_single(VkGPU* vkGPU, uint64_t file_output,
 				clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 				zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+                buffer->release();
 #endif
 			}
 #if(VKFFT_BACKEND==0)
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_12_precision_VkFFT_double.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_12_precision_VkFFT_double.cpp
index ea5d00d..a8c53fc 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_12_precision_VkFFT_double.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_12_precision_VkFFT_double.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -62,6 +66,7 @@ VkFFTResult sample_12_precision_VkFFT_double(VkGPU* vkGPU, uint64_t file_output,
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "12 - VkFFT/FFTW C2C precision test in double precision\n");
@@ -148,7 +153,11 @@ VkFFTResult sample_12_precision_VkFFT_double(VkGPU* vkGPU, uint64_t file_output,
 			configuration.size[2] = benchmark_dimensions[n][2];
 
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue;
 			configuration.fence = &vkGPU->fence;
@@ -161,6 +170,8 @@ VkFFTResult sample_12_precision_VkFFT_double(VkGPU* vkGPU, uint64_t file_output,
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif
 			configuration.doublePrecision = true;
 
@@ -186,6 +197,8 @@ VkFFTResult sample_12_precision_VkFFT_double(VkGPU* vkGPU, uint64_t file_output,
 			cl_mem buffer = 0;
 #elif(VKFFT_BACKEND==4)
 			void* buffer = 0;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
 #endif
 			for (uint64_t i = 0; i < numBuf; i++) {
 #if(VKFFT_BACKEND==0)
@@ -207,6 +220,8 @@ VkFFTResult sample_12_precision_VkFFT_double(VkGPU* vkGPU, uint64_t file_output,
 				device_desc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
 				res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize[i], sizeof(float), vkGPU->device, &buffer);
 				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+#elif(VKFFT_BACKEND==5)
+                buffer = vkGPU->device->newBuffer(bufferSize[i], MTL::ResourceStorageModePrivate);
 #endif
 			}
 
@@ -222,38 +237,15 @@ VkFFTResult sample_12_precision_VkFFT_double(VkGPU* vkGPU, uint64_t file_output,
 			*/ // Can specify buffers at launch
 			configuration.bufferSize = bufferSize;
 
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
 			uint64_t shift = 0;
 			for (uint64_t i = 0; i < numBuf; i++) {
 #if(VKFFT_BACKEND==0)
 				resFFT = transferDataFromCPU(vkGPU, (inputC + shift / sizeof(fftw_complex)), &buffer[i], bufferSize[i]);
 				if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-				res = cudaMemcpy(buffer, inputC, bufferSize[i], cudaMemcpyHostToDevice);
-				if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-				res = hipMemcpy(buffer, inputC, bufferSize[i], hipMemcpyHostToDevice);
-				if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-				res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize[i], inputC, 0, NULL, NULL);
-				if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-				ze_command_queue_desc_t commandQueueCopyDesc = {
-					ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-					0,
-					vkGPU->commandQueueID,
-					0, // index
-					0, // flags
-					ZE_COMMAND_QUEUE_MODE_DEFAULT,
-					ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-				};
-				ze_command_list_handle_t copyCommandList;
-				res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-				res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, inputC, bufferSize[i], 0, 0, 0);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-				res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-				if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+#else
+                resFFT = transferDataFromCPU(vkGPU, (inputC + shift / sizeof(fftw_complex)), &buffer, bufferSize[i]);
+                if (resFFT != VKFFT_SUCCESS) return resFFT;
 #endif
 				shift += bufferSize[i];
 			}
@@ -274,6 +266,8 @@ VkFFTResult sample_12_precision_VkFFT_double(VkGPU* vkGPU, uint64_t file_output,
 			launchParams.buffer = &buffer;
 #elif(VKFFT_BACKEND==4)
 			launchParams.buffer = (void**)&buffer;
+#elif(VKFFT_BACKEND==5)
+            launchParams.buffer = &buffer;
 #endif
 			resFFT = performVulkanFFT(vkGPU, &app, &launchParams, -1, num_iter);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
@@ -285,32 +279,9 @@ VkFFTResult sample_12_precision_VkFFT_double(VkGPU* vkGPU, uint64_t file_output,
 #if(VKFFT_BACKEND==0)
 				resFFT = transferDataToCPU(vkGPU, (output_VkFFT + shift / sizeof(fftw_complex)), &buffer[i], bufferSize[i]);
 				if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-				res = cudaMemcpy(output_VkFFT, buffer, bufferSize[i], cudaMemcpyDeviceToHost);
-				if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-				res = hipMemcpy(output_VkFFT, buffer, bufferSize[i], hipMemcpyDeviceToHost);
-				if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-				res = clEnqueueReadBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize[i], output_VkFFT, 0, NULL, NULL);
-				if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-				ze_command_queue_desc_t commandQueueCopyDesc = {
-					ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-					0,
-					vkGPU->commandQueueID,
-					0, // index
-					0, // flags
-					ZE_COMMAND_QUEUE_MODE_DEFAULT,
-					ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-				};
-				ze_command_list_handle_t copyCommandList;
-				res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-				res = zeCommandListAppendMemoryCopy(copyCommandList, output_VkFFT, buffer, bufferSize[i], 0, 0, 0);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-				res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-				if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+#else
+                resFFT = transferDataToCPU(vkGPU, (output_VkFFT + shift / sizeof(fftw_complex)), &buffer, bufferSize[i]);
+                if (resFFT != VKFFT_SUCCESS) return resFFT;
 #endif
 				shift += bufferSize[i];
 			}
@@ -384,6 +355,8 @@ VkFFTResult sample_12_precision_VkFFT_double(VkGPU* vkGPU, uint64_t file_output,
 				clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 				zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+                buffer->release();
 #endif
 
 			}
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_13_precision_VkFFT_half.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_13_precision_VkFFT_half.cpp
index 62bf623..8879fc8 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_13_precision_VkFFT_half.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_13_precision_VkFFT_half.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "half.hpp"
@@ -64,6 +68,7 @@ VkFFTResult sample_13_precision_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, F
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "13 - VkFFT/FFTW C2C precision test in half precision\n");
@@ -148,7 +153,11 @@ VkFFTResult sample_13_precision_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, F
 			configuration.halfPrecision = true;
 
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -161,6 +170,8 @@ VkFFTResult sample_13_precision_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, F
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif
 
 			uint64_t numBuf = 1;
@@ -185,6 +196,8 @@ VkFFTResult sample_13_precision_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, F
 			cl_mem buffer = 0;
 #elif(VKFFT_BACKEND==4)
 			void* buffer = 0;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
 #endif			
 			for (uint64_t i = 0; i < numBuf; i++) {
 #if(VKFFT_BACKEND==0)
@@ -206,6 +219,8 @@ VkFFTResult sample_13_precision_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, F
 				device_desc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
 				res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize[i], sizeof(float), vkGPU->device, &buffer);
 				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+#elif(VKFFT_BACKEND==5)
+                buffer = vkGPU->device->newBuffer(bufferSize[i], MTL::ResourceStorageModePrivate);
 #endif
 			}
 
@@ -221,38 +236,15 @@ VkFFTResult sample_13_precision_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, F
 			*/ // Can specify buffers at launch
 			configuration.bufferSize = bufferSize;
 
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
 			uint64_t shift = 0;
 			for (uint64_t i = 0; i < numBuf; i++) {
 #if(VKFFT_BACKEND==0)
 				resFFT = transferDataFromCPU(vkGPU, (inputC + shift / 2 / sizeof(half)), &buffer[i], bufferSize[i]);
 				if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-				res = cudaMemcpy(buffer, inputC, bufferSize[i], cudaMemcpyHostToDevice);
-				if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-				res = hipMemcpy(buffer, inputC, bufferSize[i], hipMemcpyHostToDevice);
-				if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-				res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize[i], inputC, 0, NULL, NULL);
-				if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-				ze_command_queue_desc_t commandQueueCopyDesc = {
-					ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-					0,
-					vkGPU->commandQueueID,
-					0, // index
-					0, // flags
-					ZE_COMMAND_QUEUE_MODE_DEFAULT,
-					ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-				};
-				ze_command_list_handle_t copyCommandList;
-				res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-				res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, inputC, bufferSize[i], 0, 0, 0);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-				res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-				if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+#else
+                resFFT = transferDataFromCPU(vkGPU, (inputC + shift / 2 / sizeof(half)), &buffer, bufferSize[i]);
+                if (resFFT != VKFFT_SUCCESS) return resFFT;
 #endif
 				shift += bufferSize[i];
 			}
@@ -273,6 +265,8 @@ VkFFTResult sample_13_precision_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, F
 			launchParams.buffer = &buffer;
 #elif(VKFFT_BACKEND==4)
 			launchParams.buffer = (void**)&buffer;
+#else
+            launchParams.buffer = &buffer;
 #endif
 			resFFT = performVulkanFFT(vkGPU, &app, &launchParams, -1, num_iter);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
@@ -284,32 +278,9 @@ VkFFTResult sample_13_precision_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, F
 #if(VKFFT_BACKEND==0)
 				resFFT = transferDataToCPU(vkGPU, (output_VkFFT + shift / 2 / sizeof(half)), &buffer[i], bufferSize[i]);
 				if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-				res = cudaMemcpy(output_VkFFT, buffer, bufferSize[i], cudaMemcpyDeviceToHost);
-				if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-				res = hipMemcpy(output_VkFFT, buffer, bufferSize[i], hipMemcpyDeviceToHost);
-				if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-				res = clEnqueueReadBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize[i], output_VkFFT, 0, NULL, NULL);
-				if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-				ze_command_queue_desc_t commandQueueCopyDesc = {
-					ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-					0,
-					vkGPU->commandQueueID,
-					0, // index
-					0, // flags
-					ZE_COMMAND_QUEUE_MODE_DEFAULT,
-					ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-				};
-				ze_command_list_handle_t copyCommandList;
-				res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-				res = zeCommandListAppendMemoryCopy(copyCommandList, output_VkFFT, buffer, bufferSize[i], 0, 0, 0);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-				res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-				if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+#else
+                resFFT = transferDataToCPU(vkGPU, (output_VkFFT + shift / 2 / sizeof(half)), &buffer, bufferSize[i]);
+                if (resFFT != VKFFT_SUCCESS) return resFFT;
 #endif
 				shift += bufferSize[i];
 			}
@@ -377,6 +348,8 @@ VkFFTResult sample_13_precision_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, F
 				clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 				zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+                buffer->release();
 #endif
 
 			}
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_14_precision_VkFFT_single_nonPow2.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_14_precision_VkFFT_single_nonPow2.cpp
index 1ead647..91d035a 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_14_precision_VkFFT_single_nonPow2.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_14_precision_VkFFT_single_nonPow2.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -62,6 +66,7 @@ VkFFTResult sample_14_precision_VkFFT_single_nonPow2(VkGPU* vkGPU, uint64_t file
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "14 - VkFFT/FFTW C2C radix 3/5/7/11/13/Bluestein precision test in single precision\n");
@@ -160,7 +165,11 @@ VkFFTResult sample_14_precision_VkFFT_single_nonPow2(VkGPU* vkGPU, uint64_t file
 			//configuration.keepShaderCode = 1;
 			//configuration.disableReorderFourStep = 1;
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
+#if(VKFFT_BACKEND==5)
+			configuration.device = vkGPU->device;
+#else
 			configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -173,6 +182,8 @@ VkFFTResult sample_14_precision_VkFFT_single_nonPow2(VkGPU* vkGPU, uint64_t file
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif			
 
 			uint64_t numBuf = 1;
@@ -197,6 +208,8 @@ VkFFTResult sample_14_precision_VkFFT_single_nonPow2(VkGPU* vkGPU, uint64_t file
 			cl_mem buffer = 0;
 #elif(VKFFT_BACKEND==4)
 			void* buffer = 0;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
 #endif			
 			for (uint64_t i = 0; i < numBuf; i++) {
 #if(VKFFT_BACKEND==0)
@@ -218,6 +231,8 @@ VkFFTResult sample_14_precision_VkFFT_single_nonPow2(VkGPU* vkGPU, uint64_t file
 				device_desc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
 				res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize[i], sizeof(float), vkGPU->device, &buffer);
 				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+#elif(VKFFT_BACKEND==5)
+                buffer = vkGPU->device->newBuffer(bufferSize[i], MTL::ResourceStorageModePrivate);
 #endif
 			}
 
@@ -233,39 +248,15 @@ VkFFTResult sample_14_precision_VkFFT_single_nonPow2(VkGPU* vkGPU, uint64_t file
 			*/ // Can specify buffers at launch
 			configuration.bufferSize = bufferSize;
 
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
 			uint64_t shift = 0;
 			for (uint64_t i = 0; i < numBuf; i++) {
 #if(VKFFT_BACKEND==0)
 				resFFT = transferDataFromCPU(vkGPU, (inputC + shift / sizeof(fftwf_complex)), &buffer[i], bufferSize[i]);
-				if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-				res = cudaMemcpy(buffer, inputC, bufferSize[i], cudaMemcpyHostToDevice);
-				if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-				res = hipMemcpy(buffer, inputC, bufferSize[i], hipMemcpyHostToDevice);
-				if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-				res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize[i], inputC, 0, NULL, NULL);
-				if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-				ze_command_queue_desc_t commandQueueCopyDesc = {
-					ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-					0,
-					vkGPU->commandQueueID,
-					0, // index
-					0, // flags
-					ZE_COMMAND_QUEUE_MODE_DEFAULT,
-					ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-				};
-				ze_command_list_handle_t copyCommandList;
-				res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-				res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, inputC, bufferSize[i], 0, 0, 0);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-				res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-				if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+#else
+                resFFT = transferDataFromCPU(vkGPU, (inputC + shift / sizeof(fftwf_complex)), &buffer, bufferSize[i]);
 #endif
+				if (resFFT != VKFFT_SUCCESS) return resFFT;
 				shift += bufferSize[i];
 			}
 			//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.  
@@ -285,6 +276,8 @@ VkFFTResult sample_14_precision_VkFFT_single_nonPow2(VkGPU* vkGPU, uint64_t file
 			launchParams.buffer = &buffer;
 #elif(VKFFT_BACKEND==4)
 			launchParams.buffer = (void**)&buffer;
+#elif(VKFFT_BACKEND==5)
+            launchParams.buffer = &buffer;
 #endif
 			resFFT = performVulkanFFT(vkGPU, &app, &launchParams, -1, num_iter);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
@@ -295,34 +288,10 @@ VkFFTResult sample_14_precision_VkFFT_single_nonPow2(VkGPU* vkGPU, uint64_t file
 			for (uint64_t i = 0; i < numBuf; i++) {
 #if(VKFFT_BACKEND==0)
 				resFFT = transferDataToCPU(vkGPU, (output_VkFFT + shift / sizeof(fftwf_complex)), &buffer[i], sizeof(fftwf_complex) * dims[0] * dims[1] * dims[2]);
-				if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-				res = cudaMemcpy(output_VkFFT, buffer, bufferSize[i], cudaMemcpyDeviceToHost);
-				if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-				res = hipMemcpy(output_VkFFT, buffer, bufferSize[i], hipMemcpyDeviceToHost);
-				if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-				res = clEnqueueReadBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize[i], output_VkFFT, 0, NULL, NULL);
-				if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-				ze_command_queue_desc_t commandQueueCopyDesc = {
-					ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-					0,
-					vkGPU->commandQueueID,
-					0, // index
-					0, // flags
-					ZE_COMMAND_QUEUE_MODE_DEFAULT,
-					ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-				};
-				ze_command_list_handle_t copyCommandList;
-				res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-				res = zeCommandListAppendMemoryCopy(copyCommandList, output_VkFFT, buffer, bufferSize[i], 0, 0, 0);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-				res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-				if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+#else
+                resFFT = transferDataToCPU(vkGPU, (output_VkFFT + shift / sizeof(fftwf_complex)), &buffer, sizeof(fftwf_complex) * dims[0] * dims[1] * dims[2]);
 #endif
+				if (resFFT != VKFFT_SUCCESS) return resFFT;
 				shift += bufferSize[i];
 			}
 			double avg_difference[2] = { 0,0 };
@@ -396,6 +365,8 @@ VkFFTResult sample_14_precision_VkFFT_single_nonPow2(VkGPU* vkGPU, uint64_t file
 				clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 				zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+                buffer->release();
 #endif
 
 			}
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_15_precision_VkFFT_single_r2c.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_15_precision_VkFFT_single_r2c.cpp
index d10af1b..40bf5c2 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_15_precision_VkFFT_single_r2c.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_15_precision_VkFFT_single_r2c.cpp
@@ -40,6 +40,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -63,6 +67,7 @@ VkFFTResult sample_15_precision_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_out
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "15 - VkFFT / FFTW R2C+C2R precision test in single precision\n");
@@ -178,7 +183,11 @@ VkFFTResult sample_15_precision_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_out
 			//configuration.coalescedMemory = 64;
 			//configuration.useLUT = 1;
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -191,6 +200,8 @@ VkFFTResult sample_15_precision_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_out
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif			
 
 			uint64_t numBuf = 1;
@@ -228,6 +239,9 @@ VkFFTResult sample_15_precision_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_out
 #elif(VKFFT_BACKEND==4)
 			void* ibuffer = 0;
 			void* buffer = 0;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* ibuffer = 0;
+            MTL::Buffer* buffer = 0;
 #endif
 			for (uint64_t i = 0; i < numBuf; i++) {
 #if(VKFFT_BACKEND==0)
@@ -261,6 +275,9 @@ VkFFTResult sample_15_precision_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_out
 				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 				res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize[i], sizeof(float), vkGPU->device, &buffer);
 				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+#elif(VKFFT_BACKEND==5)
+                ibuffer = vkGPU->device->newBuffer(inputBufferSize[i], MTL::ResourceStorageModePrivate);
+                buffer = vkGPU->device->newBuffer(bufferSize[i], MTL::ResourceStorageModePrivate);
 #endif
 			}
 			configuration.inputBufferNum = numBuf;
@@ -273,38 +290,15 @@ VkFFTResult sample_15_precision_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_out
 			configuration.inputBufferSize = inputBufferSize;
 
 
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
 			uint64_t shift = 0;
 			for (uint64_t i = 0; i < numBuf; i++) {
 #if(VKFFT_BACKEND==0)
 				resFFT = transferDataFromCPU(vkGPU, (inputC + shift / sizeof(fftwf_complex)), &ibuffer[i], inputBufferSize[i]);
 				if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-				res = cudaMemcpy(ibuffer, inputC, inputBufferSize[i], cudaMemcpyHostToDevice);
-				if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-				res = hipMemcpy(ibuffer, inputC, inputBufferSize[i], hipMemcpyHostToDevice);
-				if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-				res = clEnqueueWriteBuffer(vkGPU->commandQueue, ibuffer, CL_TRUE, 0, inputBufferSize[i], inputC, 0, NULL, NULL);
-				if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-				ze_command_queue_desc_t commandQueueCopyDesc = {
-					ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-					0,
-					vkGPU->commandQueueID,
-					0, // index
-					0, // flags
-					ZE_COMMAND_QUEUE_MODE_DEFAULT,
-					ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-				};
-				ze_command_list_handle_t copyCommandList;
-				res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-				res = zeCommandListAppendMemoryCopy(copyCommandList, ibuffer, inputC, inputBufferSize[i], 0, 0, 0);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-				res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-				if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+#else
+                resFFT = transferDataFromCPU(vkGPU, (inputC + shift / sizeof(fftwf_complex)), &ibuffer, inputBufferSize[i]);
+                if (resFFT != VKFFT_SUCCESS) return resFFT;
 #endif
 				shift += inputBufferSize[i];
 			}
@@ -331,6 +325,9 @@ VkFFTResult sample_15_precision_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_out
 #elif(VKFFT_BACKEND==4)
 			launchParams.inputBuffer = (void**)&ibuffer;
 			launchParams.buffer = (void**)&buffer;
+#elif(VKFFT_BACKEND==5)
+            launchParams.inputBuffer = &ibuffer;
+            launchParams.buffer = &buffer;
 #endif
 			resFFT = performVulkanFFT(vkGPU, &app, &launchParams, -1, num_iter);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
@@ -350,6 +347,9 @@ VkFFTResult sample_15_precision_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_out
 #elif(VKFFT_BACKEND==4)
 			launchParams2.inputBuffer = (void**)&ibuffer;
 			launchParams2.buffer = (void**)&buffer;
+#elif(VKFFT_BACKEND==5)
+            launchParams2.inputBuffer = &ibuffer;
+            launchParams2.buffer = &buffer;
 #endif
 			resFFT = performVulkanFFT(vkGPU, &app, &launchParams2, 1, num_iter);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
@@ -363,34 +363,10 @@ VkFFTResult sample_15_precision_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_out
 				//resFFT = transferDataToCPU(vkGPU, (output_VkFFT + shift / sizeof(fftwf_complex)), &buffer[i], bufferSize[i]);
 				resFFT = transferDataToCPU(vkGPU, (output_VkFFT + shift / sizeof(fftwf_complex)), &ibuffer[i], inputBufferSize[i]);
 				if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-				//res = cudaMemcpy(output_VkFFT, buffer, bufferSize[i], cudaMemcpyDeviceToHost);
-				res = cudaMemcpy(output_VkFFT, ibuffer, inputBufferSize[i], cudaMemcpyDeviceToHost);
-				if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-				//res = hipMemcpy(output_VkFFT, buffer, bufferSize[i], hipMemcpyDeviceToHost);
-				res = hipMemcpy(output_VkFFT, ibuffer, inputBufferSize[i], hipMemcpyDeviceToHost);
-				if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-				res = clEnqueueReadBuffer(vkGPU->commandQueue, ibuffer, CL_TRUE, 0, inputBufferSize[i], output_VkFFT, 0, NULL, NULL);
-				if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-				ze_command_queue_desc_t commandQueueCopyDesc = {
-					ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-					0,
-					vkGPU->commandQueueID,
-					0, // index
-					0, // flags
-					ZE_COMMAND_QUEUE_MODE_DEFAULT,
-					ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-				};
-				ze_command_list_handle_t copyCommandList;
-				res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-				res = zeCommandListAppendMemoryCopy(copyCommandList, output_VkFFT, ibuffer, inputBufferSize[i], 0, 0, 0);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-				res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-				if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+#else
+                //resFFT = transferDataToCPU(vkGPU, (output_VkFFT + shift / sizeof(fftwf_complex)), &buffer, bufferSize[i]);
+                resFFT = transferDataToCPU(vkGPU, (output_VkFFT + shift / sizeof(fftwf_complex)), &ibuffer, inputBufferSize[i]);
+                if (resFFT != VKFFT_SUCCESS) return resFFT;
 #endif
 				shift += inputBufferSize[i];
 			}
@@ -470,6 +446,9 @@ VkFFTResult sample_15_precision_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_out
 #elif(VKFFT_BACKEND==4)
 				zeMemFree(vkGPU->context, ibuffer);
 				zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+                ibuffer->release();
+                buffer->release();
 #endif
 
 			}
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_16_precision_VkFFT_single_dct.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_16_precision_VkFFT_single_dct.cpp
index 3872c9e..8eaf573 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_16_precision_VkFFT_single_dct.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_16_precision_VkFFT_single_dct.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -62,6 +66,7 @@ VkFFTResult sample_16_precision_VkFFT_single_dct(VkGPU* vkGPU, uint64_t file_out
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "16 - VkFFT/FFTW R2R DCT-I, II, III and IV precision test in single precision\n");
@@ -158,7 +163,11 @@ VkFFTResult sample_16_precision_VkFFT_single_dct(VkGPU* vkGPU, uint64_t file_out
 				//configuration.useLUT = 1;
 				//configuration.disableMergeSequencesR2C = 1;
 				//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
+#if(VKFFT_BACKEND==5)
+				configuration.device = vkGPU->device;
+#else
 				configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 				configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 				configuration.fence = &vkGPU->fence;
@@ -171,6 +180,8 @@ VkFFTResult sample_16_precision_VkFFT_single_dct(VkGPU* vkGPU, uint64_t file_out
 				configuration.context = &vkGPU->context;
 				configuration.commandQueue = &vkGPU->commandQueue;
 				configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+				configuration.queue = vkGPU->queue;
 #endif			
 
 				uint64_t numBuf = 1;
@@ -195,6 +206,8 @@ VkFFTResult sample_16_precision_VkFFT_single_dct(VkGPU* vkGPU, uint64_t file_out
 				cl_mem buffer = 0;
 #elif(VKFFT_BACKEND==4)
 				void* buffer = 0;
+#elif(VKFFT_BACKEND==5)
+				MTL::Buffer* buffer = 0;
 #endif			
 				for (uint64_t i = 0; i < numBuf; i++) {
 #if(VKFFT_BACKEND==0)
@@ -216,6 +229,8 @@ VkFFTResult sample_16_precision_VkFFT_single_dct(VkGPU* vkGPU, uint64_t file_out
 					device_desc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
 					res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize[i], sizeof(float), vkGPU->device, &buffer);
 					if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+#elif(VKFFT_BACKEND==5)
+                    buffer = vkGPU->device->newBuffer(bufferSize[i], MTL::ResourceStorageModePrivate);
 #endif
 				}
 
@@ -231,38 +246,15 @@ VkFFTResult sample_16_precision_VkFFT_single_dct(VkGPU* vkGPU, uint64_t file_out
 				*/ // Can specify buffers at launch
 				configuration.bufferSize = bufferSize;
 
-				//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+				//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
 				uint64_t shift = 0;
 				for (uint64_t i = 0; i < numBuf; i++) {
 #if(VKFFT_BACKEND==0)
 					resFFT = transferDataFromCPU(vkGPU, (inputC + shift / sizeof(float)), &buffer[i], bufferSize[i]);
 					if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-					res = cudaMemcpy(buffer, inputC, bufferSize[i], cudaMemcpyHostToDevice);
-					if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-					res = hipMemcpy(buffer, inputC, bufferSize[i], hipMemcpyHostToDevice);
-					if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-					res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize[i], inputC, 0, NULL, NULL);
-					if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-					ze_command_queue_desc_t commandQueueCopyDesc = {
-						ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-						0,
-						vkGPU->commandQueueID,
-						0, // index
-						0, // flags
-						ZE_COMMAND_QUEUE_MODE_DEFAULT,
-						ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-					};
-					ze_command_list_handle_t copyCommandList;
-					res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-					if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-					res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, inputC, bufferSize[i], 0, 0, 0);
-					if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-					res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-					if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+#else
+                    resFFT = transferDataFromCPU(vkGPU, (inputC + shift / sizeof(float)), &buffer, bufferSize[i]);
+                    if (resFFT != VKFFT_SUCCESS) return resFFT;
 #endif
 					shift += bufferSize[i];
 				}
@@ -285,6 +277,8 @@ VkFFTResult sample_16_precision_VkFFT_single_dct(VkGPU* vkGPU, uint64_t file_out
 						clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 						zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+                        buffer->release();
 #endif
 
 					}
@@ -315,6 +309,8 @@ VkFFTResult sample_16_precision_VkFFT_single_dct(VkGPU* vkGPU, uint64_t file_out
 				launchParams.buffer = &buffer;
 #elif(VKFFT_BACKEND==4)
 				launchParams.buffer = (void**)&buffer;
+#elif(VKFFT_BACKEND==5)
+                launchParams.buffer = &buffer;
 #endif
 				resFFT = performVulkanFFT(vkGPU, &app, &launchParams, -1, num_iter);
 				if (resFFT != VKFFT_SUCCESS) return resFFT;
@@ -326,32 +322,9 @@ VkFFTResult sample_16_precision_VkFFT_single_dct(VkGPU* vkGPU, uint64_t file_out
 #if(VKFFT_BACKEND==0)
 					resFFT = transferDataToCPU(vkGPU, (output_VkFFT + shift / sizeof(float)), &buffer[i], bufferSize[i]);
 					if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-					res = cudaMemcpy(output_VkFFT, buffer, bufferSize[i], cudaMemcpyDeviceToHost);
-					if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-					res = hipMemcpy(output_VkFFT, buffer, bufferSize[i], hipMemcpyDeviceToHost);
-					if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-					res = clEnqueueReadBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize[i], output_VkFFT, 0, NULL, NULL);
-					if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-					ze_command_queue_desc_t commandQueueCopyDesc = {
-						ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-						0,
-						vkGPU->commandQueueID,
-						0, // index
-						0, // flags
-						ZE_COMMAND_QUEUE_MODE_DEFAULT,
-						ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-					};
-					ze_command_list_handle_t copyCommandList;
-					res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-					if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-					res = zeCommandListAppendMemoryCopy(copyCommandList, output_VkFFT, buffer, bufferSize[i], 0, 0, 0);
-					if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-					res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-					if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+#else
+                    resFFT = transferDataToCPU(vkGPU, (output_VkFFT + shift / sizeof(float)), &buffer, bufferSize[i]);
+                    if (resFFT != VKFFT_SUCCESS) return resFFT;
 #endif
 					shift += bufferSize[i];
 				}
@@ -402,6 +375,8 @@ VkFFTResult sample_16_precision_VkFFT_single_dct(VkGPU* vkGPU, uint64_t file_out
 					clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 					zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+                    buffer->release();
 #endif
 
 				}
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_17_precision_VkFFT_double_dct.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_17_precision_VkFFT_double_dct.cpp
index f85102a..dd683b9 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_17_precision_VkFFT_double_dct.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_17_precision_VkFFT_double_dct.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -62,6 +66,7 @@ VkFFTResult sample_17_precision_VkFFT_double_dct(VkGPU* vkGPU, uint64_t file_out
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "17 - VkFFT/FFTW R2R DCT-I, II, III and IV precision test in double precision\n");
@@ -156,7 +161,11 @@ VkFFTResult sample_17_precision_VkFFT_double_dct(VkGPU* vkGPU, uint64_t file_out
 				//configuration.useLUT = 1;
 				//configuration.disableMergeSequencesR2C = 1;
 				//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-				configuration.device = &vkGPU->device;
+	#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 				configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 				configuration.fence = &vkGPU->fence;
@@ -169,6 +178,8 @@ VkFFTResult sample_17_precision_VkFFT_double_dct(VkGPU* vkGPU, uint64_t file_out
 				configuration.context = &vkGPU->context;
 				configuration.commandQueue = &vkGPU->commandQueue;
 				configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+                configuration.queue = vkGPU->queue;
 #endif			
 
 				uint64_t numBuf = 1;
@@ -192,7 +203,9 @@ VkFFTResult sample_17_precision_VkFFT_double_dct(VkGPU* vkGPU, uint64_t file_out
 #elif(VKFFT_BACKEND==3)
 				cl_mem buffer = 0;
 #elif(VKFFT_BACKEND==4)
-				void* buffer = 0;
+                void* buffer = 0;
+#elif(VKFFT_BACKEND==5)
+                MTL::Buffer* buffer = 0;
 #endif			
 				for (uint64_t i = 0; i < numBuf; i++) {
 #if(VKFFT_BACKEND==0)
@@ -214,6 +227,8 @@ VkFFTResult sample_17_precision_VkFFT_double_dct(VkGPU* vkGPU, uint64_t file_out
 					device_desc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
 					res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize[i], sizeof(float), vkGPU->device, &buffer);
 					if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+#elif(VKFFT_BACKEND==5)
+                    buffer = vkGPU->device->newBuffer(bufferSize[i], MTL::ResourceStorageModePrivate);
 #endif
 				}
 
@@ -229,21 +244,15 @@ VkFFTResult sample_17_precision_VkFFT_double_dct(VkGPU* vkGPU, uint64_t file_out
 				*/ // Can specify buffers at launch
 				configuration.bufferSize = bufferSize;
 
-				//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+				//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
 				uint64_t shift = 0;
 				for (uint64_t i = 0; i < numBuf; i++) {
 #if(VKFFT_BACKEND==0)
 					resFFT = transferDataFromCPU(vkGPU, (inputC + shift / sizeof(double)), &buffer[i], bufferSize[i]);
 					if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-					res = cudaMemcpy(buffer, inputC, bufferSize[i], cudaMemcpyHostToDevice);
-					if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-					res = hipMemcpy(buffer, inputC, bufferSize[i], hipMemcpyHostToDevice);
-					if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-					res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize[i], inputC, 0, NULL, NULL);
-					if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
+#else
+                    resFFT = transferDataFromCPU(vkGPU, (inputC + shift / sizeof(double)), &buffer, bufferSize[i]);
+                    if (resFFT != VKFFT_SUCCESS) return resFFT;
 #endif
 					shift += bufferSize[i];
 				}
@@ -265,22 +274,9 @@ VkFFTResult sample_17_precision_VkFFT_double_dct(VkGPU* vkGPU, uint64_t file_out
 #elif(VKFFT_BACKEND==3)
 						clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
-						ze_command_queue_desc_t commandQueueCopyDesc = {
-							ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-							0,
-							vkGPU->commandQueueID,
-							0, // index
-							0, // flags
-							ZE_COMMAND_QUEUE_MODE_DEFAULT,
-							ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-						};
-						ze_command_list_handle_t copyCommandList;
-						res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-						if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-						res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, inputC, bufferSize[i], 0, 0, 0);
-						if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-						res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-						if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+                        zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+                        buffer->release();
 #endif
 
 					}
@@ -312,6 +308,8 @@ VkFFTResult sample_17_precision_VkFFT_double_dct(VkGPU* vkGPU, uint64_t file_out
 				launchParams.buffer = &buffer;
 #elif(VKFFT_BACKEND==4)
 				launchParams.buffer = (void**)&buffer;
+#elif(VKFFT_BACKEND==5)
+                launchParams.buffer = &buffer;
 #endif
 				resFFT = performVulkanFFT(vkGPU, &app, &launchParams, -1, num_iter);
 				if (resFFT != VKFFT_SUCCESS) return resFFT;
@@ -323,32 +321,9 @@ VkFFTResult sample_17_precision_VkFFT_double_dct(VkGPU* vkGPU, uint64_t file_out
 #if(VKFFT_BACKEND==0)
 					resFFT = transferDataToCPU(vkGPU, (output_VkFFT + shift / sizeof(double)), &buffer[i], bufferSize[i]);
 					if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-					res = cudaMemcpy(output_VkFFT, buffer, bufferSize[i], cudaMemcpyDeviceToHost);
-					if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-					res = hipMemcpy(output_VkFFT, buffer, bufferSize[i], hipMemcpyDeviceToHost);
-					if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-					res = clEnqueueReadBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize[i], output_VkFFT, 0, NULL, NULL);
-					if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-					ze_command_queue_desc_t commandQueueCopyDesc = {
-						ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-						0,
-						vkGPU->commandQueueID,
-						0, // index
-						0, // flags
-						ZE_COMMAND_QUEUE_MODE_DEFAULT,
-						ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-					};
-					ze_command_list_handle_t copyCommandList;
-					res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-					if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-					res = zeCommandListAppendMemoryCopy(copyCommandList, output_VkFFT, buffer, bufferSize[i], 0, 0, 0);
-					if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-					res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-					if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+#else
+                    resFFT = transferDataToCPU(vkGPU, (output_VkFFT + shift / sizeof(double)), &buffer, bufferSize[i]);
+                    if (resFFT != VKFFT_SUCCESS) return resFFT;
 #endif
 					shift += bufferSize[i];
 				}
@@ -399,6 +374,8 @@ VkFFTResult sample_17_precision_VkFFT_double_dct(VkGPU* vkGPU, uint64_t file_out
 					clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 					zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+                    buffer->release();
 #endif
 
 				}
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_18_precision_VkFFT_double_nonPow2.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_18_precision_VkFFT_double_nonPow2.cpp
index 27c4813..ab11954 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_18_precision_VkFFT_double_nonPow2.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_18_precision_VkFFT_double_nonPow2.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -62,6 +66,7 @@ VkFFTResult sample_18_precision_VkFFT_double_nonPow2(VkGPU* vkGPU, uint64_t file
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "18 - VkFFT/FFTW C2C radix 3/5/7/11/13/Bluestein precision test in double precision\n");
@@ -160,7 +165,11 @@ VkFFTResult sample_18_precision_VkFFT_double_nonPow2(VkGPU* vkGPU, uint64_t file
 			
 			//configuration.disableReorderFourStep = 1;
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -173,6 +182,8 @@ VkFFTResult sample_18_precision_VkFFT_double_nonPow2(VkGPU* vkGPU, uint64_t file
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif			
 
 			uint64_t numBuf = 1;
@@ -197,6 +208,8 @@ VkFFTResult sample_18_precision_VkFFT_double_nonPow2(VkGPU* vkGPU, uint64_t file
 			cl_mem buffer = 0;
 #elif(VKFFT_BACKEND==4)
 			void* buffer = 0;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
 #endif			
 			for (uint64_t i = 0; i < numBuf; i++) {
 #if(VKFFT_BACKEND==0)
@@ -218,6 +231,8 @@ VkFFTResult sample_18_precision_VkFFT_double_nonPow2(VkGPU* vkGPU, uint64_t file
 				device_desc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
 				res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize[i], sizeof(float), vkGPU->device, &buffer);
 				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+#elif(VKFFT_BACKEND==5)
+                buffer = vkGPU->device->newBuffer(bufferSize[i], MTL::ResourceStorageModePrivate);
 #endif
 			}
 
@@ -233,38 +248,15 @@ VkFFTResult sample_18_precision_VkFFT_double_nonPow2(VkGPU* vkGPU, uint64_t file
 			*/ // Can specify buffers at launch
 			configuration.bufferSize = bufferSize;
 
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
 			uint64_t shift = 0;
 			for (uint64_t i = 0; i < numBuf; i++) {
 #if(VKFFT_BACKEND==0)
 				resFFT = transferDataFromCPU(vkGPU, (inputC + shift / sizeof(fftw_complex)), &buffer[i], bufferSize[i]);
 				if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-				res = cudaMemcpy(buffer, inputC, bufferSize[i], cudaMemcpyHostToDevice);
-				if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-				res = hipMemcpy(buffer, inputC, bufferSize[i], hipMemcpyHostToDevice);
-				if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-				res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize[i], inputC, 0, NULL, NULL);
-				if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-				ze_command_queue_desc_t commandQueueCopyDesc = {
-					ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-					0,
-					vkGPU->commandQueueID,
-					0, // index
-					0, // flags
-					ZE_COMMAND_QUEUE_MODE_DEFAULT,
-					ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-				};
-				ze_command_list_handle_t copyCommandList;
-				res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-				res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, inputC, bufferSize[i], 0, 0, 0);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-				res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-				if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+#else
+                resFFT = transferDataFromCPU(vkGPU, (inputC + shift / sizeof(fftw_complex)), &buffer, bufferSize[i]);
+                if (resFFT != VKFFT_SUCCESS) return resFFT;
 #endif
 				shift += bufferSize[i];
 			}
@@ -285,6 +277,8 @@ VkFFTResult sample_18_precision_VkFFT_double_nonPow2(VkGPU* vkGPU, uint64_t file
 			launchParams.buffer = &buffer;
 #elif(VKFFT_BACKEND==4)
 			launchParams.buffer = (void**)&buffer;
+#elif(VKFFT_BACKEND==5)
+            launchParams.buffer = &buffer;
 #endif
 			resFFT = performVulkanFFT(vkGPU, &app, &launchParams, -1, num_iter);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
@@ -296,32 +290,9 @@ VkFFTResult sample_18_precision_VkFFT_double_nonPow2(VkGPU* vkGPU, uint64_t file
 #if(VKFFT_BACKEND==0)
 				resFFT = transferDataToCPU(vkGPU, (output_VkFFT + shift / sizeof(fftw_complex)), &buffer[i], sizeof(fftw_complex) * dims[0] * dims[1] * dims[2]);
 				if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-				res = cudaMemcpy(output_VkFFT, buffer, bufferSize[i], cudaMemcpyDeviceToHost);
-				if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-				res = hipMemcpy(output_VkFFT, buffer, bufferSize[i], hipMemcpyDeviceToHost);
-				if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-				res = clEnqueueReadBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize[i], output_VkFFT, 0, NULL, NULL);
-				if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-				ze_command_queue_desc_t commandQueueCopyDesc = {
-					ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-					0,
-					vkGPU->commandQueueID,
-					0, // index
-					0, // flags
-					ZE_COMMAND_QUEUE_MODE_DEFAULT,
-					ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-				};
-				ze_command_list_handle_t copyCommandList;
-				res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-				res = zeCommandListAppendMemoryCopy(copyCommandList, output_VkFFT, buffer, bufferSize[i], 0, 0, 0);
-				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-				res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-				if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+#else
+                resFFT = transferDataToCPU(vkGPU, (output_VkFFT + shift / sizeof(fftw_complex)), &buffer, sizeof(fftw_complex) * dims[0] * dims[1] * dims[2]);
+                if (resFFT != VKFFT_SUCCESS) return resFFT;
 #endif
 				shift += bufferSize[i];
 			}
@@ -396,6 +367,8 @@ VkFFTResult sample_18_precision_VkFFT_double_nonPow2(VkGPU* vkGPU, uint64_t file
 				clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 				zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+                buffer->release();
 #endif
 
 			}
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_1_benchmark_VkFFT_double.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_1_benchmark_VkFFT_double.cpp
index 4757d70..d23999f 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_1_benchmark_VkFFT_double.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_1_benchmark_VkFFT_double.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -56,6 +60,7 @@ VkFFTResult sample_1_benchmark_VkFFT_double(VkGPU* vkGPU, uint64_t file_output,
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "1 - VkFFT FFT + iFFT C2C benchmark 1D batched in double precision LUT\n");
@@ -86,8 +91,16 @@ VkFFTResult sample_1_benchmark_VkFFT_double(VkGPU* vkGPU, uint64_t file_output,
 
 			configuration.doublePrecision = true;
 
+#if(VKFFT_BACKEND!=5)
+			if (r==0) configuration.saveApplicationToString = 1;
+			if (r!=0) configuration.loadApplicationFromString = 1;
+#endif
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -100,6 +113,8 @@ VkFFTResult sample_1_benchmark_VkFFT_double(VkGPU* vkGPU, uint64_t file_output,
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif			
 
 			//Allocate buffer for the input data.
@@ -132,6 +147,10 @@ VkFFTResult sample_1_benchmark_VkFFT_double(VkGPU* vkGPU, uint64_t file_output,
 			res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize, sizeof(float), vkGPU->device, &buffer);
 			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 			configuration.buffer = &buffer;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
+            buffer = vkGPU->device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+            configuration.buffer = &buffer;
 #endif
 
 			configuration.bufferSize = &bufferSize;
@@ -147,43 +166,43 @@ VkFFTResult sample_1_benchmark_VkFFT_double(VkGPU* vkGPU, uint64_t file_output,
 					}
 				}
 			*/
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
-#if(VKFFT_BACKEND==0)
-			resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
-			if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-			res = cudaMemcpy(buffer, buffer_input, bufferSize, cudaMemcpyHostToDevice);
-			if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-			res = hipMemcpy(buffer, buffer_input, bufferSize, hipMemcpyHostToDevice);
-			if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-			res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize, buffer_input, 0, NULL, NULL);
-			if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-			ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				vkGPU->commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-			};
-			ze_command_list_handle_t copyCommandList;
-			res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-			res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, buffer_input, bufferSize, 0, 0, 0);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-			res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-			if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+            resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
+            if (resFFT != VKFFT_SUCCESS) return resFFT;
 			//free(buffer_input);
 
+			if (configuration.loadApplicationFromString) {
+				FILE* kernelCache;
+				uint64_t str_len;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "rb");
+				if (!kernelCache) return VKFFT_ERROR_EMPTY_FILE;
+				fseek(kernelCache, 0, SEEK_END);
+				str_len = ftell(kernelCache);
+				fseek(kernelCache, 0, SEEK_SET);
+				configuration.loadApplicationString = malloc(str_len);
+				fread(configuration.loadApplicationString, str_len, 1, kernelCache);
+				fclose(kernelCache);
+			}
 			//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.  
 			resFFT = initializeVkFFT(&app, configuration);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
 
+			if (configuration.loadApplicationFromString)
+				free(configuration.loadApplicationString);
+
+			if (configuration.saveApplicationToString) {
+				FILE* kernelCache;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "wb");
+				fwrite(app.saveApplicationString, app.applicationStringSize, 1, kernelCache);
+				fclose(kernelCache);
+			}
+
 			//Submit FFT+iFFT.
 			uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize;
 #if(VKFFT_BACKEND==0)
@@ -241,6 +260,8 @@ VkFFTResult sample_1_benchmark_VkFFT_double(VkGPU* vkGPU, uint64_t file_output,
 			clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 			zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+            buffer->release();
 #endif
 			deleteVkFFT(&app);
 
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_2_benchmark_VkFFT_half.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_2_benchmark_VkFFT_half.cpp
index 0a1d2b2..a3bd5c0 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_2_benchmark_VkFFT_half.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_2_benchmark_VkFFT_half.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "half.hpp"
@@ -59,6 +63,7 @@ VkFFTResult sample_2_benchmark_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, FI
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "2 - VkFFT FFT + iFFT C2C benchmark 1D batched in half precision\n");
@@ -115,9 +120,17 @@ VkFFTResult sample_2_benchmark_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, FI
 				else
 					configuration.coalescedMemory = 128;
 			}
+#endif
+#if(VKFFT_BACKEND!=5)
+			if (r==0) configuration.saveApplicationToString = 1;
+			if (r!=0) configuration.loadApplicationFromString = 1;
 #endif
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -130,6 +143,8 @@ VkFFTResult sample_2_benchmark_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, FI
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif
 
 			//Allocate buffer for the input data.
@@ -162,6 +177,10 @@ VkFFTResult sample_2_benchmark_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, FI
 			res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize, sizeof(half), vkGPU->device, &buffer);
 			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 			configuration.buffer = &buffer;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
+            buffer = vkGPU->device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+            configuration.buffer = &buffer;
 #endif
 
 			configuration.bufferSize = &bufferSize;
@@ -177,43 +196,43 @@ VkFFTResult sample_2_benchmark_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, FI
 					}
 				}
 			*/
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
-#if(VKFFT_BACKEND==0)
-			resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
-			if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-			res = cudaMemcpy(buffer, buffer_input, bufferSize, cudaMemcpyHostToDevice);
-			if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-			res = hipMemcpy(buffer, buffer_input, bufferSize, hipMemcpyHostToDevice);
-			if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-			res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize, buffer_input, 0, NULL, NULL);
-			if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-			ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				vkGPU->commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-			};
-			ze_command_list_handle_t copyCommandList;
-			res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-			res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, buffer_input, bufferSize, 0, 0, 0);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-			res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-			if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+            resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
+            if (resFFT != VKFFT_SUCCESS) return resFFT;
 			//free(buffer_input);
 
+			if (configuration.loadApplicationFromString) {
+				FILE* kernelCache;
+				uint64_t str_len;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "rb");
+				if (!kernelCache) return VKFFT_ERROR_EMPTY_FILE;
+				fseek(kernelCache, 0, SEEK_END);
+				str_len = ftell(kernelCache);
+				fseek(kernelCache, 0, SEEK_SET);
+				configuration.loadApplicationString = malloc(str_len);
+				fread(configuration.loadApplicationString, str_len, 1, kernelCache);
+				fclose(kernelCache);
+			}
 			//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.  
 			resFFT = initializeVkFFT(&app, configuration);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
 
+			if (configuration.loadApplicationFromString)
+				free(configuration.loadApplicationString);
+
+			if (configuration.saveApplicationToString) {
+				FILE* kernelCache;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "wb");
+				fwrite(app.saveApplicationString, app.applicationStringSize, 1, kernelCache);
+				fclose(kernelCache);
+			}
+
 			//Submit FFT+iFFT.
 			uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize;
 #if(VKFFT_BACKEND==0)
@@ -266,6 +285,8 @@ VkFFTResult sample_2_benchmark_VkFFT_half(VkGPU* vkGPU, uint64_t file_output, FI
 			clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 			zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+            buffer->release();
 #endif
 			deleteVkFFT(&app);
 
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_3_benchmark_VkFFT_single_3d.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_3_benchmark_VkFFT_single_3d.cpp
index 51bb7b3..4579df5 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_3_benchmark_VkFFT_single_3d.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_3_benchmark_VkFFT_single_3d.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -56,6 +60,7 @@ VkFFTResult sample_3_benchmark_VkFFT_single_3d(VkGPU* vkGPU, uint64_t file_outpu
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "3 - VkFFT FFT + iFFT C2C multidimensional benchmark in single precision\n");
@@ -92,8 +97,16 @@ VkFFTResult sample_3_benchmark_VkFFT_single_3d(VkGPU* vkGPU, uint64_t file_outpu
 			configuration.size[1] = benchmark_dimensions[n][1];
 			configuration.size[2] = benchmark_dimensions[n][2];
 
+#if(VKFFT_BACKEND!=5)
+			if (r==0) configuration.saveApplicationToString = 1;
+			if (r!=0) configuration.loadApplicationFromString = 1;
+#endif
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -106,6 +119,8 @@ VkFFTResult sample_3_benchmark_VkFFT_single_3d(VkGPU* vkGPU, uint64_t file_outpu
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif
 			//Allocate buffer for the input data.
 			uint64_t bufferSize = (uint64_t)sizeof(float) * 2 * configuration.size[0] * configuration.size[1] * configuration.size[2];;
@@ -137,6 +152,10 @@ VkFFTResult sample_3_benchmark_VkFFT_single_3d(VkGPU* vkGPU, uint64_t file_outpu
 			res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize, sizeof(float), vkGPU->device, &buffer);
 			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 			configuration.buffer = &buffer;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
+            buffer = vkGPU->device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+            configuration.buffer = &buffer;
 #endif
 
 			configuration.bufferSize = &bufferSize;
@@ -152,43 +171,43 @@ VkFFTResult sample_3_benchmark_VkFFT_single_3d(VkGPU* vkGPU, uint64_t file_outpu
 				}
 			}
 			*/
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
-#if(VKFFT_BACKEND==0)
-			resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
-			if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-			res = cudaMemcpy(buffer, buffer_input, bufferSize, cudaMemcpyHostToDevice);
-			if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-			res = hipMemcpy(buffer, buffer_input, bufferSize, hipMemcpyHostToDevice);
-			if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-			res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize, buffer_input, 0, NULL, NULL);
-			if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-			ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				vkGPU->commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-			};
-			ze_command_list_handle_t copyCommandList;
-			res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-			res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, buffer_input, bufferSize, 0, 0, 0);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-			res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-			if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+            resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
+            if (resFFT != VKFFT_SUCCESS) return resFFT;
 			//free(buffer_input);
 
+			if (configuration.loadApplicationFromString) {
+				FILE* kernelCache;
+				uint64_t str_len;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "rb");
+				if (!kernelCache) return VKFFT_ERROR_EMPTY_FILE;
+				fseek(kernelCache, 0, SEEK_END);
+				str_len = ftell(kernelCache);
+				fseek(kernelCache, 0, SEEK_SET);
+				configuration.loadApplicationString = malloc(str_len);
+				fread(configuration.loadApplicationString, str_len, 1, kernelCache);
+				fclose(kernelCache);
+			}
 			//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.  
 			resFFT = initializeVkFFT(&app, configuration);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
 
+			if (configuration.loadApplicationFromString)
+				free(configuration.loadApplicationString);
+
+			if (configuration.saveApplicationToString) {
+				FILE* kernelCache;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "wb");
+				fwrite(app.saveApplicationString, app.applicationStringSize, 1, kernelCache);
+				fclose(kernelCache);
+			}
+
 			//Submit FFT+iFFT.
 			uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize;
 #if(VKFFT_BACKEND==0)
@@ -246,6 +265,8 @@ VkFFTResult sample_3_benchmark_VkFFT_single_3d(VkGPU* vkGPU, uint64_t file_outpu
 			clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 			zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+            buffer->release();
 #endif
 			deleteVkFFT(&app);
 
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_4_benchmark_VkFFT_single_3d_zeropadding.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_4_benchmark_VkFFT_single_3d_zeropadding.cpp
index 0b1183a..f8f9277 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_4_benchmark_VkFFT_single_3d_zeropadding.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_4_benchmark_VkFFT_single_3d_zeropadding.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -56,6 +60,7 @@ VkFFTResult sample_4_benchmark_VkFFT_single_3d_zeropadding(VkGPU* vkGPU, uint64_
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "4 - VkFFT FFT + iFFT C2C multidimensional benchmark in single precision, native zeropadding\n");
@@ -103,8 +108,16 @@ VkFFTResult sample_4_benchmark_VkFFT_single_3d_zeropadding(VkGPU* vkGPU, uint64_
 			configuration.fft_zeropad_left[2] = (uint64_t)ceil(configuration.size[2] / 2.0);
 			configuration.fft_zeropad_right[2] = configuration.size[2];
 
+#if(VKFFT_BACKEND!=5)
+			if (r==0) configuration.saveApplicationToString = 1;
+			if (r!=0) configuration.loadApplicationFromString = 1;
+#endif
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -117,6 +130,8 @@ VkFFTResult sample_4_benchmark_VkFFT_single_3d_zeropadding(VkGPU* vkGPU, uint64_
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif
 			//Allocate buffer for the input data.
 			uint64_t bufferSize = (uint64_t)sizeof(float) * 2 * configuration.size[0] * configuration.size[1] * configuration.size[2];;
@@ -148,6 +163,10 @@ VkFFTResult sample_4_benchmark_VkFFT_single_3d_zeropadding(VkGPU* vkGPU, uint64_
 			res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize, sizeof(float), vkGPU->device, &buffer);
 			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 			configuration.buffer = &buffer;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
+            buffer = vkGPU->device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+            configuration.buffer = &buffer;
 #endif
 
 			configuration.bufferSize = &bufferSize;
@@ -163,43 +182,43 @@ VkFFTResult sample_4_benchmark_VkFFT_single_3d_zeropadding(VkGPU* vkGPU, uint64_
 				}
 			}
 			*/
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
-#if(VKFFT_BACKEND==0)
-			resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
-			if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-			res = cudaMemcpy(buffer, buffer_input, bufferSize, cudaMemcpyHostToDevice);
-			if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-			res = hipMemcpy(buffer, buffer_input, bufferSize, hipMemcpyHostToDevice);
-			if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-			res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize, buffer_input, 0, NULL, NULL);
-			if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-			ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				vkGPU->commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-			};
-			ze_command_list_handle_t copyCommandList;
-			res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-			res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, buffer_input, bufferSize, 0, 0, 0);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-			res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-			if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+            resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
+            if (resFFT != VKFFT_SUCCESS) return resFFT;
 			//free(buffer_input);
 
+			if (configuration.loadApplicationFromString) {
+				FILE* kernelCache;
+				uint64_t str_len;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "rb");
+				if (!kernelCache) return VKFFT_ERROR_EMPTY_FILE;
+				fseek(kernelCache, 0, SEEK_END);
+				str_len = ftell(kernelCache);
+				fseek(kernelCache, 0, SEEK_SET);
+				configuration.loadApplicationString = malloc(str_len);
+				fread(configuration.loadApplicationString, str_len, 1, kernelCache);
+				fclose(kernelCache);
+			}
 			//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.  
 			resFFT = initializeVkFFT(&app, configuration);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
 
+			if (configuration.loadApplicationFromString)
+				free(configuration.loadApplicationString);
+
+			if (configuration.saveApplicationToString) {
+				FILE* kernelCache;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "wb");
+				fwrite(app.saveApplicationString, app.applicationStringSize, 1, kernelCache);
+				fclose(kernelCache);
+			}
+
 			//Submit FFT+iFFT.
 			uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize;
 #if(VKFFT_BACKEND==0)
@@ -256,6 +275,8 @@ VkFFTResult sample_4_benchmark_VkFFT_single_3d_zeropadding(VkGPU* vkGPU, uint64_
 			clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 			zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+            buffer->release();
 #endif
 			deleteVkFFT(&app);
 
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_50_convolution_VkFFT_single_1d_matrix.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_50_convolution_VkFFT_single_1d_matrix.cpp
index 8aea3c3..b53b743 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_50_convolution_VkFFT_single_1d_matrix.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_50_convolution_VkFFT_single_1d_matrix.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -56,6 +60,7 @@ VkFFTResult sample_50_convolution_VkFFT_single_1d_matrix(VkGPU* vkGPU, uint64_t
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "50 - VkFFT convolution example with identitiy kernel\n");
@@ -80,7 +85,11 @@ VkFFTResult sample_50_convolution_VkFFT_single_1d_matrix(VkGPU* vkGPU, uint64_t
 	configuration.normalize = 1;//normalize iFFT
 	
 	//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
+#if(VKFFT_BACKEND==5)
+	configuration.device = vkGPU->device;
+#else
 	configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 	configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 	configuration.fence = &vkGPU->fence;
@@ -93,6 +102,8 @@ VkFFTResult sample_50_convolution_VkFFT_single_1d_matrix(VkGPU* vkGPU, uint64_t
 	configuration.context = &vkGPU->context;
 	configuration.commandQueue = &vkGPU->commandQueue;
 	configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+	configuration.queue = vkGPU->queue;
 #endif
 	//In this example, we perform a convolution for a real vectorfield (3vector) with a symmetric kernel (6 values). We use configuration to initialize convolution kernel first from real data, then we create convolution_configuration for convolution. The buffer object from configuration is passed to convolution_configuration as kernel object.
 	//1. Kernel forward FFT.
@@ -126,6 +137,10 @@ VkFFTResult sample_50_convolution_VkFFT_single_1d_matrix(VkGPU* vkGPU, uint64_t
 	res = zeMemAllocDevice(vkGPU->context, &device_desc, kernelSize, sizeof(float), vkGPU->device, &kernel);
 	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 	configuration.buffer = &kernel;
+#elif(VKFFT_BACKEND==5)
+	MTL::Buffer* kernel = 0;
+	kernel = vkGPU->device->newBuffer(kernelSize, MTL::ResourceStorageModePrivate);
+	configuration.buffer = &kernel;
 #endif
 
 	configuration.bufferSize = &kernelSize;
@@ -157,38 +172,9 @@ VkFFTResult sample_50_convolution_VkFFT_single_1d_matrix(VkGPU* vkGPU, uint64_t
 			}
 		}
 	}
-	//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
-#if(VKFFT_BACKEND==0)
+	//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
 	resFFT = transferDataFromCPU(vkGPU, kernel_input, &kernel, kernelSize);
 	if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-	res = cudaMemcpy(kernel, kernel_input, kernelSize, cudaMemcpyHostToDevice);
-	if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-	res = hipMemcpy(kernel, kernel_input, kernelSize, hipMemcpyHostToDevice);
-	if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-	res = clEnqueueWriteBuffer(vkGPU->commandQueue, kernel, CL_TRUE, 0, kernelSize, kernel_input, 0, NULL, NULL);
-	if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-	ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				vkGPU->commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-	};
-	ze_command_list_handle_t copyCommandList;
-	res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-	res = zeCommandListAppendMemoryCopy(copyCommandList, kernel, kernel_input, kernelSize, 0, 0, 0);
-	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-	res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-	if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
-
 	//Initialize application responsible for the kernel. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.  
 	resFFT = initializeVkFFT(&app_kernel, configuration);
 	if (resFFT != VKFFT_SUCCESS) return resFFT;
@@ -219,6 +205,8 @@ VkFFTResult sample_50_convolution_VkFFT_single_1d_matrix(VkGPU* vkGPU, uint64_t
 	convolution_configuration.kernel = &kernel;
 #elif(VKFFT_BACKEND==4)
 	convolution_configuration.kernel = (void**)&kernel;
+#elif(VKFFT_BACKEND==5)
+	convolution_configuration.kernel = &kernel;
 #endif	
 
 	//Allocate separate buffer for the input data.
@@ -249,6 +237,10 @@ VkFFTResult sample_50_convolution_VkFFT_single_1d_matrix(VkGPU* vkGPU, uint64_t
 	res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize, sizeof(float), vkGPU->device, &buffer);
 	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 	configuration.buffer = &buffer;
+#elif(VKFFT_BACKEND==5)
+	MTL::Buffer* buffer = 0;
+	buffer = vkGPU->device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+	configuration.buffer = &buffer;
 #endif
 
 	convolution_configuration.bufferSize = &bufferSize;
@@ -271,29 +263,9 @@ VkFFTResult sample_50_convolution_VkFFT_single_1d_matrix(VkGPU* vkGPU, uint64_t
 		}
 	}
 	//Transfer data to GPU using staging buffer.
-#if(VKFFT_BACKEND==0)
 	resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
 	if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-	res = cudaMemcpy(buffer, buffer_input, bufferSize, cudaMemcpyHostToDevice);
-	if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-	res = hipMemcpy(buffer, buffer_input, bufferSize, hipMemcpyHostToDevice);
-	if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-	res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize, buffer_input, 0, NULL, NULL);
-	if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-	res = zeCommandListReset(copyCommandList);
-	if (res != ZE_RESULT_SUCCESS)return VKFFT_ERROR_FAILED_TO_DESTROY_COMMAND_LIST;
-	res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-	res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, buffer_input, bufferSize, 0, 0, 0);
-	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-	res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-	if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
-
+	
 	//Initialize application responsible for the convolution.
 	resFFT = initializeVkFFT(&app_convolution, convolution_configuration);
 	if (resFFT != VKFFT_SUCCESS) return resFFT;
@@ -306,28 +278,8 @@ VkFFTResult sample_50_convolution_VkFFT_single_1d_matrix(VkGPU* vkGPU, uint64_t
 	float* buffer_output = (float*)malloc(bufferSize);
 	if (!buffer_output) return VKFFT_ERROR_MALLOC_FAILED;
 	//Transfer data from GPU using staging buffer.
-#if(VKFFT_BACKEND==0)
 	resFFT = transferDataToCPU(vkGPU, buffer_output, &buffer, bufferSize);
 	if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-	res = cudaMemcpy(buffer_output, buffer, bufferSize, cudaMemcpyDeviceToHost);
-	if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-	res = hipMemcpy(buffer_output, buffer, bufferSize, hipMemcpyDeviceToHost);
-	if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-	res = clEnqueueReadBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize, buffer_output, 0, NULL, NULL);
-	if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-	res = zeCommandListReset(copyCommandList);
-	if (res != ZE_RESULT_SUCCESS)return VKFFT_ERROR_FAILED_TO_DESTROY_COMMAND_LIST;
-	res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-	res = zeCommandListAppendMemoryCopy(copyCommandList, buffer_output, buffer, bufferSize, 0, 0, 0);
-	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-	res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-	if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
 	//Print data, if needed.
 	for (uint64_t v = 0; v < convolution_configuration.coordinateFeatures; v++) {
 		if (file_output)
@@ -364,6 +316,9 @@ VkFFTResult sample_50_convolution_VkFFT_single_1d_matrix(VkGPU* vkGPU, uint64_t
 #elif(VKFFT_BACKEND==4)
 	zeMemFree(vkGPU->context, buffer);
 	zeMemFree(vkGPU->context, kernel);
+#elif(VKFFT_BACKEND==5)
+	buffer->release();
+	kernel->release();
 #endif	
 	deleteVkFFT(&app_kernel);
 	deleteVkFFT(&app_convolution);
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c.cpp
index 8ce3ce1..7eae7dd 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -56,6 +60,7 @@ VkFFTResult sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c(VkGPU*
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "51 - VkFFT zeropadding convolution example with identitiy kernel\n");
@@ -89,7 +94,11 @@ VkFFTResult sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c(VkGPU*
 	//coordinateFeatures number is an important constant for convolution. If we perform 1x1 convolution, it is equal to number of features, but matrixConvolution should be equal to 1. For matrix convolution, it must be equal to matrixConvolution parameter. If we perform 2x2 convolution, it is equal to 3 for symmetric kernel (stored as xx, xy, yy) and 4 for nonsymmetric (stored as xx, xy, yx, yy). Similarly, 6 (stored as xx, xy, xz, yy, yz, zz) and 9 (stored as xx, xy, xz, yx, yy, yz, zx, zy, zz) for 3x3 convolutions. 
 	
 	//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
+#if(VKFFT_BACKEND==5)
+	configuration.device = vkGPU->device;
+#else
 	configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 	configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 	configuration.fence = &vkGPU->fence;
@@ -102,6 +111,8 @@ VkFFTResult sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c(VkGPU*
 	configuration.context = &vkGPU->context;
 	configuration.commandQueue = &vkGPU->commandQueue;
 	configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+    configuration.queue = vkGPU->queue;
 #endif
 	//In this example, we perform a convolution for a real vectorfield (3vector) with a symmetric kernel (6 values). We use configuration to initialize convolution kernel first from real data, then we create convolution_configuration for convolution. The buffer object from configuration is passed to convolution_configuration as kernel object.
 	//1. Kernel forward FFT.
@@ -134,6 +145,10 @@ VkFFTResult sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c(VkGPU*
 	res = zeMemAllocDevice(vkGPU->context, &device_desc, kernelSize, sizeof(float), vkGPU->device, &kernel);
 	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 	configuration.buffer = &kernel;
+#elif(VKFFT_BACKEND==5)
+	MTL::Buffer* kernel = 0;
+	kernel = vkGPU->device->newBuffer(kernelSize, MTL::ResourceStorageModePrivate);
+	configuration.buffer = &kernel;
 #endif
 
 	configuration.bufferSize = &kernelSize;
@@ -166,37 +181,9 @@ VkFFTResult sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c(VkGPU*
 			}
 		}
 	}
-	//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
-#if(VKFFT_BACKEND==0)
+	//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
 	resFFT = transferDataFromCPU(vkGPU, kernel_input, &kernel, kernelSize);
 	if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-	res = cudaMemcpy(kernel, kernel_input, kernelSize, cudaMemcpyHostToDevice);
-	if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-	res = hipMemcpy(kernel, kernel_input, kernelSize, hipMemcpyHostToDevice);
-	if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-	res = clEnqueueWriteBuffer(vkGPU->commandQueue, kernel, CL_TRUE, 0, kernelSize, kernel_input, 0, NULL, NULL);
-	if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-	ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				vkGPU->commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-	};
-	ze_command_list_handle_t copyCommandList;
-	res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-	res = zeCommandListAppendMemoryCopy(copyCommandList, kernel, kernel_input, kernelSize, 0, 0, 0);
-	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-	res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-	if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
 	//Initialize application responsible for the kernel. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.  
 	resFFT = initializeVkFFT(&app_kernel, configuration);
 	if (resFFT != VKFFT_SUCCESS) return resFFT;
@@ -231,6 +218,8 @@ VkFFTResult sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c(VkGPU*
 	convolution_configuration.kernel = &kernel;
 #elif(VKFFT_BACKEND==4)
 	convolution_configuration.kernel = (void**)&kernel;
+#elif(VKFFT_BACKEND==5)
+	convolution_configuration.kernel = &kernel;
 #endif	
 
 	//Allocate separate buffer for the input data.
@@ -262,6 +251,10 @@ VkFFTResult sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c(VkGPU*
 	res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize, sizeof(float), vkGPU->device, &buffer);
 	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 	configuration.buffer = &buffer;
+#elif(VKFFT_BACKEND==5)
+	MTL::Buffer* buffer = 0;
+	buffer = vkGPU->device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+	configuration.buffer = &buffer;
 #endif
 
 	convolution_configuration.bufferSize = &bufferSize;
@@ -283,29 +276,9 @@ VkFFTResult sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c(VkGPU*
 		}
 	}
 	//Transfer data to GPU using staging buffer.
-#if(VKFFT_BACKEND==0)
 	resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
-	if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-	res = cudaMemcpy(buffer, buffer_input, bufferSize, cudaMemcpyHostToDevice);
-	if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-	res = hipMemcpy(buffer, buffer_input, bufferSize, hipMemcpyHostToDevice);
-	if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-	res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize, buffer_input, 0, NULL, NULL);
-	if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-	res = zeCommandListReset(copyCommandList);
-	if (res != ZE_RESULT_SUCCESS)return VKFFT_ERROR_FAILED_TO_DESTROY_COMMAND_LIST;
-	res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-	res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, buffer_input, bufferSize, 0, 0, 0);
-	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-	res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-	if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
-	//Initialize application responsible for the convolution.
+	if (resFFT != VKFFT_SUCCESS) return resFFT;	//Initialize application responsible for the convolution.
+	
 	resFFT = initializeVkFFT(&app_convolution, convolution_configuration);
 	if (resFFT != VKFFT_SUCCESS) return resFFT;
 	//Sample forward FFT command buffer allocation + execution performed on kernel. FFT can also be appended to user defined command buffers.
@@ -317,28 +290,8 @@ VkFFTResult sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c(VkGPU*
 	float* buffer_output = (float*)malloc(bufferSize);
 	if (!buffer_output) return VKFFT_ERROR_MALLOC_FAILED;
 	//Transfer data from GPU using staging buffer.
-#if(VKFFT_BACKEND==0)
 	resFFT = transferDataToCPU(vkGPU, buffer_output, &buffer, bufferSize);
 	if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-	res = cudaMemcpy(buffer_output, buffer, bufferSize, cudaMemcpyDeviceToHost);
-	if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-	res = hipMemcpy(buffer_output, buffer, bufferSize, hipMemcpyDeviceToHost);
-	if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-	res = clEnqueueReadBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize, buffer_output, 0, NULL, NULL);
-	if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-	res = zeCommandListReset(copyCommandList);
-	if (res != ZE_RESULT_SUCCESS)return VKFFT_ERROR_FAILED_TO_DESTROY_COMMAND_LIST;
-	res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-	res = zeCommandListAppendMemoryCopy(copyCommandList, buffer_output, buffer, bufferSize, 0, 0, 0);
-	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-	res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-	if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
 
 	//Print data, if needed.
 	for (uint64_t v = 0; v < convolution_configuration.coordinateFeatures; v++) {
@@ -376,6 +329,9 @@ VkFFTResult sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c(VkGPU*
 #elif(VKFFT_BACKEND==4)
 	zeMemFree(vkGPU->context, buffer);
 	zeMemFree(vkGPU->context, kernel);
+#elif(VKFFT_BACKEND==5)
+	buffer->release();
+	kernel->release();
 #endif	
 	deleteVkFFT(&app_kernel);
 	deleteVkFFT(&app_convolution);
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_52_convolution_VkFFT_single_2d_batched_r2c.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_52_convolution_VkFFT_single_2d_batched_r2c.cpp
index fa168ef..a6a2165 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_52_convolution_VkFFT_single_2d_batched_r2c.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_52_convolution_VkFFT_single_2d_batched_r2c.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -56,6 +60,7 @@ VkFFTResult sample_52_convolution_VkFFT_single_2d_batched_r2c(VkGPU* vkGPU, uint
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "52 - VkFFT batched convolution example with identitiy kernel\n");
@@ -81,7 +86,11 @@ VkFFTResult sample_52_convolution_VkFFT_single_2d_batched_r2c(VkGPU* vkGPU, uint
 	
 	configuration.numberBatches = 2;
 	//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-	configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+    configuration.device = vkGPU->device;
+#else
+    configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 	configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 	configuration.fence = &vkGPU->fence;
@@ -94,6 +103,8 @@ VkFFTResult sample_52_convolution_VkFFT_single_2d_batched_r2c(VkGPU* vkGPU, uint
 	configuration.context = &vkGPU->context;
 	configuration.commandQueue = &vkGPU->commandQueue;
 	configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+    configuration.queue = vkGPU->queue;
 #endif
 	//In this example, we perform a convolution for a real vectorfield (3vector) with a symmetric kernel (6 values). We use configuration to initialize convolution kernel first from real data, then we create convolution_configuration for convolution. The buffer object from configuration is passed to convolution_configuration as kernel object.
 	//1. Kernel forward FFT.
@@ -127,6 +138,10 @@ VkFFTResult sample_52_convolution_VkFFT_single_2d_batched_r2c(VkGPU* vkGPU, uint
 	res = zeMemAllocDevice(vkGPU->context, &device_desc, kernelSize, sizeof(float), vkGPU->device, &kernel);
 	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 	configuration.buffer = &kernel;
+#elif(VKFFT_BACKEND==5)
+	MTL::Buffer* kernel = 0;
+	kernel = vkGPU->device->newBuffer(kernelSize, MTL::ResourceStorageModePrivate);
+	configuration.buffer = &kernel;
 #endif
 
 	configuration.bufferSize = &kernelSize;
@@ -154,37 +169,9 @@ VkFFTResult sample_52_convolution_VkFFT_single_2d_batched_r2c(VkGPU* vkGPU, uint
 			}
 		}
 	}
-	//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
-#if(VKFFT_BACKEND==0)
+	//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
 	resFFT = transferDataFromCPU(vkGPU, kernel_input, &kernel, kernelSize);
 	if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-	res = cudaMemcpy(kernel, kernel_input, kernelSize, cudaMemcpyHostToDevice);
-	if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-	res = hipMemcpy(kernel, kernel_input, kernelSize, hipMemcpyHostToDevice);
-	if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-	res = clEnqueueWriteBuffer(vkGPU->commandQueue, kernel, CL_TRUE, 0, kernelSize, kernel_input, 0, NULL, NULL);
-	if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-	ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				vkGPU->commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-	};
-	ze_command_list_handle_t copyCommandList;
-	res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-	res = zeCommandListAppendMemoryCopy(copyCommandList, kernel, kernel_input, kernelSize, 0, 0, 0);
-	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-	res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-	if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
 	//Initialize application responsible for the kernel. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.  
 	resFFT = initializeVkFFT(&app_kernel, configuration);
 	if (resFFT != VKFFT_SUCCESS) return resFFT;
@@ -213,6 +200,8 @@ VkFFTResult sample_52_convolution_VkFFT_single_2d_batched_r2c(VkGPU* vkGPU, uint
 	convolution_configuration.kernel = &kernel;
 #elif(VKFFT_BACKEND==4)
 	convolution_configuration.kernel = (void**)&kernel;
+#elif(VKFFT_BACKEND==5)
+	convolution_configuration.kernel = &kernel;
 #endif	
 
 	convolution_configuration.kernelSize = &kernelSize;
@@ -269,7 +258,14 @@ VkFFTResult sample_52_convolution_VkFFT_single_2d_batched_r2c(VkGPU* vkGPU, uint
 	res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize, sizeof(float), vkGPU->device, &buffer);
 	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 	convolution_configuration.inputBuffer = &inputBuffer;
-	configuration.buffer = &buffer;
+    convolution_configuration.buffer = &buffer;
+#elif(VKFFT_BACKEND==5)
+	MTL::Buffer* inputBuffer = 0;
+	MTL::Buffer* buffer = 0;
+	inputBuffer = vkGPU->device->newBuffer(inputBufferSize, MTL::ResourceStorageModePrivate);
+	buffer = vkGPU->device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+	convolution_configuration.inputBuffer = &inputBuffer;
+    convolution_configuration.buffer = &buffer;
 #endif
 
 	convolution_configuration.inputBufferSize = &inputBufferSize;
@@ -292,28 +288,8 @@ VkFFTResult sample_52_convolution_VkFFT_single_2d_batched_r2c(VkGPU* vkGPU, uint
 		}
 	}
 	//Transfer data to GPU using staging buffer.
-#if(VKFFT_BACKEND==0)
 	resFFT = transferDataFromCPU(vkGPU, buffer_input, &inputBuffer, inputBufferSize);
 	if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-	res = cudaMemcpy(inputBuffer, buffer_input, inputBufferSize, cudaMemcpyHostToDevice);
-	if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-	res = hipMemcpy(inputBuffer, buffer_input, inputBufferSize, hipMemcpyHostToDevice);
-	if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-	res = clEnqueueWriteBuffer(vkGPU->commandQueue, inputBuffer, CL_TRUE, 0, inputBufferSize, buffer_input, 0, NULL, NULL);
-	if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-	res = zeCommandListReset(copyCommandList);
-	if (res != ZE_RESULT_SUCCESS)return VKFFT_ERROR_FAILED_TO_DESTROY_COMMAND_LIST;
-	res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-	res = zeCommandListAppendMemoryCopy(copyCommandList, inputBuffer, buffer_input, inputBufferSize, 0, 0, 0);
-	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-	res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-	if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
 
 	//Initialize application responsible for the convolution.
 	resFFT = initializeVkFFT(&app_convolution, convolution_configuration);
@@ -327,29 +303,9 @@ VkFFTResult sample_52_convolution_VkFFT_single_2d_batched_r2c(VkGPU* vkGPU, uint
 	float* buffer_output = (float*)malloc(bufferSize);
 	if (!buffer_output) return VKFFT_ERROR_MALLOC_FAILED;
 	//Transfer data from GPU using staging buffer.
-#if(VKFFT_BACKEND==0)
 	resFFT = transferDataToCPU(vkGPU, buffer_output, &buffer, bufferSize);
 	if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-	res = cudaMemcpy(buffer_output, buffer, bufferSize, cudaMemcpyDeviceToHost);
-	if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-	res = hipMemcpy(buffer_output, buffer, bufferSize, hipMemcpyDeviceToHost);
-	if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-	res = clEnqueueReadBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize, buffer_output, 0, NULL, NULL);
-	if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-	res = zeCommandListReset(copyCommandList);
-	if (res != ZE_RESULT_SUCCESS)return VKFFT_ERROR_FAILED_TO_DESTROY_COMMAND_LIST;
-	res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-	res = zeCommandListAppendMemoryCopy(copyCommandList, buffer_output, buffer, bufferSize, 0, 0, 0);
-	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-	res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-	if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
-
+	
 	//Print data, if needed.
 	for (uint64_t f = 0; f < convolution_configuration.numberKernels; f++) {
 		if (file_output)
@@ -398,6 +354,10 @@ VkFFTResult sample_52_convolution_VkFFT_single_2d_batched_r2c(VkGPU* vkGPU, uint
 	zeMemFree(vkGPU->context, inputBuffer);
 	zeMemFree(vkGPU->context, buffer);
 	zeMemFree(vkGPU->context, kernel);
+#elif(VKFFT_BACKEND==5)
+	inputBuffer->release();
+	buffer->release();
+	kernel->release();
 #endif	
 	deleteVkFFT(&app_kernel);
 	deleteVkFFT(&app_convolution);
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_5_benchmark_VkFFT_single_disableReorderFourStep.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_5_benchmark_VkFFT_single_disableReorderFourStep.cpp
index a649168..da81965 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_5_benchmark_VkFFT_single_disableReorderFourStep.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_5_benchmark_VkFFT_single_disableReorderFourStep.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -56,6 +60,7 @@ VkFFTResult sample_5_benchmark_VkFFT_single_disableReorderFourStep(VkGPU* vkGPU,
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "5 - VkFFT FFT + iFFT C2C benchmark 1D batched in single precision, no reshuffling\n");
@@ -85,8 +90,16 @@ VkFFTResult sample_5_benchmark_VkFFT_single_disableReorderFourStep(VkGPU* vkGPU,
 			
 			configuration.disableReorderFourStep = true;
 
+#if(VKFFT_BACKEND!=5)
+			if (r==0) configuration.saveApplicationToString = 1;
+			if (r!=0) configuration.loadApplicationFromString = 1;
+#endif
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -99,6 +112,8 @@ VkFFTResult sample_5_benchmark_VkFFT_single_disableReorderFourStep(VkGPU* vkGPU,
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif
 			//Allocate buffer for the input data.
 			uint64_t bufferSize = (uint64_t)sizeof(float) * 2 * configuration.size[0] * configuration.numberBatches;
@@ -130,6 +145,10 @@ VkFFTResult sample_5_benchmark_VkFFT_single_disableReorderFourStep(VkGPU* vkGPU,
 			res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize, sizeof(float), vkGPU->device, &buffer);
 			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 			configuration.buffer = &buffer;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
+            buffer = vkGPU->device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+            configuration.buffer = &buffer;
 #endif
 
 			configuration.bufferSize = &bufferSize;
@@ -147,43 +166,43 @@ VkFFTResult sample_5_benchmark_VkFFT_single_disableReorderFourStep(VkGPU* vkGPU,
 				}
 			}
 			*/
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
-#if(VKFFT_BACKEND==0)
-			resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
-			if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-			res = cudaMemcpy(buffer, buffer_input, bufferSize, cudaMemcpyHostToDevice);
-			if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-			res = hipMemcpy(buffer, buffer_input, bufferSize, hipMemcpyHostToDevice);
-			if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-			res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize, buffer_input, 0, NULL, NULL);
-			if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-			ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				vkGPU->commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-			};
-			ze_command_list_handle_t copyCommandList;
-			res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-			res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, buffer_input, bufferSize, 0, 0, 0);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-			res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-			if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+            resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
+            if (resFFT != VKFFT_SUCCESS) return resFFT;
 			//free(buffer_input);
 
+			if (configuration.loadApplicationFromString) {
+				FILE* kernelCache;
+				uint64_t str_len;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "rb");
+				if (!kernelCache) return VKFFT_ERROR_EMPTY_FILE;
+				fseek(kernelCache, 0, SEEK_END);
+				str_len = ftell(kernelCache);
+				fseek(kernelCache, 0, SEEK_SET);
+				configuration.loadApplicationString = malloc(str_len);
+				fread(configuration.loadApplicationString, str_len, 1, kernelCache);
+				fclose(kernelCache);
+			}
 			//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.  
 			resFFT = initializeVkFFT(&app, configuration);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
 
+			if (configuration.loadApplicationFromString)
+				free(configuration.loadApplicationString);
+
+			if (configuration.saveApplicationToString) {
+				FILE* kernelCache;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "wb");
+				fwrite(app.saveApplicationString, app.applicationStringSize, 1, kernelCache);
+				fclose(kernelCache);
+			}
+
 			//Submit FFT+iFFT.
 			uint64_t num_iter = (((uint64_t)3 * 4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)3 * 4096 * 1024.0 * 1024.0) / bufferSize;
 #if(VKFFT_BACKEND==0)
@@ -241,6 +260,8 @@ VkFFTResult sample_5_benchmark_VkFFT_single_disableReorderFourStep(VkGPU* vkGPU,
 			clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 			zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+            buffer->release();
 #endif
 			deleteVkFFT(&app);
 
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_6_benchmark_VkFFT_single_r2c.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_6_benchmark_VkFFT_single_r2c.cpp
index 12b810e..4afbeaf 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_6_benchmark_VkFFT_single_r2c.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_6_benchmark_VkFFT_single_r2c.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -56,6 +60,7 @@ VkFFTResult sample_6_benchmark_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_outp
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "6 - VkFFT FFT + iFFT R2C/C2R benchmark\n");
@@ -87,8 +92,16 @@ VkFFTResult sample_6_benchmark_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_outp
 
 			configuration.performR2C = true; //Perform R2C/C2R transform. Can be combined with all other options. Reduces memory requirements by a factor of 2. Requires special input data alignment: for x*y*z system pad x*y plane to (x+2)*y with last 2*y elements reserved, total array dimensions are (x*y+2y)*z. Memory layout after R2C and before C2R can be found on github.
 			//configuration.disableMergeSequencesR2C = 1;
+#if(VKFFT_BACKEND!=5)
+			if (r==0) configuration.saveApplicationToString = 1;
+			if (r!=0) configuration.loadApplicationFromString = 1;
+#endif
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -101,6 +114,8 @@ VkFFTResult sample_6_benchmark_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_outp
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif
 
 
@@ -134,6 +149,10 @@ VkFFTResult sample_6_benchmark_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_outp
 			res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize, sizeof(float), vkGPU->device, &buffer);
 			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 			configuration.buffer = &buffer;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
+            buffer = vkGPU->device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+            configuration.buffer = &buffer;
 #endif
 
 			configuration.bufferSize = &bufferSize;
@@ -148,41 +167,41 @@ VkFFTResult sample_6_benchmark_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_outp
 					}
 				}
 			*/
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
-#if(VKFFT_BACKEND==0)
-			resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
-			if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-			res = cudaMemcpy(buffer, buffer_input, bufferSize, cudaMemcpyHostToDevice);
-			if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-			res = hipMemcpy(buffer, buffer_input, bufferSize, hipMemcpyHostToDevice);
-			if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-			res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize, buffer_input, 0, NULL, NULL);
-			if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-			ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				vkGPU->commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-			};
-			ze_command_list_handle_t copyCommandList;
-			res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-			res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, buffer_input, bufferSize, 0, 0, 0);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-			res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-			if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+            resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
+            if (resFFT != VKFFT_SUCCESS) return resFFT;
+			if (configuration.loadApplicationFromString) {
+				FILE* kernelCache;
+				uint64_t str_len;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "rb");
+				if (!kernelCache) return VKFFT_ERROR_EMPTY_FILE;
+				fseek(kernelCache, 0, SEEK_END);
+				str_len = ftell(kernelCache);
+				fseek(kernelCache, 0, SEEK_SET);
+				configuration.loadApplicationString = malloc(str_len);
+				fread(configuration.loadApplicationString, str_len, 1, kernelCache);
+				fclose(kernelCache);
+			}
 			//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.  
 			resFFT = initializeVkFFT(&app, configuration);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
 
+			if (configuration.loadApplicationFromString)
+				free(configuration.loadApplicationString);
+
+			if (configuration.saveApplicationToString) {
+				FILE* kernelCache;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "wb");
+				fwrite(app.saveApplicationString, app.applicationStringSize, 1, kernelCache);
+				fclose(kernelCache);
+			}
+
 			//Submit FFT+iFFT.
 			uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize;
 #if(VKFFT_BACKEND==0)
@@ -256,6 +275,8 @@ VkFFTResult sample_6_benchmark_VkFFT_single_r2c(VkGPU* vkGPU, uint64_t file_outp
 			clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 			zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+            buffer->release();
 #endif
 			deleteVkFFT(&app);
 
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_7_benchmark_VkFFT_single_Bluestein.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_7_benchmark_VkFFT_single_Bluestein.cpp
index fe2c3df..bb89465 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_7_benchmark_VkFFT_single_Bluestein.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_7_benchmark_VkFFT_single_Bluestein.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -56,6 +60,7 @@ VkFFTResult sample_7_benchmark_VkFFT_single_Bluestein(VkGPU* vkGPU, uint64_t fil
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "7 - VkFFT FFT + iFFT C2C Bluestein benchmark in single precision\n");
@@ -87,8 +92,16 @@ VkFFTResult sample_7_benchmark_VkFFT_single_Bluestein(VkGPU* vkGPU, uint64_t fil
 			configuration.size[0] = benchmark_dimensions[n][0]; //Multidimensional FFT dimensions sizes (default 1). For best performance (and stability), order dimensions in descendant size order as: x>y>z.   
 			configuration.size[1] = benchmark_dimensions[n][1];
 			configuration.size[2] = benchmark_dimensions[n][2];
+#if(VKFFT_BACKEND!=5)
+			if (r==0) configuration.saveApplicationToString = 1;
+			if (r!=0) configuration.loadApplicationFromString = 1;
+#endif
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -101,6 +114,8 @@ VkFFTResult sample_7_benchmark_VkFFT_single_Bluestein(VkGPU* vkGPU, uint64_t fil
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif
 			//Allocate buffer for the input data.
 			uint64_t bufferSize = (uint64_t)sizeof(float) * 2 * configuration.size[0] * configuration.size[1] * configuration.size[2];
@@ -132,6 +147,10 @@ VkFFTResult sample_7_benchmark_VkFFT_single_Bluestein(VkGPU* vkGPU, uint64_t fil
 			res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize, sizeof(float), vkGPU->device, &buffer);
 			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 			configuration.buffer = &buffer;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
+            buffer = vkGPU->device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+            configuration.buffer = &buffer;
 #endif
 
 			configuration.bufferSize = &bufferSize;
@@ -147,43 +166,43 @@ VkFFTResult sample_7_benchmark_VkFFT_single_Bluestein(VkGPU* vkGPU, uint64_t fil
 				}
 			}
 			*/
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
-#if(VKFFT_BACKEND==0)
-			resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
-			if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-			res = cudaMemcpy(buffer, buffer_input, bufferSize, cudaMemcpyHostToDevice);
-			if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-			res = hipMemcpy(buffer, buffer_input, bufferSize, hipMemcpyHostToDevice);
-			if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-			res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize, buffer_input, 0, NULL, NULL);
-			if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-			ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				vkGPU->commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-			};
-			ze_command_list_handle_t copyCommandList;
-			res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-			res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, buffer_input, bufferSize, 0, 0, 0);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-			res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-			if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+            resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
+            if (resFFT != VKFFT_SUCCESS) return resFFT;
 			//free(buffer_input);
 
+			if (configuration.loadApplicationFromString) {
+				FILE* kernelCache;
+				uint64_t str_len;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "rb");
+				if (!kernelCache) return VKFFT_ERROR_EMPTY_FILE;
+				fseek(kernelCache, 0, SEEK_END);
+				str_len = ftell(kernelCache);
+				fseek(kernelCache, 0, SEEK_SET);
+				configuration.loadApplicationString = malloc(str_len);
+				fread(configuration.loadApplicationString, str_len, 1, kernelCache);
+				fclose(kernelCache);
+			}
 			//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.  
 			resFFT = initializeVkFFT(&app, configuration);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
 
+			if (configuration.loadApplicationFromString)
+				free(configuration.loadApplicationString);
+
+			if (configuration.saveApplicationToString) {
+				FILE* kernelCache;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "wb");
+				fwrite(app.saveApplicationString, app.applicationStringSize, 1, kernelCache);
+				fclose(kernelCache);
+			}
+
 			//Submit FFT+iFFT.
 			uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize;
 #if(VKFFT_BACKEND==0)
@@ -241,6 +260,8 @@ VkFFTResult sample_7_benchmark_VkFFT_single_Bluestein(VkGPU* vkGPU, uint64_t fil
 			clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 			zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+            buffer->release();
 #endif
 			deleteVkFFT(&app);
 
diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_8_benchmark_VkFFT_double_Bluestein.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_8_benchmark_VkFFT_double_Bluestein.cpp
index 0bebc6f..bd038a1 100644
--- a/benchmark_scripts/vkFFT_scripts/src/sample_8_benchmark_VkFFT_double_Bluestein.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/sample_8_benchmark_VkFFT_double_Bluestein.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -56,6 +60,7 @@ VkFFTResult sample_8_benchmark_VkFFT_double_Bluestein(VkGPU* vkGPU, uint64_t fil
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	if (file_output)
 		fprintf(output, "8 - VkFFT FFT + iFFT C2C Bluestein benchmark in double precision\n");
@@ -88,8 +93,16 @@ VkFFTResult sample_8_benchmark_VkFFT_double_Bluestein(VkGPU* vkGPU, uint64_t fil
 			configuration.size[1] = benchmark_dimensions[n][1];
 			configuration.size[2] = benchmark_dimensions[n][2];
 			configuration.doublePrecision = 1;
+#if(VKFFT_BACKEND!=5)
+			if (r==0) configuration.saveApplicationToString = 1;
+			if (r!=0) configuration.loadApplicationFromString = 1;
+#endif
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+            configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -102,6 +115,8 @@ VkFFTResult sample_8_benchmark_VkFFT_double_Bluestein(VkGPU* vkGPU, uint64_t fil
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif
 			//Allocate buffer for the input data.
 			uint64_t bufferSize = (uint64_t)sizeof(double) * 2 * configuration.size[0] * configuration.size[1] * configuration.size[2];;
@@ -133,6 +148,10 @@ VkFFTResult sample_8_benchmark_VkFFT_double_Bluestein(VkGPU* vkGPU, uint64_t fil
 			res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize, sizeof(float), vkGPU->device, &buffer);
 			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 			configuration.buffer = &buffer;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
+            buffer = vkGPU->device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+            configuration.buffer = &buffer;
 #endif
 
 			configuration.bufferSize = &bufferSize;
@@ -148,43 +167,43 @@ VkFFTResult sample_8_benchmark_VkFFT_double_Bluestein(VkGPU* vkGPU, uint64_t fil
 				}
 			}
 			*/
-			//Sample buffer transfer tool. Uses staging buffer of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
-#if(VKFFT_BACKEND==0)
-			resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
-			if (resFFT != VKFFT_SUCCESS) return resFFT;
-#elif(VKFFT_BACKEND==1)
-			res = cudaMemcpy(buffer, buffer_input, bufferSize, cudaMemcpyHostToDevice);
-			if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==2)
-			res = hipMemcpy(buffer, buffer_input, bufferSize, hipMemcpyHostToDevice);
-			if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==3)
-			res = clEnqueueWriteBuffer(vkGPU->commandQueue, buffer, CL_TRUE, 0, bufferSize, buffer_input, 0, NULL, NULL);
-			if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-#elif(VKFFT_BACKEND==4)
-			ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				vkGPU->commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-			};
-			ze_command_list_handle_t copyCommandList;
-			res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-			res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, buffer_input, bufferSize, 0, 0, 0);
-			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_COPY;
-			res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
-			if (res != 0) return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-#endif
+			//Sample buffer transfer tool. Uses staging buffer (if needed) of the same size as destination buffer, which can be reduced if transfer is done sequentially in small buffers.
+            resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
+            if (resFFT != VKFFT_SUCCESS) return resFFT;
 			//free(buffer_input);
 
+			if (configuration.loadApplicationFromString) {
+				FILE* kernelCache;
+				uint64_t str_len;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "rb");
+				if (!kernelCache) return VKFFT_ERROR_EMPTY_FILE;
+				fseek(kernelCache, 0, SEEK_END);
+				str_len = ftell(kernelCache);
+				fseek(kernelCache, 0, SEEK_SET);
+				configuration.loadApplicationString = malloc(str_len);
+				fread(configuration.loadApplicationString, str_len, 1, kernelCache);
+				fclose(kernelCache);
+			}
 			//Initialize applications. This function loads shaders, creates pipeline and configures FFT based on configuration file. No buffer allocations inside VkFFT library.  
 			resFFT = initializeVkFFT(&app, configuration);
 			if (resFFT != VKFFT_SUCCESS) return resFFT;
 
+			if (configuration.loadApplicationFromString)
+				free(configuration.loadApplicationString);
+
+			if (configuration.saveApplicationToString) {
+				FILE* kernelCache;
+				char fname[500];
+				int VkFFT_version = VkFFTGetVersion();
+				sprintf(fname, "VkFFT_binary");
+				kernelCache = fopen(fname, "wb");
+				fwrite(app.saveApplicationString, app.applicationStringSize, 1, kernelCache);
+				fclose(kernelCache);
+			}
+
 			//Submit FFT+iFFT.
 			uint64_t num_iter = (((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize > 1000) ? 1000 : (uint64_t)((uint64_t)4096 * 1024.0 * 1024.0) / bufferSize;
 #if(VKFFT_BACKEND==0)
@@ -242,6 +261,8 @@ VkFFTResult sample_8_benchmark_VkFFT_double_Bluestein(VkGPU* vkGPU, uint64_t fil
 			clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 			zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+            buffer->release();
 #endif
 			deleteVkFFT(&app);
 
diff --git a/benchmark_scripts/vkFFT_scripts/src/user_benchmark_VkFFT.cpp b/benchmark_scripts/vkFFT_scripts/src/user_benchmark_VkFFT.cpp
index 66a66c1..f6a1808 100644
--- a/benchmark_scripts/vkFFT_scripts/src/user_benchmark_VkFFT.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/user_benchmark_VkFFT.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -57,6 +61,7 @@ VkFFTResult user_benchmark_VkFFT(VkGPU* vkGPU, uint64_t file_output, FILE* outpu
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	const int num_runs = 3;
 	double benchmark_result = 0;//averaged result = sum(system_size/iteration_time)/num_benchmark_samples
@@ -76,6 +81,24 @@ VkFFTResult user_benchmark_VkFFT(VkGPU* vkGPU, uint64_t file_output, FILE* outpu
 		storageComplexSize = (2 * sizeof(float));
 		break;
 	}
+    uint64_t bufferSize = 0;
+    if (userParams->R2C) {
+        bufferSize = (uint64_t)(storageComplexSize / 2) * (userParams->X + 2) * userParams->Y * userParams->Z * userParams->B;
+    }
+    else {
+        if (userParams->DCT) {
+            bufferSize = (uint64_t)(storageComplexSize / 2) * userParams->X * userParams->Y * userParams->Z * userParams->B;
+        }
+        else {
+            bufferSize = (uint64_t)storageComplexSize * userParams->X * userParams->Y * userParams->Z * userParams->B;
+        }
+    }
+    
+    float* buffer_input = (float*)malloc(bufferSize);
+    if (!buffer_input) return VKFFT_ERROR_MALLOC_FAILED;
+    for (uint64_t i = 0; i < bufferSize/sizeof(float); i++) {
+        buffer_input[i] = (float)(2 * ((float)rand()) / RAND_MAX - 1.0);
+    }
 	for (uint64_t n = 0; n < 2; n++) {
 		double run_time[num_runs];
 		for (uint64_t r = 0; r < num_runs; r++) {
@@ -97,10 +120,16 @@ VkFFTResult user_benchmark_VkFFT(VkGPU* vkGPU, uint64_t file_output, FILE* outpu
 			configuration.performDCT = userParams->DCT;
 			if (userParams->P == 1) configuration.doublePrecision = 1;
 			if (userParams->P == 2) configuration.halfPrecision = 1;
+#if(VKFFT_BACKEND!=5)
 			if (userParams->saveApplicationToString && (n==0) && (r==0)) configuration.saveApplicationToString = 1;
 			if (userParams->loadApplicationFromString || (userParams->saveApplicationToString && ((n != 0) || (r != 0)))) configuration.loadApplicationFromString = 1;
+#endif
 			//After this, configuration file contains pointers to Vulkan objects needed to work with the GPU: VkDevice* device - created device, [uint64_t *bufferSize, VkBuffer *buffer, VkDeviceMemory* bufferDeviceMemory] - allocated GPU memory FFT is performed on. [uint64_t *kernelSize, VkBuffer *kernel, VkDeviceMemory* kernelDeviceMemory] - allocated GPU memory, where kernel for convolution is stored.
-			configuration.device = &vkGPU->device;
+#if(VKFFT_BACKEND==5)
+			configuration.device = vkGPU->device;
+#else
+            configuration.device = &vkGPU->device;
+#endif
 #if(VKFFT_BACKEND==0)
 			configuration.queue = &vkGPU->queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
 			configuration.fence = &vkGPU->fence;
@@ -113,20 +142,10 @@ VkFFTResult user_benchmark_VkFFT(VkGPU* vkGPU, uint64_t file_output, FILE* outpu
 			configuration.context = &vkGPU->context;
 			configuration.commandQueue = &vkGPU->commandQueue;
 			configuration.commandQueueID = vkGPU->commandQueueID;
+#elif(VKFFT_BACKEND==5)
+            configuration.queue = vkGPU->queue;
 #endif
 			//Allocate buffer for the input data.
-			uint64_t bufferSize = 0;
-			if (userParams->R2C) {
-				bufferSize = (uint64_t)(storageComplexSize / 2) * (configuration.size[0] + 2) * configuration.size[1] * configuration.size[2] * configuration.numberBatches;
-			}
-			else {
-				if (userParams->DCT) {
-					bufferSize = (uint64_t)(storageComplexSize / 2) * configuration.size[0] * configuration.size[1] * configuration.size[2] * configuration.numberBatches;
-				}
-				else {
-					bufferSize = (uint64_t)storageComplexSize * configuration.size[0] * configuration.size[1] * configuration.size[2] * configuration.numberBatches;
-				}
-			}
 #if(VKFFT_BACKEND==0)
 			VkBuffer buffer = {};
 			VkDeviceMemory bufferDeviceMemory = {};
@@ -155,20 +174,24 @@ VkFFTResult user_benchmark_VkFFT(VkGPU* vkGPU, uint64_t file_output, FILE* outpu
 			res = zeMemAllocDevice(vkGPU->context, &device_desc, bufferSize, sizeof(float), vkGPU->device, &buffer);
 			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 			configuration.buffer = &buffer;
+#elif(VKFFT_BACKEND==5)
+            MTL::Buffer* buffer = 0;
+            buffer = vkGPU->device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+            configuration.buffer = &buffer;
 #endif
 
 			configuration.bufferSize = &bufferSize;
+            
+            resFFT = transferDataFromCPU(vkGPU, buffer_input, &buffer, bufferSize);
+            if (resFFT != VKFFT_SUCCESS) return resFFT;
+            
 			if (configuration.loadApplicationFromString) {
 				FILE* kernelCache;
 				uint64_t str_len;
 				char fname[500];
 				int VkFFT_version = VkFFTGetVersion();
 				sprintf(fname, "VkFFT_binary_X%" PRIu64 "_Y%" PRIu64 "_Z%" PRIu64 "_P%" PRIu64 "_B%" PRIu64 "_N%" PRIu64 "_R2C%" PRIu64 "_DCT%" PRIu64 "_ver%d", userParams->X, userParams->Y, userParams->Z, userParams->P, userParams->B, userParams->N, userParams->R2C, userParams->DCT, VkFFT_version);
-#if((VKFFT_BACKEND==0) || (VKFFT_BACKEND==2) || (VKFFT_BACKEND==4))
-				kernelCache = fopen(fname, "rb"); //Vulkan and HIP backends load data as a uint32_t sequence
-#else
-				kernelCache = fopen(fname, "r");
-#endif
+				kernelCache = fopen(fname, "rb");
 				if (!kernelCache) return VKFFT_ERROR_EMPTY_FILE;
 				fseek(kernelCache, 0, SEEK_END);
 				str_len = ftell(kernelCache);
@@ -189,11 +212,7 @@ VkFFTResult user_benchmark_VkFFT(VkGPU* vkGPU, uint64_t file_output, FILE* outpu
 				char fname[500];
 				int VkFFT_version = VkFFTGetVersion();
 				sprintf(fname, "VkFFT_binary_X%" PRIu64 "_Y%" PRIu64 "_Z%" PRIu64 "_P%" PRIu64 "_B%" PRIu64 "_N%" PRIu64 "_R2C%" PRIu64 "_DCT%" PRIu64 "_ver%d", userParams->X, userParams->Y, userParams->Z, userParams->P, userParams->B, userParams->N, userParams->R2C, userParams->DCT, VkFFT_version);
-#if((VKFFT_BACKEND==0) || (VKFFT_BACKEND==2) || (VKFFT_BACKEND==4))
-				kernelCache = fopen(fname, "wb"); //Vulkan and HIP backends save data as a uint32_t sequence
-#else
-				kernelCache = fopen(fname, "w");
-#endif
+				kernelCache = fopen(fname, "wb");
 				fwrite(app.saveApplicationString, app.applicationStringSize, 1, kernelCache);
 				fclose(kernelCache);
 			}
@@ -243,11 +262,14 @@ VkFFTResult user_benchmark_VkFFT(VkGPU* vkGPU, uint64_t file_output, FILE* outpu
 			clReleaseMemObject(buffer);
 #elif(VKFFT_BACKEND==4)
 			zeMemFree(vkGPU->context, buffer);
+#elif(VKFFT_BACKEND==5)
+            buffer->release();
 #endif
 
 			deleteVkFFT(&app);
 
 		}
 	}
+    free(buffer_input);
 	return resFFT;
 }
diff --git a/benchmark_scripts/vkFFT_scripts/src/utils_VkFFT.cpp b/benchmark_scripts/vkFFT_scripts/src/utils_VkFFT.cpp
index 930e1d6..ee6763c 100644
--- a/benchmark_scripts/vkFFT_scripts/src/utils_VkFFT.cpp
+++ b/benchmark_scripts/vkFFT_scripts/src/utils_VkFFT.cpp
@@ -39,6 +39,10 @@
 #endif 
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
 #endif
 #include "vkFFT.h"
 #include "utils_VkFFT.h"
@@ -221,7 +225,7 @@ VkResult getComputeQueueFamilyIndex(VkGPU* vkGPU) {
 			return VK_ERROR_INITIALIZATION_FAILED;
 		}
 		vkGPU->queueFamilyIndex = i;
-	return VK_SUCCESS;
+		return VK_SUCCESS;
 	}
 	else
 		return VK_INCOMPLETE;
@@ -352,20 +356,18 @@ VkFFTResult allocateBuffer(VkGPU* vkGPU, VkBuffer* buffer, VkDeviceMemory* devic
 	if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_BIND_BUFFER_MEMORY;
 	return resFFT;
 }
-VkFFTResult transferDataFromCPU(VkGPU* vkGPU, void* arr, VkBuffer* buffer, uint64_t bufferSize) {
-	//a function that transfers data from the CPU to the GPU using staging buffer, because the GPU memory is not host-coherent
+#endif
+VkFFTResult transferDataToCPU(VkGPU* vkGPU, void* cpu_arr, void* output_buffer, uint64_t transferSize) {
+	//a function that transfers data from the GPU to the CPU using staging buffer, because the GPU memory is not host-coherent
 	VkFFTResult resFFT = VKFFT_SUCCESS;
+#if(VKFFT_BACKEND==0)
 	VkResult res = VK_SUCCESS;
-	uint64_t stagingBufferSize = bufferSize;
+	VkBuffer* buffer = (VkBuffer*)output_buffer;
+	uint64_t stagingBufferSize = transferSize;
 	VkBuffer stagingBuffer = { 0 };
 	VkDeviceMemory stagingBufferMemory = { 0 };
-	resFFT = allocateBuffer(vkGPU, &stagingBuffer, &stagingBufferMemory, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBufferSize);
-	if (resFFT != VKFFT_SUCCESS) return resFFT;
-	void* data;
-	res = vkMapMemory(vkGPU->device, stagingBufferMemory, 0, stagingBufferSize, 0, &data);
+	resFFT = allocateBuffer(vkGPU, &stagingBuffer, &stagingBufferMemory, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBufferSize);
 	if (resFFT != VKFFT_SUCCESS) return resFFT;
-	memcpy(data, arr, stagingBufferSize);
-	vkUnmapMemory(vkGPU->device, stagingBufferMemory);
 	VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
 	commandBufferAllocateInfo.commandPool = vkGPU->commandPool;
 	commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
@@ -381,7 +383,7 @@ VkFFTResult transferDataFromCPU(VkGPU* vkGPU, void* arr, VkBuffer* buffer, uint6
 	copyRegion.srcOffset = 0;
 	copyRegion.dstOffset = 0;
 	copyRegion.size = stagingBufferSize;
-	vkCmdCopyBuffer(commandBuffer, stagingBuffer, buffer[0], 1, &copyRegion);
+	vkCmdCopyBuffer(commandBuffer, buffer[0], stagingBuffer, 1, &copyRegion);
 	res = vkEndCommandBuffer(commandBuffer);
 	if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
 	VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
@@ -394,19 +396,96 @@ VkFFTResult transferDataFromCPU(VkGPU* vkGPU, void* arr, VkBuffer* buffer, uint6
 	res = vkResetFences(vkGPU->device, 1, &vkGPU->fence);
 	if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_RESET_FENCES;
 	vkFreeCommandBuffers(vkGPU->device, vkGPU->commandPool, 1, &commandBuffer);
+	void* data;
+	res = vkMapMemory(vkGPU->device, stagingBufferMemory, 0, stagingBufferSize, 0, &data);
+	if (resFFT != VKFFT_SUCCESS) return resFFT;
+	memcpy(cpu_arr, data, stagingBufferSize);
+	vkUnmapMemory(vkGPU->device, stagingBufferMemory);
 	vkDestroyBuffer(vkGPU->device, stagingBuffer, NULL);
 	vkFreeMemory(vkGPU->device, stagingBufferMemory, NULL);
+#elif(VKFFT_BACKEND==1)
+	cudaError_t res = cudaSuccess;
+	void* buffer = ((void**)output_buffer)[0];
+	res = cudaMemcpy(cpu_arr, buffer, transferSize, cudaMemcpyDeviceToHost);
+	if (res != cudaSuccess) {
+		return VKFFT_ERROR_FAILED_TO_COPY;
+	}
+#elif(VKFFT_BACKEND==2)
+	hipError_t res = hipSuccess;
+	void* buffer = ((void**)output_buffer)[0];
+	res = hipMemcpy(cpu_arr, buffer, transferSize, hipMemcpyDeviceToHost);
+	if (res != hipSuccess) {
+		return VKFFT_ERROR_FAILED_TO_COPY;
+	}
+#elif(VKFFT_BACKEND==3)
+	cl_int res = CL_SUCCESS;
+	cl_mem* buffer = (cl_mem*)output_buffer;
+	cl_command_queue commandQueue = clCreateCommandQueue(vkGPU->context, vkGPU->device, 0, &res);
+	if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_QUEUE;
+	res = clEnqueueReadBuffer(commandQueue, buffer[0], CL_TRUE, 0, transferSize, cpu_arr, 0, NULL, NULL);
+	if (res != CL_SUCCESS) {
+		return VKFFT_ERROR_FAILED_TO_COPY;
+	}
+	res = clReleaseCommandQueue(commandQueue);
+	if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_RELEASE_COMMAND_QUEUE;
+#elif(VKFFT_BACKEND==4)
+	ze_result_t res = ZE_RESULT_SUCCESS;
+	void* buffer = ((void**)output_buffer)[0];
+	ze_command_queue_desc_t commandQueueCopyDesc = {
+			ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
+			0,
+			vkGPU->commandQueueID,
+			0, // index
+			0, // flags
+			ZE_COMMAND_QUEUE_MODE_DEFAULT,
+			ZE_COMMAND_QUEUE_PRIORITY_NORMAL
+	};
+	ze_command_list_handle_t copyCommandList;
+	res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
+	if (res != ZE_RESULT_SUCCESS) {
+		return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+	}
+	res = zeCommandListAppendMemoryCopy(copyCommandList, cpu_arr, buffer, transferSize, 0, 0, 0);
+	if (res != ZE_RESULT_SUCCESS) {
+		return VKFFT_ERROR_FAILED_TO_COPY;
+	}
+	res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
+	if (res != ZE_RESULT_SUCCESS) {
+		return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+	}
+#elif(VKFFT_BACKEND==5)
+	MTL::Buffer* stagingBuffer = vkGPU->device->newBuffer(transferSize, MTL::ResourceStorageModeShared);
+	MTL::CommandBuffer* copyCommandBuffer = vkGPU->queue->commandBuffer();
+	if (copyCommandBuffer == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+	MTL::BlitCommandEncoder* blitCommandEncoder = copyCommandBuffer->blitCommandEncoder();
+	if (blitCommandEncoder == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+	MTL::Buffer* buffer = ((MTL::Buffer**)output_buffer)[0];
+	blitCommandEncoder->copyFromBuffer((MTL::Buffer*)buffer, 0, (MTL::Buffer*)stagingBuffer, 0, transferSize);
+	blitCommandEncoder->endEncoding();
+	copyCommandBuffer->commit();
+	copyCommandBuffer->waitUntilCompleted();
+	blitCommandEncoder->release();
+	copyCommandBuffer->release();
+	memcpy(cpu_arr, stagingBuffer->contents(), transferSize);
+	stagingBuffer->release();
+#endif
 	return resFFT;
 }
-VkFFTResult transferDataToCPU(VkGPU* vkGPU, void* arr, VkBuffer* buffer, uint64_t bufferSize) {
-	//a function that transfers data from the GPU to the CPU using staging buffer, because the GPU memory is not host-coherent
+VkFFTResult transferDataFromCPU(VkGPU* vkGPU, void* cpu_arr, void* input_buffer, uint64_t transferSize) {
 	VkFFTResult resFFT = VKFFT_SUCCESS;
+#if(VKFFT_BACKEND==0)
 	VkResult res = VK_SUCCESS;
-	uint64_t stagingBufferSize = bufferSize;
+	VkBuffer* buffer = (VkBuffer*)input_buffer;
+	uint64_t stagingBufferSize = transferSize;
 	VkBuffer stagingBuffer = { 0 };
 	VkDeviceMemory stagingBufferMemory = { 0 };
-	resFFT = allocateBuffer(vkGPU, &stagingBuffer, &stagingBufferMemory, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBufferSize);
+	resFFT = allocateBuffer(vkGPU, &stagingBuffer, &stagingBufferMemory, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBufferSize);
+	if (resFFT != VKFFT_SUCCESS) return resFFT;
+	void* data;
+	res = vkMapMemory(vkGPU->device, stagingBufferMemory, 0, stagingBufferSize, 0, &data);
 	if (resFFT != VKFFT_SUCCESS) return resFFT;
+	memcpy(data, cpu_arr, stagingBufferSize);
+	vkUnmapMemory(vkGPU->device, stagingBufferMemory);
 	VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
 	commandBufferAllocateInfo.commandPool = vkGPU->commandPool;
 	commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
@@ -422,7 +501,7 @@ VkFFTResult transferDataToCPU(VkGPU* vkGPU, void* arr, VkBuffer* buffer, uint64_
 	copyRegion.srcOffset = 0;
 	copyRegion.dstOffset = 0;
 	copyRegion.size = stagingBufferSize;
-	vkCmdCopyBuffer(commandBuffer, buffer[0], stagingBuffer, 1, &copyRegion);
+	vkCmdCopyBuffer(commandBuffer, stagingBuffer, buffer[0], 1, &copyRegion);
 	res = vkEndCommandBuffer(commandBuffer);
 	if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
 	VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
@@ -435,16 +514,76 @@ VkFFTResult transferDataToCPU(VkGPU* vkGPU, void* arr, VkBuffer* buffer, uint64_
 	res = vkResetFences(vkGPU->device, 1, &vkGPU->fence);
 	if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_RESET_FENCES;
 	vkFreeCommandBuffers(vkGPU->device, vkGPU->commandPool, 1, &commandBuffer);
-	void* data;
-	res = vkMapMemory(vkGPU->device, stagingBufferMemory, 0, stagingBufferSize, 0, &data);
-	if (resFFT != VKFFT_SUCCESS) return resFFT;
-	memcpy(arr, data, stagingBufferSize);
-	vkUnmapMemory(vkGPU->device, stagingBufferMemory);
 	vkDestroyBuffer(vkGPU->device, stagingBuffer, NULL);
 	vkFreeMemory(vkGPU->device, stagingBufferMemory, NULL);
 	return resFFT;
-}
+#elif(VKFFT_BACKEND==1)
+	cudaError_t res = cudaSuccess;
+	void* buffer = ((void**)input_buffer)[0];
+	res = cudaMemcpy(buffer, cpu_arr, transferSize, cudaMemcpyHostToDevice);
+	if (res != cudaSuccess) {
+		return VKFFT_ERROR_FAILED_TO_COPY;
+	}
+#elif(VKFFT_BACKEND==2)
+	hipError_t res = hipSuccess;
+	void* buffer = ((void**)input_buffer)[0];
+	res = hipMemcpy(buffer, cpu_arr, transferSize, hipMemcpyHostToDevice);
+	if (res != hipSuccess) {
+		return VKFFT_ERROR_FAILED_TO_COPY;
+	}
+#elif(VKFFT_BACKEND==3)
+	cl_int res = CL_SUCCESS;
+	cl_mem* buffer = (cl_mem*)input_buffer;
+	cl_command_queue commandQueue = clCreateCommandQueue(vkGPU->context, vkGPU->device, 0, &res);
+	if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_QUEUE;
+	res = clEnqueueWriteBuffer(commandQueue, buffer[0], CL_TRUE, 0, transferSize, cpu_arr, 0, NULL, NULL);
+	if (res != CL_SUCCESS) {
+		return VKFFT_ERROR_FAILED_TO_COPY;
+	}
+	res = clReleaseCommandQueue(commandQueue);
+	if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_RELEASE_COMMAND_QUEUE;
+#elif(VKFFT_BACKEND==4)
+	ze_result_t res = ZE_RESULT_SUCCESS;
+	void* buffer = ((void**)input_buffer)[0];
+	ze_command_queue_desc_t commandQueueCopyDesc = {
+			ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
+			0,
+			vkGPU->commandQueueID,
+			0, // index
+			0, // flags
+			ZE_COMMAND_QUEUE_MODE_DEFAULT,
+			ZE_COMMAND_QUEUE_PRIORITY_NORMAL
+	};
+	ze_command_list_handle_t copyCommandList;
+	res = zeCommandListCreateImmediate(vkGPU->context, vkGPU->device, &commandQueueCopyDesc, &copyCommandList);
+	if (res != ZE_RESULT_SUCCESS) {
+		return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+	}
+	res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, cpu_arr, transferSize, 0, 0, 0);
+	if (res != ZE_RESULT_SUCCESS) {
+		return VKFFT_ERROR_FAILED_TO_COPY;
+	}
+	res = zeCommandQueueSynchronize(vkGPU->commandQueue, UINT32_MAX);
+	if (res != ZE_RESULT_SUCCESS) {
+		return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+	}
+#elif(VKFFT_BACKEND==5)
+	MTL::Buffer* stagingBuffer = vkGPU->device->newBuffer(cpu_arr, transferSize, MTL::ResourceStorageModeShared);
+	MTL::CommandBuffer* copyCommandBuffer = vkGPU->queue->commandBuffer();
+	if (copyCommandBuffer == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+	MTL::BlitCommandEncoder* blitCommandEncoder = copyCommandBuffer->blitCommandEncoder();
+	if (blitCommandEncoder == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+	MTL::Buffer* buffer = ((MTL::Buffer**)input_buffer)[0];
+	blitCommandEncoder->copyFromBuffer((MTL::Buffer*)stagingBuffer, 0, (MTL::Buffer*)buffer, 0, transferSize);
+	blitCommandEncoder->endEncoding();
+	copyCommandBuffer->commit();
+	copyCommandBuffer->waitUntilCompleted();
+	blitCommandEncoder->release();
+	copyCommandBuffer->release();
+	stagingBuffer->release();
 #endif
+	return resFFT;
+}
 VkFFTResult devices_list() {
 	//this function creates an instance and prints the list of available devices
 #if(VKFFT_BACKEND==0)
@@ -476,7 +615,7 @@ VkFFTResult devices_list() {
 		}
 		free(devices);
 	}
-	else 
+	else
 		return VKFFT_ERROR_FAILED_TO_ENUMERATE_DEVICES;
 	vkDestroyInstance(local_instance, NULL);
 #elif(VKFFT_BACKEND==1)
@@ -572,6 +711,12 @@ VkFFTResult devices_list() {
 		free(deviceList);
 	}
 	free(drivers);
+#elif(VKFFT_BACKEND==5)
+	NS::Array* devices = MTL::CopyAllDevices();
+	for (uint64_t i = 0; i < devices->count(); i++) {
+		MTL::Device* loc_device = (MTL::Device*)devices->object(i);
+		printf("Device id: %" PRIu64 " name: %s\n", i, loc_device->name()->cString(NS::UTF8StringEncoding));
+	}
 #endif
 	return VKFFT_SUCCESS;
 }
@@ -653,7 +798,7 @@ VkFFTResult performVulkanFFT(VkGPU* vkGPU, VkFFTApplication* app, VkFFTLaunchPar
 	ze_command_list_handle_t commandList = {};
 	res = zeCommandListCreate(vkGPU->context, vkGPU->device, &commandListDescription, &commandList);
 	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-	
+
 	launchParams->commandList = &commandList;
 	//Record commands num_iter times. Allows to perform multiple convolutions/transforms in one submit.
 	for (uint64_t i = 0; i < num_iter; i++) {
@@ -662,7 +807,7 @@ VkFFTResult performVulkanFFT(VkGPU* vkGPU, VkFFTApplication* app, VkFFTLaunchPar
 	}
 	res = zeCommandListClose(commandList);
 	if (res != 0) return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
-	
+
 	std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now();
 	res = zeCommandQueueExecuteCommandLists(vkGPU->commandQueue, 1, &commandList, 0);
 	if (res != 0) return VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE;
@@ -674,6 +819,27 @@ VkFFTResult performVulkanFFT(VkGPU* vkGPU, VkFFTApplication* app, VkFFTLaunchPar
 	//printf("Pure submit execution time per num_iter: %.3f ms\n", totTime / num_iter);
 	res = zeCommandListDestroy(commandList);
 	if (res != 0) return VKFFT_ERROR_FAILED_TO_DESTROY_COMMAND_LIST;
+#elif(VKFFT_BACKEND==5)
+	MTL::CommandBuffer* commandBuffer = vkGPU->queue->commandBuffer();
+	if (commandBuffer == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+	launchParams->commandBuffer = commandBuffer;
+	MTL::ComputeCommandEncoder* commandEncoder = commandBuffer->computeCommandEncoder();
+	if (commandEncoder == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+	launchParams->commandEncoder = commandEncoder;
+	for (uint64_t i = 0; i < num_iter; i++) {
+		resFFT = VkFFTAppend(app, inverse, launchParams);
+		if (resFFT != VKFFT_SUCCESS) return resFFT;
+	}
+	commandEncoder->endEncoding();
+
+	std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now();
+	commandBuffer->commit();
+	commandBuffer->waitUntilCompleted();
+	std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now();
+	double totTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeSubmit).count() * 0.001;
+
+	commandEncoder->release();
+	commandBuffer->release();
 #endif
 	return resFFT;
 }
@@ -787,6 +953,29 @@ VkFFTResult performVulkanFFTiFFT(VkGPU* vkGPU, VkFFTApplication* app, VkFFTLaunc
 	time_result[0] = totTime / num_iter;
 	res = zeCommandListDestroy(commandList);
 	if (res != 0) return VKFFT_ERROR_FAILED_TO_DESTROY_COMMAND_LIST;
+#elif(VKFFT_BACKEND==5)
+	MTL::CommandBuffer* commandBuffer = vkGPU->queue->commandBuffer();
+	if (commandBuffer == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+	launchParams->commandBuffer = commandBuffer;
+	MTL::ComputeCommandEncoder* commandEncoder = commandBuffer->computeCommandEncoder();
+	if (commandEncoder == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+	launchParams->commandEncoder = commandEncoder;
+	for (uint64_t i = 0; i < num_iter; i++) {
+		resFFT = VkFFTAppend(app, -1, launchParams);
+		if (resFFT != VKFFT_SUCCESS) return resFFT;
+		resFFT = VkFFTAppend(app, 1, launchParams);
+		if (resFFT != VKFFT_SUCCESS) return resFFT;
+	}
+	commandEncoder->endEncoding();
+
+	std::chrono::steady_clock::time_point timeSubmit = std::chrono::steady_clock::now();
+	commandBuffer->commit();
+	commandBuffer->waitUntilCompleted();
+	std::chrono::steady_clock::time_point timeEnd = std::chrono::steady_clock::now();
+	double totTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeSubmit).count() * 0.001;
+	time_result[0] = totTime / num_iter;
+	commandEncoder->release();
+	commandBuffer->release();
 #endif
 	return resFFT;
-}
\ No newline at end of file
+}
diff --git a/debian/changelog b/debian/changelog
index 73b22d3..afc248a 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+vkfft (1.2.31+ds1-1) UNRELEASED; urgency=low
+
+  * New upstream release.
+
+ -- Debian Janitor <janitor@jelmer.uk>  Sun, 11 Jun 2023 12:51:19 -0000
+
 vkfft (1.2.26+ds1-1) unstable; urgency=medium
 
   * New upstream version 1.2.26+ds1
diff --git a/debian/patches/0001-Use-Debian-version-of-glslang.patch b/debian/patches/0001-Use-Debian-version-of-glslang.patch
index f340b71..69e4b6e 100644
--- a/debian/patches/0001-Use-Debian-version-of-glslang.patch
+++ b/debian/patches/0001-Use-Debian-version-of-glslang.patch
@@ -35,10 +35,10 @@ Subject: Use Debian version of glslang
  vkFFT/vkFFT.h                                                           | 2 +-
  30 files changed, 30 insertions(+), 30 deletions(-)
 
-diff --git a/Vulkan_FFT.cpp b/Vulkan_FFT.cpp
-index 5e7b9bf..f7de958 100644
---- a/Vulkan_FFT.cpp
-+++ b/Vulkan_FFT.cpp
+Index: vkfft.git/Vulkan_FFT.cpp
+===================================================================
+--- vkfft.git.orig/Vulkan_FFT.cpp
++++ vkfft.git/Vulkan_FFT.cpp
 @@ -9,7 +9,7 @@
  #include <inttypes.h>
  #if(VKFFT_BACKEND==0)
@@ -48,10 +48,10 @@ index 5e7b9bf..f7de958 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_0_benchmark_VkFFT_single.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_0_benchmark_VkFFT_single.cpp
-index aae77e2..895ac24 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_0_benchmark_VkFFT_single.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_0_benchmark_VkFFT_single.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_0_benchmark_VkFFT_single.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_0_benchmark_VkFFT_single.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_0_benchmark_VkFFT_single.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -61,10 +61,10 @@ index aae77e2..895ac24 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_1000_VkFFT_single_2_4096.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_1000_VkFFT_single_2_4096.cpp
-index 84ecc9e..0251a1e 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_1000_VkFFT_single_2_4096.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_1000_VkFFT_single_2_4096.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_1000_VkFFT_single_2_4096.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_1000_VkFFT_single_2_4096.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_1000_VkFFT_single_2_4096.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -74,10 +74,10 @@ index 84ecc9e..0251a1e 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_1001_benchmark_VkFFT_double_2_4096.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_1001_benchmark_VkFFT_double_2_4096.cpp
-index 4cf990a..b3a83a4 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_1001_benchmark_VkFFT_double_2_4096.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_1001_benchmark_VkFFT_double_2_4096.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_1001_benchmark_VkFFT_double_2_4096.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_1001_benchmark_VkFFT_double_2_4096.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_1001_benchmark_VkFFT_double_2_4096.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -87,10 +87,10 @@ index 4cf990a..b3a83a4 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp
-index 202ee75..bdb9945 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_1003_benchmark_VkFFT_single_3d_2_512.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -100,10 +100,10 @@ index 202ee75..bdb9945 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_100_benchmark_VkFFT_single_nd_dct.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_100_benchmark_VkFFT_single_nd_dct.cpp
-index 4b286e6..d2773f9 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_100_benchmark_VkFFT_single_nd_dct.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_100_benchmark_VkFFT_single_nd_dct.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_100_benchmark_VkFFT_single_nd_dct.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_100_benchmark_VkFFT_single_nd_dct.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_100_benchmark_VkFFT_single_nd_dct.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -113,10 +113,10 @@ index 4b286e6..d2773f9 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_101_benchmark_VkFFT_double_nd_dct.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_101_benchmark_VkFFT_double_nd_dct.cpp
-index 5c0cb5f..2d34037 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_101_benchmark_VkFFT_double_nd_dct.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_101_benchmark_VkFFT_double_nd_dct.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_101_benchmark_VkFFT_double_nd_dct.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_101_benchmark_VkFFT_double_nd_dct.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_101_benchmark_VkFFT_double_nd_dct.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -126,10 +126,10 @@ index 5c0cb5f..2d34037 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_10_benchmark_VkFFT_single_multipleBuffers.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_10_benchmark_VkFFT_single_multipleBuffers.cpp
-index dddcdde..2e28af6 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_10_benchmark_VkFFT_single_multipleBuffers.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_10_benchmark_VkFFT_single_multipleBuffers.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_10_benchmark_VkFFT_single_multipleBuffers.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_10_benchmark_VkFFT_single_multipleBuffers.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_10_benchmark_VkFFT_single_multipleBuffers.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -139,10 +139,10 @@ index dddcdde..2e28af6 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_11_precision_VkFFT_single.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_11_precision_VkFFT_single.cpp
-index 48a6325..53d2bbb 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_11_precision_VkFFT_single.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_11_precision_VkFFT_single.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_11_precision_VkFFT_single.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_11_precision_VkFFT_single.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_11_precision_VkFFT_single.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -152,10 +152,10 @@ index 48a6325..53d2bbb 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_12_precision_VkFFT_double.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_12_precision_VkFFT_double.cpp
-index d1f25c9..6fbeccc 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_12_precision_VkFFT_double.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_12_precision_VkFFT_double.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_12_precision_VkFFT_double.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_12_precision_VkFFT_double.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_12_precision_VkFFT_double.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -165,10 +165,10 @@ index d1f25c9..6fbeccc 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_13_precision_VkFFT_half.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_13_precision_VkFFT_half.cpp
-index 59b8354..5e05109 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_13_precision_VkFFT_half.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_13_precision_VkFFT_half.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_13_precision_VkFFT_half.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_13_precision_VkFFT_half.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_13_precision_VkFFT_half.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -178,10 +178,10 @@ index 59b8354..5e05109 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_14_precision_VkFFT_single_nonPow2.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_14_precision_VkFFT_single_nonPow2.cpp
-index 77fc99b..4aad477 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_14_precision_VkFFT_single_nonPow2.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_14_precision_VkFFT_single_nonPow2.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_14_precision_VkFFT_single_nonPow2.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_14_precision_VkFFT_single_nonPow2.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_14_precision_VkFFT_single_nonPow2.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -191,10 +191,10 @@ index 77fc99b..4aad477 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_15_precision_VkFFT_single_r2c.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_15_precision_VkFFT_single_r2c.cpp
-index 257e9a5..0194c57 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_15_precision_VkFFT_single_r2c.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_15_precision_VkFFT_single_r2c.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_15_precision_VkFFT_single_r2c.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_15_precision_VkFFT_single_r2c.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_15_precision_VkFFT_single_r2c.cpp
 @@ -14,7 +14,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -204,10 +204,10 @@ index 257e9a5..0194c57 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_16_precision_VkFFT_single_dct.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_16_precision_VkFFT_single_dct.cpp
-index 3d02b66..f23357b 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_16_precision_VkFFT_single_dct.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_16_precision_VkFFT_single_dct.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_16_precision_VkFFT_single_dct.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_16_precision_VkFFT_single_dct.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_16_precision_VkFFT_single_dct.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -217,10 +217,10 @@ index 3d02b66..f23357b 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_17_precision_VkFFT_double_dct.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_17_precision_VkFFT_double_dct.cpp
-index 60129db..600638f 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_17_precision_VkFFT_double_dct.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_17_precision_VkFFT_double_dct.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_17_precision_VkFFT_double_dct.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_17_precision_VkFFT_double_dct.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_17_precision_VkFFT_double_dct.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -230,10 +230,10 @@ index 60129db..600638f 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_18_precision_VkFFT_double_nonPow2.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_18_precision_VkFFT_double_nonPow2.cpp
-index edacae7..ba77445 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_18_precision_VkFFT_double_nonPow2.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_18_precision_VkFFT_double_nonPow2.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_18_precision_VkFFT_double_nonPow2.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_18_precision_VkFFT_double_nonPow2.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_18_precision_VkFFT_double_nonPow2.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -243,10 +243,10 @@ index edacae7..ba77445 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_1_benchmark_VkFFT_double.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_1_benchmark_VkFFT_double.cpp
-index 2e17950..29de8ff 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_1_benchmark_VkFFT_double.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_1_benchmark_VkFFT_double.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_1_benchmark_VkFFT_double.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_1_benchmark_VkFFT_double.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_1_benchmark_VkFFT_double.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -256,10 +256,10 @@ index 2e17950..29de8ff 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_2_benchmark_VkFFT_half.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_2_benchmark_VkFFT_half.cpp
-index 9aa9776..c81e9fc 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_2_benchmark_VkFFT_half.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_2_benchmark_VkFFT_half.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_2_benchmark_VkFFT_half.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_2_benchmark_VkFFT_half.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_2_benchmark_VkFFT_half.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -269,10 +269,10 @@ index 9aa9776..c81e9fc 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_3_benchmark_VkFFT_single_3d.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_3_benchmark_VkFFT_single_3d.cpp
-index f337314..8a03916 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_3_benchmark_VkFFT_single_3d.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_3_benchmark_VkFFT_single_3d.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_3_benchmark_VkFFT_single_3d.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_3_benchmark_VkFFT_single_3d.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_3_benchmark_VkFFT_single_3d.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -282,10 +282,10 @@ index f337314..8a03916 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_4_benchmark_VkFFT_single_3d_zeropadding.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_4_benchmark_VkFFT_single_3d_zeropadding.cpp
-index 466bf9b..69c0c25 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_4_benchmark_VkFFT_single_3d_zeropadding.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_4_benchmark_VkFFT_single_3d_zeropadding.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_4_benchmark_VkFFT_single_3d_zeropadding.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_4_benchmark_VkFFT_single_3d_zeropadding.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_4_benchmark_VkFFT_single_3d_zeropadding.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -295,10 +295,10 @@ index 466bf9b..69c0c25 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_50_convolution_VkFFT_single_1d_matrix.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_50_convolution_VkFFT_single_1d_matrix.cpp
-index c536376..b9cb2ce 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_50_convolution_VkFFT_single_1d_matrix.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_50_convolution_VkFFT_single_1d_matrix.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_50_convolution_VkFFT_single_1d_matrix.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_50_convolution_VkFFT_single_1d_matrix.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_50_convolution_VkFFT_single_1d_matrix.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -308,10 +308,10 @@ index c536376..b9cb2ce 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c.cpp
-index 4b471e6..dd7b28c 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_51_convolution_VkFFT_single_3d_matrix_zeropadding_r2c.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -321,10 +321,10 @@ index 4b471e6..dd7b28c 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_52_convolution_VkFFT_single_2d_batched_r2c.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_52_convolution_VkFFT_single_2d_batched_r2c.cpp
-index 7b6fccf..935d32a 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_52_convolution_VkFFT_single_2d_batched_r2c.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_52_convolution_VkFFT_single_2d_batched_r2c.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_52_convolution_VkFFT_single_2d_batched_r2c.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_52_convolution_VkFFT_single_2d_batched_r2c.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_52_convolution_VkFFT_single_2d_batched_r2c.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -334,10 +334,10 @@ index 7b6fccf..935d32a 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_5_benchmark_VkFFT_single_disableReorderFourStep.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_5_benchmark_VkFFT_single_disableReorderFourStep.cpp
-index fbfa396..414b180 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_5_benchmark_VkFFT_single_disableReorderFourStep.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_5_benchmark_VkFFT_single_disableReorderFourStep.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_5_benchmark_VkFFT_single_disableReorderFourStep.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_5_benchmark_VkFFT_single_disableReorderFourStep.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_5_benchmark_VkFFT_single_disableReorderFourStep.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -347,10 +347,10 @@ index fbfa396..414b180 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_6_benchmark_VkFFT_single_r2c.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_6_benchmark_VkFFT_single_r2c.cpp
-index 51818c0..fa1c3be 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_6_benchmark_VkFFT_single_r2c.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_6_benchmark_VkFFT_single_r2c.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_6_benchmark_VkFFT_single_r2c.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_6_benchmark_VkFFT_single_r2c.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_6_benchmark_VkFFT_single_r2c.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -360,10 +360,10 @@ index 51818c0..fa1c3be 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_7_benchmark_VkFFT_single_Bluestein.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_7_benchmark_VkFFT_single_Bluestein.cpp
-index 52b8a7e..5ce4950 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_7_benchmark_VkFFT_single_Bluestein.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_7_benchmark_VkFFT_single_Bluestein.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_7_benchmark_VkFFT_single_Bluestein.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_7_benchmark_VkFFT_single_Bluestein.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_7_benchmark_VkFFT_single_Bluestein.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -373,10 +373,10 @@ index 52b8a7e..5ce4950 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/sample_8_benchmark_VkFFT_double_Bluestein.cpp b/benchmark_scripts/vkFFT_scripts/src/sample_8_benchmark_VkFFT_double_Bluestein.cpp
-index 81c1e0d..020b61f 100644
---- a/benchmark_scripts/vkFFT_scripts/src/sample_8_benchmark_VkFFT_double_Bluestein.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/sample_8_benchmark_VkFFT_double_Bluestein.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_8_benchmark_VkFFT_double_Bluestein.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/sample_8_benchmark_VkFFT_double_Bluestein.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/sample_8_benchmark_VkFFT_double_Bluestein.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -386,10 +386,10 @@ index 81c1e0d..020b61f 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/user_benchmark_VkFFT.cpp b/benchmark_scripts/vkFFT_scripts/src/user_benchmark_VkFFT.cpp
-index 5e63644..3fa0a79 100644
---- a/benchmark_scripts/vkFFT_scripts/src/user_benchmark_VkFFT.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/user_benchmark_VkFFT.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/user_benchmark_VkFFT.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/user_benchmark_VkFFT.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/user_benchmark_VkFFT.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -399,10 +399,10 @@ index 5e63644..3fa0a79 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/benchmark_scripts/vkFFT_scripts/src/utils_VkFFT.cpp b/benchmark_scripts/vkFFT_scripts/src/utils_VkFFT.cpp
-index 00a0426..31ae013 100644
---- a/benchmark_scripts/vkFFT_scripts/src/utils_VkFFT.cpp
-+++ b/benchmark_scripts/vkFFT_scripts/src/utils_VkFFT.cpp
+Index: vkfft.git/benchmark_scripts/vkFFT_scripts/src/utils_VkFFT.cpp
+===================================================================
+--- vkfft.git.orig/benchmark_scripts/vkFFT_scripts/src/utils_VkFFT.cpp
++++ vkfft.git/benchmark_scripts/vkFFT_scripts/src/utils_VkFFT.cpp
 @@ -13,7 +13,7 @@
  
  #if(VKFFT_BACKEND==0)
@@ -412,10 +412,10 @@ index 00a0426..31ae013 100644
  #elif(VKFFT_BACKEND==1)
  #include <cuda.h>
  #include <cuda_runtime.h>
-diff --git a/vkFFT/vkFFT.h b/vkFFT/vkFFT.h
-index 26e8580..eae06be 100644
---- a/vkFFT/vkFFT.h
-+++ b/vkFFT/vkFFT.h
+Index: vkfft.git/vkFFT/vkFFT.h
+===================================================================
+--- vkfft.git.orig/vkFFT/vkFFT.h
++++ vkfft.git/vkFFT/vkFFT.h
 @@ -35,7 +35,7 @@
  #include <inttypes.h>
  #if(VKFFT_BACKEND==0)
diff --git a/documentation/VkFFT_API_guide.lyx b/documentation/VkFFT_API_guide.lyx
index 9df3ce8..77cf9d9 100644
--- a/documentation/VkFFT_API_guide.lyx
+++ b/documentation/VkFFT_API_guide.lyx
@@ -136,8 +136,8 @@ vspace{1cm}
 \backslash
 scshape
 \backslash
-LARGE VkFFT - Vulkan/CUDA/HIP/OpenCL/Level Zero Fast Fourier Transform library
- 
+LARGE VkFFT - Vulkan/CUDA/HIP/OpenCL/Level Zero/Metal Fast Fourier Transform
+ library 
 \backslash
 par} 		
 \end_layout
@@ -192,7 +192,7 @@ vspace{1cm}
 
 {
 \backslash
-large August 2022, version 1.2.26
+large October 2022, version 1.2.30
 \backslash
 par} 
 \end_layout
@@ -237,8 +237,8 @@ Introduction
 \end_layout
 
 \begin_layout Standard
-This document describes VkFFT - Vulkan/CUDA/HIP/OpenCL/Level Zero Fast Fourier
- Transform library.
+This document describes VkFFT - Vulkan/CUDA/HIP/OpenCL/Level Zero/Metal
+ Fast Fourier Transform library.
  It describes the features and current limitations of VkFFT, explains the
  API and compares it to other FFT libraries (like FFTW and cuFFT) on the
  set of examples.
@@ -334,7 +334,8 @@ Copy vkFFT.h file into one of the directories included in the user's project.
 \begin_layout Enumerate
 \noindent
 Define VKFFT_BACKEND as a number corresponding to the API used in the user's
- project: 0 - Vulkan, 1 - CUDA, 2 - HIP, 3 - OpenCL, 4 - Level Zero.
+ project: 0 - Vulkan, 1 - CUDA, 2 - HIP, 3 - OpenCL, 4 - Level Zero, 5 -
+ Metal.
  Definition is done like:
 \begin_inset ERT
 status open
@@ -395,7 +396,7 @@ begin{minted}[tabsize=4,obeytabs,breaklines]{cmake}
 \begin_layout Plain Layout
 
 set(VKFFT_BACKEND 1 CACHE STRING "0 - Vulkan, 1 - CUDA, 2 - HIP, 3 - OpenCL,
- 4 - Level Zero")
+ 4 - Level Zero, 5 - Metal")
 \end_layout
 
 \begin_layout Plain Layout
@@ -963,6 +964,100 @@ end{mdframed}
 \end_inset
 
 
+\end_layout
+
+\begin_layout Enumerate
+Metal API: Metal.
+ Sample CMakeLists can look like this:
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{mdframed}[backgroundcolor=bg]
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{minted}[tabsize=4,obeytabs,breaklines]{cmake}
+\end_layout
+
+\begin_layout Plain Layout
+
+add_compile_options(-WMTL_IGNORE_WARNINGS)
+\end_layout
+
+\begin_layout Plain Layout
+
+find_library(FOUNDATION_LIB Foundation REQUIRED)
+\end_layout
+
+\begin_layout Plain Layout
+
+find_library(QUARTZ_CORE_LIB QuartzCore REQUIRED)	
+\end_layout
+
+\begin_layout Plain Layout
+
+find_library(METAL_LIB Metal REQUIRED)
+\end_layout
+
+\begin_layout Plain Layout
+
+target_include_directories(${PROJECT_NAME} PUBLIC "metal-cpp/")
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\begin_layout Plain Layout
+
+target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vk
+FFT/)
+\end_layout
+
+\begin_layout Plain Layout
+
+add_library(VkFFT INTERFACE)
+\end_layout
+
+\begin_layout Plain Layout
+
+target_compile_definitions(VkFFT INTERFACE -DVKFFT_BACKEND=5)
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\begin_layout Plain Layout
+
+target_link_libraries(${PROJECT_NAME} PUBLIC ${FOUNDATION_LIB} ${QUARTZ_CORE_LIB
+} ${METAL_LIB} VkFFT)
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{minted}
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{mdframed}
+\end_layout
+
+\end_inset
+
+
 \end_layout
 
 \end_deeper
@@ -1228,7 +1323,8 @@ VkFFT buffers
 VkFFT allows for explicit control over the data flow, which makes both in-place
  and out-of-place transforms possible.
  Buffers are passed to VkFFT as VkBuffer pointer in Vulkan, as double void
- pointers in CUDA/HIP/Level Zero and as cl_mem pointer in OpenCL.
+ pointers in CUDA/HIP/Level Zero, as cl_mem pointer in OpenCL and as MTL::Buffer
+ pointer in Metal.
  This is done to maintain a uniform data pattern because some of the buffers
  can be allocated automatically.
  
@@ -2644,6 +2740,14 @@ ndLaunchKernel calls to user-defined command list ze_command_list_handle_t.
  
 \end_layout
 
+\begin_layout Itemize
+Metal API: similar to Vulkan, VkFFT appends a sequence of dispatchThreads
+ calls to user-defined command encoder MTL::ComputeCommandEncoder.
+ MTL::ComputeCommandEncoder and its MTL::CommandBuffer must be provided
+ as a pointer in VkFFTLaunchParams.
+ 
+\end_layout
+
 \begin_layout Standard
 If VkFFT fails during the VkFFTAppend call, it will not free the application
  and allocated there resources - use a separate call for that.
@@ -2937,6 +3041,23 @@ uint32_t commandQueueID;	// ID of the commandQueue with compute and copy
 
 \begin_layout Plain Layout
 
+#elif(VKFFT_BACKEND==5)
+\end_layout
+
+\begin_layout Plain Layout
+
+MTL::Device* device;	// Pointer to Metal device, obtained from MTL::CopyAllDevic
+es
+\end_layout
+
+\begin_layout Plain Layout
+
+MTL::CommandQueue* queue;	// Pointer to Metal queue, obtained from device->newCo
+mmandQueue()
+\end_layout
+
+\begin_layout Plain Layout
+
 #endif
 \end_layout
 
@@ -3172,6 +3293,80 @@ cl_mem* kernel;	// Pointer to device buffer used to read kernel data from
 
 \begin_layout Plain Layout
 
+#elif(VKFFT_BACKEND==4)
+\end_layout
+
+\begin_layout Plain Layout
+
+void** buffer;	// Pointer to device buffer used for computations
+\end_layout
+
+\begin_layout Plain Layout
+
+void** tempBuffer;	// Needed if reorderFourStep is enabled to transpose
+ the array.
+ Same size as buffer.
+ Default 0.
+ Setting to non zero value enables manual user allocation
+\end_layout
+
+\begin_layout Plain Layout
+
+void** inputBuffer;	// Pointer to device buffer used to read data from if
+ isInputFormatted is enabled
+\end_layout
+
+\begin_layout Plain Layout
+
+void** outputBuffer;	// Pointer to device buffer used to read data from
+ if isOutputFormatted is enabled
+\end_layout
+
+\begin_layout Plain Layout
+
+void** kernel;	// Pointer to device buffer used to read kernel data from
+ if performConvolution is enabled
+\end_layout
+
+\begin_layout Plain Layout
+
+#elif(VKFFT_BACKEND==5)
+\end_layout
+
+\begin_layout Plain Layout
+
+MTL::Buffer** buffer;	// Pointer to device buffer used for computations
+\end_layout
+
+\begin_layout Plain Layout
+
+MTL::Buffer** tempBuffer;	// Needed if reorderFourStep is enabled to transpose
+ the array.
+ Same size as buffer.
+ Default 0.
+ Setting to non zero value enables manual user allocation
+\end_layout
+
+\begin_layout Plain Layout
+
+MTL::Buffer** inputBuffer;	// Pointer to device buffer used to read data
+ from if isInputFormatted is enabled
+\end_layout
+
+\begin_layout Plain Layout
+
+MTL::Buffer** outputBuffer;	// Pointer to device buffer used to read data
+ from if isOutputFormatted is enabled
+\end_layout
+
+\begin_layout Plain Layout
+
+MTL::Buffer** kernel;	// Pointer to device buffer used to read kernel data
+ from if performConvolution is enabled
+\end_layout
+
+\begin_layout Plain Layout
+
 #endif
 \end_layout
 
@@ -3429,6 +3624,7 @@ uint64_t saveApplicationToString;	// Will save all compiled binaries to
  VkFFTApplication.saveApplicationString (will be allocated by VkFFT, deallocated
  with deleteVkFFT call).
  VkFFTApplication.applicationStringSize will contain size of binary in bytes.
+ Currently disabled in Metal backend.
  (0 - off, 1 - on)
 \end_layout
 
@@ -3438,15 +3634,29 @@ uint64_t loadApplicationFromString;	// Will load all binaries from loadApplicati
 onString instead of recompiling them (must be allocated by user, must contain
  what saveApplicationToString call generated previously in VkFFTApplication.saveA
 pplicationString).
+ Currently disabled in Metal backend.
  (0 - off, 1 - on).
  Mutually exclusive with saveApplicationToString
 \end_layout
 
 \begin_layout Plain Layout
 
-void* loadApplicationString;	// Memory array (uint32_t* for Vulkan/HIP,
- char* for CUDA/OpenCL) through which user can load VkFFT binaries, must
- be provided by user if loadApplicationFromString = 1.
+void* loadApplicationString;	// Memory binary array through which user can
+ load VkFFT binaries, must be provided by user if loadApplicationFromString
+ = 1.
+ Use rb/wb flags to load/save.
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\begin_layout Plain Layout
+
+uint64_t disableSetLocale;	// disables all VkFFT attempts to set locale
+ to C - user must ensure that VkFFT has C locale during the plan initialization.
+ This option is needed for multithreading.
+ Default 0.
  
 \end_layout
 
@@ -3509,6 +3719,52 @@ uint64_t* paddedSizes;	// described in useCustomBluesteinPaddingPattern
 
 \begin_layout Plain Layout
 
+uint64_t fixMinRaderPrimeMult;	// start direct multiplication Rader's algorithm
+ for radix primes from this number.
+ This means that VkFFT will inline custom Rader kernels if sequence is divisible
+ by these primes.
+ Default is 17, as VkFFT has kernels for 2-13.
+ If you make it less than 13, VkFFT will switch from these kernels to Rader.
+\end_layout
+
+\begin_layout Plain Layout
+
+uint64_t fixMaxRaderPrimeMult;	// switch from Mult Rader's algorithm for
+ radix primes from this number.
+ Current limitation for Rader is maxThreadNum/2+1, realistically you would
+ want to switch somewhere on 30-100 range.
+ Default is vendor-specific (currently ~40)
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\begin_layout Plain Layout
+
+uint64_t fixMinRaderPrimeFFT;	// start FFT convolution version of Rader
+ for radix primes from this number.
+ Better than direct multiplication version for almost all primes (except
+ small ones, like 17-23 on some GPUs).
+ Must be bigger or equal to fixMinRaderPrimeMult.
+ Deafult 29 on AMD and 17 on other GPUs.
+ 
+\end_layout
+
+\begin_layout Plain Layout
+
+uint64_t fixMaxRaderPrimeFFT;	// switch to Bluestein's algorithm for radix
+ primes from this number.
+ Switch may happen earlier if prime can't fit in shared memory.
+ Default is 16384, which is bigger than most current GPU's shared memory.
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\begin_layout Plain Layout
+
 // Optional zero padding control parameters: (default 0 if not stated otherwise)
 \end_layout
 
@@ -3603,8 +3859,8 @@ uint64_t registerBoost;	// Specify if register file size is bigger than
  4 to emulate 128KB of shared memory).
  Defaults: Nvidia - 4 in Vulkan/OpenCL, 1 in CUDA backend; AMD - 2 if shared
  memory >= 64KB, else 4 in Vulkan/OpenCL backend, 1 in HIP backend; Intel
- - 1 if shared memory >= 64KB, else 2 in Vulkan/OpenCL/Level Zero backends;
- Default 1
+ - 1 if shared memory >= 64KB, else 2 in Vulkan/OpenCL/Level Zero backends,
+ 1 in Metal; Default 1
 \end_layout
 
 \begin_layout Plain Layout
@@ -3658,6 +3914,17 @@ uint64_t localPageSize;	// In KB, the size to split page into if sequence
 
 \begin_layout Plain Layout
 
+uint64_t computeCapabilityMajor;	// CUDA/HIP compute capability of the device
+\end_layout
+
+\begin_layout Plain Layout
+
+uint64_t computeCapabilityMinor;	// CUDA/HIP compute capability of the device
+ 	
+\end_layout
+
+\begin_layout Plain Layout
+
 uint64_t maxComputeWorkGroupCount[3];	// maxComputeWorkGroupCount from VkPhysica
 lDeviceLimits
 \end_layout
@@ -3739,6 +4006,11 @@ einPaddingPattern
 
 \begin_layout Plain Layout
 
+uint64_t useRaderUintLUT; // allocate additional LUT to store g_pow
+\end_layout
+
+\begin_layout Plain Layout
+
 uint64_t vendorID; // vendorID 0x10DE - NVIDIA, 0x8086 - Intel, 0x1002 -
  AMD, etc
 \end_layout
@@ -3825,6 +4097,21 @@ ze_command_list_handle_t* commandList;	// Filled at app creation
 
 \begin_layout Plain Layout
 
+#elif(VKFFT_BACKEND==5)
+\end_layout
+
+\begin_layout Plain Layout
+
+MTL::CommandBuffer* commandBuffer;	// Filled at app execution
+\end_layout
+
+\begin_layout Plain Layout
+
+MTL::ComputeCommandEncoder* commandEncoder;	// Filled at app execution
+\end_layout
+
+\begin_layout Plain Layout
+
 #endif
 \end_layout
 
@@ -3980,6 +4267,19 @@ uint32_t commandQueueID - ID of the commandQueue with compute and copy capabilit
 ies
 \end_layout
 
+\begin_layout Standard
+Metal API will need the following information:
+\end_layout
+
+\begin_layout Itemize
+MTL::Device* device - Pointer to Metal device, obtained from MTL::CopyAllDevices
+\end_layout
+
+\begin_layout Itemize
+MTL::CommandQueue* queue - Pointer to Metal queue, obtained from device->newComm
+andQueue()
+\end_layout
+
 \begin_layout Subsubsection
 Memory management parameters
 \end_layout
@@ -4013,11 +4313,12 @@ kernel buffer, used for calculation of convolutions and cross-correlations
 \begin_layout Standard
 These buffers must be passed by a pointer: in Vulkan API they are provided
  as VkBuffer*, in CUDA, HIP and Level Zero they are provided as void**,
- in OpenCL, they are provided as cl_mem*.
- Even though the underlying structure (VkBuffer, void*, cl_mem) is not a
- memory but just a number that the driver can use to access corresponding
- allocated memory on the GPU, passing them by a pointer allows for the user
- to query multiple GPU allocated buffers for VkFFT to use.
+ in OpenCL they are provided as cl_mem*, in Metal they are provided as MTL::Buff
+er*.
+ Even though the underlying structure (VkBuffer, void*, cl_mem, MTL::Buffer*)
+ is not a memory but just a number that the driver can use to access correspondi
+ng allocated memory on the GPU, passing them by a pointer allows for the
+ user to query multiple GPU allocated buffers for VkFFT to use.
  Currently, it is only supported in Vulkan API - each of five buffer types
  can be made out of multiple separate memory allocations.
  For example, it is possible to combine multiple small unused at the point
@@ -4158,8 +4459,9 @@ Precision parameters (and some things that can affect it):
 \begin_layout Standard
 uint64_t doublePrecision - perform calculations in double precision.
  Default 0, set to 1 to enable.
- In Vulkan/OpenCL/Level Zero your device must support double-precision functiona
+ In Vulkan/OpenCL/Level Zero your device must support double precision functiona
 lity.
+ Metal API does not support double precision.
  Optional parameter.
 \end_layout
 
@@ -4171,8 +4473,9 @@ uint64_t doublePrecisionFloatMemory - perform calculations in double precision,
  This option increases precision, but not that much to be recommended for
  actual use.
  Default 0, set to 1 to enable.
- In Vulkan/OpenCL/Level Zero your device must support double-precision functiona
+ In Vulkan/OpenCL/Level Zero your device must support double precision functiona
 lity.
+ Metal API does not support double precision.
  Experimental feature.
  Optional parameter.
 \end_layout
@@ -4345,6 +4648,7 @@ uint64_t saveApplicationToString - will save all compiled binaries to VkFFTAppli
 cation.saveApplicationString (will be allocated by VkFFT, deallocated with
  deleteVkFFT call).
  VkFFTApplication.applicationStringSize will contain size of binary in bytes.
+ Currently disabled in Metal backend.
  Default 0, set to 1 to enable.
  Optional parameter.
 \end_layout
@@ -4354,18 +4658,64 @@ uint64_t loadApplicationFromString - will load all binaries from loadApplication
 String instead of recompiling them (loadApplicationString must be allocated
  by user, must contain what saveApplicationToString call generated previously
  in VkFFTApplication.saveApplicationString).
+ Currently disabled in Metal backend.
  Default 0, set to 1 to enable.
  Optional parameter.
  Mutually exclusive with saveApplicationToString 
 \end_layout
 
 \begin_layout Standard
-void* loadApplicationString - memory array (uint32_t* for Vulkan, HIP and
- Level Zero, char* for CUDA/OpenCL) through which user can load VkFFT binaries,
- must be provided by user if loadApplicationFromString = 1.
+void* loadApplicationString - memory binary array through which user can
+ load VkFFT binaries, must be provided by user if loadApplicationFromString
+ = 1.
+ Use rb/wb flags to load/save.
+\end_layout
+
+\begin_layout Standard
+uint64_t disableSetLocale - disables all VkFFT attempts to set locale to
+ C - user must ensure that VkFFT has C locale during the plan initialization.
+ This option is needed for multithreading.
+ Default 0.
+\end_layout
+
+\begin_layout Subsubsection
+Rader control parameters
+\end_layout
+
+\begin_layout Standard
+uint64_t fixMinRaderPrimeMult - start direct multiplication Rader's algorithm
+ for radix primes from this number.
+ This means that VkFFT will inline custom Rader kernels if sequence is divisible
+ by these primes.
+ Default is 17, as VkFFT has kernels for 2-13.
+ If you make it less than 13, VkFFT will switch from these kernels to Rader.
+\end_layout
+
+\begin_layout Standard
+uint64_t fixMaxRaderPrimeMult - switch from Mult Rader's algorithm for radix
+ primes from this number.
+ Current limitation for Rader is maxThreadNum/2+1, realistically you would
+ want to switch somewhere on 30-100 range.
+ Default is vendor-specific (currently ~40)
+\end_layout
+
+\begin_layout Standard
+uint64_t fixMinRaderPrimeFFT - start FFT convolution version of Rader for
+ radix primes from this number.
+ Better than direct multiplication version for almost all primes (except
+ small ones, like 17-23 on some GPUs).
+ Must be bigger or equal to fixMinRaderPrimeMult.
+ Deafult 29 on AMD and 17 on other GPUs.
  
 \end_layout
 
+\begin_layout Standard
+uint64_t fixMaxRaderPrimeFFT - switch to Bluestein's algorithm for radix
+ primes from this number.
+ Switch may happen earlier if prime can't fit in shared memory.
+ Default is 16384, which is bigger than most current GPU's shared memory.
+\end_layout
+
 \begin_layout Subsubsection
 Bluestein control parameters
 \end_layout
@@ -4548,7 +4898,7 @@ Register overutilization
 \begin_layout Standard
 Only works in C2C mode, without convolution support.
  Enabled in Vulkan, OpenCL and Level Zero APIs only (it works in other APIs,
- but worse).
+ but worse, does not work in Metal).
  Experimental feature.
 \end_layout
 
@@ -4584,6 +4934,15 @@ uint64_t registerBoost4Step - specify if register file overutilization should
 Extra advanced parameters (filled automatically)
 \end_layout
 
+\begin_layout Standard
+uint64_t computeCapabilityMajor - CUDA/HIP compute capability of the device
+\end_layout
+
+\begin_layout Standard
+uint64_t computeCapabilityMinor - CUDA/HIP compute capability of the device
+ 
+\end_layout
+
 \begin_layout Standard
 uint64_t maxComputeWorkGroupCount[3] - how many workgroups can be launched
  at one dispatch.
@@ -4658,13 +5017,17 @@ int64_t maxTempLength - specify how big can the buffer used for intermediate
 \end_layout
 
 \begin_layout Standard
-uint64_t autoCustomBluesteinPaddingPattern; // default value for useCustomBluest
-einPaddingPattern
+uint64_t autoCustomBluesteinPaddingPattern - default value for useCustomBluestei
+nPaddingPattern
 \end_layout
 
 \begin_layout Standard
-uint64_t vendorID; // vendorID 0x10DE - NVIDIA, 0x8086 - Intel, 0x1002 -
- AMD, etc.
+uint64_t useRaderUintLUT - allocate additional LUT to store g_pow 
+\end_layout
+
+\begin_layout Standard
+uint64_t vendorID - vendorID 0x10DE - NVIDIA, 0x8086 - Intel, 0x1002 - AMD,
+ etc.
 \end_layout
 
 \begin_layout Standard
@@ -5022,6 +5385,21 @@ uint32_t commandQueueID;
 
 \begin_layout Plain Layout
 
+#elif(VKFFT_BACKEND==5) //Metal API
+\end_layout
+
+\begin_layout Plain Layout
+
+MTL::Device* device;
+\end_layout
+
+\begin_layout Plain Layout
+
+MTL::CommandQueue* queue;
+\end_layout
+
+\begin_layout Plain Layout
+
 #endif
 \end_layout
 
@@ -6214,6 +6592,21 @@ launchParams->commandList = &commandList;
 
 \begin_layout Plain Layout
 
+#elif(VKFFT_BACKEND==5) //Metal API
+\end_layout
+
+\begin_layout Plain Layout
+
+launchParams->commandBuffer = commandBuffer;
+\end_layout
+
+\begin_layout Plain Layout
+
+launchParams->commandEncoder = commandEncoder;
+\end_layout
+
+\begin_layout Plain Layout
+
 #endif
 \end_layout
 
@@ -7303,6 +7696,8 @@ This example shows how to save/load binaries generated by VkFFT.
  This can reduce time taken by initializeVkFFT call by removing RTC components
  from it.
  Be sure that rest of the configuration stays the same to reuse the binary.
+ Use rb/wb flags to load/save.
+ This does not currently work in Metal backend.
 \end_layout
 
 \begin_layout Standard
@@ -7374,28 +7769,7 @@ if (configuration.loadApplicationFromString) {
 
 \begin_layout Plain Layout
 
-#if((VKFFT_BACKEND==0) || (VKFFT_BACKEND==2) || (VKFFT_BACKEND==4))
-\end_layout
-
-\begin_layout Plain Layout
-
-	kernelCache = fopen("VkFFT_binary", "rb"); //Vulkan and HIP backends load
- data as a uint32_t sequence
-\end_layout
-
-\begin_layout Plain Layout
-
-#else
-\end_layout
-
-\begin_layout Plain Layout
-
-	kernelCache = fopen("VkFFT_binary", "r"); 
-\end_layout
-
-\begin_layout Plain Layout
-
-#endif
+	kernelCache = fopen("VkFFT_binary", "rb");
 \end_layout
 
 \begin_layout Plain Layout
@@ -7477,28 +7851,7 @@ if (configuration.saveApplicationToString) {
 
 \begin_layout Plain Layout
 
-#if((VKFFT_BACKEND==0) || (VKFFT_BACKEND==2) || (VKFFT_BACKEND==4))
-\end_layout
-
-\begin_layout Plain Layout
-
-	kernelCache = fopen("VkFFT_binary", "wb"); //Vulkan and HIP backends save
- data as a uint32_t sequence
-\end_layout
-
-\begin_layout Plain Layout
-
-#else
-\end_layout
-
-\begin_layout Plain Layout
-
-	kernelCache = fopen("VkFFT_binary", "w"); 
-\end_layout
-
-\begin_layout Plain Layout
-
-#endif
+	kernelCache = fopen("VkFFT_binary", "wb");
 \end_layout
 
 \begin_layout Plain Layout
diff --git a/documentation/VkFFT_API_guide.pdf b/documentation/VkFFT_API_guide.pdf
index ea5e079..f9b7ba5 100644
--- a/documentation/VkFFT_API_guide.pdf
+++ b/documentation/VkFFT_API_guide.pdf
@@ -270,434 +270,436 @@ endobj
 << /S /GoTo /D (subsubsection.3.3.6) >>
 endobj
 156 0 obj
-(\376\377\000B\000l\000u\000e\000s\000t\000e\000i\000n\000\040\000c\000o\000n\000t\000r\000o\000l\000\040\000p\000a\000r\000a\000m\000e\000t\000e\000r\000s)
+(\376\377\000R\000a\000d\000e\000r\000\040\000c\000o\000n\000t\000r\000o\000l\000\040\000p\000a\000r\000a\000m\000e\000t\000e\000r\000s)
 
 endobj
 157 0 obj
 << /S /GoTo /D (subsubsection.3.3.7) >>
 endobj
 160 0 obj
-(\376\377\000Z\000e\000r\000o\000\040\000p\000a\000d\000d\000i\000n\000g\000\040\000p\000a\000r\000a\000m\000e\000t\000e\000r\000s)
+(\376\377\000B\000l\000u\000e\000s\000t\000e\000i\000n\000\040\000c\000o\000n\000t\000r\000o\000l\000\040\000p\000a\000r\000a\000m\000e\000t\000e\000r\000s)
 
 endobj
 161 0 obj
 << /S /GoTo /D (subsubsection.3.3.8) >>
 endobj
 164 0 obj
-(\376\377\000C\000o\000n\000v\000o\000l\000u\000t\000i\000o\000n\000\040\000p\000a\000r\000a\000m\000e\000t\000e\000r\000s)
+(\376\377\000Z\000e\000r\000o\000\040\000p\000a\000d\000d\000i\000n\000g\000\040\000p\000a\000r\000a\000m\000e\000t\000e\000r\000s)
 
 endobj
 165 0 obj
 << /S /GoTo /D (subsubsection.3.3.9) >>
 endobj
 168 0 obj
-(\376\377\000R\000e\000g\000i\000s\000t\000e\000r\000\040\000o\000v\000e\000r\000u\000t\000i\000l\000i\000z\000a\000t\000i\000o\000n)
+(\376\377\000C\000o\000n\000v\000o\000l\000u\000t\000i\000o\000n\000\040\000p\000a\000r\000a\000m\000e\000t\000e\000r\000s)
 
 endobj
 169 0 obj
 << /S /GoTo /D (subsubsection.3.3.10) >>
 endobj
 172 0 obj
-(\376\377\000E\000x\000t\000r\000a\000\040\000a\000d\000v\000a\000n\000c\000e\000d\000\040\000p\000a\000r\000a\000m\000e\000t\000e\000r\000s\000\040\000\050\000f\000i\000l\000l\000e\000d\000\040\000a\000u\000t\000o\000m\000a\000t\000i\000c\000a\000l\000l\000y\000\051)
+(\376\377\000R\000e\000g\000i\000s\000t\000e\000r\000\040\000o\000v\000e\000r\000u\000t\000i\000l\000i\000z\000a\000t\000i\000o\000n)
 
 endobj
 173 0 obj
-<< /S /GoTo /D (section.4) >>
+<< /S /GoTo /D (subsubsection.3.3.11) >>
 endobj
 176 0 obj
-(\376\377\000V\000k\000F\000F\000T\000\040\000B\000e\000n\000c\000h\000m\000a\000r\000k\000/\000P\000r\000e\000c\000i\000s\000i\000o\000n\000\040\000S\000u\000i\000t\000e\000\040\000a\000n\000d\000\040\000u\000t\000i\000l\000s\000\137\000V\000k\000F\000F\000T\000\040\000h\000e\000l\000p\000e\000r\000\040\000r\000o\000u\000t\000i\000n\000e\000s)
+(\376\377\000E\000x\000t\000r\000a\000\040\000a\000d\000v\000a\000n\000c\000e\000d\000\040\000p\000a\000r\000a\000m\000e\000t\000e\000r\000s\000\040\000\050\000f\000i\000l\000l\000e\000d\000\040\000a\000u\000t\000o\000m\000a\000t\000i\000c\000a\000l\000l\000y\000\051)
 
 endobj
 177 0 obj
-<< /S /GoTo /D (subsection.4.1) >>
+<< /S /GoTo /D (section.4) >>
 endobj
 180 0 obj
-(\376\377\000u\000t\000i\000l\000s\000\137\000V\000k\000F\000F\000T\000\040\000h\000e\000l\000p\000e\000r\000\040\000r\000o\000u\000t\000i\000n\000e\000s)
+(\376\377\000V\000k\000F\000F\000T\000\040\000B\000e\000n\000c\000h\000m\000a\000r\000k\000/\000P\000r\000e\000c\000i\000s\000i\000o\000n\000\040\000S\000u\000i\000t\000e\000\040\000a\000n\000d\000\040\000u\000t\000i\000l\000s\000\137\000V\000k\000F\000F\000T\000\040\000h\000e\000l\000p\000e\000r\000\040\000r\000o\000u\000t\000i\000n\000e\000s)
 
 endobj
 181 0 obj
-<< /S /GoTo /D (section.5) >>
+<< /S /GoTo /D (subsection.4.1) >>
 endobj
 184 0 obj
-(\376\377\000V\000k\000F\000F\000T\000\040\000C\000o\000d\000e\000\040\000E\000x\000a\000m\000p\000l\000e\000s)
+(\376\377\000u\000t\000i\000l\000s\000\137\000V\000k\000F\000F\000T\000\040\000h\000e\000l\000p\000e\000r\000\040\000r\000o\000u\000t\000i\000n\000e\000s)
 
 endobj
 185 0 obj
-<< /S /GoTo /D (subsection.5.1) >>
+<< /S /GoTo /D (section.5) >>
 endobj
 188 0 obj
-(\376\377\000D\000r\000i\000v\000e\000r\000\040\000i\000n\000i\000t\000i\000a\000l\000i\000z\000a\000t\000i\000o\000n\000s)
+(\376\377\000V\000k\000F\000F\000T\000\040\000C\000o\000d\000e\000\040\000E\000x\000a\000m\000p\000l\000e\000s)
 
 endobj
 189 0 obj
-<< /S /GoTo /D (subsection.5.2) >>
+<< /S /GoTo /D (subsection.5.1) >>
 endobj
 192 0 obj
-(\376\377\000S\000i\000m\000p\000l\000e\000\040\000F\000F\000T\000\040\000a\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000e\000x\000a\000m\000p\000l\000e\000:\000\040\0001\000D\000\040\000\050\000o\000n\000e\000\040\000d\000i\000m\000e\000n\000s\000i\000o\000n\000a\000l\000\051\000\040\000C\0002\000C\000\040\000\050\000c\000o\000m\000p\000l\000e\000x\000\040\000t\000o\000\040\000c\000o\000m\000p\000l\000e\000x\000\051\000\040\000F\000P\0003\0002\000\040\000\050\000s\000i\000n\000g\000l\000e\000\040\000p\000r\000e\000c\000i\000s\000i\000o\000n\000\051\000\040\000F\000F\000T)
+(\376\377\000D\000r\000i\000v\000e\000r\000\040\000i\000n\000i\000t\000i\000a\000l\000i\000z\000a\000t\000i\000o\000n\000s)
 
 endobj
 193 0 obj
-<< /S /GoTo /D (subsection.5.3) >>
+<< /S /GoTo /D (subsection.5.2) >>
 endobj
 196 0 obj
-(\376\377\000A\000d\000v\000a\000n\000c\000e\000d\000\040\000F\000F\000T\000\040\000a\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000e\000x\000a\000m\000p\000l\000e\000:\000\040\000N\000D\000,\000\040\000C\0002\000C\000/\000R\0002\000C\000/\000R\0002\000R\000,\000\040\000d\000i\000f\000f\000e\000r\000e\000n\000t\000\040\000p\000r\000e\000c\000i\000s\000i\000o\000n\000s\000,\000\040\000b\000a\000t\000c\000h\000e\000d\000\040\000F\000F\000T)
+(\376\377\000S\000i\000m\000p\000l\000e\000\040\000F\000F\000T\000\040\000a\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000e\000x\000a\000m\000p\000l\000e\000:\000\040\0001\000D\000\040\000\050\000o\000n\000e\000\040\000d\000i\000m\000e\000n\000s\000i\000o\000n\000a\000l\000\051\000\040\000C\0002\000C\000\040\000\050\000c\000o\000m\000p\000l\000e\000x\000\040\000t\000o\000\040\000c\000o\000m\000p\000l\000e\000x\000\051\000\040\000F\000P\0003\0002\000\040\000\050\000s\000i\000n\000g\000l\000e\000\040\000p\000r\000e\000c\000i\000s\000i\000o\000n\000\051\000\040\000F\000F\000T)
 
 endobj
 197 0 obj
-<< /S /GoTo /D (subsection.5.4) >>
+<< /S /GoTo /D (subsection.5.3) >>
 endobj
 200 0 obj
-(\376\377\000A\000d\000v\000a\000n\000c\000e\000d\000\040\000F\000F\000T\000\040\000a\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000e\000x\000a\000m\000p\000l\000e\000:\000\040\000o\000u\000t\000-\000o\000f\000-\000p\000l\000a\000c\000e\000\040\000R\0002\000C\000\040\000F\000F\000T\000\040\000w\000i\000t\000h\000\040\000c\000u\000s\000t\000o\000m\000\040\000s\000t\000r\000i\000d\000e\000s)
+(\376\377\000A\000d\000v\000a\000n\000c\000e\000d\000\040\000F\000F\000T\000\040\000a\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000e\000x\000a\000m\000p\000l\000e\000:\000\040\000N\000D\000,\000\040\000C\0002\000C\000/\000R\0002\000C\000/\000R\0002\000R\000,\000\040\000d\000i\000f\000f\000e\000r\000e\000n\000t\000\040\000p\000r\000e\000c\000i\000s\000i\000o\000n\000s\000,\000\040\000b\000a\000t\000c\000h\000e\000d\000\040\000F\000F\000T)
 
 endobj
 201 0 obj
-<< /S /GoTo /D (subsection.5.5) >>
+<< /S /GoTo /D (subsection.5.4) >>
 endobj
 204 0 obj
-(\376\377\000A\000d\000v\000a\000n\000c\000e\000d\000\040\000F\000F\000T\000\040\000a\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000e\000x\000a\000m\000p\000l\000e\000:\000\040\0003\000D\000\040\000z\000e\000r\000o\000-\000p\000a\000d\000d\000e\000d\000\040\000F\000F\000T)
+(\376\377\000A\000d\000v\000a\000n\000c\000e\000d\000\040\000F\000F\000T\000\040\000a\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000e\000x\000a\000m\000p\000l\000e\000:\000\040\000o\000u\000t\000-\000o\000f\000-\000p\000l\000a\000c\000e\000\040\000R\0002\000C\000\040\000F\000F\000T\000\040\000w\000i\000t\000h\000\040\000c\000u\000s\000t\000o\000m\000\040\000s\000t\000r\000i\000d\000e\000s)
 
 endobj
 205 0 obj
-<< /S /GoTo /D (subsection.5.6) >>
+<< /S /GoTo /D (subsection.5.5) >>
 endobj
 208 0 obj
-(\376\377\000C\000o\000n\000v\000o\000l\000u\000t\000i\000o\000n\000\040\000a\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000e\000x\000a\000m\000p\000l\000e\000:\000\040\0003\000x\0003\000\040\000m\000a\000t\000r\000i\000x\000-\000v\000e\000c\000t\000o\000r\000\040\000c\000o\000n\000v\000o\000l\000u\000t\000i\000o\000n\000\040\000i\000n\000\040\0001\000D)
+(\376\377\000A\000d\000v\000a\000n\000c\000e\000d\000\040\000F\000F\000T\000\040\000a\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000e\000x\000a\000m\000p\000l\000e\000:\000\040\0003\000D\000\040\000z\000e\000r\000o\000-\000p\000a\000d\000d\000e\000d\000\040\000F\000F\000T)
 
 endobj
 209 0 obj
-<< /S /GoTo /D (subsection.5.7) >>
+<< /S /GoTo /D (subsection.5.6) >>
 endobj
 212 0 obj
-(\376\377\000C\000o\000n\000v\000o\000l\000u\000t\000i\000o\000n\000\040\000a\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000e\000x\000a\000m\000p\000l\000e\000:\000\040\000R\0002\000C\000\040\000c\000r\000o\000s\000s\000-\000c\000o\000r\000r\000e\000l\000a\000t\000i\000o\000n\000\040\000b\000e\000t\000w\000e\000e\000n\000\040\000t\000w\000o\000\040\000s\000e\000t\000s\000\040\000o\000f\000\040\000N\000\040\000i\000m\000a\000g\000e\000s)
+(\376\377\000C\000o\000n\000v\000o\000l\000u\000t\000i\000o\000n\000\040\000a\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000e\000x\000a\000m\000p\000l\000e\000:\000\040\0003\000x\0003\000\040\000m\000a\000t\000r\000i\000x\000-\000v\000e\000c\000t\000o\000r\000\040\000c\000o\000n\000v\000o\000l\000u\000t\000i\000o\000n\000\040\000i\000n\000\040\0001\000D)
 
 endobj
 213 0 obj
-<< /S /GoTo /D (subsection.5.8) >>
+<< /S /GoTo /D (subsection.5.7) >>
 endobj
 216 0 obj
-(\376\377\000S\000i\000m\000p\000l\000e\000\040\000F\000F\000T\000\040\000a\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000b\000i\000n\000a\000r\000y\000\040\000r\000e\000u\000s\000e\000\040\000a\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n)
+(\376\377\000C\000o\000n\000v\000o\000l\000u\000t\000i\000o\000n\000\040\000a\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000e\000x\000a\000m\000p\000l\000e\000:\000\040\000R\0002\000C\000\040\000c\000r\000o\000s\000s\000-\000c\000o\000r\000r\000e\000l\000a\000t\000i\000o\000n\000\040\000b\000e\000t\000w\000e\000e\000n\000\040\000t\000w\000o\000\040\000s\000e\000t\000s\000\040\000o\000f\000\040\000N\000\040\000i\000m\000a\000g\000e\000s)
 
 endobj
 217 0 obj
-<< /S /GoTo /D [ 218 0 R /Fit ] >>
+<< /S /GoTo /D (subsection.5.8) >>
 endobj
 220 0 obj
-<< /Filter /FlateDecode /Length 395 >>        
+(\376\377\000S\000i\000m\000p\000l\000e\000\040\000F\000F\000T\000\040\000a\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000b\000i\000n\000a\000r\000y\000\040\000r\000e\000u\000s\000e\000\040\000a\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n)
+
+endobj
+221 0 obj
+<< /S /GoTo /D [ 222 0 R /Fit ] >>
+endobj
+224 0 obj
+<< /Filter /FlateDecode /Length 420 >>        
 stream
-x�]��N�0��{���v;�N�(\���!V�x�;n����_��A�J�^/p���<��A�J����E a"�\�$�@����~����G��P�Y�a�8�f}[2)&i���ry��m�٬��~ӓY��m��rq�Y��
-v���^�1��7�gD`%�{�;\,%S�$U3A�EVD��x�2Z�cV�'�t`�C�,i��g�65lу�o��'P���{�D��)ԿÊ�0NU5W�/p]9FR|;w��v���b�}滞|>a��?���rX��"�l���ꡀ�[�Æv�(@]��+�uU��m%q�},zPB΅��+&̂���LL���䚺C����_̫KX?�2"6��ƞ���;�̪�iQ;���
-������yF�\
+x�]S�n1��+�qcp��R��V�yoU����-�����"���@�	�O0첝^V.� �P��}��"0�@LY��F �o���
+���'����0ԯ�c�G,�1)��!_�]z|5ko��t3ko�W���Ow� @N���>6?�\�c�1��7�����d��F�2Y���D��8�݆�RE���>q��8��a�P�����d����.�#�@����H4���b�49-�ң��i�UZ��5C �Z�qӘ�6�!g����]}W���fj��F�3#J�+�;���c���̉6h�2���O��
X$����{��l��N�iZ���_Se�h�$Nj�2	P@������|��1Ŕ���ӣ������zXlk"�K���یX����htwTvO��޼^#������oǲ
 endstream
 endobj
-218 0 obj
-<< /Type /Page /Contents 220 0 R /Resources 219 0 R /MediaBox [ 0 0 612 792 ] /Parent 227 0 R >>
+222 0 obj
+<< /Type /Page /Contents 224 0 R /Resources 223 0 R /MediaBox [ 0 0 612 792 ] /Parent 231 0 R >>
 endobj
-221 0 obj
-<< /D [ 218 0 R /XYZ 69.866 758.996 null ] >>
+225 0 obj
+<< /D [ 222 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-222 0 obj
-<< /D [ 218 0 R /XYZ 70.866 721.134 null ] >>
+226 0 obj
+<< /D [ 222 0 R /XYZ 70.866 721.134 null ] >>
 endobj
-219 0 obj
-<< /Font << /F48 223 0 R /F50 224 0 R /F51 225 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+223 0 obj
+<< /Font << /F48 227 0 R /F50 228 0 R /F51 229 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-272 0 obj
-<< /Filter /FlateDecode /Length 2119 >>       
-stream
-x��\�n�6��+�Ò�o``���]gWtۮ��ے��R�i��4-Ų$^�޷L���緗_���&�����^��Ɲ9hJ�R�`	c������R�(�#g�rh?+8���{$�����?�A ��4���7˵�O�q�c�-'��N\�����OcI.���N�W'ɤ"��|�9I�~���
-'w�p-�z\���3�p����L��9�%������[������m�A	F�.SpC��Ȍ
-�W9��K��܄���@��fMv`@�
-4�tS�������?X����NI@+�؈'��D�D����T����h�{�\~��]�y�w���&\�]�u���UX���6<S�u	��`!��O��ZL��md�MǏ������Y�n|[>,�tIz\�MI�����K�G���J���%Vq��@mR��
�T`L9e��"9�\�l
-3*z	�k8��lc?����~7!�$Z�YA�Q#,�#�@^	�QR��3H�4QK�3�6�튉�9ԕ%ν�~�P/(K'�F���P���(��b��-ނݮ�
��ɗRf%����9d8E+{.X錪?��G~.W�Pq��}��6�_B��,�|��@8fG���@�M�Do��)9#x�'��9Tĭ2^�e�&���[P���!�,�j4��X��e��aw��^����ᔏ��,����e����q~���%R_߸�>��o��<hT4�v̓�M�u���N�Ў�B�|C�D0�=�+��H^2��.��MV�c�R·W
-=I>��2���tQ%��f>p'n�5�E5��qa]��XO�>t�@����IސdW�E������d����bw����_�jMo�4v���<4��?��NW߯b��/��4�끎�\S�+�����z�jH��fڶy���."t5�!���Z��_¹Y���!�%��<]���������W1���Y�	]4�;�u1�3��ldd|6FD۾�Oi)���J�M��K���\�"�4>}�pl4�;�՛��
�J~.'�\R#�Ŕ
-=��
--i���MI��(���X,����t@�t����؏�Zp�։�Hj�d�b�=]㆓�'d�G��p�|����?;Λ�\ܻ��_Ιv��놥�d4�yP��ʦ�t���SCу�)���n;�!��ƽ-���0ۮ�
#Ft����$y<@x�b��g�6�xа{�ǲ��<;�&�vi��L����?�M���V�n!�Vg��9[;�x�8Q�~"JmR��~� H V�]�O���h��[
-��{���*��n��ză��Kz�6�[I����n+J�t�j#�f�W���y� �`�������{כSA�1x?���dZ�1n�΃�D���A}��<��)��$����@",Q���C�o�It-�I��.��B�@F�����Su+Q=��V�
-ɸ;�79�Wf;�A����UG��"ɒ�h�����F#u)�c֟�i~���Iٰ�<�|��!�8��8�����]��2"}�w�h1��Exښ��K��aCbaÆ�zn��z}��C��'�;�囲��n�
V.
��˘�M�~��IgC�O �̖ �������w��#���	�(�F��������x�|�kxR�uZ���c?
-��w�ۀG��YE���>Mxw���!җ�Չ�
-e,e�R�y���;��B�
-1��גQj�9�cܦ♻εa�nV_��H���=¿�q�>�fD��|��'O GjcA���m,+@M�-0�	PB\�z��'�
-To޳ǣg��={f�@��M`ܙ|�Fp������\�]�I��WW^OO=7��T���0�+,���g�7P�^��Fj���O������ʄbD�n���&�=0�U.�gli�:d�eԇbM�6g�kH�58��є�4Tl��=6�%"[�������̇��/��?Cp�X´�+���1�E���]ɡ>�wԔz'���Z��3tu����_.[�F�=�`v��;��`�c���q϶4��N"o\Ю�P{?�Y�	طR��7�����N9�����M���˿��
+276 0 obj
+<< /Filter /FlateDecode /Length 2128 >>       
+stream
+x��\Ɏ�6��W��!�;00`��r�[�kr��_�})���v�d�uh�-Kb��v��9���/4�o/�]%=0M�I!o�0��;sД��������ß_)���Q*.G��ge)��Vp���2H�+��0�2@4Sy	�/�k��@�(��>~	'��N\���Pk�cI.���N�Wgɤ"��r�9K���Y�A���o��y=�~M��I8�y�S&Ü��y���K�-^]X\]c����#F�)�!M\dF�)���gnn¤�v~��g�$;0 `]��KJ���,�S�[&�$�Zl�|S�h"@`~x�'~pu4�5n.���.���^�����%Fq���wU�t��ϔb�E�:�X�O��遠���6��f���c�a����V�_�@ȗ���ޔUo�鬾�{`�1��\Zb����U*���|�
+�)���AZC$���MaF%/�
��u�w���&�$@�)^���B��� �Hy�$pGUI!�� ��Di,�΄ۨ�+�ue�s/����+����	�'�3�7�0�X�}��`��s�d򥔄Y��v�d	N�ʞ+V����fD�����)T�ܣ��F��cR�e§��cq4l�T޴O�^��3���CE�*�E�PV-"�f9����-.(���}1��K�#4�߲����U/�z�AP�S>#@c��&@�gP��ߙ�/��\��H{}���d��1�(ʠI�m[����#�����+�>ѳ l�-�1b��ᦨ�9|9��Ş�%Z�d`D��
�	S4�a��5�͍�ǅ��	l$��A����y��doL��"�U�y�H�]��d^1:z���������IgG?k)�CcJ/����|��*f9�81Lc�?�h�w��\�S�Ow��
�1����~��<|��0Ԯ��K�k1>�u[�w�3�N�dR5(��Ǐ������W)!����&d3�h6w���(f
+�Y�����H�=�Qi-�T��K�]��K����h����W
+W�Fs��ź6���̕����P�s�8��"�,�V豖XhM��u`mj��F��"����*�:�A�b}>��D�����B�YM�lY*�/�l�p��a����Vv;���i`�yW�K{������~�nZr�F#��|hlzI�.�65�<���)��˶C"���g�[�K[�6�aĈ!G��$�(�w����j��
���|,;�jCp綰Ci����`;{��$�ԫ8�U��V2ou¦.�ڍ
������*ЬoS�	�J>r`��	��3q��?�c���Z�;x�
�T�x�s}Y�Ю�+k��}E��.Wm��,��
+}<јl�&<A���;2��ջ��7��Hc�~�ݿ��s����j��s�����t��40s�L��o��"��T��[uC�z�0�
}RB�H��T�y�a%��A��\�R!wg�&�����cP�,�Cә����H��&����H]�����
+�4����Iٰ�<�� ���|��́(���],ZJo:m���+d�\\3lH�Ol�0�w@�-1ZϠo����/�p';�|S�v�U��J¥��~��<�Ｙ5@�����������#�<7#1���~�ys{0J��3��2�{6�;���Tc�ބ���ԏ���|���6�ɽcV)�O��\]��K���D��
+e*e�R�y����]0��b���5���K�Ƹ]ųt+�[ðް��H���=��qcC�034�^Cy9r��ncYjcN�IN�����;U�<C�W�F��=�<{$���3��Ɣ5���K5���@�5�M����m�yq����s�(�ŉ�ӽ��~~���i�蠶���;,�-W&#���V!����+����BZ�Yj��X��-Y���a��`�Ft�+
M��g�]s�(#�<����z}�����K0������0�0-��>�AGL��I]Ю�T�m��C����S^��̭A⩺�v���,���n����)��j�wcB��{�?]�=B�s�Bvͧ��?�5��]��l��{��C�_����:��P�
�~Ħ�����?���
 endstream
 endobj
-271 0 obj
-<< /Type /Page /Contents 272 0 R /Resources 270 0 R /MediaBox [ 0 0 612 792 ] /Parent 227 0 R /Annots 274 0 R >>
+275 0 obj
+<< /Type /Page /Contents 276 0 R /Resources 274 0 R /MediaBox [ 0 0 612 792 ] /Parent 231 0 R /Annots 278 0 R >>
 endobj
-274 0 obj
-[ 228 0 R 229 0 R 230 0 R 231 0 R 232 0 R 233 0 R 234 0 R 235 0 R 236 0 R 237 0 R 238 0 R 239 0 R 240 0 R 241 0 R 242 0 R 243 0 R 244 0 R 245 0 R 246 0 R 247 0 R 248 0 R 249 0 R 250 0 R 251 0 R 252 0 R 253 0 R 254 0 R 255 0 R 256 0 R 257 0 R 258 0 R 259 0 R 260 0 R 261 0 R 262 0 R 263 0 R 264 0 R 265 0 R 266 0 R 267 0 R 268 0 R ]
+278 0 obj
+[ 232 0 R 233 0 R 234 0 R 235 0 R 236 0 R 237 0 R 238 0 R 239 0 R 240 0 R 241 0 R 242 0 R 243 0 R 244 0 R 245 0 R 246 0 R 247 0 R 248 0 R 249 0 R 250 0 R 251 0 R 252 0 R 253 0 R 254 0 R 255 0 R 256 0 R 257 0 R 258 0 R 259 0 R 260 0 R 261 0 R 262 0 R 263 0 R 264 0 R 265 0 R 266 0 R 267 0 R 268 0 R 269 0 R 270 0 R 271 0 R 272 0 R ]
 endobj
-228 0 obj
+232 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 69.87 681.577 163.845 692.01 ]/A  << /S /GoTo /D (section.1) >> >>
 endobj
-229 0 obj
+233 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 69.87 652.916 223.562 665.68 ]/A  << /S /GoTo /D (section.2) >> >>
 endobj
-230 0 obj
+234 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 638.423 207.745 651.163 ]/A  << /S /GoTo /D (subsection.2.1) >> >>
 endobj
-231 0 obj
+235 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 624.108 243.682 636.837 ]/A  << /S /GoTo /D (subsection.2.2) >> >>
 endobj
-232 0 obj
+236 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 609.543 348.84 622.391 ]/A  << /S /GoTo /D (subsection.2.3) >> >>
 endobj
-233 0 obj
+237 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 597.285 236.652 607.945 ]/A  << /S /GoTo /D (subsubsection.2.3.1) >> >>
 endobj
-234 0 obj
+238 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 580.101 265.608 594.049 ]/A  << /S /GoTo /D (subsubsection.2.3.2) >> >>
 endobj
-235 0 obj
+239 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 565.644 275.842 579.603 ]/A  << /S /GoTo /D (subsubsection.2.3.3) >> >>
 endobj
-236 0 obj
+240 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 551.748 420.583 564.607 ]/A  << /S /GoTo /D (subsection.2.4) >> >>
 endobj
-237 0 obj
+241 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 539.502 232.337 550.162 ]/A  << /S /GoTo /D (subsubsection.2.4.1) >> >>
 endobj
-238 0 obj
+242 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 522.318 476.912 536.266 ]/A  << /S /GoTo /D (subsubsection.2.4.2) >> >>
 endobj
-239 0 obj
+243 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 508.41 213.938 521.15 ]/A  << /S /GoTo /D (subsection.2.5) >> >>
 endobj
-240 0 obj
+244 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 493.964 264.125 506.705 ]/A  << /S /GoTo /D (subsubsection.2.5.1) >> >>
 endobj
-241 0 obj
+245 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 479.518 310.81 492.378 ]/A  << /S /GoTo /D (subsubsection.2.5.2) >> >>
 endobj
-242 0 obj
+246 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 464.535 238.529 478.482 ]/A  << /S /GoTo /D (subsubsection.2.5.3) >> >>
 endobj
-243 0 obj
+247 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 450.089 357.507 464.037 ]/A  << /S /GoTo /D (subsubsection.2.5.4) >> >>
 endobj
-244 0 obj
+248 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 438.381 323.1 449.041 ]/A  << /S /GoTo /D (subsubsection.2.5.5) >> >>
 endobj
-245 0 obj
+249 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 421.735 274.491 434.475 ]/A  << /S /GoTo /D (subsubsection.2.5.6) >> >>
 endobj
-246 0 obj
+250 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 407.289 223.191 420.03 ]/A  << /S /GoTo /D (subsubsection.2.5.7) >> >>
 endobj
-247 0 obj
+251 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 392.975 371.041 405.703 ]/A  << /S /GoTo /D (subsubsection.2.5.8) >> >>
 endobj
-248 0 obj
+252 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 378.41 204.433 391.138 ]/A  << /S /GoTo /D (subsection.2.6) >> >>
 endobj
-249 0 obj
+253 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 363.964 314.756 376.692 ]/A  << /S /GoTo /D (subsection.2.7) >> >>
 endobj
-250 0 obj
+254 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 351.706 238.374 362.246 ]/A  << /S /GoTo /D (subsubsection.2.7.1) >> >>
 endobj
-251 0 obj
+255 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 335.06 433.554 347.92 ]/A  << /S /GoTo /D (subsubsection.2.7.2) >> >>
 endobj
-252 0 obj
+256 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 322.814 302.549 333.474 ]/A  << /S /GoTo /D (subsubsection.2.7.3) >> >>
 endobj
-253 0 obj
+257 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 69.87 296.521 224.267 307.025 ]/A  << /S /GoTo /D (section.3) >> >>
 endobj
-254 0 obj
+258 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 281.967 258.805 292.508 ]/A  << /S /GoTo /D (subsection.3.1) >> >>
 endobj
-255 0 obj
+259 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 265.322 335.558 278.181 ]/A  << /S /GoTo /D (subsection.3.2) >> >>
 endobj
-256 0 obj
+260 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 250.326 294.874 264.286 ]/A  << /S /GoTo /D (subsubsection.3.2.1) >> >>
 endobj
-257 0 obj
+261 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 235.88 291.61 249.84 ]/A  << /S /GoTo /D (subsubsection.3.2.2) >> >>
 endobj
-258 0 obj
+262 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 221.434 281.209 235.394 ]/A  << /S /GoTo /D (subsubsection.3.2.3) >> >>
 endobj
-259 0 obj
+263 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 206.988 308.993 220.948 ]/A  << /S /GoTo /D (subsubsection.3.2.4) >> >>
 endobj
-260 0 obj
+264 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 193.093 226.885 205.952 ]/A  << /S /GoTo /D (subsection.3.3) >> >>
 endobj
-261 0 obj
+265 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 178.778 273.319 191.626 ]/A  << /S /GoTo /D (subsubsection.3.3.1) >> >>
 endobj
-262 0 obj
+266 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 164.201 326.998 176.81 ]/A  << /S /GoTo /D (subsubsection.3.3.2) >> >>
 endobj
-263 0 obj
+267 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 149.887 287.378 162.615 ]/A  << /S /GoTo /D (subsubsection.3.3.3) >> >>
 endobj
-264 0 obj
+268 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 134.759 452.204 148.719 ]/A  << /S /GoTo /D (subsubsection.3.3.4) >> >>
 endobj
-265 0 obj
+269 0 obj
 << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 120.314 478.9 134.273 ]/A  << /S /GoTo /D (subsubsection.3.3.5) >> >>
 endobj
-266 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 106.549 302.824 119.158 ]/A  << /S /GoTo /D (subsubsection.3.3.6) >> >>
+270 0 obj
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 106.549 286.195 119.158 ]/A  << /S /GoTo /D (subsubsection.3.3.6) >> >>
 endobj
-267 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 91.972 283.744 104.712 ]/A  << /S /GoTo /D (subsubsection.3.3.7) >> >>
+271 0 obj
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 92.103 302.824 104.712 ]/A  << /S /GoTo /D (subsubsection.3.3.7) >> >>
 endobj
-268 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 77.657 278.233 90.386 ]/A  << /S /GoTo /D (subsubsection.3.3.8) >> >>
+272 0 obj
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 77.526 283.744 90.266 ]/A  << /S /GoTo /D (subsubsection.3.3.8) >> >>
 endobj
-273 0 obj
-<< /D [ 271 0 R /XYZ 70.866 731.035 null ] >>
+277 0 obj
+<< /D [ 275 0 R /XYZ 70.866 731.035 null ] >>
 endobj
-270 0 obj
-<< /Font << /F50 224 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+274 0 obj
+<< /Font << /F50 228 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-289 0 obj
-<< /Filter /FlateDecode /Length 1219 >>       
+294 0 obj
+<< /Filter /FlateDecode /Length 1253 >>       
 stream
-x��Xˎ�6��W�F��70O�]gWtۮ���R�(��#ɴ)��Y�e��tΡH��A?�ɮoo?M�D�V������7rP��D7x��q��9��!�;KiCk����Tp��NRj�߈�غ�����]C�N���Zz���N3M��c�>[�U�^N��~�K`w�;{gm8�[)W�L�o�[�d!
-����|F?���^�=:؄Z<����ֈ�7�_�X}2�0eXS�
2FbL�/�p@�QeDcVbZ���� ���v
-�2��%��<�l�ƭ��I���:���Zl+o���@h[Ծ�Ծ�"8��\#�%��F�4�9�{��ϡo�[|@{NX��J��NK����jz�e�9f̔^�:E���;������.��(t=�Y���F[���c�Ȩ$��/e������$�Ō���)f��-,d�a�E��]sVB��Y(�!_X3���� �'�5�n��P�k&�j)< �s>���0�5�Q��m��Ks$��ٕ8�-eEv��0}��e�r�Hm
�i�>����޵����:�״�g�����gʨ�FU�� 2��.�'�"���KC���0��214%Fp�9)Ma�5o��W>D�b�1�%<��ڹ��3�8s���Q��]�<�P&���"?ʑ�G��h	�!�vl��8
�R�b�d�Y���ꑎ�U'�I�YG.
-/a��sՑ)��K�MP��r�S�
-���X�I'\�KM%͜��S�k�=nei�D�PL#�_��֑�l=�{V���1���Pv�ӴV���?�L���3�j[���~Om�6Ջ-�WO�
|����ψ�ظ#6���2/Jy�}�_f��~�O��ٿ��y��Yu7�d�]�MB<�{9�i3G݅o��N�ݣNhgV����[)zvM�(|��=��)W)�9>�H���vْ<�gc�#� �7���P�k��@{�T���:
m�A���ef�˚y��+��Wm�Zͬ�z���_��EV�E5���|��{l��V_5���w�/!]B�~��������^�����=�ݳA�f�C�JӅS����Ξe�0_��v����UP,�C+�5̇�#���̷��v6���b3U2w����\�e��xx�����ֿt'��.��CESD}D����|I�-���6%�\��=�w����3�_�,g�.SQ�[�,N7`�8{����������ei���_�9�R
+x��Xˎ�6��W�F�H=�A�x��:���v��ߖzP�d;�N.��Y���y��Cr��N�~}��u���r6�SJc��]�|S��)zf�s2�������)�AJ��U���JyK?A|���q�����,n9�'�w����N��T����W=I��W�+=�����[u��o��0�i���H��7�X���֢c �������w2�6h���7�,�ېp<g�p�(�Ê���Q��#�%^�/��y��gE�����ZFq��0p#���5F���$����eu3"�pN,0��kb���������ݱ0ȏ���³ĥ���{��ȸ������0���V�^Ŷ��
+����j�wRxKX��fi�0j�r�b��c�7t&V�1�X�Q�}�Y�e�icd1�c�<献B$�Ve�tŝ��9׵CIY�t�g,y+�6����Y��Fev�3~,�`'��e�X��E�{�Q	X�������+'!��<�ļ�W.9�s�;o�t�*��T�P��j�ߡ�ԫ��y.(Nٜjc��Dz��#���!����	�,���Tr��yʹ"^kN��)�9�R8��k/���/!��/i3�d�x-��#[���*|�L�����"?����x��'g�b����8�f�xw�=�Ga�5o��W��}5l<���w����z�����GN���(���A8	�m[ُRP��2��wHo;5Zww��� ��(J@z�?�V��k�7{z֑1�]��Pu���iou�ګQW��C��w�J�\��+���J�y^��W����s�6Sd
�s/�J��W-�u��>�:(�Xn���Tej0��{�6�ܠ?������Z�B�~��oI
�@�G����A���� ��7��|�@
Е�eG꧜޳�&?���8�q0�μ.ڱ�v��K���
+�y�C݄o	����ͫZ���~���gG��Wl��u9��-H��|Ktɚl-�sx4�hЂ
+������]���upB��BiA�>�~%���P����:.�s��m'�u���[�R��{CS�����w�/>I�1�O ��}���-�[ �����,�w�B�H=�Ӽ/!�G5�<ˆa:���g���9P(�C������<�pr��t2��炱�)iK`+�����K�p�z����h���~�K7Dڣ��1�%RC3>�E�ś3���؜��g����I/���'�iu��yNC�MW`ԧ������PH\��v�6�.o��?C�
 endstream
 endobj
-288 0 obj
-<< /Type /Page /Contents 289 0 R /Resources 287 0 R /MediaBox [ 0 0 612 792 ] /Parent 227 0 R /Annots 294 0 R >>
-endobj
-294 0 obj
-[ 269 0 R 275 0 R 276 0 R 277 0 R 278 0 R 279 0 R 280 0 R 291 0 R 281 0 R 292 0 R 282 0 R 283 0 R 284 0 R 285 0 R 293 0 R 286 0 R ]
-endobj
-269 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 705.732 274.491 718.472 ]/A  << /S /GoTo /D (subsubsection.3.3.9) >> >>
-endobj
-275 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 690.736 407.827 704.695 ]/A  << /S /GoTo /D (subsubsection.3.3.10) >> >>
-endobj
-276 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 69.87 664.347 502.476 678.294 ]/A  << /S /GoTo /D (section.4) >> >>
+293 0 obj
+<< /Type /Page /Contents 294 0 R /Resources 292 0 R /MediaBox [ 0 0 612 792 ] /Parent 231 0 R /Annots 299 0 R >>
 endobj
-277 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 650.57 267.306 663.179 ]/A  << /S /GoTo /D (subsection.4.1) >> >>
+299 0 obj
+[ 273 0 R 279 0 R 280 0 R 281 0 R 282 0 R 283 0 R 284 0 R 285 0 R 296 0 R 286 0 R 297 0 R 287 0 R 288 0 R 289 0 R 290 0 R 298 0 R 291 0 R ]
 endobj
-278 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 69.87 624.169 229.037 636.826 ]/A  << /S /GoTo /D (section.5) >> >>
+273 0 obj
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 705.863 278.233 718.591 ]/A  << /S /GoTo /D (subsubsection.3.3.9) >> >>
 endobj
 279 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 611.792 222.569 622.332 ]/A  << /S /GoTo /D (subsection.5.1) >> >>
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 691.286 274.491 704.026 ]/A  << /S /GoTo /D (subsubsection.3.3.10) >> >>
 endobj
 280 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 594.596 542.13 608.556 ]/A  << /S /GoTo /D (subsection.5.2) >> >>
-endobj
-291 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 69.87 580.151 313.166 594.11 ]/A  << /S /GoTo /D (subsection.5.2) >> >>
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 115.3 676.29 407.827 690.25 ]/A  << /S /GoTo /D (subsubsection.3.3.11) >> >>
 endobj
 281 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 565.717 542.13 579.664 ]/A  << /S /GoTo /D (subsection.5.3) >> >>
-endobj
-292 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 69.87 551.952 216.915 564.549 ]/A  << /S /GoTo /D (subsection.5.3) >> >>
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 69.87 649.901 502.476 663.849 ]/A  << /S /GoTo /D (section.4) >> >>
 endobj
 282 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 537.494 523.6 550.342 ]/A  << /S /GoTo /D (subsection.5.4) >> >>
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 636.125 267.306 648.733 ]/A  << /S /GoTo /D (subsection.4.1) >> >>
 endobj
 283 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 523.049 417.578 535.896 ]/A  << /S /GoTo /D (subsection.5.5) >> >>
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 69.87 609.724 229.037 622.38 ]/A  << /S /GoTo /D (section.5) >> >>
 endobj
 284 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 508.603 480.415 521.331 ]/A  << /S /GoTo /D (subsection.5.6) >> >>
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 597.346 222.569 607.887 ]/A  << /S /GoTo /D (subsection.5.1) >> >>
 endobj
 285 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 494.157 542.13 506.885 ]/A  << /S /GoTo /D (subsection.5.7) >> >>
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 580.151 542.13 594.11 ]/A  << /S /GoTo /D (subsection.5.2) >> >>
 endobj
-293 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 69.87 479.58 164.503 492.188 ]/A  << /S /GoTo /D (subsection.5.7) >> >>
+296 0 obj
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 69.87 565.705 313.166 579.664 ]/A  << /S /GoTo /D (subsection.5.2) >> >>
 endobj
 286 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 465.146 367.095 477.994 ]/A  << /S /GoTo /D (subsection.5.8) >> >>
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 551.271 542.13 565.219 ]/A  << /S /GoTo /D (subsection.5.3) >> >>
 endobj
-290 0 obj
-<< /D [ 288 0 R /XYZ 69.866 758.996 null ] >>
+297 0 obj
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 69.87 537.506 216.915 550.103 ]/A  << /S /GoTo /D (subsection.5.3) >> >>
 endobj
 287 0 obj
-<< /Font << /F52 226 0 R /F50 224 0 R >> /ProcSet [ /PDF /Text ] >>
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 523.049 523.6 535.896 ]/A  << /S /GoTo /D (subsection.5.4) >> >>
 endobj
-297 0 obj
-<< /Filter /FlateDecode /Length 684 >>        
-stream
-xڍVˎ�0��+��J���a`�$zέ�=���˧$�Y��@6Eq��P�[~/n�vq��˗{r�/k�)�����q-Kqk����Җǟ���\
-��w��c�5^w����М��ƆpG��C6c�M>���P����R��j�a�5j���]�dd#(��)F\!0P��lv��N�УG:Hx����E7|���p���ц��W|���!v�I��{��E��7c/�cZ�^�>|:����!Y�!�Y*�jQ�JHn�s?r]#�я�R :��E�R��	�+Å'h-F��'�	����
w����օ���|�w#lZ"_��tN���li�}F"���/�!�s�nG�� 1�0G��_u-�����Hp�F��S�F͜y�l����WT5I��c�{��Ί��;� ��τѵ�S��m,�\��쨯��3s�򔕩I���j�	꧲Ў1�1:gF�����o^iR�(۸��ƺ��qn�BO�_��oWߘH�s
�-W�a�ڭ����g)�<�i1&��~B3��~�0�a�3��a粲�ZIE,=���6�vE�>YM�p��Ȣ������7����t�BZ]��S��3�4lջ/�L{��Cl}�I��i�^HY��-��`Q��U?*E�R��89�UWi����8PKtk+=��~r�=.�b�
-endstream
+288 0 obj
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 508.603 417.578 521.451 ]/A  << /S /GoTo /D (subsection.5.5) >> >>
 endobj
-296 0 obj
-<< /Type /Page /Contents 297 0 R /Resources 295 0 R /MediaBox [ 0 0 612 792 ] /Parent 227 0 R >>
+289 0 obj
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 494.157 480.415 506.885 ]/A  << /S /GoTo /D (subsection.5.6) >> >>
+endobj
+290 0 obj
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 479.711 542.13 492.439 ]/A  << /S /GoTo /D (subsection.5.7) >> >>
 endobj
 298 0 obj
-<< /D [ 296 0 R /XYZ 69.866 758.996 null ] >>
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 69.87 465.134 164.503 477.743 ]/A  << /S /GoTo /D (subsection.5.7) >> >>
 endobj
-2 0 obj
-<< /D [ 296 0 R /XYZ 70.866 721.134 null ] >>
+291 0 obj
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 87.803 450.7 367.095 463.548 ]/A  << /S /GoTo /D (subsection.5.8) >> >>
 endobj
 295 0 obj
-<< /Font << /F50 224 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+<< /D [ 293 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-301 0 obj
-<< /Filter /FlateDecode /Length 3233 >>       
-stream
-xڽ[Ko���Wtn61,��W�F�,�q|b��x�c�K��S�W�l���6|I��"������\~[���̿Ooo�t6r'�������'���[K�������w�����[�R��QRj�<J��
-��7�������-=���8Ii���݇��0;j�pv�W�y[��i%���j
-yNu�S�Uzĥљ'%y��������Om���p�����J7J��ôN�ϑ��
�]��h�F��흇Lcy�4�~�k��y]t��)Ned�6<�l�Ӝ�:���9_���W���4�\�>��Y~���z�{;n��u�I�\]�㎒��QGҧ��$���m�uz�y|�߲��:���c��,k~��>l嫵�d{����jS��t�3�q$k�)YN�P��N�C����"�V���{�i��Dp�#QZ8��U��2$�O���|D&�e~���VBi�L�tr��3��s� <���=i�0��T�
--MOe�yPa�<�`�R�F�--B��J"�r�/�e=�$?Y��Ӣ����.sF^eA����`gioB����{�AX�@ZF��\Q9�{� �ay��%*U��S�@�&CO�^"��E��_�����&k8��|�y�����>�,R�	�������П�^�z�w� y�DP�z�7��N�a��U�����kw<kn���0ͫ6��]d�(aؔ[�y}��%#����Ks`�hI8�,sF�c���n���/o�ڣ��d<J��`p�"���7�(K�Ǝ�Ʃ�pB��Ti�|�9�%7����&H���Ns� K�zJ,jt;�ٰ#W��U��c���l��ǔ��
-�U
-�~<Ȋ�kE!����e!
-���B��=<�E�Z�M�î
-�5�Z�/��(�)�^!�5(>Ni(�9=Ш�ſ;��j8Xiv��<z)��-��J;l��{=���6%$2���4T��6�Ui�N�0B
N���1��Y�=G��GE����G8����`�h�U���yK�WU�T�Ik��䅼-�3T	�x�o.�e��OpI�2U�]	�T/��9�0`�����N���!����+3qA�@K8�n�q�+;)��$0�<(s
Hq�qq�᪢VQ�"�4@U�vC���Qe��/S+)��Q�'�͛�/E�=��F�ڠ���F��a-SR��#*I�_�UHݤ]��OX�*Jz�c�B���^��Q�A���~:+T�q��.��vP�ul
-���`�g��z�;�v�5��T��`En��݊tk�ء�&��ܕ�C�v}eu�rQ����r5�B�~󏀇�m�x��\�^o�H�騦�i2��׻erIp�0P���F0�E��(�P�&�L<�ɢ�	�6��⨮畞�\���S�������dL[G�ԓ8i���1��-k�R`#��eL�cs�nMt��ԯX��[�p*y�z6�6ɜ
-�ٮP��x[w%'UN���y=V�n�+�^�Zh�<A;�d�f�hAmHZ�Q5zc&���8��S�d�稾L0��s*B$�=KSH�#l�H��V*��*�+�QY�����S���_�ҿNO�����~�=�,U>�lkaJ�:֕��ޮzz��G(�w�q�T����iB]��A���\ :�b#���C��kB�j��<rs&Ptc�3�$[_��4�2dQ���*��"$h�����������v�F>9h'?j�RQ�L�h��jr��Ѭg�Lk�r�a�y�H{N59��f�k���ӧ��C��l_a� O>4,���/�g�C�����":m�d�5��ѭ#E?�%�t�
-&���� +�Em3�к��B�Of��^�0ڪV��(t
-�,��Y���S���S�����0��v|gպ#QUqm94/Kx#�uj)�^#��V-���q�\8���J���p�#���85��&�*;$�W���d�$�{�L��r#�T�*��id6M�hm7�V�޺}ܔp����z`�_�?�����2%���SD�ӵR�3(!���ei�b'�wx��H��z*�kF�0������S���*�)����VZ�eSz%�ֻE�f����n�������\uO�+�yo;�{��[
>�+�ɐ �e�)�\��U�3�癑��ͦ�J@�[xA�[2-E�=�4e0]aH�#����lv��+l+�%ŉP<"ڣ̷`T��YsnM1�pe��Ď�8��PC�*�],��"�+��Z��v�g� [~w��.���|t��P�]z-�F	-!�Z�y�IAOꩪ��qiR�A���Sg%�����-Ó7�W1�E���&u,�\~=`<+���#�)�|��� ����wP���B��CRN��M����N�I�b�Q�X�.�s6$7�� Svd�b(�=r�c\�ˣ��L�I��?"wݓ7?7
l/�����t��ޏ|K�����l��m��s%ާ�����ޗ�9�zϒFP�~��Z�c��=�$��G:�*E�X�ێ��E�(��ٱ�]�[�l�&�T6��ɵƭ�m]�d=T�TC|#���S�C��ID���#��=y���'��fǎ�+�Ǜ�ǱWq�ho�(�}�1��(����(:	���Mp���f�t�-	�S�yw��5����p���i���ԑ��6͉��\�Q��A����*�2ؔ~H���s_��#�6k�3��!�v�ܶ��T�F�y�;*�p��(]D�u�2W�+ICS/GO�+�(�C�q4=�p�CG����I<�؞�/g��0e��O�<e�"�>��Mz%���
-�AbOPj�mI?��^,=�m#��R�˭�$Q+YY�i�nf<R����M���>6��z)).{�����R�y�U��R��5UNZ[6}g���ϡf�����P�G��*�1ƶѵ�i)��F�۞��A�s"ŗ��HŸTMq)�5�C�q�S�Ss��sBEBȝ�؉i$,Ɨ�nN״����у��d�r�{-����ad7�Y���E��]��˼�]	�k��PO��՚�t#8[�.��y�=�i�#��&,N9�Fխ4ơ
-�ݤ������~f�˱������e������ oR�"�z�Xv�
-ɖ
�Cx��B�"`��p�̦1�÷⃼�*E�ynb�OS�A��]߭�G�������РS��T;����a�;	��~�)��7;r8��~L���.��'@��~}&�(ѓ�"u�G�B:`�\�������Q#
+292 0 obj
+<< /Font << /F52 230 0 R /F50 228 0 R >> /ProcSet [ /PDF /Text ] >>
+endobj
+302 0 obj
+<< /Filter /FlateDecode /Length 693 >>        
+stream
+xڕV���0����U�7	�h �� mR��۝������oތ���o��_��,��ޥ������,֚3mu�-�?ˏ
 ���>F<��Ϲ����o,�7�xr=]����w���֖�+/����Z�E� ��K���A}�4�xρ|��f��Tġ�"t�\�C���\�Q���W>)p}������O|��;Gp��û����'�.by���8Q������*z+P��ʚ�ԵC��I�Y�[�܁\��u��������Ŧ�*0�@ZE-�� ��)b܀aAg��\�R�J��FjRM��$(^�����끏�k�ĳ�Yzy*���<���$��|?#�=wbAd��Z
+k�%��OQ�{�%y�'Q2�.�1`++8Ŀ5�c�vR��Q$�쁉�c+}Xo5���t��jR���6%v;cY�Qux�O��u�MU
�Q���D�����X�y�Z[9�c��xN���>න�N�l*����c��>��u�):�<Do^@<���LTH
+��[��ь<�ޔ�d�ef���˚-5�p�t��c��oKT�IjD�(u_�ӧZ�v����@� ����߈���lj[uk��!�Q�Y�abd}�ì��πW�ؓ7/C��ώmU[S#E[`�?���8�DX[�X������u�>
 endstream
 endobj
+301 0 obj
+<< /Type /Page /Contents 302 0 R /Resources 300 0 R /MediaBox [ 0 0 612 792 ] /Parent 231 0 R >>
+endobj
+303 0 obj
+<< /D [ 301 0 R /XYZ 69.866 758.996 null ] >>
+endobj
+2 0 obj
+<< /D [ 301 0 R /XYZ 70.866 721.134 null ] >>
+endobj
 300 0 obj
-<< /Type /Page /Contents 301 0 R /Resources 299 0 R /MediaBox [ 0 0 612 792 ] /Parent 227 0 R >>
+<< /Font << /F50 228 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-302 0 obj
-<< /D [ 300 0 R /XYZ 69.866 758.996 null ] >>
+306 0 obj
+<< /Filter /FlateDecode /Length 3245 >>       
+stream
+xڽ�n����l bX,>���Y`����)�.�����S�W�l���6|I��b������"��od�}z{󧳑8��h�����
=Y��Z��������{%�=�݂���|���R��QJ�W��	��LU��7m�I�X_��IJ���>����Q�ƀ��c���J�O��K���t����J�3M�J�*Mo]����K��N9^!��n�.�Ä�H��H����䅮Bo$A+|y���C����5�����0N�<�E���TV����d���`�!S��5J5c���0�\�>������r��Ui�@]4ɜ��c�(�M\u�}ZmL����&Y���ׇ�-󚾩s���mg^�[��a+]��'��iU�=�[��+Y
+�|���v�
+��/I�RE������'�����I\�"�!�|� �hP����t�VBi�L�tr��3��s� <��=I���P�
+-M�<�0qD	�W	M�Ж�PĤ��@�\�K{aY�-�O��=}Z��r��e�ȫ,�^�_�8K�	=���u�AX�@RF��
.����=T�0?^���*��)H S��y/^ޢy�����?�(���o�}�7���/�h��߸��į3�翗���%��2�_6T$^��w�	� �r���x�!v�gͭ���y�F���d%�rk0���dd8��{i�-1�e��@x�s���
����-X{ؐ�GI�a0��:;"��4�,�;*�S��lg��x+���s�Kn��u�L�(	���@�@.����X��vX�aG�:P[����)>)�0Vi�)'+���x�I֊Bn����L(�����+yx*+
+#��@�]�k
+�_J!*Q.
+DS���@�kP|��P�sz�Q��I#$�w%��p��쾯h�R(�[^��v�$��z�dJHd<32Di�J#5�Ei�N�0B
N���1�Ԭ�#�⋣"��`�w8����`�h�U���yK�W;��jW�&�i�˛�4έJ��3}s�P.��|�K�����J%�bɝ�7TW2#�߉��c�}ch�	�}A߁�p0U�q�+;)���I���Z�k�'�G��*j�+�KTh7��{=���z4���{=���/\�yS���צx�:�6�����H>9�2%�_�TbD%� ��zG7i�:��A�B�
=�q�B���^��Q�A��^?5+T�q�ޢ�x�vP�ul
+��Um���A� ����� @�wj;(r�$u+���k���41_8��:dn�W���@.��^��<�кW�Q�a|�`[B�i<PH�b�7M$�tT�?�4���ԮwhrIp�0P����1�����]�=�u���`���L�N��9�ǴWu3��D�R�T��$��<��%cں���DI3%uЮ��n�QJ�
/Z�5�ͩ�5�ɲS��>9�T��R�lJl�9L3�P��x[���*'T]��y=��v�WT� �Дy:08j�(�QB���mZ�2P���3��8n��ɰ��'�=G�e��|��SQG�ܳ$��8�FP������l����Y�}E8
+�^\y*�d��k[���I�Q^�����R����L��g�ܜ����A�����eS-�ox*�	O�r���{4�iSۋО:�ML�&�X�(����3�"۞i$޺���t.�,��s�2�u��!K۵4*�k\'Qj䓃v�
+ �!��J�iA��&�~�-7���c�\�"5�6�iϩ&���w��#R�t�b���u��J�ɇ����ݗ
m
+���]z^F~�;m�d�5�1�ѭ#Da?�k�s�&���� +�El3���uM�*:�̍�{���V�L��@A�	��P�f�[�Oq��N�3�G
+ø���A�U�F"/����м��FX��RX�FzU�Z���D�pTI;�1%��1F���q4��M�UvH�5�(����I��Ιf5�Fԩ�9T�ie6͔h7�Q��>nJ8����������>�L��z���t��J�0��,�V����k;��T���aT	������B�mwU!m�գ��i��M�hZ�n�M%��N��}`_��?W�S��g^�ێtO�{� ��9��俢�)�\��U�"�H�f3HV�M�J��)A�}O���SJSδ:���v#-�z�m�иi�"��e������ι5q��W6�o����-���w��Q���u�5zŶ��Ln#S��v����\~w�&.���|ع��P�^�3�V	-!Og�}�MAo��$siS�A�u�Sw%�����-Ó7S�+��lNt�8��?.��UE��ᔔ�ᓇ� ���w�*��B��CN��M����N�J�"�Q�Y5�s�$��� S>��P�{��ȓ�\P=�yf��Ј�e��랼��y{�@
�P�=M0J��H��Эޯ��z�mcd�]�>�u�f���U�:�{�4����kڮ�,�@��X+�ciT�g8m�Pg��v	oL����Sل�'�:;�1_�z����
I��t�$����G��{��)O2��fǎ�ʏ�������ګ�U��0�m�~vC�AVQE&�e�c�	���ќ�ڒ������?�"�|p:�É4DQ��|�RG�+�X4'��r�H�U֒^7P,e�)�����羚*�
Fm��Uw
+IB�.T�m)�����;(�P�� ����~e�T�����>5d9��4.�Yip#��V���Q�ԋ?��0�b{�?����$�S�$��\27%��N,���F����JŽm��.�0K�V���4��r3"q�rV�4��͎�Z��6����渏�̻^|�h��s�Գԝw�a��"�d]S%ܲ�m�S>�uD`�P�����DQ�J�ôݑHKy7S-���/����)�xlF(�}մ������}��S�Ss���#v��>N{��孛<m��1��3k�d�Y�WZ��;��Hn"?�.���'���L�
+Hku�PO����Ht�8[?��$y�=�i�#'�bavʑk4����8�MF���'�Ƕ��	���<m]�)��rM�N�U$^ϗ׎�H�eC��aC����"`�Gq�0�5�÷v��El(��D6��6���vY|���/z���zלC�N��S�]L��
+�m@�4�H-��7���@q|�#��z��Ժ����� L
�\ޯ��|.���p��n,��t�bp�2(�2 O�?��e�
+endstream
+endobj
+305 0 obj
+<< /Type /Page /Contents 306 0 R /Resources 304 0 R /MediaBox [ 0 0 612 792 ] /Parent 231 0 R >>
+endobj
+307 0 obj
+<< /D [ 305 0 R /XYZ 69.866 758.996 null ] >>
 endobj
 6 0 obj
-<< /D [ 300 0 R /XYZ 70.866 721.134 null ] >>
+<< /D [ 305 0 R /XYZ 70.866 721.134 null ] >>
 endobj
-303 0 obj
-<< /D [ 300 0 R /XYZ 214.988 645.248 null ] >>
+308 0 obj
+<< /D [ 305 0 R /XYZ 214.988 645.248 null ] >>
 endobj
-314 0 obj
-<< /D [ 300 0 R /XYZ 217.32 535.286 null ] >>
+319 0 obj
+<< /D [ 305 0 R /XYZ 217.32 535.286 null ] >>
 endobj
 10 0 obj
-<< /D [ 300 0 R /XYZ 70.866 421.652 null ] >>
+<< /D [ 305 0 R /XYZ 70.866 421.652 null ] >>
 endobj
-316 0 obj
-<< /D [ 300 0 R /XYZ 70.866 359.864 null ] >>
+321 0 obj
+<< /D [ 305 0 R /XYZ 70.866 359.864 null ] >>
 endobj
-317 0 obj
-<< /D [ 300 0 R /XYZ 70.866 338.207 null ] >>
+322 0 obj
+<< /D [ 305 0 R /XYZ 70.866 338.207 null ] >>
 endobj
-320 0 obj
-<< /D [ 300 0 R /XYZ 70.866 188.148 null ] >>
+325 0 obj
+<< /D [ 305 0 R /XYZ 70.866 188.148 null ] >>
 endobj
-321 0 obj
-<< /D [ 300 0 R /XYZ 70.866 149.965 null ] >>
+326 0 obj
+<< /D [ 305 0 R /XYZ 70.866 149.965 null ] >>
 endobj
-299 0 obj
-<< /Font << /F50 224 0 R /F52 226 0 R /F56 304 0 R /F57 305 0 R /F71 306 0 R /F60 307 0 R /F72 308 0 R /F62 309 0 R /F73 310 0 R /F58 311 0 R /F59 312 0 R /F68 313 0 R /F54 315 0 R /F75 318 0 R /F61 319 0 R >> /ProcSet [ /PDF /Text ] >>
+304 0 obj
+<< /Font << /F50 228 0 R /F52 230 0 R /F56 309 0 R /F57 310 0 R /F71 311 0 R /F60 312 0 R /F72 313 0 R /F62 314 0 R /F73 315 0 R /F58 316 0 R /F59 317 0 R /F68 318 0 R /F54 320 0 R /F75 323 0 R /F61 324 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-324 0 obj
+329 0 obj
 << /Filter /FlateDecode /Length 2459 >>       
 stream
 x��[;��6��W�2p�%Y|�=�-�no2����xor��/���nNK͙@�JU�?�������?1�Ӂ
�k¥��Dk=����/�����y������������_���=s%V��U�׺G!Eޣ{ n����߇?�>V%�QryCr�����J�W)Pb�i��._$���_�.E��������J2=��1kh)l���r>�~~��R>R��x��ǋ�6x�����2x��%�����|��������vy����I�$p v!�Yx��X���-Z�f�9؊���ɢkJ���u,l`Z��L�$Tm0��2��h��Be�F�#
V��gU��*	TI2�0N�Q�0�����?���g~7�佣�f]u�.�O��'�z��R�(;(�p|�d�F#�(����x��Q�����D��;o� ���]�J��U�g#��7�x�����h��pꀆU���B�%�?��آ���+�,��֦����識U�f^N���
wΈNݨE7�n�ua�ϣ4�$Vk��&�U���ӑ~�}�D���K�D���j�v�Q�>�����&
@@ -718,19 +720,19 @@ R9
 ���)L�M	O�&� ��e���[��7s���f�/g�����|l����4��EHk/9Ic/��[{�I���÷�*Lݑ~����(�".�O�a:!
 endstream
 endobj
-323 0 obj
-<< /Type /Page /Contents 324 0 R /Resources 322 0 R /MediaBox [ 0 0 612 792 ] /Parent 227 0 R >>
+328 0 obj
+<< /Type /Page /Contents 329 0 R /Resources 327 0 R /MediaBox [ 0 0 612 792 ] /Parent 231 0 R >>
 endobj
-325 0 obj
-<< /D [ 323 0 R /XYZ 69.866 758.996 null ] >>
+330 0 obj
+<< /D [ 328 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-327 0 obj
-<< /D [ 323 0 R /XYZ 70.866 469.378 null ] >>
+332 0 obj
+<< /D [ 328 0 R /XYZ 70.866 469.378 null ] >>
 endobj
-322 0 obj
-<< /Font << /F75 318 0 R /F84 326 0 R /F58 311 0 R /F61 319 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+327 0 obj
+<< /Font << /F75 323 0 R /F84 331 0 R /F58 316 0 R /F61 324 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-330 0 obj
+335 0 obj
 << /Filter /FlateDecode /Length 2437 >>       
 stream
 x��[K��6�ϯpo]��J�(�1Y`o��m���.�C{��/��-Ɏ�gЃ�D)��H�Ú?�||b���'�w����iA�F9��_�=}��O~��S�Z�l�����o�T�_�/O?�\[����?���s=�T2�H.Z���ߛ?�1F%gQr5-��e���J�S%Q`���Yvl}�7o���rx}��lT�9�IqӼ��+(�P�d�!h^�5_~bΌ�#�Ox^̷���x9|��wØ�g���������,���}l�������N�$�Ѻ�D��C�V�b���[�|�8	��*FR2S�Ь��Z�X���həJ�J�k�q�Zo���x��\��3Z�гj��*ITIq90N�ш�������<~�}SK>:�n�U'ui!��{R}�@����Ɖr�v�UF�4��bn����N?}4�G?�aje��O��S˰,R�'Ѫ`/.n^BA�6��$fM�P�Ā#�uTH?.��hF��d�uN�����Y�F�����H�Vj^P}�Ç@���$�˔$�}(���%��Hm��V�1x:���	�����K�S�Q�׭^2����t���\Jg�y��A�G%��;��!����č$��rߕ�`�։̣6��rN����C��B��}X����m:ԕݲ
@@ -747,408 +749,417 @@ vȫ
 ����y��T���Oŕ].��	Lzj�#�Z[��3��&+#o�"�Sr�V�ݲfG�u���Ѹw1
Ľ�w#��%��Lt1j�Y���
 endstream
 endobj
-329 0 obj
-<< /Type /Page /Contents 330 0 R /Resources 328 0 R /MediaBox [ 0 0 612 792 ] /Parent 227 0 R >>
-endobj
-331 0 obj
-<< /D [ 329 0 R /XYZ 69.866 758.996 null ] >>
-endobj
-332 0 obj
-<< /D [ 329 0 R /XYZ 70.866 599.391 null ] >>
-endobj
-333 0 obj
-<< /D [ 329 0 R /XYZ 70.866 365.606 null ] >>
-endobj
 334 0 obj
-<< /D [ 329 0 R /XYZ 70.866 188.608 null ] >>
+<< /Type /Page /Contents 335 0 R /Resources 333 0 R /MediaBox [ 0 0 612 792 ] /Parent 231 0 R >>
 endobj
-328 0 obj
-<< /Font << /F75 318 0 R /F58 311 0 R /F61 319 0 R /F52 226 0 R /F84 326 0 R >> /ProcSet [ /PDF /Text ] >>
+336 0 obj
+<< /D [ 334 0 R /XYZ 69.866 758.996 null ] >>
 endobj
 337 0 obj
-<< /Filter /FlateDecode /Length 2680 >>       
-stream
-x��[Ks#���WLnvU4ƣ�-U���U�����Sb��=ė��t7�x#����F$g����Rj�cQˏwjx����W��E���0~�-��r�˯��x����nq��~��l���-�Z�~���u�[�ϟ,Ý���A;#=mWu���ߖ��9��UFn�E�aU!����Rؾ��.�����󏣬����A�������іN����j�W��%�Ce���e��Rꄗ��3^^)
�>���
-��UC�����u����zDTOx�Ps$�����ͯ.x����f���6���a����f=pF�'7 �4���-i��Z��;0�Q����VGO�C�y�=Dr���v������i�aF��}��y4�d�r^��0eT��v�4B�Κ,gg�]�X�O�+`��K�>�}X}���t&��؇&��ȾN~�|�f���k�v��	o�4��2�q��D��k�X�彍k�J���m6&�D��D��H�vk��U#���>��+���j!\M?���A�����Q�`�oJ#��GsVX9'v#ۛg�ې�+�6�j�棊�����XTm�ۋl�U���.�.b���O�l6�5a��ɗ���6���D ��Tߜ�6@Ǳ����9��>���2�1���H����5��L�H�;�͆����VZ���Q��W��D ���ҏtЃ���ܛ���!�@�K֛�pINÙqnC-�k����M)[��T���3�|ԶMGż�k�:}�ќ�=���EZ�5�I'����,cK�=�����㻿~�����@c�~�_������V���α�\>�(=x��/��,��q��5�W:���/iFzt��)��ݶ�v`$�����Ct'�2y�z�Ak�ֺK
-
*�4���m�Q���_�=�{/��[� �jX��(
-���\_�����1��cHI6SO��2���S.�'�ѩ�<�c\�3�l�`;�B�,��lN��.j�B��e�|�5+��fKkfpkd�4��7�%r�΍z3�k���s�ތ%���������Z��#��ۨ��Ţ��=���/&�fԥ���Ί#�ϛVEPش{�m5�t�M�"�>�Sb�-�6�W���1�	[C��mp40&)zJ�$IR!��Q�"���-�t 2S��!�*���N�0]����Tg��.��4�)�D��
�\B.w� T	%u���ǈ$�cM�ǖ9*6���ೞ�;d|�g�'�[q�Uٚ[fg��H��ICx�W��̪��gk��h/���oVok�d☔Z���
�F�ie�POHyz�&	t~r��O�;���K���FJn��fa�	I���`s�[���`c'���ˉC'�Iw;�� v��#���o�"���e�J����m��gr���{2D���c�$�Q퓼d�9$�x���m�AR3L�|
-�X�V�Ҏ�E僘-�`�<�=��K�(��;o*"��|K�x-Y��:5u����.��v����G�R�Z�,��83T��7��>���1��-�#ې���i\$�݃�K^�g�p��I�؇�4��d�L�*�i�00_9ET�*,�id��ɶėCf�0��q����d6��]}�M(9%�ҩ|�%�n��8�S̒\�)IV�[��&�H�M����)r�;�_4�!Xgf���^����FUV� I���Ɍe�y��H���l\�4���rȍ&F�N�6Ѯ��2���&�Vt���Ҫm��K�q�.�b>C/�*����gڶ�=SҷL�2�X󍵩�M+����q���{MI���=�n�^��OӇ&^�t�a�:�xE<�.un�R檔���`��2��������~�&��`ն"7��Ze�m_��-)�V���ܝ�?���}O�g-��ֶ'?�DQ������Դ�*?^�2��R��up�&Is(�5�el�IQ�G��f^��}�m)x`ɽ��i�
-Wt�׈
���4�n��B
>;c/�c�Jx���"٭��,�qci.|C�XT��G��j��KX���>M��[����K��"gJ59�"W�58�ǆ��6=NP�r�E�ZMv�����2�m�$
-�wu��kᥭ"Eg1G�Q\F
�A�K��'��&���IU\�ۣ��@�!;�R�=H����q�N��]}�q�ul��mTM���5܎��L�)]n���;KuK�=ͪ\!�Y]���e>��h;������Q�5=Y>��Ц'��Ir<�8��w�?s�/;�/ualkۖ��~xkMH����i�Q�E��;|��Y�[7���E�F���޼H�9=��9CJ��ݔ��*���!�=ї�$k��,��zfWy���-I��3 �|�@���̘�2Ck���yC�L�t���QB�g�!
-�W�b�v d	�ZN`�tGJ0=R�$o���$]ȇ�q՜��B�.�I�&�R�U�<əz�S�̌�
�uA{�rءȸ��x��?A0�s�M�d��mT<�^vB�-�p�B4ݭ��~�OU�;��I�k�����_���W�d�%*�J�S�X�k���]��;��Rz�L�й;b
�!�ұ)sQ+��Ii+�YF�Ғ��]����e�,
-)���4�&�ʬ�4�WK�M��Uj1�a���6��1�Pf1�w��*���P$����??��OUfy
-endstream
-endobj
-336 0 obj
-<< /Type /Page /Contents 337 0 R /Resources 335 0 R /MediaBox [ 0 0 612 792 ] /Parent 227 0 R >>
+<< /D [ 334 0 R /XYZ 70.866 599.391 null ] >>
 endobj
 338 0 obj
-<< /D [ 336 0 R /XYZ 69.866 758.996 null ] >>
-endobj
-14 0 obj
-<< /D [ 336 0 R /XYZ 70.866 416.518 null ] >>
+<< /D [ 334 0 R /XYZ 70.866 365.606 null ] >>
 endobj
 339 0 obj
-<< /D [ 336 0 R /XYZ 70.866 229.618 null ] >>
+<< /D [ 334 0 R /XYZ 70.866 188.608 null ] >>
 endobj
-340 0 obj
-<< /D [ 336 0 R /XYZ 70.866 165.294 null ] >>
+333 0 obj
+<< /Font << /F75 323 0 R /F58 316 0 R /F61 324 0 R /F52 230 0 R /F84 331 0 R >> /ProcSet [ /PDF /Text ] >>
+endobj
+342 0 obj
+<< /Filter /FlateDecode /Length 2419 >>       
+stream
+x���n�6잯po}@�j_� @f&S����=�S�Kߡ���KJ�,i<�g�q҃�D�(�;E�|��w���� !��ZZ�U~�z��|�
��T�ÿq�Wxd��3�5�r�������?�n��~Gmt�#~�B1�E��߇?�=&1焹ԯ"��f�y��C��р�ZJ���G$G3���\A?ƹy��^�?:3�4�
/�D��	������������˹Џ_^~����wب��\�������y������6�2��ˌ��fB$�	����4ӰU��6#4k��Fr�7��#$��H
U*���!Q[	�:pB�
+�+�C8�?�D���.��S���s*m@�l����my�%~�fy<�����pG
+j�V�Y�d�lԢ�9��vڢ@>�o �<b{��Y�k�'��p�xy(��������Wc]02�9�k������-d�Q#L�VR�f�>��V�*�\(�*E���h&C�KFT�U�F�^����������)�.6?O��4�̎��H�	@�ı�؜8�(�('* ���i�y��>�Sm�Y˩vg\A�G���F]�d�fB�\������8���ײ�}N������͵	��ɿa��+�/�s�֕�}Y�c�9��(��W8�|�9��q�1%2�Gxϓ�R�~��F_y�*�������#2:��p0�#�У �.�jӢ�HF��lcZ4��Oݦ4��,�x��� 3R���	ϣ�n��&n�@�)h��Y_f˜�'�����h�-CH��]_dh�����>�%8>�D�\��O�ڣ�w+�@q�}�9��(1��E^dۥ�t<�QR�@�p|����H�bS9Va�u�
2YEA5��(�g��gK���Ŕ2sMt�4|	��kG�ۤ,��z���M��5R@���NR�������1�v�y_2�'�l$Ҍ�k��MU�{r�ǸF{���z�=�F��ʹ����V�Y�Y�5��6���f�k�����|#4+�lv�&4��Ii:H�c3r�u6����B��Sg��/%�M�}����,g����èm
��i�L��_赫g�j�\�R/9cX�
��i����$*ޔ�݀�U��^:�c:(e�v�iR�8�
���^I��8j@�4ry�GO��,X����V�(��髒y-zsM�*m����%�0	��+��x1�e�O䣊���`F�U�q8���_&
+�ª���Q���޿Y-E[n�.�^mM�\�J��:GS&֒�0�J�`�丼L��/�ί���A�K�<�����o�١�t@n�]�Jh ��@��g�G#�C���r'��J
+2�|*YF��b����9���(f^�W�7掔򵐯��������䶏tw���(�9�%�mr�M��%4�$}�K�6A�pL�Ҕ䒋|G�4��i�!�2`d{>/�E+�u���"Il0,��}&��$x�T�F��=E:�H�昪s\W7�|Y�u��+$�� Y��,�(_!���L�R�@^$�-��,L��8�����L`G�y���V[Zl�G8�������̹���P-F�P-���P�6.B��ٴ5u�����7	Jx�Tۜ�{]�P���{�{wR�.�
˼x���[�z��[�f�
�/�Lk���h�����R�`+zO�M}���[m���`V�6�PS��ظ��lj��r�Ǫ�D��c����m�dOhpF$�&&0��	DGWĶ��ݙ�$]�P��z�LS�,�A��ͪ�u��
�%�����w��,�wwZ�J�FC�zw�тzww�w�ww�|�z��Y�W�w�W��ns�`�j����2`���Y�N��v�%���Z��:׻{�l[�:���������au�"��Ke��e��\{֥l֞����z_�gQ��}����u�K�[/��^��?�0"�����p
+�\�NҮ������͒o>�ڥ�m�.����v�X'�r���.��w)�[���~�G=�>
�4us��:Im&��^�����,�sKH��P�V�]�-�6�it��tcMFV@n�K��bψE�c��E���5������s�dj_I����*���Y�hix�	KL2���<s���0)���HKp=ގ��2w���O�j�&��H56���h\n���C�W8�rN��9=��KS�gr�=�s�^�C�)�$F<�o�Q�5����ꋻ��8���;?��L��F2o��[�(/�C���i���]�d�S�(����^�v$q��#�Ay�s�z
�!�1�b%�১�n�C^1J��)�j����S�IB��A��֜�B|�2m����r�8b��8���(#�hY��ʏ4�R��jC�Z�AU��8�jj�4.�C�K�Y9b�sJ�\��B�͈���\Q ��������_���_�	
+endstream
 endobj
 341 0 obj
-<< /D [ 336 0 R /XYZ 70.866 114.745 null ] >>
+<< /Type /Page /Contents 342 0 R /Resources 340 0 R /MediaBox [ 0 0 612 792 ] /Parent 231 0 R >>
 endobj
-335 0 obj
-<< /Font << /F75 318 0 R /F58 311 0 R /F61 319 0 R /F50 224 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+343 0 obj
+<< /D [ 341 0 R /XYZ 69.866 758.996 null ] >>
 endobj
 344 0 obj
-<< /Filter /FlateDecode /Length 3325 >>       
-stream
-xڽ��#�1߯P�L�G���ʀG��N���{�z�,V��f�xF�.�:��N����ׯo�;s�zK��p���7�ߨ�>E�yNA�M�t����og�lR���?������+�ɫ�J�J�����^����n�h
-\~��m>^>��� �겛O��)�e�!�F��� ~����lS��J�ˆN�o�L��p匴!0���B�M�����b����+~c�LT�$^i�s��ʑח�jOZ�Ϭ�`֒^�Z)�T�ɏ�I�]� m�!����U�(����j�(o��N!f��|@ٌ�묿D�v,tյ
-!���4�P��FOM��o�2hm3}��Z\Q��h��3��-\c �
�zMs3�<��ŧn6�0�K+�gi�~0k�M/<�j��Q�ܔ�*��~G~���ܚ�R��*��D��M�6<�%���{ŭ�m&�8�#���ФN�j�ޟ�ӛM�����Ǉ�6_��#�
-|���o�t����*d���[h�6D�-�\��B�����H)Ef����ed��Y=��%=#�w�R������NY`��_��u�Қ�[WڐD1W�<���m�LW� 0�L�=�J������o�p;��.Ǟ�|R�X��){�����N��o�x��̅}��6OoX@	iӣJ�(~�LK�5L{�Y�Ș�)�u�BI�?59-�p�n����a����wA.�-�n4E�e���GƠ0%�S���~���BoU~�2t�X]1���}���&Um���r��@1�̂�*��#y����8V�����4���WdE4Nq�����R� ��uo�<W4��D�h�*��H�õ���*�M��}�����UW�R��Gl8�}�c^j�%Y7��^2#1�����d��<çg��=r2��_�#���'��t�_�@���!aT3oD��o��bb����W��y'��N��ʞ*���P�+@7˘�V�94htВ�f�VzW'cȾ���*�
-�b|i�U�������-�sϗ5�&^'*��U-w�M�&n�D'y�i9Ymd�*�b+M�<���h?w��g
-5������'��׍���+ff:����cj(�"�\��NRܝ;�!*[Bu��P���~��F��5�K���s�D���Z�HZ�Cyǥs��H�L��j��f�)oԑ'�]kӠg�vi6PGE�>CJq3FP�ۄVv G�t7��}���ū�Վ�t?ă��a��� �
��,&����ŧ��*�R��ϛ���=��J]�S�v턑�u�]]�h�U�����[/�dy���N=^K��ع�T�[
-\���1*����JW#Å\PI
-&�E4�^�f���f�����/�!��
-s�=g�
-32��=�2�E\u��r1{l�%��Ee�_����L�c;9~�ʉ��m����@����t�)���E��V�8M̨y��i���GF�kh��sN�u�O����{�����r�Ij�K�ָ�H��q�^�`��;�S��<{�EP��
=5w�g�>��2~����-����=�ជ�JJ��y�f��4�k��U�-Ҩ��&���|�m��y���a�S�0�:�C�;a�u�P�\�'����;�2����(���Q]�+���F�O�qD��q��25Ϥ��(�*U4喥��1�R�ћN�xbq4�)�@��Gh�t�R'���Ȭ��8�C�I��'�\���A��8���G�{��*_��m�̥ͩA�&s�|D��˾2"�z!o�FTݖ3,kKL;�������|+iR0��I{#�v'�
-vݷݥ8��t��4�@�4�o�յO�Y�3Qv��iTɪ�9P��+�{�Oj���J����ZFY�����=z)9�V/X4�
�M�5%�1(ь�k����|���a�P@�Vs8_P�Q:�zxhʂ�#%O �8p��Q/��(^qm=H�=K� z:��8���P�=u,�T���p|"��+��
-4 M׬"��J<�q���]Z4ͨ�#CH��"��e�e�h��'s�F���(}��5�߻����H���=�{�l�?�[Y��,7�W���f��褼LxH�A�_�=��i%�P��p0� ��qAQ�"��:	o�z?��@
4�ݏ��M�&l���
-35/�
-�y"�'*D����'��ڦԢE#�ǰk���ܰէ�0��J��s=��VL�0�QP�6�Q���Jq�l��`���2�dp�)_��otȎ���'Q�Z��Rn��CO�h���-c�قF��?�N��H^?)'ă<y�K�/���y�eD�DPq���:�7PF漪�$�����f�0־�ےBSL�����KDO�5��hk�X�cd�5TF��ES�G���1,�Ƕ~��ܧ�em7q�͚G�������r�ݒ�\��<�o6�#r����W� �!�8>}t��/X�D3�	9+�^�,�9.�1�x�GZw�p<�}�M���������RC�u
���^���sYOuS�8����>�S�DX�{=�7�^��N��kF���Gc�!&ёƂyԏS��U���ٛ7��۵��I���s<��w����O}��=Q�ú!����!�~���'*�{<�	̊+t�=_��-=�ޫ�[=N��7��8х���e�������;�&���FGq�n�{�3���̋�R���k��u��Ł�9j�uTk�<��Xf�~�r�yZ1��glr� ����n>�O� B��{�D�	#ݫq�9+x�j�#'h�GI������	#R��t#7K)�Q6�ظWꉹ��n����x}�-x|=N;�y���-W�6����{Ϯ �xKO���O�V]F7����ѽx��֓�[p�Ox����mT8��5�4S>�������/8ƹ���_��,(���k�6�
�+�vnX�nP�04Y��2�
-��UM�7������fB������\�z�q�n\�<4Ec�&�"�;���v�^$��P;��d��_H
-ruiBzKR�즭�%U'��*���V�z�
|S<��
ϏKʯF\�/�x<��3��:r���NeMM<~w�e��֕�,m��������)l)(��%Ao�{�q*����q��IX�pD�U�,�?����UzK�D;`��c���L(̊�s45��K6p��$-��ʠ܇pξ#piK���'xW����`s޽�Ois��xK�ųJ��R�t�o�s���7�L�@u5Png�����\��8���
-���GA?������5��ky��c}����>J�uaUl�]����\x�kV?���n�J9��&�&k�__��'�4�
-endstream
+<< /D [ 341 0 R /XYZ 70.866 426.041 null ] >>
 endobj
-343 0 obj
-<< /Type /Page /Contents 344 0 R /Resources 342 0 R /MediaBox [ 0 0 612 792 ] /Parent 227 0 R >>
-endobj
-345 0 obj
-<< /D [ 343 0 R /XYZ 69.866 758.996 null ] >>
+14 0 obj
+<< /D [ 341 0 R /XYZ 70.866 153.841 null ] >>
 endobj
-346 0 obj
-<< /D [ 343 0 R /XYZ 70.866 721.134 null ] >>
+340 0 obj
+<< /Font << /F75 323 0 R /F58 316 0 R /F61 324 0 R /F52 230 0 R /F50 228 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
 347 0 obj
-<< /D [ 343 0 R /XYZ 70.866 691.732 null ] >>
+<< /Filter /FlateDecode /Length 3114 >>       
+stream
+xڭɎ�:��_�(?��l P�t�s�s�9��_G"E���8կA*�DQܷ2��^��?L������b���`���??l~c.��̲Ƙ��Ŧ����˿��Dc����7c<l�~�>\L�-Л�*>��{��o�)�e�3�ʋ\��5��dhƤ�qw�1?1�����YZ��sܓO��0|��*��mde����A㝀���|x
����\7�Ĳ�Td��/߀��z�
A=Z�&�A�[�$.�?+�p���?G���/�9�X��"\��	��C-��b8�,���s> �������1���r2�[�����H�-p������� �"�N.8+ؕb���C`�TN1���S�@ۉr҃�.kP�i��G�Y`M�#"k�A�C���R}8��ę���úD��х���Jm�ӽ�"X�*�|&�.tBO���C��Ͷ�X�߸Do�5���^�am
+���۴ex�b���tj���0���٤��j���q��nk�����`lis��o�T~n��w��3v���|\]V����U��(O�%3�,�x!�w]J���U��NU�/ΠJ��Z�*�K}��`}�����8�US�����k���i�]�0
+��3w�.4��{ʪp%Y{q��̺d�C
+�(�'�@�X�j*:�L�bAؑ6������s�J2ۛ�#�	�_ݔ��i��B����=e�\H��WQ�@G�>pȨ�D�p(�<Z�c��5.n��/���qCFaI��;ܧ2�ީ('�ft=��
+�*څ~;�9��:mMVF�����\�֜���)�Z���|��׽iO��	kR��D��Q��5b
v����.@X�	�*S�4kHc<5=��^�����Hm�����څ�}qc�;��Q��Yf΁���xo��R�l��y��P����_5�����Tz�kK0L�E=�Z��ĩc�k���@sx��ƸV��♬)?@�l��h��3�80�l��f{۔�~n��P�!ᥙ���\��'گ�)�TT7
+�*�}�H"�.��'L8���y��7ZA0ݼ� q�'+N�͔�Eu{LcJaM�����"��
+������� �x4w�GJ�+��� �$���Қ��u����s5���B�9�Pc�5���x��rZ��#�f�.�����u��~`"W,E:���LΜ��o����̹�G'��1K҆*K1���$�mn�0*���k-gŗ��A�<5(�߹���G�q����u�*Of�e석(4h���B�$r�f�`Ը!QѾm��T��~h/|�Qw{!v)j�#!���<�JYB�r����/i�~���\��Ҕ��S�u~IFRJ���n���l6~J,���fO��ShiGY5-a�#����\D�[�v����:tWǃ�.��L��-�s�Ƒ~�OUG��En�:f��T��,�0M��*f�j���*�Z:�S&Y��t��n��>�gTM������u����è,r��VU�?�zN|�R�RAUH�T�9�����d~���q8W3��K����T͆�ܼQM������L蔍*ٛf
�}_��Ir�/�L΂�G�C�@lb��*�Ȼ�*�K�.����LZ���-����������V�5w�K��7��>pvX[^c����l��W�)[f���2
�Ͷ�
+as�Ϙ����l��o���b+��xeGti��&	��V�l���6$d��C�uR�lD pRH�� �+��-a=m!^����@>4�,�,1n����}���(�I��Z-t����ȯ�i=U��X]nJ�^�>걕Y�z�|�_�/�pזw�-.�>�/A��#�`�m~�E�t5���*����1���z�9�����eIa̗SMyw�X��E�5L8S��vc
+3c�dD��EpE�bT�����*C)�W�:�x�T��;�U��c[�;k�>�Ȫ@-�F�ZV��
+h��3��k=#<gg����7�H���@�-[i�8=՗-�(�3a=Nm��~B��3<�@����ʀ�LU�ū��DP����Z�o�748cW�<9��8��Y�$ٳ�(91J��X�AIlߟ��O�B��7V�Xm4:�K@�xݛFUZMBGƩ�-�ss��I7�����"��<,�\�@�|ZV:}��`sCm�Y-
��0}��ON}V=������2#�t�n�����y�mlQ5LL*ĠW�;�m71�w����&�E.��-J������Jγ��\�Ex�4��z�_��H�j7&��Q@GM{��-'bQ�Lz��:jH�f
+�s�E�*[;�����Z��gW5����$�������G�qo��T˼��L�@�ۮ�n_����.��I�����֙�h�G�|��˷펍���1�x=�IƮ�[�="�{X�0�ͦ	��Ę��}Ø��FF���m��)�����{q���]`j��$�r���]��R&Jj�E~��=�3��hV����w>ՈG�&^�7�li�Ǹ����3�VfL<j`�8򌆺X�q(6�\T���OzOF:�]d��.������U7 [�����Lk�V(����w�
+�S�`ISs�yUס�cX&O������\
az�]#�
+��[*�<� �3��ċC���5�[�ٺ�p*�`^c�Ø3�8�S7&�D�Zi��³���±w���Fv��+� �_Pc�m���
+6g�1<��78r�3������4c�ϩ2ؗ��i)�B��X�oN��ګ6?NՐ�wZ��ɧD#�Do��;i}T�3υ�z�jSQ��� ��L�9.���T(�s�j��$���k}_ͳ�N��d�ؾ��6Ne�D�������t�)v8u���c�7�Cu���|m��+;��p���,�.(��F:��x�	_�����F���M��f��.i�7��u�M���Mo���,��6���>[��aP�Չf�^0�J:��j�}��X��
��
�TŸ�34�I�G
+͍�j�d���wm������C�7�=��MDў��Kɼ�4�qm7��$�.)��h�Ցn��6���Eu�Ru�?�&F֨
+ߘh`�}
=���Q������a�gƦm��K��śeKm��u�}��?���
+endstream
 endobj
-18 0 obj
-<< /D [ 343 0 R /XYZ 70.866 612.861 null ] >>
+346 0 obj
+<< /Type /Page /Contents 347 0 R /Resources 345 0 R /MediaBox [ 0 0 612 792 ] /Parent 231 0 R >>
 endobj
-22 0 obj
-<< /D [ 343 0 R /XYZ 70.866 170.861 null ] >>
+348 0 obj
+<< /D [ 346 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-342 0 obj
-<< /Font << /F52 226 0 R /F50 224 0 R /F59 312 0 R /F71 306 0 R /F56 304 0 R /F57 305 0 R >> /ProcSet [ /PDF /Text ] >>
+349 0 obj
+<< /D [ 346 0 R /XYZ 70.866 605.057 null ] >>
 endobj
 350 0 obj
-<< /Filter /FlateDecode /Length 3971 >>       
-stream
-xڵˎ$���_��Y�H��@{�i�L��C�'�!�]$9ğR��TU��à��D������ϋ����̟����f࢔�(w���;Eo�E]��Z�B�p�����UJ����Q����^�7���z)��O�n��ҿ�a���?z��3���7���]�k��Q2ڗI��d���'�S����-<?1�	���	�e�o<Rw��qL�Qi����Z�`����Ƥ�a��o�5�L�t��`��sí���o���A�X��,^g�g	y\?-��'6�1-.2�I76�#��G�a-
-�U�o/��}Ο��G"C����kx΢�3��h�.�d>�e!L&�d&�Pl�V�3�u�[��it�{�F�'X�<b�:��%<@��
-VY�*M�\�-O�#�A���а�?�J�BD)�#[����E�Z�Hi2�_ԍ�2~S\��UZ&��g7�(cy���ҷe��8���l�9�jCYq��1��0�PU�'���S�W<��7��FTVx�����>����I�:i�'#����64BB�|���0�6z�0+7	3^(IHP	0�<�@l9^�qBY&
�W�J<���%��{�#��8Rw�=Taێ�@��w�Hk�'
-�2��k�E"��5�,�,�:;�,rf��^��6�G��ě�=<9����>����T�}k��N��Y�sŗ�U����P�}QE�d`+E�|�I��n˜e��Bf4��������h��
-��Yv�(uVT�!y�m�Ѓ���=|T�F���V͔&�PC��G��w�o{�E0˸�� �il���bu���m2��%��ʖ�'����ʖ�U�|��5͇f�'!Qu�}��=E�����?���xj����O�4��F�]ӄ�J����v���!G�-�5��F*Ɉ~� ���{cj%$����w��Ho�A�^$�_��������w)�w�I����|��"�L0LXC����Q�Yć��Ա�Ձ򇁿���:��L���p����M����Z����&)H�6���.ʚr��w�,[�)%�ӖK �r�m�.����q�l�Ȍ�ا�*�T��ƶ]?gY������5��4����m����mٯ���k��]����Ocl�7���~�ÚɖY�BtU��DDw�-tI�ܘ�ŵf�֏�R�ݦ��$ņ
-:��]�Z�Y駘�.�R#v+i}�v����L�@���Z��^��]�Q82]��\�O�.0d�CXq�M�r�t���t�-:7
-ϡ��bP�#�O#KlãcE#
-վ2M�!Z��:�PZ�K�Xd�י�Da����Q�8G[���Ѳ�Z׉���������-l�E�b#�٪��Sm ��Q��.Iw}2ƹ/�WR(�:'�*��k$�>�Sn �&���y;��P"p��=5����A��E�L�
- ,��~�+QJ�zR���Į�E�nyYR���6p���ܳ
Fz
-S�]�b�lz^r�a��>yU��ql���6���Ac�끶����v�4��Z���Zge������PbZ�r���"�U(:��Zq!o���!���d,�<�`�i;`�3R]X5�T���&���+V�=B[C��80�SY):lZzrO�#�?�?�<cR�Ǝ5�e|a�]g��L�_X���\���X��Rސ�W���ƛ���I��Q�H&ɧS1�}^���q�d������#������=?����[�m%�YI��Ů��	�m���K�#��[g%���� ��n{*Nݢk�yvpAtea���yup�Tp�^������ap! x7a0�9�H�k6�`y?m{.W޴f���ݭ����5�Vc1\GJ˸j�7�q�
�V��h��*����ƥ�0n���BV!;R.�U>�������?�]���)m�oY���^��Qs�!+�ue!v����S}PN���>[���8��夺l�7�Zw�J�M4P�~���#�9@����[�y�Na;I�連�(�bGS�%é�e���<;�Q�okz�{o&��Q
-&UG��P�)��p���N���Q9,�9����Z�b&���/���� ��<h'�',�m&�A-����ǧ[�2d:��	;�/(��Ԭ�a���Wp�$g�#\`)�?u`^DՃ��DtUH�;9ɬ�L2#�_'��!�x����1a��l��ZMd'�9�GvP�	��v����V�-�������N���|s�x�"!"e�2D2�n*�u��2qϺ�'����G@�5����1�f�
-"j+�6=�d���kpO?͎魉���F��1���X;�1�N��pP�%����\`g���z��!J�
�q�HJ������s �([B���&/�T7��K�X��^�Ra��i-̰Xr�|�?���F��=�˜�l03v sK�E�J�X>~�p�`l��6Vy�'�1���U���15��͋�>qϤ2�m�4dڜ���ZAv��:q�Q��yB*sdcAӐ�[I&.�8A�NX�:�#��9܍+(�ow�v7�
-z�OT|�c�~u,��#�g�7�97)�C}ꔪLڲsvLE��ְb�LH�B0=��^A�B{��j��N�W6���>�<���P��p)�<ϝLq��ކ"�ϼ
%x֒!�ĳI!ek��2�	=��hz�INH���yO�,)d\)��М�4�%����ҟ˞�߇]M2����=���b�^��Lr
���-�T��T.-�S7��2Z���=CO��3���� ɵa�ݢ��Ak�R5������d�0�A�s�$��@}7X�D2,c�k��9��̟�r���=�tz@5�4$ŪQ'� �.��7a+٫��;p�u�I�-�YVK�۹;Չ���-��C�C}ڑ+� ()t`���b���U�3��j�;ἐrl����I���c��\�Y_TH�ϥJ�� 'R%���f�%e��T�P4I���2�O>p?�*�X5�@��
�����T������4W�iwZ4s�ɳ��D�D��3`x�+�x ��r~�\)��u>�x2W�&����y^.r�:>ϕ����I�D97lt��f7���\)Oڱs�+9a�^�S��Cn0@SjDsK�tȳ_��t�YZ&����
-�A�Kg>)sUv��ʾ����pv#L�[G�{�s֩�x�4>�O��������JWJ�m+�¶��z|k��>e��DG{��70�l��G�4]x
-{l[{DvO٥'�����4����-Mt��,��=�mk��#M�X(�I���F����.�o:�Kпc��5'����~�l`���ǩ$��%Tݐ�tEJ}S.Q-W�r�X��S:�J��r�m����2��[���"��f׹��d�ȳ���^���P��"��Ͷ��Z�3�v'�ȗ���e�a�
-WA�����}d�}����L=V�k߰��kC��-Vjk�~޹�g�E�ZM�2yJ�ICSZ��[OP

}Qt�ȶg7=90y_��a�"����K�*�z�R77��[�Ov�q�
-����q��{��70z��j��q�[7?��e�n%�W'�ը���mh�������(i��S��+ݏm�m��"j�f��j���o�dö�<�0�te��M�[����O����D?����(mw����b���
�)�y�.��ӗ+�mWwKjj��~���B7nM�梶���6��Sw|i{c4u�6m��}�㏪�l�H�Y\��o�u�z/D�~=y����x�Jl�R�`�-}�m�!���]��m�����]ۿ]��[U�`��<����)��s۲�$#��&���`*����ݒ�o��6`��tݻ���m�vu�������6��`�*[~.=���p�R��O���]�{��Uv�N_��U���8��& �Oڀ|��ơ����Nr�;�zi"ΡK�R��=t���+J�2i-)��'���L�����
-endstream
-endobj
-349 0 obj
-<< /Type /Page /Contents 350 0 R /Resources 348 0 R /MediaBox [ 0 0 612 792 ] /Parent 227 0 R >>
+<< /D [ 346 0 R /XYZ 70.866 540.732 null ] >>
 endobj
 351 0 obj
-<< /D [ 349 0 R /XYZ 69.866 758.996 null ] >>
-endobj
-26 0 obj
-<< /D [ 349 0 R /XYZ 70.866 682.209 null ] >>
-endobj
-30 0 obj
-<< /D [ 349 0 R /XYZ 70.866 499.379 null ] >>
+<< /D [ 346 0 R /XYZ 70.866 490.184 null ] >>
 endobj
 352 0 obj
-<< /D [ 349 0 R /XYZ 70.866 441.102 null ] >>
+<< /D [ 346 0 R /XYZ 70.866 453.376 null ] >>
 endobj
 353 0 obj
-<< /D [ 349 0 R /XYZ 70.866 403.526 null ] >>
+<< /D [ 346 0 R /XYZ 70.866 417.261 null ] >>
 endobj
-354 0 obj
-<< /D [ 349 0 R /XYZ 70.866 365.95 null ] >>
+18 0 obj
+<< /D [ 346 0 R /XYZ 70.866 338.39 null ] >>
+endobj
+345 0 obj
+<< /Font << /F52 230 0 R /F50 228 0 R >> /ProcSet [ /PDF /Text ] >>
+endobj
+356 0 obj
+<< /Filter /FlateDecode /Length 3862 >>       
+stream
+xڽɎ\���C���H|D��O��mIY�>U,.E>��'�Aw�Wdk_8��������,��O�~���R"9��������7uRD��#	���O����T若/R��ou�O��Y㧇����|���C�ᅬ�N��Pyx<}�D�����q�茂�K��޴
+����H:��̽� $�m��wĝ,���H�>C��O���D��&�S�++�@���[�����0����$e>��Y�>Y�Y���*B�l�_G"$!lOL9�i@���D]d�P�th���!I��Z���!�L�j��x���2�^��u���۩ԕs�����xi\=ʽ*>����¡�l}�'"���&c��&3��!
3;����pqFn�+Rba��o��Zt$�D�c�Ȋx�g�<�O��v;y?ˈ�1���|)�K�y'�UUpN1��-ٯ�Y3�1E�W��,A��cjO�S̿��P��=1(����R
��5/�aኌT��tI>��������M?�."3'gK��77���+��T�慈��uȝ1D+�:%LrU�����$�n���~4��S�L��{�|4�YGl�OIO�FZl����M� ���	j ͱ�YN׀{��l��1	�rp�I�U]��bCa��`)�f"��3�?�J���}�n\T�k�R���z�j������M� +fY��n6K�mNqg�#�Қk^-o6�{Z��_p��p_s�|�$��O�q%��F>�6¾���O���X<���=�
+�~d����]GM���o|O��f�>����h����Lc}_
:?�����vUĜ��d�o�Iq�����Y,<�][_�a7��d�I��^��
+����C���z�/|��}
3(f~�}��fU5�]dgg��1��s��Cҭ��=����ʧ�e&�="�l���CO�BK>��恐�j���EEOK�Af�\5�Ț���t�\��TܻzQ�(��j�1���$�{#o�r��� �ݙ��vկ�!�'BX�u�<���JB�Vt��c���J��c��Tw�Z��ʠ�H{%�k^7���qX���E�q��,��&Y��9�'�f�����XZ�Z+���d���|�\�F���\�M3�s�Op�����8�V��|bX�J2A��稘����6п��٨�����7s�������n�j�j\��*�嗪i�Zy��0���8�Z�z�v��ԛCU�F	��̒\�ҵ��Kl� ���:��e���
�����3B=s*'N6k�.	�7L��7䓾�ҝ�;�z��>
+�|��&{�IU��|m{��sJ�#��l��[��.���*����*��-��RÒ�����_���1��E��u�� e҈�_'h�J�GЀ��̀���jf���Y�GeG���r���:˕��C&X��ΙG�`])��u�Z�J�!4�
+��C"�!	g�Chb����MI�h����q�WX%际&aS[u�V����ݡ����
+gj���Vż
+����+��y���Pj�Fވ�k[�%n��9�����)/�*�.�����ݬ�e@���~������\�N�w9hHϽt�H�o��ք(���Sk3H�)o>���\�q>�UM��~jK�
+׹~�]|<$�8m�Y�km��B@lm2˺�;������^�]J�G*�1�@d{vJ��4�Ӌj��=w�~�^�s�#�6^�
�i�Г����Ղs�d�ʐͪh+^���0q�p��^�e�M;�hh�NU����5��S�u��8�?7�bi�`\����	��Б���/u�:<�P�3��T|��*���9o��øl�����~���TK{�b�%N�P�����1�ӈ����@�`�]���4s�h�Z�V͙�� �G�L���d��츂U�;x��W�p��B��ݤ��e�C�*r����<����*�fI�^=w�cSmeA����G6!NEiy�)d��}�ߡ���t��G��P����3������^�8�\ZYS��ƴ:���v�J��(��g��6TnW����W�Ө���YnO�����Q��o�O�˹Z��S�{8�ݖ�jtɺ7����Fq��[��fAq8����� i����߿��ÿ���[�_���ߎݧuJx(k�-��N����@�ѰZM5W�9X�s��P�n"t������_[K���}�ӫ���7���nP��x�G��	d.����W��VA/	�-�}k#�+�"}�P\	�f֨78�~�~�ӆS)'��l4
+�F��O|��%��-�
+8n�-'e�� r	"���qS�`�/�0>�GFgn��?�\aSŹ����m�}
�3�G��.�������x�8�:/��{_�˨�W��i��l��`�d�C.��hh��¼��g顖�v@��I{�wic��q�Sߥ�	�R�⺴�"�a�a��u]}g
���\���N�,��K�K;�3m���K����l���f�9�ڰl�H�	rӼ�*�oJ'��xL�C���>����@����xi�woG�I(P�@�8��ע�W�2vp�޴�D��i��87e�:J��[~��<��ܗ�WR(�Ϻ�*	��kQ�hͥ �uC3'�s;��ǩ���_Dp
K*��䅩��Uɝf:A��u:�F�H�Wg
+���<����"�k}����S'��#bp2B��R�FB/0��������P���s�60��䐒
:���Uהֶ6<��[���ފ���]�r��Xh3
+��U[t��nq�Uh��nS��K1)	=��;L�K��u]K_��U��c�����`�\���-�}�s���Kz������sEו�P;K̝c��[��M����ܐ�W�;���ˀ���OF_�s�
*'��6��,!�h᜹����A���W��U������ڷ[�������/S�i����n#���7��g�~��V���S9���Ż�袏*�s�?7���b���l�H�5�-��r�vu-*��n�����UZ�l̈́->n����������_��Z���W/T
����e\b}�Yau�~tS�+��L;˯���}�Q{)Z��%�K`���N�o�qb��.���[��f�8~�K��%��q)��VaD'�7e����Dׯ��`+��Wp�E�m�y�i;hB�:�(���WAǻ4�Q^
+�G���9*ham״�{YT���������y*B�9a�t]��sPBF*�s]�
�tn�
�T���
�Da�1}��A;U�����c�
�����-��P+8vZ Q@y?�RgM�pKq��kx
��Cyvif���1j\r~�$E��U�4�S���z��aS�p��'�������#/�|LY4BA�=,�R�]�!������A��Of�x�Ϋ�w?��dJ��H�;��Lي\WV���?���U<n��m�o���+�Y^c�MKNU�X��W���8
�?���ٴ޻<t���f�$��cs"}|iY�/Ki�&68�
+t�ښ�^d��1Z����	�1z\���gP��B�RÞ�������&��X��5ǳB������P�0�[�J]Ql���iɹ��m�U����JfE�ۡW\,+&�[
+B�6Љ�U	���Կ�#����'�Y��� ��gI�$�p$��U/��V]�2j�ҫ#���F���Y}�^ް��\۞�%`��ݤ�!����ws�����	*��M<��X�^M6#CW�lp�܄��Kc��)g�٠
+�k���g������e'�
+�g6�}T{�v�f���7�gS	���-q��:�~%����G=l�
+endstream
 endobj
 355 0 obj
-<< /D [ 349 0 R /XYZ 70.866 328.374 null ] >>
+<< /Type /Page /Contents 356 0 R /Resources 354 0 R /MediaBox [ 0 0 612 792 ] /Parent 231 0 R >>
 endobj
-34 0 obj
-<< /D [ 349 0 R /XYZ 70.866 217.763 null ] >>
+357 0 obj
+<< /D [ 355 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-38 0 obj
-<< /D [ 349 0 R /XYZ 70.866 192.207 null ] >>
+22 0 obj
+<< /D [ 355 0 R /XYZ 70.866 538.3 null ] >>
 endobj
-348 0 obj
-<< /Font << /F52 226 0 R /F50 224 0 R /F56 304 0 R /F57 305 0 R /F71 306 0 R /F60 307 0 R /F58 311 0 R /F72 308 0 R /F59 312 0 R /F62 309 0 R >> /ProcSet [ /PDF /Text ] >>
+26 0 obj
+<< /D [ 355 0 R /XYZ 70.866 412.155 null ] >>
 endobj
-358 0 obj
-<< /Filter /FlateDecode /Length 3959 >>       
-stream
-x��\9�d���Wt8c���
�VoP`��f�"r"A���W�W�I��EO�G�u�G���?/����~^�~��n�"�������2�yq���ƏpH._�|�*��B�{�o���
-!������)�8�&����ƿ�O�h6�l_���p�oA<������F7t�#=o`��7p��Y�z�Iٞ� ��mo��]�yP&�DY��|��Ġ�<)����&�ش=+P|qĕ�ҋ:��6U6�����Fm٠�j+I7O��}����EW����-1U�9h��1#3uV�L}��-�
��`�u��	���O&$A�+�R▁sq�~���z/;F�&�W�\�M�m�m��oke�my4K&��ԕx���%Nٯ�Esh�o�4�J[@�'̵֒�ٗ߼,T�Y'��{5P�T6=kQ�'�翙��>	2?IБmU�YO4�8]�Y#��v��z���$Ӥļ�K
Zsx-���u��h*����x|
-%nitE�Q�c\�㓜���6 ���֬��i���&6�G܊�����_($�aZȦ��J�������V�p�*��V�<T0��vzu7f<Z�V`
�h8������c�ޜ(:vϓQ��
4��Q�b{9��CoIƱm��%e/Ԧ2�����\�}�lC,ׁIr�V*S�h�Y���f�,"o�ռY	7���Zf�r�31���������}��U-���	}���>��8>ɡD�"ymq.}k2��D�P)�Ot+FhT/�RXv����"�� ���!6���'\	�R,�Y~��j:,釅��g5}�OW����&.�S
-´�'�ͻ}s���ՓK5	�MQ�c��>�o&���@>�:�}k3n��\.�,/��}��t5.��d��JZ
-��$v�<�]��/��_u.I�����^��-�όb�L�a%�2)����v^�Op��6vV-�y9����ƁQ�b���;��4b�%�]�����U�)�8L1yF�N���cW�r������j����uc���tI��YG6��"�!��;�HC߉:/n�*�o�.q@cf6)��:�ap�;;�_�2u�i�VCRc�]�ž���ɘ�в��ox�6�ܘUԕEo�?'y�R�d+�0�P-��wl��8I��ژ
-76I:,��G��J���'�U1T����
�U���ۣ݊���L
-m�$���"��T�U�$���b�tj�b����ӼA�cvI�B�`hP���������b����!?��*�߹��D�;lg��7XaQ;\�7�¦!>�c��%G���p�i=�T�F=!�UU4�.e�=����3�)�ҘƇ���,_�aF�C���^W�Λ�MY�� (��T����:�o�46=�*stk��Ue��М:�HXV�%-6�y��զ��\k�jNݩJ���t���� 씙m{Ì�y�"ZU��w��Zfg��]�ʠ���c���/�jr�����kc&7bGl�8�(u��`�F���uk�E���%Y�R�7X�q
-��N�$UEZ��Ū��H�Rn����h�O�H��VUv!����p-پ4ԧ�<Q2�UOZE-������3F��Q�M�a�>A-{Sk���,w��Bg���v�����E�d�d,5��lY�+
-��XCh�pڰ4����%��\�3Wj"�\d�R���ݸ�Jo�1|Ro��6. �B6<�)G�9T����ٝ���-M��W>��,��Cq���}��/.�5��.�:v��N�a�,.u��@���"���K����Z��u�R�tb���=��y�9|!���쳖K�.r�y`���3+5��D�BoD[�w(�3�Z֩�%��8nQO7�C�]0*;(��]�zW�����`*�Py�����9|�vw	yg_#�Z�"�U�
L�qk%a�h3H�f�/���=[���Z�7w#]�PN�ڴyf�ʧ�Ek$*vN��#P�aVG���R�82(r�㓩��an*�0J�OI��۽�>����vx��NH�E<���Z�U��2���Ejo熵����1�p3(�	�ʜM��pR�f2�2o8�D&Y-��;qb����fA�=
�x3�m]�gQ7��D�UQ���.�l3��co E8yx�0������$"ST_𯔱��x.%��R~�i.P�����c��E���n����>����,oL���$��q�������ۥ�@�5=����d��A�\61�mm�ӑj��KwC	�LW��4�y�f��[�έ�Br���i8�?�\���<[UTy����g,�;0��tͯ�4Q �b���!� 4�m�F�4��,��9�'#�M�5�h���Bm�Xyڮ5�P�R�ᱲ��À�n���l�Z4n9�VZk~��L��u_5R1S���'���Q�������).�W�\D�B�"8���ݵ�ȣ��h����%¹���`�i�\���tX�ǈ��}�B:
-�)���Kl��(h�)oӢ�?�>MxB}�nWiޠ���
-�F��u��=��"�������rmX�E+ج�ۍ�le��Z(�ti���/�%3P�S)f���%',�i�l�Jݬ�}����������@餜�u���=}f�Ȕ����f9~�X�:�̜��x�GS�*+����|7�|�� ^��Q*�{���k�\`���Ü��v�ɔR�X�OP����0t�����ۺa��q��9|ӷūr��Ig6��+]*{x�.�N���ʔ��}2�ne�8�_e���ZN~��R�,��d�,�`�Z1�`ִ�Z2�xvQ�vo�$y<�Qʞ9��]�H#T��J�{w�1�tw�n�Dl��9���!��ۺ0ޤȹ�aI�àx�@5�ӳy�i���bo+�q�v�eD;�.7�Fwz�Q���x^F@��q�#��0�>�\�7�����,kJ�7#�$��H�*�����x����������Cz�E�߾��Q�Ҁ�����O�Fu���⩡���J;�~�0��B�
��;(�~ma�Q+�v񛻱K�-��ac��"y���:�]�������!�+��aj�}��Ǉ��2��F��_��Lq�aY���ڿ0����mٍ�8y�v`6+,5�)K5�~[�?ƜT�tp�V9+/?}�$�}���'��_�wU��K)Iz?�H�P� ����0^V�/�s	5bS$6>���)�[ɗ��� ������|�������wqh�/��cIR!M���-�tt�f8F����p�2OD#}�g=�FF�j���/򱡃��������z9�)&� ~�;߿�0�7�Ѳ��.�0��w��<w&�����z�b�W�h;�����?3�t�g�j���������e�2L���<@@?���c���"�qZ󵍗{=���^'���}���ĸ�$�?�� �,�L�!5�})Gw�?�ã�ZH�Uq�����;l�5�\��K�=<~��䱗ǝ8�F�ә8��CAu��)U!��7��iK�~�\����)&���0RE�I�d���#b�;��Q�xhr4��S���!b��<w�X��?Lz��>z-�~��_J��/!�P],ʤ�����K�։W�ح�7�&3�����XE���|�'��}�L�&�Dߗ�%ߗ����TRx��
-���>ʓ�ȏ�K�R�+ò+*��ݲܭ�[~^��
@��h��f��*�w�\�f�1�민��9#õ�U��HS�W��ve
O"l�*����gP��a�&�Y����8�
��] _��9ů[�r����#Q��|3��Қ�-ch?�1x���u<\ N����	JA��1�S)���M�W�Md�zx���C�G��B2�戧�M2[�y!V�����-��R��-tW����tZ��k��M�:	�-����ߍ�+���m����O�*�[�[�~2��iw���:Z���r@V��@�3")��y�"*�����M������
-endstream
+30 0 obj
+<< /D [ 355 0 R /XYZ 70.866 229.325 null ] >>
 endobj
-357 0 obj
-<< /Type /Page /Contents 358 0 R /Resources 356 0 R /MediaBox [ 0 0 612 792 ] /Parent 360 0 R >>
+358 0 obj
+<< /D [ 355 0 R /XYZ 70.866 171.048 null ] >>
 endobj
 359 0 obj
-<< /D [ 357 0 R /XYZ 69.866 758.996 null ] >>
+<< /D [ 355 0 R /XYZ 70.866 133.472 null ] >>
 endobj
-42 0 obj
-<< /D [ 357 0 R /XYZ 70.866 393.854 null ] >>
+354 0 obj
+<< /Font << /F52 230 0 R /F50 228 0 R /F59 317 0 R /F71 311 0 R /F56 309 0 R /F57 310 0 R /F60 312 0 R /F58 316 0 R /F72 313 0 R /F62 314 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-356 0 obj
-<< /Font << /F52 226 0 R /F50 224 0 R /F56 304 0 R /F57 305 0 R /F71 306 0 R /F58 311 0 R /F72 308 0 R /F59 312 0 R >> /ProcSet [ /PDF /Text ] >>
+362 0 obj
+<< /Filter /FlateDecode /Length 3452 >>       
+stream
+xڵ[K�$9�����Vc�~H=-Mmu!������f�?��#�a��2{Y��ʴO�v��_/���'9}_ߞ~sw���HΩpy�˓�7�.�	��%H#�q��/������t�姷�i�����y�~���QJ+�^>��<�#�+k7~ɍr���m�)R�J
S�	LQ�XU���M	jM�'�T>}x�YT���a���^r�P1���Β.��̷>j������.���Iי�kS�\N�VȨ�)?����MVxg)$��2��0	�VZ(�T��y�:\|����)_s���ޏS>�ǆ�&
+��ڰ;�Y+bp�a���vJ�d~�ǿ���^�4y�����#|f/�hɬ�*����l�����%���釟�͟���E
+���e� ����?>�����uM���jr�=����r�]_t��W����!��D� ^0t����C_4�;�����-`�6v4�c�	�Z
+�F�*P�zg*��7�p{�h�ϡ_���R` ��
���X�u�G:	��#PYaS3�!���|�tfӌ-]��~��5�r'��80�7"xw��)�d>H�p*�ԂI�3 �<�Ӕc��Xs��P���jw$TR�+��\�� ����qʧ���qK�
+?�6A�a�q�'s��%L��1�B<7�����A�z�D� ��* Ak*����ɤ��C��r���	"��8�q��QmݨЕ6q�	m��}Q�N}��A�VۨS��P�Ca
+@��D��=*5��"�ʑJiܯ�Ҿ�y�����^��C�V�
�04c�uue�p���B��y�&X�d�&@b�K����U�����w��v���������<*?5���FE�'���i�R�	��8�~�[e���y={#��6����ʟ:��a��*B�e�@sdC{��4�g�UbmFj+A5�U�͸.ӻ5��[_��8Y�o�%ķyl�#?	U�h&t�"���x�HydV_VC~[�p�%�����'*�u?�]��?����d��߮�O�8���ʥ�$�0�R��pb�!���N���������f�����$5��LXT[����/g�l_�'�Eȸ��M��(��k��S�'�ny7��$����X�������������R �����\q���Zx^��A��Xpa|�M���o�ݩ�Z�wo~��@�b!sd��$])6��ٓA+�$�����@G*�t��Fu�AU����ܰ�˯2(��B�`�#�S�!��Ǒ'�g��YQyƋطZW^�v���+=����9�VU�* g���ɃY}ﯮH$����~`{���0"�I�&���n��9�U�Õ��O�`V�}�9�H{΅��u��:r�����~�;�XG�)��C�K$9��">BSa�Z��ݓ����[K���e�!Y@5K����%D����嵮�� ӱ*㒫j�K/l�
�T[�=���Ւ��_Kߧu�<n��R�uN�������/QaD��n�mtP�S(&Nǐ�\�'�E7G&�g�=�Ô�U2��P��؞��ʬ�5�ך�;S��v/�mu/��:��E�E*nE�V��+��ׇ�l���)�)����!�b���T)���,�$!���g�MP���E!��X�S��9�Sy�g0Īs�_,	�	D��ۦ���5qƞ�yA��*�9��c7�[��vNs}@��v�ubK��V&s�|��j�iv��Tm<��F᯾&^4�7��#-��L%����'T�)]�Z��fnO�b���Hm������7[7
�cMګRZ��s����F}�.�R0c�G���<'�t�.1�j�zL�yl�����ʨj��v��,�\�\��dS�WTR�01@33A3�)Z���$��%*��n`P}W➁s��(�>ҧwuMB��nc�	{���S��
+IK[I/��@B$N��Qq71mɱd]�����ZRĚT��M���{-��&{<J�����S��k���x)s7ͭ44�(��	$�²�8|s�-���ԏJz<>G���#ٔ��g<,��,j=��
+xg$D�Xj���#6�sG�^*���e��4D��hp��n)�m�?�Z���]�Z%a��=��YÍ��U�
�G�gt
�;�Gz��Е���Bâ�f���~L��Kĩ�-�2�ZyRل֝mUZ���7�P�ae2��q*�֎����UX&�M�W��:��k�5+���X��Z��S*f��}�]l�F�@@�ypϏb4��z�����z�K�7-����jBn�ne$�ӆ��'XOF�!ze�ګ�wY<S�&e��t�����~Xm|bO�ʟ�"k�ch*���!�x^!m�=
+:����K3+�H���|C��gL���@U>�<���3$
+<W��U�
��6l����~����Ndmbr�j}<Vo{T�)I��FM���D�
+-ҜbeL��7����c;�
+Mq���Bu�\8��j�ʲ�wN�i��k;q�c����Tz�N��JN鸞i�}��/�)��PU�M g��i�)ʜ�m�#
���yr[��@c�8�1[6�-�"?y۝�ݎ_�2m��VQ�w��2�V�fgbJ+���(��l���џ
;���s�g�U�oM_�vF5;"˞[�Nά��N��H�i��<
+U*C�$�*���k���������]��b+�-�B��t�8^���4>�����3��V 6X^!�1{�M�I�nP]��l'@�Cy�iU�N�NhW�����v�M�^�Q�^_�'7{�M����Y�i��1����y�O�7)|%5�	�]eEsץ�=c�:�P���	���Ŵ��Z�ne1+�
+�6<��vtj��l�$�-������
+~��T�,ZV���t������jՉC|#7��2i!pNi[�~�{k����OÔ㭬�����;��N���-ORdOk������x��F�%O
�D����Sr�������
��. �E�C��u�͚�ݸ �R�����)��v�/,Q��%Hw�jZ�I�Sn�l�W��T=�b=��Z�cnhOzZ��Q=�-g
+�����C�;j����̣U����w�t��3\��~ѷ�~���ө#��x�A��Tyދg�!+`g�;j7O�
K�)d-��#s{϶RWQ?�
K�gwgˮ��0�w��S�%Ϯ ��"Q�8����~�[�{[h�dkU�䕏=�Kn��x�D����&������P�R.�-��
�8FF<4zyb��aߙ�Kڲ��x��Z�{����y�H+~�v��2a���;,q�!{��:�@�&Խ���P���ZVg�C�i�/rIvs5���F��S��45^>gN@�$�����I�u�9Y�(�?�����}��-�C�ܛi7���YK�ͬ�tp{c��gz�;�XF��T�_�u�i�����_�O�u
+endstream
+endobj
+361 0 obj
+<< /Type /Page /Contents 362 0 R /Resources 360 0 R /MediaBox [ 0 0 612 792 ] /Parent 366 0 R >>
+endobj
+363 0 obj
+<< /D [ 361 0 R /XYZ 69.866 758.996 null ] >>
+endobj
+364 0 obj
+<< /D [ 361 0 R /XYZ 70.866 721.134 null ] >>
 endobj
 365 0 obj
-<< /Filter /FlateDecode /Length 3944 >>       
-stream
-xڭ[K����W�qci��Vh��>�C�+���������W��=����f�z~U��_.��W<�^?�����"�Z{����'�".�3g�x&��|�|��su�\��_����G	��ܼpn�r.��x/>uO4�s��(��O�_���-P?iqt��Ζq_��"�<���t�9��������<�]�Յ}���#z{�wL�
-�����w���W�JIꊌ_��dd���?�����5�䭻Y���nZOe���\�IK`4��s�.�V�u
3^�_oWפ� wd���P"-iYTk�q6�~��,����Mua%�F�.�yv���Wy��H�53�C���Wi�@Y�
-�`���;Ҥ?�3xݩ��(�/&��^�mԫ@A���꘰���ݙ�D|b!�WS�ee$�ֹ��<�
?ggf���ɶ��h&����+(˘iJ;f��A.�D��'�Ӫ�JRD�LP�54L�o	����J!�.�
�tG���]�ҝ���آ�xE>�H�r#��t�E�0�!�4CH��-U'���_g{��[.DGb�)��0:��A$�D$�``䨤��$�C�E	��'�c|�چ�(*܎����w�DF�fRH{���Qp����G����o�S�1�;�8��5ߕ��
-��u̍�q�[
-��w),R�V;�?��τ�e�?>���'|�O������8�s؎����^�5!��t�8�BsR��l�/!Ќ�lF�� Vx�&��}�J3meǤ��z�uX2����΂�}�2��T�!��1d�:�)��OXP1�vc��4�4/iH\S�QR~E�IT�6%}�;5��J)�J�+��$��c#��_�IV��uZ�f�8�ՑV��lM�h�k2|��5vd�5JR|H�"L@&f"��Ygj���-��1�,�PMP��op�46$�F�3Q��j�����mL5��]ɵzPI���I����p)��8����m��k�LP���a����rB�3�i/1�M�ۃP����(O�ˁ�����K�zG()�4���Ҩ���5�Y~�I�i�\���q�"(s�g^�����"1����y�y�������,V��K�![�PĔ�̇������&�h�j|Q�1���Kܒ�P�l��If@2#�vg�}X"2�P
@.��#^_�t���.�Tv�\v{��6ϑ���I�+[�a��s
-<[�q�},��{ �k��d}Gt�FK��v��0�C��3���)�k	�
-[�,���-�3��9\'�F�	�	��?"�هv��;���d� �jBS�{O��k=�+�]LGM~|�I�y|���2�r7��x��zђ�6]r����f�Vs4]��l'��īY�(�T���֦`dg\��pg�^��QPLH��uM��hl��M#N0mQ>B�8v�N\.�
-lX�r�Z6<P�X��n�А�c���!B�H�q�B��u��u1�bk�bj�´&�u1"��q��w6��Z/�mZ�p��m/�L>�`�u��[�+X,�*�j|ۚ��1�V�~���������˭ֹb����2�_�?Owڪ�:C�=,�����jCV�3F2U7�Kۺ��%��
��~v�������F�,�=5�������U��Cn����6
��ה�~�N�bV�4HrP����m
P�5�Zl�*�(�a�d?ƭtw��HC�E�bKf�{�����V��25� ��=�4*���n_���U��2�>��`ԇ
-��AN��{=PiC=W��]��J[�j���B��#N��>Sۿ��S�Y�>?q-���Ua�55D��_�oȗ�.��M�DK�G�3�m;f��>�9ަ���%�tx�g���u��iE�'�O){M>h)=��7Bt�]O�ڃ�~��
�fQ���ٙ��I=8T>�nR��D�|���x�m�K�J��y�b�CPZ\�*��U��mBM�	�.�IIL/��tE骵ʼ�#� �\��A-��f�k��t�";39��h��ߣ0ZXU�=����5\�_T~��zj:Ť�]�����a̝��LUrZbvʛks�H�NS�$v�>0��
-(�W#u],9X�RU�m�6����$���)��u8���۱j�I,�`-���iL_,Hs�	��P1P�i|��F5O#��o$
-�����+Q�qE��ԃ��?�P�U�fh�%�3���O�E�A{�����bN��8in��G�Y�e%L�~��	#�RWҵ`��h��	p|���>�Ĝ��(UYO*TS@V�K,c�=ڷGz�qP�S6��Es �Z�3M��j�i��v~�o��7��[2;����(��K��[����z�g`I�yԥU7kyŶѭ��	y J��a�{}�̛��V���z�eM��=:�oB�E]�6WEպ:�A��Y����WC�А%N�g�	�����ş�֧`5�<����({ŏ��8	i��l,6I"}i��1C����]�4�2���ғ�}?��g�kN��󯋅�o�|b��8�K���ls��S�Am3��@}
-l���LF}H�[ۺ��1��I��.[zJ�M���R,'6�o�ؘUY�BuܠQ�o�ɪRAO��k#'	�75�����IZh��mu������-�q�͛�3C0���{''�ҬfJ���-ez��»d8�w|�`�yf��'�f]�����_w&«j��ř�@U(�G�q~pR�I���ě����WT(g�\M�%��T&&g\�ͮľԍ����AϤ0���1�����LՑ��?kg�L;R�y��yV�����̽q�\ё�����+� <4]Ko�����i�d���C%�@2k����l��g��ħ��3E����T�q]\�T�A������C`�/�9�eh*(����d�����2vs2/�g�d�1�̯�+�``.���"X��S~�?�/U)�(U���w����V;�6�2OK%v�kQ
5�9�* фA���S�3�
-�g�DK��Kcjq�����Jn�)���8��]���4V���rH�}�C:�*�J��� E��!��:$�K\���3�h1{��G�^f��%7LH���2
-�&9���(�W��x �)y�~?x*���E]�K^��,��B���d@%��TtÛ?3T���D�`"��>Fˆ����A��Р��.��7nirE:��j��	O��Āi����»TZn�I�iѺ�,��:�!�|܍2�+�l���7[���j����%fp;�ð�c��IL�,�;��B��ݓ����,���bQ̳�B�
-m1yM����&9�Vݕ��4��N�`�A73��P'�
PLKr���F�
-%�t;suj0��v?t���=�^�D����BWڞz��C��	�����i<:���d���+[��B�����
������K����:�	/�Tn���\��(�1�㏦O�>��-�#��Sm;�Iն$�`A`x�mX�~�3{��"TƹM}��Cm�0��a�2�V���C`)����b_ߟ��F�d)�X��uC�!��f�|?�h��3�t��#����󒝮���{��N�u��W����P�?k�Ĥ6���/"$4Ƿ�,���O�	��*!1s�	�(�a�,�O�,_��ګ5���Ta�(м�EdB�Hdr�MȄv\dzgZ�|Ĕ�����ɩMcԢ:���_�(f	وҗUf��%QՏ�I9�3��=k�Ü��ha�#�D����g������1cYђ|:j �p�#��`�JmG�&�U���ZK��`"��~;���0��|�_�Q��3��M4s��%փ!�!g���W�����͇M��z��kS�3k�]���c�]:���\��!!\�KB�衧#��R��^�4l�����^
+<< /D [ 361 0 R /XYZ 70.866 691.777 null ] >>
+endobj
+34 0 obj
+<< /D [ 361 0 R /XYZ 70.866 581.166 null ] >>
+endobj
+38 0 obj
+<< /D [ 361 0 R /XYZ 70.866 555.61 null ] >>
+endobj
+360 0 obj
+<< /Font << /F52 230 0 R /F56 309 0 R /F57 310 0 R /F71 311 0 R /F72 313 0 R /F60 312 0 R /F62 314 0 R /F50 228 0 R >> /ProcSet [ /PDF /Text ] >>
+endobj
+370 0 obj
+<< /Filter /FlateDecode /Length 4178 >>       
+stream
+x��M�,��b��dm�oi����&������S"�b�J.��P4t����jv�����b��?���D��}y��Èg)�`�t�_~~��M|���歍a�.<����7!�M�P?���U
+!ҫ���W����W�L|*~�ᾍo �'X��6=	�T���QFAZ���d������77�(���m�>I�U]���*W�=�D7fm�L'|����2�{��N��-�/�'L�&�w[�P1��=���jo�Yi�wH�;��
w�oA���Ge[�����r�1}G ���� B�IHP����w�����
+I=h;i�%d�X���W�U�N�JX�Ѱ�K����1R��Y>Wމ��v���\$��"�IG
+:،�D��7���Γv
+�3��ox����Ȧy	�N����AT��?c��i_���"Y!d�t��F7z���P�z�o��7��||�$:j�����l��oGZ�!�3�t���t\�ݔf�oR�,�+�X�d侶�;�]:���7h\�qݒWi>QǺ;�4�߃�d��g�4H8e���Y��*��#�2���'�W��l�?q�-$*{[��S0�0������˟V�����U��,܈�i{�6Wpl�
+F�4�Wv��Z�=i�Y��/�����çYꊲ&�P7.6u7\WvsUF!B�\]G%��(�roڸٴ�Ҝ�3�@�d�y	��6��
L�D�A�m�	?�r�8(�
+����ꨦvԍ���V
~G|7�.�Ge��5�)��4}I��#MVA�3q9y����<0!@a��|RoΙk�-����\[�iI��u'4�T���������(J�~����$[o�:�^�d���F/��K"�v�'p�Љ�pkt�k�~�ţ,hQ<�1��T����I�b��nn�BtN��'�:�4�*|�O�����P�I�C.r��g��yi���H�'�i�ά�*�Y�)��e�-ќ}�g�҄�ƛ�{�������;���o�u��Y0�F=��0�!%W�@1��;����#���D����u�бh!��7Go�@f�t���\E����h�䦕#Z&7C�)��=.�+u����q�4�QRSm�������3\*�B�,��2����Q����vڞ�EN���+��Ż���ޙ
W��hFAc�d� i��v��	%L}r��rG�u�VW���Y�{�N"���� =k/8*�N3�(mM�RF�'��)�l�DSƀ�v�,ArU�dNqj���?J���6�!u� aޤ�Jʩ��8J%��R��9-��Lz	����`�2#�OWL2c�Pk颡�i�L{@�6������㦡�Q�Y��2�1�*����B�^�����&��d��4�e}��nr����S���������&�+#�&<f�^���0���޴�#�縸�i�,K|w����	s���L#l�JU��	�"w͈�逾M�c.K��Z4�|+/?z�¾�������]��D\�>� ~���� ����T~����x�wS$l6��qA
�K>y���[��_�~�)��G���Yl����䡿���������b:��|�a3�D��Ÿ��H��;�ȈX-_�I�"����ĎW�%�6���yN����b��\�ܣ��sfF;��ط���.-M4�c�`�vgnI~"�G"}��g��*K�҃ˠFZPYZ����a�B���9?Ɲ3�tn��Ӛ�-���(8����x�\��2.MT�q��l�K"2�R�)&N�k'�:��_Kģ�Z���.�&��6u͈�$��d�2?9s+6'�ȼ�9SYP/���堠F����*9�.��b��_-r�*���s�~��*Z����pɏ��oF�3?��H�D�4*\~�NN7�\�}J���$߇t�>&�R�,b˅����"�|D��WD_B��:�5*+0K�o�E���+���[�;NmZ�K�/S�%�3Q�epI�e�WD_ڨoL�C�Ԅ�c�)�Ɋ��k�l�d��* �<X���Ŋ<R�Ep|�����q�#��6Ѷ=�q�@fT(�T�&��FH����[�*�T͝p�c��B�D�����I��9����Lй�I��x6�}���P�/��T�ˑ8���'ů#T����͸Όj�+��vT34:��m�-�4����	aA�i��"L����ֳ�Yd�z8K�aM�(�6��ak�8ey:O+�`����D��)Rn倲���Xu5���� ��^�2	Z��W��Q����t`[pEB���ر�T�tE��0$j�I�����
i�y�ы�9W!��©
+��/�a�U��<�*Jd<6�Pd���V�Dx}]�Ma���j��i���~��������^�;Ӱ��3��8�q����������K���g�e����ڀ��d�
'��Ɵ��%@W�4xY�`�旈]�=>�����#�y��Q����PC
+�DN��q��ؕ.T6����BE�jb���ZJ�q6ӂ�P�_�惚�}R���:X�`V�c�<��|Y�M���%�p���A�D"�Y��!|�Pz.��;L��wO�(�cLn�T+�B\1=8,g�a˝�+��y�[��I�D��Ysf�:Rĩ�#߸#]�pEqR�H1����J6-|�b6�.�0R�k�H�� �/x�v���D��71K~,�DL�"���*Dl��!�gDt�ns�R�@J��h앲���Am7�VvA�_Gv1p�v��.>�͈��X���?�2�M���Y��٧����^�د�Ɯ��:'�/\.�P<�n��92O��|��ମl��.��VK�*�xy~���S�%ZO�4r���wR��5��Sv�Ȳ�yF�v���)0!5Y��
+��E����Z��γ�veF��%��ݷd�I��:[u�j.�Eߏ�2�����سL�<��Ze��k�~�<�rޤ��x�WV��ҵ�F����2����m��,-܉�����kqSd���\r�C^i�
�#᨝��N�ђ���w��e����EA��T�{�|rʼ�.��Vfsn3@�-�1#���k���3*�����?r�8$,"Q�'o���*e��(1C��m�0�.�����&@��x��w�W'��srʊ�C*��>�W[ָP:&R�����ϵ�nʥ�Q���Y�գ�����W���녗�#��-:b�Gq��z�{W9B�v���U�L��"ju،�+GzO�R9r!W��ǥ�Qtm�ީ�o.��u��65�3�!���-<KaVf?�ٙ}avf_���$�,2s��KI!�I�L㪰��>=��j5�&�~�z�E��wC!7��Ю�u�/vLb��'#Ո��8J$Zn�h,�]׶���TZ1:V���4���PC���2~�8z���1w��q���W"��j.�V�<�u��u9uq���ץ�&?����j�v�����׽�]�[s����za{�Y��!�6�Q,�B����E~�3��h��Z���#�s���;쉣F�'�W��T#A&����M��]����[*��p���l�3��-��.}�|4�.���>���t��*�^�g���c���81
�]i1��u�gU��خj���?p�I�B���ɑ@^nz_IM�D/럅H:(�pD?j-|zk�3�M�4K|�=eK�ә	u�줿��R�du��Sp�I���6�F0@Y�L�
���f詡ӡ�]P�)�����1���qH�۵�I� ��\���ς�?BjZ�z�X=�뿩!��5M9�քP�n���4vi�-s���L�@�Lu�oj�;��%�T=��iζ֥ߒ(u��m]�x�q�D�������R��L_���Y�E#p�ψ��z!>j�w�\�A�ZA�ѵ��3�B�h���+U6�����۲e*!�ޗ���o�'�]�CΠ�U��xŸ<�찈������N�����78f4K�Ц��Yr��ЭK.�0��4��q��d:����m�l���l2u����

�[�z:cЯ�4:�6�I��������q����ߤy�ژ��~�������0�ز�J�CiK5>�5u�(]�R9�/y���c����,�lkM�TJ�Q��Ley�
j��s�����8�֗���MS��B�)3�9����Xt����h���{EZl������/�����<�vvR
 endstream
 endobj
-364 0 obj
-<< /Type /Page /Contents 365 0 R /Resources 363 0 R /MediaBox [ 0 0 612 792 ] /Parent 360 0 R /Annots 371 0 R >>
+369 0 obj
+<< /Type /Page /Contents 370 0 R /Resources 368 0 R /MediaBox [ 0 0 612 792 ] /Parent 366 0 R /Annots 372 0 R >>
 endobj
-371 0 obj
-[ 361 0 R 362 0 R ]
+372 0 obj
+[ 367 0 R ]
 endobj
-361 0 obj
-<< /Type /Annot /Border[0 0 0]/H/I/C[0 1 1] /Rect [ 484.223 488.494 507.687 502.454 ]
+367 0 obj
+<< /Type /Annot /Border[0 0 0]/H/I/C[0 1 1] /Rect [ 484.223 177.403 507.687 191.362 ]
  /Subtype/Link/A<</Type/Action/S/URI/URI(https://github.com/DTolm/VkFFT/issues/58#issuecomment-1007205682)>>
  >>
 endobj
-362 0 obj
-<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 204.121 262.719 211.971 277.175 ]/A  << /S /GoTo /D (equation.2.1) >> >>
+371 0 obj
+<< /D [ 369 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-366 0 obj
-<< /D [ 364 0 R /XYZ 69.866 758.996 null ] >>
+42 0 obj
+<< /D [ 369 0 R /XYZ 70.866 721.134 null ] >>
 endobj
 46 0 obj
-<< /D [ 364 0 R /XYZ 70.866 479.967 null ] >>
+<< /D [ 369 0 R /XYZ 70.866 168.876 null ] >>
 endobj
-50 0 obj
-<< /D [ 364 0 R /XYZ 70.866 339.32 null ] >>
+368 0 obj
+<< /Font << /F50 228 0 R /F52 230 0 R /F56 309 0 R /F57 310 0 R /F71 311 0 R /F58 316 0 R /F72 313 0 R /F59 317 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-367 0 obj
-<< /D [ 364 0 R /XYZ 526.166 227.933 null ] >>
+376 0 obj
+<< /Filter /FlateDecode /Length 3909 >>       
+stream
+xڭˮ�m�8KE�z7rr|da���,��	�4�j?�ԃ��<�����̈�7)����M���F����7_�Z�)%����>��F�yS7/Ep�P>�>�v��EJk�tRJ�w�R㯃w_e�{��ɥ��O���4�)�otH߁W��!�<�hwO�m~(+�{Y+���/�d���&}����0�R^��Ғ�˲�m"o���[P߶1���Q�Ӹ�ȓ7Q�N+�o*�K��'���\�H�W��/f��?|�n攋Fx��S*P�)$��J��_��*��[M�>�ںN����ioWx�'>�S��yYڠqeh��=
WLt�m�&�\Ľ�V�\�Q���\ik���l���++�F�<:�k��
+'�,
+R@�ĢL_ڄd]�u�V~+e�pW%�f��
�sy���D!�m�eJ�,��Z\�"+ٳ�?:y�$p�^]���K�%M�'"�*I��
:Nꌲ�,��&W4�)�K��lK�ؖ%�����p�YY�%�!I!��/��������=�a�y�0O6��J��
q�����dT�K�D;����]_M��Y�u�w�pi�_F65ɣ���P
W3^̜�!�:魑
vǳ�����S5i{�l��dl�n��@�a6`������Ҽ��Ŧ9N'��L�ٽ��7)�[*�UV q�����ȫ5�!!�@_��C�G�UZ��!6�
+����p���K�8D@$ʜ�]1�o���G�6,�c��T��i;�"�$5;�ٸ^�a���&��x�㳴V_X{A�t@o����RV;/u"c�I!���JS�D�<%�E��[���r��W�ikW��ƙ��J�5A��B)?CXX�V�i�����S�G���,Pb�����5�pH<�%���A(0�2_W�C��L�
Y4=Q��L���.�q��Jr���и�q�ۅ�.�R|7Ci�����`����âE�H:t;B*?�P�A�@QA����{;�	������7���_���ݤЈ����nA�$o����͟jN���Fb�I��5Sx��} ����c�j-�LPHU������Fn
a
�Q��Nv��%��*��炦�F	,�{^pT��p�Lp���	b�Ġ�Up��N������ �ң�:Y�a�S	>"�������������5�ӡFb�)��($�|I!=��+
+�8����	�͎B�Q\�KR�6�y����V<^sKC&�%�l���5R
+2�|0�PfI3���F(2�IlV Ȼ�¸�<YȢ��ֽA���!�Y�N�A�D���Ϯ$��sA��mc���`:8��ճ,��XA{�m�+��F�'�ӡ����ncc����8��s��A�Oq�u�)_.�<�Ì��z�Z��'�X����'�����'��B��������cԴ֛����N�Z��*8T%���~��)�� ��ˠ'�)�\ng�O�<��ݰ�Z�(�����J@[�Q��
+�S����O	G�<���|����')�	k�v��!�e�
`�R���W��hP����e�U��x��A�x3h��WD�xkB*�o�e��v9�T��A��ى֮KŘ��`j�$\��)��!c��Ԯ��3')r�Sl�kj����|}�&)��$)�I�Tu3�7 4��ߏ�emseѕ?�M�H�'^Y����U�6C�
+w��Js��R}3���ˏ�0vz��/\l�:�]E��G�l�rzTx^UI�Y���g��e�T¢�f�M*�n��N��Q�,@a�=�x��8�3(����c:��ؐ�i�c�~�Yyz��4�`��+�N^P�~��e"	�g�8w&�PE(��e�V����򆠈�����e�\L��UA����aș��Z�-�3�����b- ;-f��g�)L�#̏'븘c���ͽ�J"�짓���-�r��0�B}��;�:���gȭs)�=En�b�Fϐ[��������NV�Ժ�6f"f!湍���9�3�nq��3��`(J�{�Q������N��e�@�~s����4�u����g�F�qA�`/^���Vj��U砐6��g�#b�y�sXb��s���tN�{�H��jGǏrhO}&��ghйq��wvj���@�N�+��N�WҚB�m��c�"�P��&������2�'�5'���CQ�z�{g٘ƫ�É�ǄJ�NC0T�~S��
+q���un�g����{��b �,�CN��R�q'�ʚ*�R�(��%]�.4���M���^PB�Jtp�P�A@Dgb��&��	�A��fy#�v)*[S�¹?ĳ���*
cI�J�M*���oh��~�چ���҅⥞���T�%Z�҉,�tEA���0��n��9�o�y�Zt/c�:7��������4��oȨ��I�������J+����f��r�V�ٱԳ�h#���R*(��MGYy�G�m�jM�z�\�w,o̝�5�-����iXæ��l��d��I$��-�$ח;�
+.����s)����,��r�	7�c��J��h���{��ݧe��ъv�����^kn���u
+ :v
r�$�M~_�Yb3�޹E�1�ܣ/�����~}tr�9[L�f�u��0��fZ5�.Զl�M��z�k�~�7-��T8�.+C?t׽]���4�yyCt���=Y4��/j(�mv�2a( v�����8k�M��Û�f���!Je�?u9�����7�J'4X�Z{����:��D���ݻܚ��Q�PSg7���k�T������nU�#���*�F�F����·�S����DHx�ƚ{Q'KAп�!k����m��]_�ǾK���?-��*�Nlym�v�c1������<������Q].nf�/��ͥ|ڛWs(�r��n5���_I6�B)�|s7U.XȰ@_�l.n02�1.�mQ
��S��/[S|��[$�f�YpM6)=�
k��L�Ab�j�\��.�_�ސ)~ ZWe��s��Ź#B�U�n�;�쇺�����9��_q�?�JU��h��8�e���o�:�Pqt���|Ʌݸ"}�������\�����)D��=3�XY�_�'x�c_��wW�'}����JL�?����
+ԖqK���ې����B|�cln&��1xx��&݁��4뙎�QDj�H]�R�f�L[
+}f[\F�G_'�.Dr�+���@8f"ˬ�:���63H�d,�mW�������������W��8���-e���b�;h9�ƞ�j>�$9���;�զ-j�*h�k��51P��8'�Ń���v�C\�
XT"yj���sl�m�q�<ec;���0�����Mn=͙��l�iP��tPmj~~�懸��Wqj
�U�ܣd9�����u�q�Q��-�����l�S����&���&�k+�i'�r��S�m�w�����Nƽ��w/��!N�;ό�K�����3��U��ڨZ�)X@9�3)=n�5�d�4Ƽ/��8n�X�
��b؎Y"٪	CbN�"���:Q���{��8+�fy=��=���C�}��B��������E��
+�ݬ�
+�g7SK�vS�n�����?Q�?[�v����5����ֻS`[#�՝�r/���(��7��Ħ��R�,P��$J.1B<(u��ƴ�mcXw����=x޷)\��p!�7�k�fI�H�0����r���Ծ�G�_e�o3s�B�k-q�jn��:�Վ�=����4'Б��9̞�C��mf�����Ĵ�0_���o��O���+;�m][�J~cd�����|:�� �S��~���0�M�+�M���e�M��f
�s���&��"Kܛ��-2�|8�e>Y��m�aQ��I��ؑ�UI�g6'��7�D��ڪshv�}��nn���na)�z�����?��V������}�
+endstream
 endobj
-368 0 obj
-<< /D [ 364 0 R /XYZ 526.166 198.892 null ] >>
+375 0 obj
+<< /Type /Page /Contents 376 0 R /Resources 374 0 R /MediaBox [ 0 0 612 792 ] /Parent 366 0 R /Annots 382 0 R >>
 endobj
-369 0 obj
-<< /D [ 364 0 R /XYZ 526.166 176.017 null ] >>
+382 0 obj
+[ 373 0 R ]
 endobj
-370 0 obj
-<< /D [ 364 0 R /XYZ 245.304 88.844 null ] >>
+373 0 obj
+<< /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] /Rect [ 204.121 594.043 211.971 608.5 ]/A  << /S /GoTo /D (equation.2.1) >> >>
 endobj
-363 0 obj
-<< /Font << /F52 226 0 R /F56 304 0 R /F57 305 0 R /F71 306 0 R /F58 311 0 R /F72 308 0 R /F59 312 0 R /F50 224 0 R /F62 309 0 R /F60 307 0 R /F73 310 0 R /F61 319 0 R >> /ProcSet [ /PDF /Text ] >>
+377 0 obj
+<< /D [ 375 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-374 0 obj
-<< /Filter /FlateDecode /Length 3998 >>       
-stream
-xڥ��#�-߯�p_�m�G�j��V3�r�h3ہؑ���I�8xtKcS��I@����7���o��}�ç7oZ�{�~��.oԛ~��H!��|��~���/7��Q*��&���;������\y�J����~���[���C7�����������|��˔x��a�p�~ڥ�j=�b\�4�O�,$�i����XFx���er�B�u5�Y���|}^(l˟������r��q�K�����ķ�S�m��uU?]yo�.��jN���+z��wZ��(���7��YA��T��6W��::~����4�M��;��H�]�c��ͩS�-�7	O+	l�_Ő�n�=m��E}��|�ľ�߮�.��xÜ?㜼�c��X;��_�~�D�}��i�Z^�5�K�����R�����@�E;$��v`�%Ѭ���~���a�>J�2�ٽtVmH�S~޻c�<%�M�P&~Il
-GJ_�r���W�i{�����颲l^�U�x��4h8��}�����N�M=�6vE��"�a6�_Hۣ�T�A$,�!,�z
�j���^�����#*;.��"�����%JOd�`qz�؋�h�:ºsI_�Y��(t��(��V���;3-��j�����}�^[<�?����D2%��F��#��)Z=g���jj�s��g�	�HN#�l;d���T{rF*Xk���G�Ė��	�)�Viel���S#��1��<�::�$�<!9�2�Rػ��� 	�a�ep�jg�t��D{}���f�T��^�U�A�Uo���#T�A��}_ƎfJ�OJ������G�,jv�܃�&����٭�oL�
-�|�
-)vH;�p0��hH�+T��wf��m�x.�	���װ]xĢ�JS28�m�K�/��� ��ܪЌ�L�|�����k�}<8uE{_ d&Ye��R���������^9�`��P_
-n�&����x�g}8cb�]7��3S����a����>}/U�iF��
-8Z����A�4���PBMG������.�N�:���8��l(�[��S�ƺZ��i�}��*����|^���~��(�;�`X(_Y����ǡ%��n�7�;�"{��s�j�M���kƼkC����i�ޢ6=)�<Y�.�cؠ�IKu�2�+`sޢ9��/�������E�o$nU��4{+|k�I�7�0:��h���Ň�rl��&:�	MHK�T~k<'��(!\�V��cv�]��㐶8Yٰ�	;~��;nݳj�%	���V�e���
-q��C��ߎu�Y���\�1�z�m���i�-���8-��gZ�8��a:�y�LM�(�%���OWgG_�t8�
xt��c0�}�ʷ-���sa0���2)�f[j	;�l�IX!�I����QH� H�ߢǔf�&�-z>�*�Q���ԣk2Z�~sh3�АY'6Fh}�$�,.�ǳ�����h�e��t��T8�����ƋAzb�m���xQ�.��z0UΚ5߮�xMi���%��7a��&A�����i��x�~\:E�Đ@��(؆s7hǜT|�H�q�������x��4-��.���N�5�f��h���S��HJ���d�2
-pP|䚈��Ci���1�Ve\�A������!v?���_!�oͨ�v��І���)��1lܫ�}�
-�(ݫn��ANٻ�ȢG�_�z���%�ݓ:�E]�DƮFǄ�ͬc�Dn��b�0�C��r�"��\��c�(v!tF5��TM�H����'�"�Q����T����}�n���9�<�9�ɸ&�3�x6=m�$r�-���������w؅�֡��NS�L�5E.�;�2�evgǵZ鵔UZ�\�4��_O�dp�E���GQۢYO����=dܷ$.�x��P����Ҍ�CI�b�6�Q�<}V�9ּ��:+w|3+��<:��r��vkI��<�C�;��|���Q�EF^
ЫVk�����]�%}1�1p���O�G]D?N���ԎLB�@�KIiO�⌺(���L|p!�)�]��t	��톜��9W:~w|3x�.I_�������&P<����b.������HK���(��Ib�c�i��'��#��Ѫs&h��<̞Z:����R�4	�h�����h�����IFwM�f�Lh5�K]%��d�����"AZX�&��Q�]8|(���������l�~]#��
;����s������U�_i�Ud�up��"X���F��I��RS7�(�-,�
-a�A�8|�`�sD�	��k޺���D{�l˻P"nE��`\�QmߥD��rόʏ�*�yn*����32�#��$!�T�oc�k�8�|M�"��fѡ"������<7ac�)S�ͣ�Nz�L�f�����>�ڿ�����1Ju*�Ju��.�vv�=�R��,�^��Pw�	�����
-)�9O�!�V7�A�
-���>�Ox�S�%�oN�-!��S6qHm�A�!N�<ŉ���|V����A$o������E�����G����jEr�T�	��A����Ҟ�)ia�!���ǄШn���c�)�vH�.۞2�S/_ ��6�(;؅���y�>M�^\w#r���Nq�bo����MS
��	�ΰ@�I�Q����Tf�۴���y�U	��t�!�~58��@�E2�X^�2xSs�k�;��h���j�R,��a�FGl�E�i�"�R4��鷝�-=�g�z��������>�</�
�*׏�	S�¹W��a����k�e��y�ߞDɎ���>�	�y1
-�$弖�<i�s�=���N����мAމ��D�{Ƕh���kۄ[���sAw[T����l����ϑ�GS�v�diOb�O"%ȗ�z�6k2A$rk��"��wX����_�o�lu��Y����Ye|*懴WM���Ζb{A-x�ԋ`����n��/Ȃ	h!ټ��Ё�"��^x{K�~?F�����`�e����Ƃ��3�O+�mV�>��Ch���c�s��A�m7",5�&��"
�j/!V�9�Q�>������~{���f2�rޡ��{UZlx����Y���
���)�D!����O��ݱ��#F�ԱM���_�W}s����=BY�y3�����ue��!8U�G�"x*M{r*3��K2S��Q�J�Q�.w����5N:�щ��WQE��y�<�^�J:�2R�q�Cˑ/i53;�>��*;K0
	�"�ړ��W
7�ގK��T�2-�[:�s:u��t-d�2���hr�o�"o��uln9k��A�z�js[��0��Ā�hK�@�Ecyہ�W������_��Rx(4��^�^l��W����T�l�!��|��i���;
��z�c�E����kbmg���>�1�z���!0������}{1�\q����,����[�nmI5���}�X�!q�u�Z�=i!�i[_{%�����&��j9�l6O�r�C;�v�^��IŽ�%�S��^j?���	�^����⋬�Pג���t^��]�H�=Y�y�M�]���$����n�;�"5�
��;�P�j 5+%	5n%䮁��6�-�����/��y�'W�L���F�V1��[��BMø��%5�t�'��L���t��c3�B� #�l�y����R���.��2j�����D�o�m�Xm����=u�'�b�ne��y����;掞��ck<�ʇ�-��g' �ZhL�[h�Kb1�2n����T82w��+��c�h�����?�!�:E�*��s�|f�S	,��Vn��'�o��<��g�]��K�:.]|�;�O���/����iߞK��_����R�Q/���̳�ƟW�a�]���lr��l�J�5Դ^:M2ٺ����oEΗ�)��1���x=@O�;����)l�ZzRF����tV�^r�R�������ks��C�ܡ�xڙ7����[��0��׷���
-endstream
+50 0 obj
+<< /D [ 375 0 R /XYZ 70.866 670.644 null ] >>
 endobj
-373 0 obj
-<< /Type /Page /Contents 374 0 R /Resources 372 0 R /MediaBox [ 0 0 612 792 ] /Parent 360 0 R >>
+378 0 obj
+<< /D [ 375 0 R /XYZ 526.166 559.257 null ] >>
 endobj
-375 0 obj
-<< /D [ 373 0 R /XYZ 69.866 758.996 null ] >>
+379 0 obj
+<< /D [ 375 0 R /XYZ 526.166 530.216 null ] >>
+endobj
+380 0 obj
+<< /D [ 375 0 R /XYZ 526.166 507.342 null ] >>
+endobj
+381 0 obj
+<< /D [ 375 0 R /XYZ 245.304 420.169 null ] >>
 endobj
 54 0 obj
-<< /D [ 373 0 R /XYZ 70.866 610.09 null ] >>
+<< /D [ 375 0 R /XYZ 70.866 278.38 null ] >>
 endobj
-372 0 obj
-<< /Font << /F52 226 0 R /F56 304 0 R /F57 305 0 R /F71 306 0 R /F59 312 0 R /F50 224 0 R >> /ProcSet [ /PDF /Text ] >>
+374 0 obj
+<< /Font << /F52 230 0 R /F50 228 0 R /F56 309 0 R /F71 311 0 R /F72 313 0 R /F59 317 0 R /F57 310 0 R /F62 314 0 R /F60 312 0 R /F58 316 0 R /F73 315 0 R /F61 324 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-378 0 obj
-<< /Filter /FlateDecode /Length 2881 >>       
-stream
-x��[9���߯�����4��~m����ٛڑ�j���*R�ZcL�`�ţX�W9���o���?T{�����͛�[�^Ƿ_������ߢ�R�7�ۯ���㦔{(e
��'=�7ٕ��{L-��Q��+�ui�O�T(�Cƞ��L�^�
���Ls��+�*���ù����,Z��Vb���H�L��`$���k��m
�r1��m���F=+jO�h�q��h_��qg`�{]���_���ۢ�$�:<�FƝ7_7mM���,Y��˪�ncC�7��O�>]Ώw��;�
-��}�7�v��2meX�Vf�]��A�5�j	ϰ�k�?�d\��
כ��|�\"�2Mu~�n"a{_�/�ͤH�#�![#��]�5ͣR�ob=��0�aM%�d��[���¦(�wW��z�^x�ZlD�+��ζ@Q�c���v�z�̓�=s�
kH�#�ob����F-m���V�~KN����5�Y�/J�$��.EV��|���qь,c0��!Q��m
5���3����V�����i�m�2��l����3g��r��P�S�,�3|��^&�z�g�Q��=k����G� �"��m����u���
-Y��é�pZ5�֯�<
-�����ؓz�h��@m3"�y��`��s?��W�6m�&���H&H;�:Ǿ�ZSk��B<+���B�G ��[�6�sڬ�]C�(��B�5�؀kH��N��`����������]Cb��c�
���̇�����"o��;�>���Ɂ�0�Ȑ&�Mď
Y�Y+�$�e�C��h���t&{t��g����;"*�����y�h&a}P�؆!Z�7���X6C��p��8����5�ya�J0!o.��$(���\�h),�E�B�7�f��#�2��7��c���4�f�C��1�s�'�k'����;;���<w���,��D�u��߈f�o������~S:��앗�8d�m�c�%F-��W��Y*���� q�ț��Y[L��dH�ٴi3m{�Xӿ4p�E�\�r
-��h,��@vNm>�]4�%*�f�Ψev�|�,�qc�n	R� ,;��B�bˏ[��~uJ�_T�%�{t�m�/��r6�a ��e%"6m8ܬ$	øu��!|��4vM=`�"��X롹�.��os)N�-䚨�AR&�U+8�\���jQs!�Qs5����v�_��M�+!�`=$�t���S��1�\N<��[��(���G$���s��R��r��t%�q>m��mxO�&�#�CϬ�}'��M�L7XQ�r"��q��wJ�y���(���jH��0�5y[��DBX	1L(cfO��H���Bɝs��,��}V?gV��])��Ɯ���0��\0zZ���T�����H�(��z@�Ո*�FA�v~�G
-m����nv��Ϝ�n�	p-�����n��I�tV�4y��Ak*I��9���QcA���I]���Ҋ�&��Sv<U�17�s��<j̡�Ŕ�3��3̷�gw���q%*�7��UG�viH�+'�⪤�v��G	>�QkW����y*hk��QDH���*������*����hA��x�.�_P�\�H��vz U�������r����9����L�e����0\��j@��~:t-�)>c�.�ᰥ��!�bq�S2�&��!���(T]��ZKT�u�@`�.�:��/��S�ln�kwy�S�lN#�&>X�z�˷r�R�3�㶦D�������}��H���Ln�����9ϋ�ڭ�}=V�|�a-�>���|r���9�Ck��O4��",?0eb3���L#:z��V�<�^
�����[psgV���~H��t�4:��K�iwa���vW��Cpd�^��x�Y�����q�1�5��1No:�q��iL^��n�jZ矅������r< o�`6+&Qv�#�h���f?����+I>���|��o�e7�39n�s=������
O�E,�ldu�l��>(�ǐ��h�:��+/7�7�zq����(���k��Q~!,^�%��9��ʽ��΃��{I�?8~����	ҕ�n�]D���
-��j:�>� ~��TQ�w
-���:8z=��߁��%'��2�u6?�5�m�@��f�����9���<�����l��2~�*d�YO �ϔ@��&���=��xȮ�^��;�WB���s�%Wrl���!C��3½����j�[750p
- ��ֲ(iᨹ��ܟ^�Z8�2�`w�&H�p
-M���I��G�塮�v�谨7���B_u��\U߱���_�41��y���y.��1�m�d�
S�xT$���V'0�*��s���m��E8]�(�󳄖@���;�����Ŋ�Z8w�mW����Kp��pz�3��dJ���1�����r�Hے��tx���"+.��cX�Fmă��V�ˊ���\C_U��v�Q�M|.[�*Ȑ�$��W����0�B�ĝōꆤPhɹ]����j���]���E.���g�;K���pz�=�NQ-gX��X��rޢO2\�UTn��G��=$����;_���)+��f�-�r:�5Աu�"�RP�+\����%h�
-���
-�af��K�}��Q�����$^F�wKy��G!zŇ���/KEA���״�
-]\N헬��|�H����e��Vb/�Z���ut�ړ{xp\i��a{;�y}�"X{7}zI�����2��۳�y{v�����^K	D�]��!���� `2�@U�1��A/��f�{ӯ�ZyU�>�y�]����ٜ�ڠ��Ԗٝz�借�~����
+385 0 obj
+<< /Filter /FlateDecode /Length 3439 >>       
+stream
+xڥɎ��>_�?0zdq����� ��&���5bk!Eɞ��p�K���������p�����_��/[J�|����ϸ�Qܲ�l�/����|���\�΅���R�����9�˹��Pg�}��Oȷ:���ٍ�fG+���3��W�}4�#��>��1,x*�
+��Lq�-
�
+�n���O\�`$���(��������;�A���W�n���V�e�X�w[?W��c��?q�3`Zw��E���N^'|���<��W����ￏ��[\JB<~4������Qq������"֏F��oOoK h}��0L�J�Э�=�H<��v��Hí1�Ӣ�����;�����5�ݞ�S��l�w�E�^�/��)M��PA��ӓMi��e��K���o9"4F��2�R@iד�8�D���w߿!d�Ƹ2T~�LX���Iu�����xq��R�cG�����	�Nn�agP��PON�=A���bw��f(k�(�$�iY�g����
�ʦ1�&��Q��hD�!c+�/�3�����Ly�w5R)�B\���)��=��wU(���j$��)U�_�*TsWn�F�N��d���C���6��T]����W��))���L~�6?������{�Qs���?ܠ�!��{|*$��Z���ʝ�b�ϵ��weŏZ�1��Z��0�E)h�	H�B�a�ߊ�O�(�P���aZ9���ۏ�A[s#61��S�e�����OF����Īߛ�!ze��:,t�AO��c�h}6܆�VU�1�#��\|�a�)���(82S�mm�uc?��`�����H�6V���㈩e���ϳ&*Bnh����薊,��6Oe����ی8n��W�Y��1ݬ>W���K��1���&��l��chzo��vi?_������.;��>T�*�?�,��W,}�J6>kIxv����Ċ=:���.�9�&Ս��;�Pe�v�	�Қ��n֬I(挳��H\4&�*��}3E�^���ђ}N���?Dl�͆U>fG.��	����F=U������c? ��t\�Y�^�]��
+uW���7_�G�,�Je��FQ�%6���$��E��k�=l��[�p�ߑ%�!�x��e}���uz��Lb)&#�Ů�>Tl)�
+��(X��6#�0fi�p����[#�8�,q+Vf:��p�Y��!?y���[���Ŧ1X������)�W��̺�>���
z�-Jș: <Oԁ�jtk�l��z�����y����bX�
������"`�J�j��_׏�P=�A����\u��A��#�Ѩ�9e�}�iv�I�5��a���&7���%�l�������B���z�]��`.To�F����h5�����������!���J�;�@g5_NQ�����1���+���6��w1j������W�7���"qź�&][kF_/��փ�g
�S�y���m�$rp�e��_�O]�;K����<Y��s�&Z���Õ^�ۜ�	>Bw���w�U����4��[����_ܶ�x���f���<��{��� M9B�C�c����p�v��Y˺��P��v�4ш�������tW�Z��/l�L�m� ��eCa���<�otق*��f�;�����RѰK�Ѧ�t��44�����	�9,��ҋ��TF@ �,x�IqfH��N�F�w�Yo=Y�;b��H��@���#�5w���#�c�r��,��	���Ǉ<���60-���Q-n�O���]�s�L��[������r�BIӺwd��(����F��
~3��a3�A	Z�#�H�ES���BJC��m�� c��,��p���u���N9bF���֩ԉ�8����͎W�/,�����΍�<bd�ʆ�+t��g@kG	h��I��.�΢��C�܍j<�����@�0�\9����YA�Y�v	[�*�*�{�|f�ö.i�H���i�)82��u����n@��AߌJ9�������i�g�T��|�)eL���O�h�	vg���vw�t�B	�rh�u�u�����DN\����5J��t�6�ʾ�F���J�/�S�cJ���׃4e����o�p�m--)��TC{f���-W����4�QoM��-!ﱁ���x�]@Ag��'yCf��o�`�Qs��{�Q�h�U(��*$�T蠵^[����3�o�|I��>M������S�RZ �CLYSN�V5o�A�y���
l��np]�y뤥π����nL�Jc�R�����.���<��G���ܬn�h�͢����l0Q3�����9��z��	��o���љA�z/<��;���7k���,���2�6\)�;�&��E߬��aLh�2}JK�+��Ȭ���[Cv_f	YB�����)A���z�o�P��i\&�d���|`j��S ΰ�*-�܌4��kgˎ�Wr��z&w�>
+[Z|QG[��sW��_�?w����4����>�rL���QH��p��
+�M+D��=œZ�A��1s�Qs{�K��W�WFб8�Jm��N��W�)u9��y�T̽?`���Q����n��"?�����?d�g2i��"$�-��6��u�v���ra�f;�/���g������kiP\PO�.�m6��/���\�Z�Y�^g���V�h6{	v7�6lh�ǚ�&�_
+ʍۍRWҹzY�=��hD���6\Ar�ΓĄ��<zlSgm��x�v3�*�WJ@��|�D�XЙ�ouV�ؖ���Jv��O2�!�W���8u-��!C�9�zyi2~�ҋ�w~�;��b��b����ỵ���u�2kk�p��﮾)ۼ2���q���ڦ8:���?�j��i����]&�ܨ�����E�F����?|�\74����^���;xkζ�Oi��R�d?���J�3ߦ�i?���@�6�����ijZ��hhVri�qW�,�H��+�M͒���w���y*]�_�e��מ8P_�I�e�?#a>��d������4�^^�bmš�17���/�_�A�����D[����5
Ѱ��ZD��	�����Q�
��)3"���ؠ�fL.[��_��o~�����u�v�k��'��sڑ��8����K�5��^��]Ԕ9��`Bؿ�(3��ϲ��ISE~�>���	[/�U�#v���:��GO��GOt�KJ"J�V��f5s�$t�uˠ�r��..NS�N��U��1�
+	N*Չb.6��N���棥�I�ܑ�	�ؑ�"q���Ҋ{�:
+"FÎ��6	F���mbu\����Y�3�۵��6��dwu&�\wۋ��l�-�^�/�#�0�p3>�xڅ��*��-0{��ۖ��f�O{�ߛ�4�=X���c����)�o�������k;u���/qdr8
 endstream
 endobj
-377 0 obj
-<< /Type /Page /Contents 378 0 R /Resources 376 0 R /MediaBox [ 0 0 612 792 ] /Parent 360 0 R >>
+384 0 obj
+<< /Type /Page /Contents 385 0 R /Resources 383 0 R /MediaBox [ 0 0 612 792 ] /Parent 366 0 R >>
 endobj
-379 0 obj
-<< /D [ 377 0 R /XYZ 69.866 758.996 null ] >>
+386 0 obj
+<< /D [ 384 0 R /XYZ 69.866 758.996 null ] >>
 endobj
 58 0 obj
-<< /D [ 377 0 R /XYZ 70.866 654.194 null ] >>
+<< /D [ 384 0 R /XYZ 70.866 314.39 null ] >>
 endobj
 62 0 obj
-<< /D [ 377 0 R /XYZ 70.866 502.233 null ] >>
+<< /D [ 384 0 R /XYZ 70.866 162.102 null ] >>
 endobj
-66 0 obj
-<< /D [ 377 0 R /XYZ 70.866 388.671 null ] >>
+383 0 obj
+<< /Font << /F52 230 0 R /F50 228 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-70 0 obj
-<< /D [ 377 0 R /XYZ 70.866 155.046 null ] >>
+389 0 obj
+<< /Filter /FlateDecode /Length 3107 >>       
+stream
+x��[;�#�
��WL�l	��TS5�\����K}�]���I�A6������Jj6H�x|@J����^��E���/���h�%�tx��/:�Q/�%�-z��ҦCz��\���R�C){{�f�/-�����@i*�ஔӅH��-��Sn�Sw|�t��ic�Rk�z{�q�����<�Wy���Q�����F���3�kCIn��Ҟ���kт���u�����Ӫµ�9)������?�>��'�cH/ei@r���2b��FP4h�5�T���M��$jq�q���{��}к�_5I�H�N-,��T�Q&*��)�dV����1�3c����HB���G�~��YRbŕ��
+{߉!\�����޻
+x��6F�����>���E�����z�m��n��]�7����������U���7Ќ�Їw9b��1@���ae7�ՙN��8�q�N��YD�H`p�e���o6�aZ�
\�i��<0����ɇ+'E��yL�>+τ��
��m5و�K8���.Q�_�K��Lzb��&6�f�籑ZG7�)T��pUz����Lb�u����¢�g�r��aI]�;wSAnmF!c��ѧ6J��ۧ�D�b�O�`�h���dG�݃^��1n3f\�Wxm��}26��>�S�������4�?�D_���~�)��F0�f��U�FG��$T���8��}�;k5��2�>pNP{muH$���Fzv�;��L
0�z���	�Sa�{jE��7��S/��Fm�Z1Ej;�����vk�����6����	a~ǐ����F"��r(C!MUC=�s���h���DywZ�%ے���B�Cݤ���'��h�콆݀a�FjW��"�u-9�:�ܦ��P7�����M���q����#t��ϣ��3:�:�It�-��n6|_d_ǹR�~�N�
+3�'1cetVe�b,�É����"1[���Ō6�	f4
+2~b��'���3�1�h<���+���40<'��c���u�I����P������4�q���M��ɿ�)8Ğ�X�`�?
'H.�Y�p�-N��\�DE�Ӗ|1xh;J4�"�W��l�~�"6��=�؉�����Q�����Y"M_�_���q���)^<��dx���x��O�:��_����ӕ�B�P��U���4�FڡF򏠶R��Mn��?��?��i��OIf�ӕ2|���]�"����E֨
�;�y7ԋ*n]e��*'T�ɋ�iF��Mu�jũ�}T�*n��T����!>I)���E��8�e#�ͥӑ&Hk�%U�6��5?�
���V��><�V-��OSJ���f
+� �9���#�K'�ڐIf�9�䪮bU�n��N!�
+�,�C�s��*#���[�u�k.�c�`Fu��K�A�K�ϵI;K�����J;r�]X�>�0~���6{��#g��蓮��
+�!T�pcic8���7��YE/i��10'��xO"��>c.���k��P�5C�f(-�"����0S]�� T���ͥK�h���ˮ��-�!�:���Քkˠ`gΖ玗Go9色��Ɏpۡ�QO�72��$�醫�`�9��Ŧ��e~�kovk���l����@1J��/*g��7��e��Ή���*��")u}gZ�ܑvt�6�1Z� Qߚw�������u#k�Z�i�(���t��?�5�`Y�[��h��Q����``ǰ�Lv���4�
+G��'&�	n;�����(y�ʣ�q�Z��s�1U�<��	i��yG�z:�39�N�8��� �Ms�gi�a;.�"lQ/#8�d����N���v|�r���4��q���@�}`AH׏{1Z�Vc��AD���D�K�ق�S�v���("���f��[K�y�m���Z~�!zc����L`�-\��
+������>Fm�r������,�<-�9�P'&6�[�Ȃ�6���y��r�IS�z���e6����lI�],�"-JS���	K�{
+m��T;��ͩZ��q8����7�ilJg�A��@:��5Z����0��ױu��a�[�K������8f@�)���Jg&m6�BO�Ǯk�����0Վ��C-Jl|6��{��a�睸Ҭ��tCd��AF���@UÅ`�8^4����!�=jV�MAx)@�truɅ@�C!7�)
+��ߊ#+��ug'"��4���l�#3��O���[��є�x���'�c�8,Bp���(ވ!X��"��k߫�WS���h��Xp��QW�ǷrtBVz�[|�.�V�UlCgۃ-sї�\r5��p�l��0�Cdl)�����;���՘�
+���#������c.��RuH[���>��(��e��
+$�L�HN+wj�A;�5�Sk��G疁*VT�J?��
+�PY��9�&���Y��8�2E���^~�>t8��n�+{i��F�!\��@m0"aߎ�"�<Z��,��(�v~Mq/�c���l*9y����#Kz������Q��z��^VX��F4_�� ˾5ݽ�����Hԭ�����obg0�
2�O�B|5r;+�BܼM?�Vpn���l�.O����_�Hg�����������o���C���A<\Ĕ��id�}�����O���s��8;���c-���v^��z�#�7��m`K��Kx�+�2?!��>>I�ye[3����q���}P�\٤�(+K�thN�� ����
+�t����yB9�8�&U,��Q=��#��q��ZT���Q��^�O���0_�T2.7����qR�ND#3�}]�8	2��iqyAHu��-�r,���&��8��JB�]�Ow�(��.c:�h�-r����͖F�4���> (�6N�e8Ƙ_��n���KK�`^�/f��g���@�����*/����؛[�%�O%kmR�I�r-���ϲX�:��2�R���������1��+�S��N�]<m��2�b��R���՟�`�g�vu�4l��|0��2'�9��g�g42�?�H��M�$c�p���l�����c���:�X�&���Ϳ��M[x�*o���,*
����?�a(
+endstream
 endobj
-376 0 obj
-<< /Font << /F52 226 0 R /F50 224 0 R /F71 306 0 R /F56 304 0 R /F59 312 0 R >> /ProcSet [ /PDF /Text ] >>
+388 0 obj
+<< /Type /Page /Contents 389 0 R /Resources 387 0 R /MediaBox [ 0 0 612 792 ] /Parent 366 0 R >>
 endobj
-382 0 obj
-<< /Filter /FlateDecode /Length 3424 >>       
-stream
-xڭ��#�1߯P�l�,�
�V;8��p����g&Y�l���`�i6�b�W��s��C��__���b���`��׿����\�%�-�X~�ͦ���?�\��?��wc����wPF�)��;����^����S�Lqe�A�:��|�8��<�������w�eȚ
-L�/�e��0�������yu�����%x�[�-�Sf��!�F��`W�*
-�`�	�"�%:������L���-9Ǆ�7��
������1�8�ъ_���r7��s믑]k�J8��"��z��&;Ud�`I�N��SoS����*3rCCaX�act[��Rw
-w����j9�T�_I�R��x�~eBfD��P^���+�L�D��P8�����/��}FR0!;�*�y-8B|�<�q�>�D&綦��3<�����p��u鯿N�F�#���HZ�Y!�4�	��Q�Y ��gpWTM&��Bi���Gl�`%%���jٻ��YwUrW�҂⅊	�+�e��<��
��Hw�����Y���BO֢<�2�X75rbR�H�"���Yf�
�e0XW"8N�&>!��.��GT |G�&V��1I��V�����r��=#�"6
J@� R�nO���\`�f��HJiUg�J�ZQJ�iPemDK/�s���9L���9`�Ց�k�^�сk���HSv8�j����3�p (no��W4{�����S��m-�	��@i4��ִiM/�f�V}��k�@Tg�4Ts��*6�>�z��m�
-�8o��<�5�u��4PCDRj��*N.������!cpa�f��b�SO��kj��K6�*CYр35p�af���f��#�#�Ayr��48i����W�a���,#Ll�U�M�(^y��F�L�4��=$*v��v��y㪁��?TM3yW+"�⇻4�Ch.)��v&:�$�0���4����\GK�Ck`�e�C�޻5�Ҕ�6a�ҁ*�HY
?�I��a��릫�������
��9�/{���m���KuN��JQ��,6�����Ţ{�UN��3b=�V6~��N,jU�Aa�Ȁ��*�$��hʼ�K��QO�y��W��G���އ)����l~uJʡj�܍$�FEحQ\��gwp�-[�)2� ��R��qFZl�vL	g@�0��w��������0kx|?ڵ1���]�u�X<��2��y�ȵE�q[�{�!W���]p/���9A��aP�k�۸���X��������%��K��!o��ÿ��[Qt�뿿}���?�D�����7�A>����h���f:WZ4�#S�\5v�?�������W�5	.E�'c������B��n`Q~��p�mٸ�����RIw���y�'E�}.b��aL�)�Y�d*_Oh�Eh]�����@����ݵ�e�
-�Yl��%q��/�XA�QA7�I��{ߵ���WK@�Ǆ��n���hѤ!�P�<G�ǬC���c�CBn��1&[.��Is�����8ڕn�r���oss��Ѕ��N���!�&i�N�������w�4�^���c�+�5�ҝ���ز]�W�Cʔ���=���qT��Q��h?�u&��'�q��|ɖ�T���]l�y�i�i+�@璴4����4����_�ш�Ɔ~��f��rQ��8vF4>��1VD�}3{XI!����B��sP٦EAG�r�G��cu�w�<V�Pw}�v��U�-��>�R��o�!Q8�%�"ǲ��Za�!�h�o�GX:�#ؼ�s��y��W�*��d����W�,W+��H&ث��~�{Wi�6�-i�2z
-5꽠|�.����c�����e��3(UD�dI&F>�!�d���=v�#ƿ�9�1��=�����ԥ���S���"�q���ʮ��ϝK!���0��B�*�9�&��|��R�fI�b��稥j�e)d�"-��9��j���iI�w$i���a��۸z>��A�ڞ�걳�sosӽ�;dvUX8�ݝ^s~�}G�B?�m7�o�=~N�8������-�1�Z4N�;���J���^I���zZ���gLm/��F�	۷�Jl4��b�
�oRF'R�sj�����N�6f
�zLV4Pۊw\1k7�����SQ�c������G�Q�0j����o��zVF�	�'����ϟ�W>%�1��R�n�7�ͦj1px��4�����Wp�I`<c�F��y�q��5w�����W�A��`���qOU����V��+䰅�_Ҵ�6ƍ�����Mk�tt�&-=�6�yv�ˉ]���Uj�=�yҢS�jP'���19������&��d�#�|B%UT��~*��)���o��ʹ�Q��|[�Q�ҟs+%
�5g{��%�cR�I��̋�FxX�
V����"�^{������-���+ԼPtb(1'�ӭL<3V�sV�E�ƦR�	�B�b
-���N"���Xa��IӍ�7�Ժ4%S=L](NgO���XFB2����*U߯�M���9�w��~C��� �a�H"E�0�<�	v��f^YN]5�͗�����uL`���+~;��J&TD�.q�JJ
-]�d��I;�"�O ��B�R4i'�]�4�`8��㦖O�_]_ǁ�sZ�J��Q�/M<�9���t�z-�VZ)�*-=�1&$54�������fWU=�e�C�]�̲�%>�Z8��M�}}a���S��~�/�b����*��5����vpu�����A97���.GF4e�4���b���xW�%��A�7~���d�
-��豒r�a�+v@�1��n�kƪǱ���z��쮨G�_P�l��׷��(�oE������*6?h���}؅�~ɠ��!�j��s����'�6('쮓/�{	O]F�!�����BK2.�=+�P�j����{!���י�ޤ`(��%����aK��yÅ�{�%	�#�e�>OX���0;v���27������R�s1��U�h�|�4̘�%�0���:E�yќ�L6'X�Zv�?���ї�m�22�����BI�a��'����N����5��Ur��i��kݒ(��y_�?��GK#��!��?0�)�����X��jNs֊�Ը�g���?��q� ����A���O�ք�%�P����֫
-Do��d�]}�"�fu@������5�~g��-�����Mu�"�+L/0�|k�<�)��>��:��K萤��\�?A���q���R1:^�-m}[��[m*$�J�RH�M��g��C�C�^����J��8IŽų���/Y�0�����ϩ��G�-VN:�8B��n��8<�ŗ����54��3�����h�$D��M����\���xЪ��UR<,g,S:��28}��_�J�1��p�f�{�VB�?}���2R
-endstream
+390 0 obj
+<< /D [ 388 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-381 0 obj
-<< /Type /Page /Contents 382 0 R /Resources 380 0 R /MediaBox [ 0 0 612 792 ] /Parent 360 0 R >>
+66 0 obj
+<< /D [ 388 0 R /XYZ 70.866 682.547 null ] >>
 endobj
-383 0 obj
-<< /D [ 381 0 R /XYZ 69.866 758.996 null ] >>
+70 0 obj
+<< /D [ 388 0 R /XYZ 70.866 448.923 null ] >>
 endobj
 74 0 obj
-<< /D [ 381 0 R /XYZ 70.866 682.759 null ] >>
+<< /D [ 388 0 R /XYZ 70.866 324.203 null ] >>
 endobj
 78 0 obj
-<< /D [ 381 0 R /XYZ 70.866 557.163 null ] >>
-endobj
-82 0 obj
-<< /D [ 381 0 R /XYZ 70.866 417.121 null ] >>
+<< /D [ 388 0 R /XYZ 70.866 198.934 null ] >>
 endobj
-380 0 obj
-<< /Font << /F52 226 0 R /F50 224 0 R /F56 304 0 R /F72 308 0 R >> /ProcSet [ /PDF /Text ] >>
+387 0 obj
+<< /Font << /F52 230 0 R /F50 228 0 R /F71 311 0 R /F56 309 0 R /F59 317 0 R /F72 313 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-386 0 obj
-<< /Filter /FlateDecode /Length 2969 >>       
-stream
-xڽ[K�$��ϯ�cϡ���rK�]S��`�r�v��V�X���@�dV�x�C+;I��������".��$�s��������K0F����>��E\�ŉ�[a�.\>�]~�
-a�V�@X���i�F����Uy�z�VP�G*�&
-��
-�
��Z�-���n8��}�5�6���q� ��z{͍i2+*;�
���k[T����^�9��J����u4@Z�"�*4I@YPQ(Z@Yn�Wg�!���
-s_�����FM٠�T�TY��iy���=��²�n(���T�sR�֯m�6�ci̤�3�V��f�r-̅2�=Q��uќ�|�md�^�X۸�2�٨�詅�I[�I�}�V.�pY���͋�#�Aa���)	\x�0I�h,������V�:Q�2��I�����дA�+�q��2zhK�_��]ێ�Cɻ%b���#KQn�u��~���e�g��<B�h�35Z�x-I��Py�S�{�����d�B�l������`Z�K�
-R��
���j�xwqZG/�8��j{(�y(Zr�I�\i�Y���v]�k@]u��Ӵ�q/�:n���j�{
-4Š�(T��*��B��F���q_6�eF
-���QRT�L9�e+k'pӀ�U�p��7?z�����j��'?����_��h`߼��mH$��<�\a N��5衸12A�=�#�"�4uS�--&��[0w�9i"�W�؃��QV��Q`v@�S���"�M��x*�d������nĄ���� s�3>�{)���oԅ^��0�|M��V4�����d�8b�3W2��^��1�z�L��sҨd
-�(�,�0�7�/�A�&ף��o�VhBg�f�ǀH�ľ�����H���_��"h���1M@����[F�-���mfC���`L�d�x(rT�剓��M���L��h���|f��
�j}+��e����Xeo�����1f/�c�B��(P~���>C�\Y�myBזN.�A{���c3�DWk���fϒ�K��:�
-��d��ε�%�h��_x�g��g���>2��# �-���@E�.���!�vv���h�Ԝ�%s��ǷF�Jk [�;����e���#SW@���Zy�d�������"M��Bp.�x�9�|D�+�ě�mc�BӬs�=��1Uߓ�fhTz��$z��Uvv2%J�h�3�g���52���$��ݴ�7�T��9)�(�A���[�>��Af��T��D�30c#��U�]t$y6���f�Wz5��ke�c���C��9ޜ|�y��V���4�� rJG����.���gGr�4���c���f�<��v��6�q��
-�;�W�.N�s�:�?�_��ą�_j���Z�5l�$���R�M���=.����b�X�*�ߣ*s/����D兯�y�Bg�hN'U�w��Pkw`�Q�Z����m���Wu�0���a��h?@$r@r̲����U&�L��'�#��P��ʇ%g-�K�BI;��:-X�����L���{W�$a8����Kh�����B�.â��
�#����h���
Z���h�l�����(}M�=�J�R�4���Z[v$�+U&M-95�H~�.Ћ������
-=�^�in�vg�2� ,>�ӭ0�k���j6�:��L6�ڱ���!<��D�z���t�7z$O�&�i�E�-������U>�����J�T�l5ɦ5�e4�V�ٸa:i���ɴܠ�����~�g2�^i�,����/dO�eSFެ�nM�Ͱ�"m�,f{�8������@0�	��������Ϟξ���*�^�m�V{�Lx9��Ai�]P�,mB��{V�|�b�R�4.ex�,������hXX]�"�U*��n
-{ˊ	�f��sW�7���0	��ea"�r�1�[Ԗ9���s6t;���N���Fd�lRS7�-�M�O4���nq^��J9��J9(���$�V�.[Y�_(&�h+:����R)~F��ƚ���4��՘��#�[����:�=g�jQ�q
�pI�+O��w�{Ŧjw��v�ǝ-�M����C�V �Y`��V��i��i(�l����ab� �v��"�Z�Snc�'²��e��5��SR�rH��4�p*L8u��C����ڇ���X�����6{e���Q�k%ޞ�Ch����rd=�#��3e�ez�LL�*h3^�R�������z���v��yo�1~�e�s�q҄�}��ۊ���ؙ<��xC�����ݛ��,{<8n3��>N¤�<�26@�++O��p���L�!�������8Zl�vt�'�V��I�!&�$�l��VjY�������>Q�\�~�w��C♮L�[]{��iG)�u��X'�P�c�Cc�G���i����� �v�T`S��d�8������K����&��#G�	�H�$�EY�M��Ԅ	�1S�-��X����vB#,��j��F:��=�g���g�n��g|�ê��>�?W�,��S�D�.`C'��?}.4�����=�[@'i�E��/�,$NΧqv��i~���5�����1kA�#C)�bf�/?	%?�*�������.>�*_�",��aYjN���A�!X�x�L���ӿ&C��\���ӏ?�/�?\�M��;��v�F,.�����ʯv�M�IZ�Z�+j����}�cڑ�1n�F�e��^�ϯ2
-5?��Ǳ����S���1����Qp���&Z�5OYg:SBn���iC�6?J�%sǋb1�}�R�Y��@����b5��>%V���sb5*�Bϊ���w���{Y^?(��E� ��f+��S����e��HE69�����E�]!��o��[1/���xs}��Wu��1�iԟq�Yr�4��'Q�����!�R~�T�W��y�vs�P����g��B��&�a�v���^L���^��ԙ����� ��g��%>{�.�V�U�3����k��uܻ��F'��A�V��;&�
+393 0 obj
+<< /Filter /FlateDecode /Length 2971 >>       
+stream
+xڵ[�n$���W�:�ܙ� ���ˀ�}3|�O��됱��,���PR3닅���������v������G��\�B��������E{�V����>�͕����������勇�������=�=�}�����PR����wß|o?�>+��_���w�o�mGJ�,�ϵl��l_?�o�ɵ=�k��F+x����<�X�s����S�a}JH)��F-�b�]r[�����]߫s�J��F�	e�y�N��2��+Y�N+�o^�i�{7�
�2���#ֿʊ��w�*_h�QF7�l�ⷔ*���S���2yW����W�PMg�|m�����{�=x������v��N����הL?p}���G��7N�4P��s�
���@�\������'��ׅH����+_�m � �g[��w:�{xn�O�p�=����z������"X�D9ٰ��X�f�7~g�)�Z��K>۩��K�ǰ�����\&�wf��]�o+��y-���]��}��>^k�׳˚��Q|pʢ��j@j
 =WV~�إƗU��>
+���=�L�ug~��?��}�#�'��<�Lht!�ؼw!�8�C:�,8k:2yX�7~�3�a$=-.|q��|�٭���<z")@f��l`�D���lUd�U@���3�>�zj�V����L�!�D�U�O�`��xH���|���9��������zI뿭������+,#����>� �'P^�u߷क़T��\���rgu��Y�[+�٘yu&�$/�~R+�~�@��t�V��b�o�+�K��X��}�q�����wa��	��0V�q�!o|
+K���Z���=�::�g���,.�^���/3��o���焛��{�����᠏$Z�(�0�u3M����
+^�<�z�6�w~�$7�ԡX�@�@[z�h[����(�z��,�-pE�F;1Xq[E$�n�X#'i��Y��M^�!%���X��i�� B�Ij��P�!'C�+Պ�����4e�"��6X��v����j���s���b��a���!P���y��h<N��%d>-CR��q8k�ghxb�u���ۀT\ձ�3۾����#W��ο
:(�R���v�!J��d�<Д�����`CDm��JHԇ{d��u0�wW����R�c\��iRJ����*f�����@��@K��Sв�^kt��ݷϢ`�V%U���}��Z�V�4�`m$r�����eu>����`��ߐD�e��7L��R�X�*�&��m�z*���E7�+�^�p�-ahDp�Q�2K��<����M����������\�%�o��m�H����M:�'A\��kR$��z$��zg�g�_K�Ҙpa�x����y��"2��Pw�d��a�0��VD�'͜<���P�����u���/���e*6�.t�B�57L�<G��n�_�:*�.��t�>)��B�����X�K�R��*�A�wYUXz�kpɨ��ϨD�쎊��D)�BoW�f�fǏsU�
+���b�b��u:��5E���۫!m����Wr:��Y���1K��KI�H���q�J2����`ǯ$�pgJC���W0���-h��N�G�����Cv��^��[���Q��e��i��HC*=ƾ�\��4�¾|���m=�-����Cl��T�\_s(�6�o݄9��f��9��x\�xZlή�lC�\���MIB���Q7��G(�UɄ��/���j���������SH���k;����Z�t���F���#P,�Ō*�m�T�/�a)�B�<����Q�m�X���M���*�x&�Y�DA��m(L:lQ�>�D8��Ns��{��*"�c�&�SH'��yz|�MPvƩ<y�9X�\&M"��s�=`��2A�����1//�=�C;h�(4и2^i͇��d�Z��p,`����������uf�
+O��w��޲��k�o�7�/����Q�V-&�M��]�P8��ի=]����D	���R�u%سL�X\����`�����ZU�*0W�c	]*�Doam:?�3˹�ǬX���+���7szI��o��S̼s'j��lY�-�n�ˈ�,G�ƍ�8����{{l�������Z���ګ����0����*z����d�/����y���q#e}��
+\ �W�u��B���N��ϗ'ly�Ŝ5�C/nF��ͤS;�|]��:	�����|Yuh�\��pLJZ���_��D�>%�u�4é8C�4�KK�Pَq.+]�נּs\����zzV�0�?Z7a.�\�����fO�;�H��l�R��]o�͜+�圫�gu�%�̩��T�&&E
��+�Q��i�pDk	sW������-gp|;Hj)�ul�5�ۖ#N7�NFes`��'5Q��Eq�|����a�'�h}��5��{���=ڇN_�焼�`���s���/�w咮�i˹��:��y���iX8�]���|&�+qc\�+W]O}u`Zf�BL|���KOm���Ҷ/hr%�o�r��ظ�I�Kէl�fBs���3��;��_Q1��I�󖩢��\���&FȲHcU�rŀ2Y�/�+�}#Z�����6t*���^0��2�pla�R���v�ɋ�
t�b
+��ñ��v�Cp8�_�ΐo
+;�sdp9mI�)��g�~Cu�C*��:�J��(��9_l�ǿ���üc֓ԅ�vLxGz,i�{-ؑ�q��zG��ǩ�^��˥�ⵦXO5�y��xp:�~���U��ϴ�:��M��A���ok<wJ����w��_if����CƔ?�rqM�U㼸��0�B����&��.v�iŮuF�{�C�տ�;�~���gƘO��^(��4�)�k��kW��~��9����.��X�� W>�z�t��'������Rx��G���
+m�V��};����&^�����s�
 endstream
 endobj
-385 0 obj
-<< /Type /Page /Contents 386 0 R /Resources 384 0 R /MediaBox [ 0 0 612 792 ] /Parent 360 0 R >>
+392 0 obj
+<< /Type /Page /Contents 393 0 R /Resources 391 0 R /MediaBox [ 0 0 612 792 ] /Parent 366 0 R >>
 endobj
-387 0 obj
-<< /D [ 385 0 R /XYZ 69.866 758.996 null ] >>
+394 0 obj
+<< /D [ 392 0 R /XYZ 69.866 758.996 null ] >>
+endobj
+82 0 obj
+<< /D [ 392 0 R /XYZ 70.866 721.134 null ] >>
 endobj
 86 0 obj
-<< /D [ 385 0 R /XYZ 70.866 603.307 null ] >>
+<< /D [ 392 0 R /XYZ 70.866 270.547 null ] >>
 endobj
 90 0 obj
-<< /D [ 385 0 R /XYZ 70.866 517.573 null ] >>
+<< /D [ 392 0 R /XYZ 70.866 184.813 null ] >>
 endobj
-94 0 obj
-<< /D [ 385 0 R /XYZ 70.866 283.072 null ] >>
+391 0 obj
+<< /Font << /F50 228 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-98 0 obj
-<< /D [ 385 0 R /XYZ 70.866 143.03 null ] >>
+397 0 obj
+<< /Filter /FlateDecode /Length 2469 >>       
+stream
+xڭɎ+��>_����1�@�i���A�f��$6��)���E���0��X�}c��o'y�ˋ���ϗ?ݜ>)%�s*�>}Q��<�S�"z�I��N��O?����?��pt������ܤTRz����_���M��:�ˊ���ߦ��SH��Wڗ�n�j�/{�_�[����b^ͻ�z��mA燅͛�lh���l.g�L�����F��,<�?�2�k��p�	t�sYp�R��0D�E�_�s���B�p�1
��+Z���jV���)5I�v���&���#��s@v���qu%$E���HA��!�����a
+N.4XN��r
+�on��Y9��-i�$�.(iB�Y���1�![S�d�E��*	#�ydgN	�\#/��Y�����E<�J!�<
#X��f���Ν��7(xKH[��^	Qu:8�^M��G\�c���
+��*�a�S%z�݉�	\ߪ{����C9���/�&����֒_��xѵE�l2Β��N�0�L̛��N�`��j�,��%&.�g���yR���Ԍa�!~L��Jp,J��5^l��(Į���j	p�c�S2,oՈe�8���C�{���.��ԗy��`�|��l`/�璶���8Ǽ�A6�#����ɞ�
#г��ᶉ�9�*�k��lCE�Cʁ�u�ndY�-�EF�*�[��T
+�S�BLb.xx���Y���g��|/2�JyO����(*W�;��T+-\�tW8@ZI��"�ª��2��S�0j7���KV��1���V�C��
A8R�j{.S��<��a��#�3,* ��Nq{�\���^��R�����ok�)*�_sd�`R�,��ɱ<����e`˴	m[JB��o+��|�qL7��(�kë{�eIw��/���m<�*I�
+U�$f]��J��������d镼�BP�p͍;o`�%�
+����s�!�DH�k�UK&�y���|��+.5�3T����/��[6�
+	W��WY}�O���Z��d���2"E:j+;嚟�mr*Y�H��.������v;�,[j9W�Z�aI�^m
+����
N�p��j(�5�m"l0�׵�jt�[�.��v{!ʫ���X��mO�!��\{�l'��7����IP5b6��^E�qz.��ʽL
��~�TZK�C��M��1]�����!�
+�r͵���Q�����h��z��&���[N��}܃٨������X��*xǅ��r�z���O�`��Cn���m�Q#;�`�E<cc	Q=o�ȡQ"��πz����{{e˵���Y��̬���sy���i��H��u�74X�ʩ�@���?�jR��0�����tht ��p����a�KXv�M5�B%��g�C��x�dMn���m����IWc�A� Z�'u�AH+�(����M�U���i�Ԩ�3��O��;��+����eM:���ր׌B������e��_J�x-�O�h^y�{�)��R㖐;�"M-�C?~�O�����xki�c��vLZ���8���1iɊ,��%	YS��OҨ�/ƽ�N{��彡��^+��1?�FO\��� b�y����«�-�>_������|�������eb:�`���ن��ߧ�����-�k������T-����
�2"�0����H�f�zT����2���<V��^3:`֋��`�ʂ���u�м{�8�ϋRm�Sw����&i�~$�����*�*�;bu٠�zZ��M�)�:#E�ω���~V��������l�wF#�sNJ����v���%.#E��)o�Π&�lq�a��*�=T��ֿ<��;�Z�f>�+�yw���%�q�uJx
+tJ�Qř�D�C�UhL��;�rTG4n�@M{�b����%�cb(M��.�bk��%&���IO]��6ώ�|Z��]�/6@�A�z5Z��c�s]�((a��i�dx�Xs��	���nppA�N\_H�W/�o�r�p�7.��W��َ��˔��Ia\���eJrB�qK��i�eN�Ɏ{~�Ҽ��J���m���.7a6p
+���
+�T�q]|�FFf\��0EZ��a�����A�q��ہ>�ow&�EQ�&�C�!1\kr�7�5�^�T�l��O/f����G�%��z�!�u��^R��j�r�F��5/�Τ:�ԓ��6�9θ�i�����fR�v�2A��2as�	+��Vý�(���E��=A�Nj�����?�~ԇ>w����a���y��7�4F��4�r�3]�7;����O6�B�n�yt�\���ݘ�9v[b�5B�?�[�w=�B�3q���TRa���!�cWY�-�fNG�A�p�8�:͚a+w�!g/�U2����w�?Y)E�M������X�W�';-�ݥ���Tb�1��65�|���9��n���A�hV�wl�P98��㛃��&����R1KU����.�K���
�}���iį
+endstream
 endobj
-384 0 obj
-<< /Font << /F52 226 0 R /F50 224 0 R /F56 304 0 R /F57 305 0 R /F71 306 0 R /F58 311 0 R /F73 310 0 R >> /ProcSet [ /PDF /Text ] >>
+396 0 obj
+<< /Type /Page /Contents 397 0 R /Resources 395 0 R /MediaBox [ 0 0 612 792 ] /Parent 366 0 R >>
 endobj
-390 0 obj
-<< /Filter /FlateDecode /Length 762 >>        
-stream
-xڍV��1��[r�k�?�(�5�"QP�Z�����c{7{!�*�=�s�l��{�˗�n��~��9X�Q�9����)7z1K�*z_>�2!-�?������n��b�����d��nʓ���g�f�⑚?y`�ڙr�ٲzm�{��β5lsF���rR>�G%��,V�Ws���qz�DՈm�1ɝxr�����k�[0�sØ���xa��}�S��.��%��XUHv��L���Y0J����T���/TG*�ZF�����2�J��@_��ڠ6x{�p��"�ɨ�u�b�dU@�j�n�W��BH�\����g8���D�SJǄQ��n?яm�ߢ���V"��x3��Y������X���hʿvSk)ױ4A��zx(܍����2k1y�[�}O��-����8E� ���u�0�h�t�
-�Z�GN�<iG�~Yj`�V��ܝ�(VH
-�Ŧ��C�V^h��J��W��e�M���P��6�Wu��LK���
-
-M���RY�Ya�EY�:� :\��Q��杊�̪�P0�go#
�� l ��a:�����ۄ?�gD���i���(I��mpw��2��i	'�m��$�*yOۓ��Mv�]󂽅����r�3������hZ&���!��N�CB�<n)�
-Y<4����9���&:�"�]U�hd���#
-?�2�u���{��D[���0)yS�$��ѩ�<���L�n��u��^
�����}j���)ݨ����o�'>}T7jA�4�?-Z+c��Q)D��Q(�s�|��#�#3
-endstream
+398 0 obj
+<< /D [ 396 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-389 0 obj
-<< /Type /Page /Contents 390 0 R /Resources 388 0 R /MediaBox [ 0 0 612 792 ] /Parent 360 0 R >>
+94 0 obj
+<< /D [ 396 0 R /XYZ 70.866 595.534 null ] >>
 endobj
-391 0 obj
-<< /D [ 389 0 R /XYZ 69.866 758.996 null ] >>
+98 0 obj
+<< /D [ 396 0 R /XYZ 70.866 455.492 null ] >>
 endobj
-388 0 obj
-<< /Font << /F52 226 0 R /F71 306 0 R /F56 304 0 R /F59 312 0 R >> /ProcSet [ /PDF /Text ] >>
+395 0 obj
+<< /Font << /F52 230 0 R /F50 228 0 R /F56 309 0 R /F57 310 0 R /F71 311 0 R /F58 316 0 R /F73 315 0 R /F59 317 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-394 0 obj
+401 0 obj
 << /Filter /FlateDecode /Length 2694 >>       
 stream
 x��\Ko$�
��W�޲@���tOw�[&�-��d��9d/���$�D��{�k����Q�"%��H=<r�}��ߞ$���O?]��TZ9k��ߞ|��)H��W2������')���YI)�)������˳	������WE%�
O�o�^��$�>��_~}�9�'�DrN�c|�"D?A2[�[�/�:�\�k۹]��W���\C�_�z�~�E�Oͪ��\"����埋�Ʈe�:��fl��O�˽i�8����n���wnt.��(f4�4���e5�W�K$S�,Y{�K�Bj����x�e%܍x��q}��:�r(o�hC�s�"y4e�����jޚ�ѳג'����<��O���~�Z#�Q�z��=��ˤ��f�f�7g���6f'�7�c�/6�Qֆ���.5ac?�W觻�':a$<K��T���gg�1Q�e[]?6�O�Z��>Fa�"I���bmk��2V�j=*�6��Z&�lo�n�E��R��yb�
贕�Dp:R��E�k�U�{&����lt�=�:s��z�A�i�Q�E���mJ��2j!������pjSU���n��H��0�:��qB��]r4!��
@@ -1160,22 +1171,22 @@ x
 ty?�ޝ�p5ћ1r�!�>7����u	���A�ۓ&��e�5�|�L�J�q���S�T����O�cH��q@��"V7���`m����M(w��
�rU��v���������i������x3�1"��W?�5\9?�p��Y��(���j�����:�c-��������u�W�%D�┏��f�J$3�m@��D�^@���K!����w�Wv������*w��H2�w1��[�s�N�v����9"�\��a�U��]'�x�"�w��sZO�]���^�}��k�@wk;s�A�r�ތ����.%��q`���#�W�y\b#�,8�z��G�gv!��綥�r��FW��7�z�=(olz�� �;(�ю�w?������6���X
��\�v���ܼ���!+a��~�
�ht��b%��+�Ayw���@ɭ�'Uۊy��T��{�I�j�:�m�S��/���v�����4�#U�1��w`�����Ha)����!k!K�-����>΂?��������%�9X��[����};3�����xvcۊ�b�iwv���x�O��S��#������]�o���7����.�|��T	03�j�o���}^K?�mlNWr�Xu*Ňm�.��?M��zS���YvA�;��nD���Nyϟ�>��Ɵ���S��w,o˟�y|��)�p��[���Vف?ٜ��6.��?ٰ���%n����f_(�m�?���@=�>����
 endstream
 endobj
-393 0 obj
-<< /Type /Page /Contents 394 0 R /Resources 392 0 R /MediaBox [ 0 0 612 792 ] /Parent 360 0 R >>
+400 0 obj
+<< /Type /Page /Contents 401 0 R /Resources 399 0 R /MediaBox [ 0 0 612 792 ] /Parent 366 0 R >>
 endobj
-395 0 obj
-<< /D [ 393 0 R /XYZ 69.866 758.996 null ] >>
+402 0 obj
+<< /D [ 400 0 R /XYZ 69.866 758.996 null ] >>
 endobj
 102 0 obj
-<< /D [ 393 0 R /XYZ 70.866 721.134 null ] >>
+<< /D [ 400 0 R /XYZ 70.866 721.134 null ] >>
 endobj
 106 0 obj
-<< /D [ 393 0 R /XYZ 70.866 656.498 null ] >>
-endobj
-392 0 obj
-<< /Font << /F50 224 0 R /F52 226 0 R /F85 396 0 R /F75 318 0 R /F84 326 0 R /F58 311 0 R /F61 319 0 R >> /ProcSet [ /PDF /Text ] >>
+<< /D [ 400 0 R /XYZ 70.866 656.498 null ] >>
 endobj
 399 0 obj
+<< /Font << /F50 228 0 R /F52 230 0 R /F85 403 0 R /F75 323 0 R /F84 331 0 R /F58 316 0 R /F61 324 0 R >> /ProcSet [ /PDF /Text ] >>
+endobj
+406 0 obj
 << /Filter /FlateDecode /Length 2293 >>       
 stream
 x��\Mo�6��W(�hi~S�k�@nM}rj�\�Cs���"��vM+�=Ȼ�5���i��w_;��zǋ��|�NtN0nM�9�\t|�������CǙ�}�o��\V	&����~��m~&�|��-}�����L;�|����5�Ym4<Y��������~p�l5�uO_����l'I�=}�>�}�s���.a�z8�w�U�x���p
QNڒ������?~����שn��KZ>��e,�?½��M���d��w��Z�w��-ӲP��	��t9jHT�0!T)��
@@ -1190,16 +1201,16 @@ r;u[
 ��f[�������K�fr��$'m��g7�P���^3�e�K�xv�&�.0�K�\%�a�:�D��ԁ%.1�����L���pj#������O:=���c��
 endstream
 endobj
-398 0 obj
-<< /Type /Page /Contents 399 0 R /Resources 397 0 R /MediaBox [ 0 0 612 792 ] /Parent 360 0 R >>
+405 0 obj
+<< /Type /Page /Contents 406 0 R /Resources 404 0 R /MediaBox [ 0 0 612 792 ] /Parent 366 0 R >>
 endobj
-400 0 obj
-<< /D [ 398 0 R /XYZ 69.866 758.996 null ] >>
+407 0 obj
+<< /D [ 405 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-397 0 obj
-<< /Font << /F75 318 0 R /F84 326 0 R /F58 311 0 R /F61 319 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+404 0 obj
+<< /Font << /F75 323 0 R /F84 331 0 R /F58 316 0 R /F61 324 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-403 0 obj
+410 0 obj
 << /Filter /FlateDecode /Length 2316 >>       
 stream
 x��\Ko�6��W��hi�@`��-�[S߂��&���\��;���yQ�A���p��ȏ3C���q��ow�z�{'��18��5�3�3�����{(��7g*��X�<VC-��?�~��d������m�O}c���i'���J
Ozm4������~x���:3}5�
�����F�rx�4���s�8��q�#Wg����
@@ -1215,16 +1226,16 @@ m
 �Y��J6 �%�t��5G�xߕ(���x�#��n���+�m9?��O�`5����+yQ��P�_���S�>���3���X;���х���^K2n���?`�z;�D9����S� ����/�r΄���fo�ؚ��<����E�
 endstream
 endobj
-402 0 obj
-<< /Type /Page /Contents 403 0 R /Resources 401 0 R /MediaBox [ 0 0 612 792 ] /Parent 360 0 R >>
+409 0 obj
+<< /Type /Page /Contents 410 0 R /Resources 408 0 R /MediaBox [ 0 0 612 792 ] /Parent 366 0 R >>
 endobj
-404 0 obj
-<< /D [ 402 0 R /XYZ 69.866 758.996 null ] >>
+411 0 obj
+<< /D [ 409 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-401 0 obj
-<< /Font << /F75 318 0 R /F84 326 0 R /F58 311 0 R /F61 319 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+408 0 obj
+<< /Font << /F75 323 0 R /F84 331 0 R /F58 316 0 R /F61 324 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-407 0 obj
+414 0 obj
 << /Filter /FlateDecode /Length 2649 >>       
 stream
 x��[I���ϯ��l O&�;0`�{:�o��f��ؾ��K�~�dq)�ԞM|s�h$�6~UEV�$��wb8��N�Y���EXsPF/ ��z�˯x��x�x�
@@ -1245,22 +1256,22 @@ u:
 �VTR�i�}kZ櫾Ҟ���Lqy���/����(��&�Z�A.�4Ae��c���VE+�@�L��T�����R!��hG�VjSo�j{�/�SC"�p�U�̥G��s~��>q�^���v[�z&;I)���6,�����z.����+e�5ܻm̵���+�ف7!�݂��s>�lW�����O�I��]s��ƻo��Qc˭�@�!!����k?�<Zg.�����(V��c#8��=�rw�%�*�UíҼ��&!�~H֤�j������kf� ���z�a��ps�v��
 endstream
 endobj
-406 0 obj
-<< /Type /Page /Contents 407 0 R /Resources 405 0 R /MediaBox [ 0 0 612 792 ] /Parent 409 0 R >>
+413 0 obj
+<< /Type /Page /Contents 414 0 R /Resources 412 0 R /MediaBox [ 0 0 612 792 ] /Parent 416 0 R >>
 endobj
-408 0 obj
-<< /D [ 406 0 R /XYZ 69.866 758.996 null ] >>
+415 0 obj
+<< /D [ 413 0 R /XYZ 69.866 758.996 null ] >>
 endobj
 110 0 obj
-<< /D [ 406 0 R /XYZ 70.866 344.289 null ] >>
+<< /D [ 413 0 R /XYZ 70.866 344.289 null ] >>
 endobj
 114 0 obj
-<< /D [ 406 0 R /XYZ 70.866 179.4 null ] >>
-endobj
-405 0 obj
-<< /Font << /F75 318 0 R /F50 224 0 R /F52 226 0 R /F58 311 0 R /F61 319 0 R >> /ProcSet [ /PDF /Text ] >>
+<< /D [ 413 0 R /XYZ 70.866 179.4 null ] >>
 endobj
 412 0 obj
+<< /Font << /F75 323 0 R /F50 228 0 R /F52 230 0 R /F58 316 0 R /F61 324 0 R >> /ProcSet [ /PDF /Text ] >>
+endobj
+419 0 obj
 << /Filter /FlateDecode /Length 3073 >>       
 stream
 xڭ[Ko$���Wtn1�����h�[7ç$�%{�/��!Y��٭A��V��(ֻ>r���"��?��?�x7�H�c�[���X���ꭍ��J���˯!�B�Va�:>u|�7|ڀO����1p����3xg��f�o��7��17nQ�KJMy�N�,w��/۾i�5�%�+�k\>m���Fڲ~���-�H?�$m�F��y�iT������K{��G+�7�oi�
@@ -1280,334 +1291,371 @@ T
 {�:����8�����$`/2Bh#c}�|����9y�t����񂁧��[E�䤆��Mz���G���yB���⍲�["�*� 9���t�R������v�T}��\�Qϋ�����?���V5����^�d;��ѝ���$e��i�e�C�|���otU'�3�a��̳�z���Ұ����M�G��}�z����6�f�.WЇ�,G��?8��Ϊ��}4���xH�j�X��}��u���kO_��[������䙼�GY}�s��4d��Z�޹�������󤃜)߆���p�ZLT�UjH�C�^���v������4�
 endstream
 endobj
-411 0 obj
-<< /Type /Page /Contents 412 0 R /Resources 410 0 R /MediaBox [ 0 0 612 792 ] /Parent 409 0 R >>
+418 0 obj
+<< /Type /Page /Contents 419 0 R /Resources 417 0 R /MediaBox [ 0 0 612 792 ] /Parent 416 0 R >>
 endobj
-413 0 obj
-<< /D [ 411 0 R /XYZ 69.866 758.996 null ] >>
+420 0 obj
+<< /D [ 418 0 R /XYZ 69.866 758.996 null ] >>
 endobj
 118 0 obj
-<< /D [ 411 0 R /XYZ 70.866 610.542 null ] >>
+<< /D [ 418 0 R /XYZ 70.866 610.542 null ] >>
 endobj
-410 0 obj
-<< /Font << /F52 226 0 R /F50 224 0 R /F75 318 0 R /F85 396 0 R /F58 311 0 R /F61 319 0 R >> /ProcSet [ /PDF /Text ] >>
+417 0 obj
+<< /Font << /F52 230 0 R /F50 228 0 R /F75 323 0 R /F85 403 0 R /F58 316 0 R /F61 324 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-416 0 obj
-<< /Filter /FlateDecode /Length 2673 >>       
+423 0 obj
+<< /Filter /FlateDecode /Length 2729 >>       
 stream
-x��Z9�#��߯hg^���}�O6��e��l�&����ﻊd�j���ћq�j�M���X-�����/<ߏo/�/F.B�`�p��o/��E,���^L���}]~�q�$��¹��<�����'��8qn9�H�0bq>\����>��g!��U�
Q���3���J�E��%}�es$g-4"��Y�9������(�-�`��y��XF�q=QGf�9yH߉��MQo�W}!17�����h04sJ���R�q�#�"�8��.�)ƭ&B'7�L���
��{�;�EJ�j&��P�<�(��2"���ɫp�ht��_�^4:Ҷ1�f������|Y_��f+봲�ԡ�C��������Ϣ��״k
`D�³K@�3��r��03�Ϭ�d�8EW� ��"i������\7EOM+H$���ǀ8���Rz>\��K^���I!(�qg�^�Ȣ2?Qġ%�Z��#I�n���9q����F���}`j%�F;i
�*[;=�M�����xLִ��t2�!t��di���[P�3���J!��VK�T8KO�u4".��p]\��w��&%3�V�'#Gѻ��C�"�?�j�$_�SL��b��<Nބir��m�&�ɛ0Mv�@Y����B-k���bL�i�p��ȭQ��d%�8(�k���{��ii��)�1u��Ћ2������h}B���3#�� ���dtVCv�R���FEܩQk3��b#��Y�;�~G���	Jd��p�!czPG�i�T��qf���R���uf:���Gc퀱�8��+�g�PJo�Tz�2�Q!�I�۹:L��
-Œ�����Ѧl5���R�����<'+���]R��P(�a�j���]˶�P��(񳊦H�;�ԑ�{�0��dǛ�MC�nJL��T9�%�����Fi�d���+��ˆ�lt�e��R���-��~#u�P�9�
-�Z>=Ւ��̗�������~�����޵�2��3��J�<AoD��telW������?�c+�h����__~�����T���ԯpI�4˿����mE��)����*A���60�p�@�Y���/�ȗɆ�8N��k���ǿ�߮����T�Q�ن���5�+���,��(���X�a�Y7�ȗ�z�f�zmI�� �35��ڒ8��q�Ŀԏ�a7�2����
-�j�V��J�w�������.]�pu��#'^.��6��Y���/�#�Ӎ|�Ż���V����)��d0bTǩ��N��e��F~k5�pN��@n7J�-Z5��.�w��Rў~݄�ifS�|GKm�洩X�\iyW�˅��G�K��R"�[��ʪE
-�w�"�@�4I��|JJ��b�r�XuKB���&gߘƴ�[iL���,U�C��K�T���X��5�S�iMÌ��i���R��i�	
-~rZS���F��L}nJ�9�8�P�f7A��Z�5;�w/�+��+�_ð��m������Ґ#�ߠ2=��ʝ�R5�8O͂io͡J������v���ڡDؠSQ/��1i4�f��Li+gFP��7���V�S����X���<Yn�W27�}8��$v|}��rY�f��u̢�siM��'��3��WC?�F����k3��;����Sk�'E��.s�ԝǪ��k��F�*
�ۭ7�C��썚mr)�)V���JS#9f}>����QA貲�G���,KN��Õ�������q��ZV�y�j�r3�}���w���Xs�v˂,jHm��J��cf��*�qݽ�E��$ro-�c��ALt��S�kk��}=��&-�}��ǵZ
-��ܽ�鹊�sc-%�d^��WKU!�R
-�6ʽ_KQ��]T1b}cx�t���me��6v-��A{�|���1[<\��h����-����| G����!����K=l�N&���bn��[t#_�4t��m$S��..x@�\/TV�)�+�3�fQ��'���H���
���:�+��7.���{\P��dm�d5#m�,�m�ᜩ�����o0���YH�={��+����d���s�*�h^-C�A�>���Ŗ�0`���f��R1-����
R�2Q���n�	�1P�;}[_���}�?����׳2b
x�!"�pa^sxmP����u�3AJ�Ѝ4j'�➕�w�������ԅ�x^V��{^��z��R�'��%�������

&�&����$d(������i�ɳND���U�)����mc�(|�I���g����F.�����Y����n����ٞY%��xk^�dl�ٺD����#���F�Hf����ֺ ^뮦�Ȭ�'������C�j�w����Z��,�Q�]A���8�s���&t�NtJ��_~J����
-{�݊?��P�Z��V
�'�c���]����J����州������%�P�޺w�(�r�&�4��`»��j��׌�5?�1*�
o��vŃQ��p��*�ɚ�����L�0���U��{9�����G�p�@�׳���0�ّ %��n	�Qw+/l�э�l|cD�+�FVy�
-j��d�FŠ������v~��)���A�L�����7��5����dS�hp��}Y\��-�7�k[��NG�-��n��v�V�m���w%B�۰��3_�.����?���
+x��Z;�#���W�3��%�o` `$�\��d��l�%����ﻊd��͖Z��]=����WJL�Ob��ȟǷ����s0F����'�o�$'opLON�Y�0�}�~y�"��O8�=���'%�?y�
+�8bi>>������|K�BڇVi�{Јr���M��S:,n��_��P���F�y=)4'��=��}�~8��<zL$봞w'b�_R�.|�B�&JrӾʋ6���ïo?/f���R��1�i���[o��쭝�S���7:��d>��lĈ��D�a��(�ģD�H$�O�VE3d#�<�r𲑑��*�8KǷ��F���B�5�X����C�&I���}��*��
+�VӮ-{ !���.�u��L�+ئ�Ì�o�l�g5��ɻ��Z�,=f,"�u�s=�,5�`��k�?:�1=Օ���hz���d�|Q���]�Nq���5
+��b
+-S��OI�ws ?�Dq�[/P�2���Ԋ�-��5�*�zz�4/��:�1k�~������ENdPoA)�L���y�H�Bkɒ
+e�m���F|й�ˀkT���{��F셰~2Q�K*0D+"�s�zK�b��4�,valb��4(��0
:L����m�$B
5E�l���Q��QɭR��d%�8(K�ώgBuʴ�%����Ҙ�,���y*Kփ�x��	�����`�¨R�*n���ɔ�DfO7*�N�Z��!%�j��(��vl&đ���eG��-@]��C-��-@�3��ou���PG�q��#ر�� �5��:�}X���_X���R
5�^C�<#;��:#*\AT���f*n!*Ok	����,FԴ.�N��PX��� ��W`�:��,Hc�n���ʪ�f��`��1Q����Dᔟ&�D';W��}8��82É��z��)qQ��>�P�-�R�wG�s���ѐ�F�Ҟ��f�;����]p���y��H��c��!��� 1Ͷ{�����;!L�ͳ/+�7�$�Q���(��&���T�K�s�_��1X�� A��-���TBua!�b��#���	p��sKv��543=�,"��yAɴW�i6�8R�ÄR��uMY:W�
+3?6�ê�#����99��R		���Dn�Ԏ}͚FI���zR�ILN����#��<�^�ٛ�"� #<��Ԙ�9�����[g�Ǚ�J�
+z�U
���v���=���,gaͤ�����?�>��+��'>?ObV�O��S��v�BN������նݞ@{b���]�S�0kG��p���ߧ�ȗс�	网j���ǿ�߮������[d�\�~M�jS4�9�����\)p�Y6�ȗ�s�f�sFi��4���`I
�s.���NM�O<��R��|q�S���Z+)X�û���K�N���#�P'9��s��2�5o���BO�Od��R3i�T� Z���O�X�ޮ�q*��t�l�!d�U!̡֏k�t㘋ТU-7�C���s��m���M�N-	�V�X�w�
+�����v��Zr�sK�]aUo��/L�t�(;v�+��*�IY8Cm���k���㠽3�a}>��[qL�wıQ�RMa�Edܩ60ں/�)г���kʈ� ����0�
q�v���q�~p\���Sog�CIT�B.,O�(_Û�PwN�Q5<�w7��W|��k �e�
҈>V�NW�"�7v�c)wg�TMF.R�txw��P%̡���u�^��z7C��u�^BT���ѐ��]`%ں�0P8�Qu+����P:)�?Vxs�B�57,���6��X$7�t	����c]U�
+�������T��DYVh|CX��]%�R���2��P
+�]�����˴�& k��F�*]���[*Ka"���%�M,�<�*�� Qa��˰/F�{4*��.+�{t+K댍FԟJ�(��Y�N��=�e���V?��
S��B�Q��m�(bHN�!�ٱ
C��ʵ��El�$0���T�Rn5#J'��APr�s�
�į�T�,���ݜ�ɥ��{}#�Kmg��3������=��D�JImfg��\�ӞȢ��#R���F��= dl[p�C�ֹ�M�-ߝ�!�dF����%U��"'��J�����ك^�N&7�V�>��Gt#_hkT�qK�f%�1
�Bt��Rea��1�;�"xG/z?}���!�e��v�|�X$�+ޔڳ��d���3����܍(��hf�}�u��j��V�Aŀ5��aA�BǕآ�Jr�_X��IϹE�\4��pТ�_x�~�g˯�(|�q��_o��Z�p��`�a�M"U'uwO"{��~c��Ësƍ~�I�Ta���(Y���b�p\^3��6���1Ux]��d	3�w#��٢��C��*�a:@Fk�u�8un�je�Z���u�?j��h���6�-�*݀)�YR�ě(Sܔοb�P<�1��^��Ӧ��T �F���#��Eim�E�Pb�>�q�;3����mT+V�j�_L@����;��U� �q�%IF������K�?6����=$)�,�ߪnK�s��j���\.ŵLx��Ȝm�� w�?*�5�_s�r�"�k�+��|1~Bu��D�q3P�����)uB�F+|ldv+��ӻ�U Ё]u��9Q�Oy�?b�<����+�#�c�m���N�[�.��AN�@�<�c��ew���5����s��}>�.�r���"�v�&��kÛ.�E�����~O��
 endstream
 endobj
-415 0 obj
-<< /Type /Page /Contents 416 0 R /Resources 414 0 R /MediaBox [ 0 0 612 792 ] /Parent 409 0 R >>
+422 0 obj
+<< /Type /Page /Contents 423 0 R /Resources 421 0 R /MediaBox [ 0 0 612 792 ] /Parent 416 0 R >>
 endobj
-417 0 obj
-<< /D [ 415 0 R /XYZ 69.866 758.996 null ] >>
+424 0 obj
+<< /D [ 422 0 R /XYZ 69.866 758.996 null ] >>
 endobj
 122 0 obj
-<< /D [ 415 0 R /XYZ 70.866 531.209 null ] >>
+<< /D [ 422 0 R /XYZ 70.866 466.203 null ] >>
 endobj
 126 0 obj
-<< /D [ 415 0 R /XYZ 70.866 440.829 null ] >>
+<< /D [ 422 0 R /XYZ 70.866 375.823 null ] >>
 endobj
 130 0 obj
-<< /D [ 415 0 R /XYZ 70.866 359.264 null ] >>
+<< /D [ 422 0 R /XYZ 70.866 294.258 null ] >>
 endobj
-414 0 obj
-<< /Font << /F52 226 0 R /F50 224 0 R /F85 396 0 R /F75 318 0 R /F84 326 0 R /F58 311 0 R /F61 319 0 R >> /ProcSet [ /PDF /Text ] >>
+421 0 obj
+<< /Font << /F52 230 0 R /F50 228 0 R /F85 403 0 R /F75 323 0 R /F84 331 0 R /F58 316 0 R /F61 324 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-420 0 obj
-<< /Filter /FlateDecode /Length 2278 >>       
-stream
-x��\I��6�ϯPn-�j�/@��
-��tnANm�Ksh/��}\�HS6m��	���1-�-|����������<Q�����D�A�QJ9�����'h��_2rk����_�R��v�{������������_C���
-z�Y(;
-M3��>c
-G\Ko深�ۗ��IˁR�UR=�|�d0�4�Jblx�2|���	a���pc�����^~}�?�>��G"@9��ۆ7e
k>���EˇY��fZY��h�3�VD�����	'�	�٢�}��hj�Q=J�*֘����GЬS�ct�̲)2�OO��M~�����Н뗟=����kه'|�H�fn8���_�WWV4��e��
-�L%=smD�_�d;Q�ϓ4���p��G�����]��I�f�f��/�+���0x�Y)�~\��1=�)ʰ��R��.J��)d����5�|Ʌ����Y��k��0Gd9M�,���7��*^�e8g#3�����"ѤD�{.�����E��<Ƶ7�[�Qw)N)3����d�`�c��ڏޑB��e�B7�����?I�5�>j�Ix�������Ga;*�ș!�z�CS&#/ro��N��=���i :i�U�gLp$����m���ͨm��]2�-�
���d����v�Pz�r����.w���v�Zŝ�^��(�Vs�0P�*XaɘO.����X����ȅΖiИF|�7���**���@&S����p�Q��7B��,������c�#_�WD=��)�6����C�s��*1Q��؄�t�
ܣ
=�y��r��+�hTM-S�K��)'����W�<J�Z�����W(�˩���bA���~�!;�Y=��s}��%薐��R�^3v>�����R�A|FW>�LG��E��Z��X��!�b���+��C��+��D�GW��g%�E{}�C,�)��9����(�@�����-���MG��O���5ؤc�m���B�Y�]N�:��,i��S?�ud1#�c�<��EΉ��霸���[r�4�+�֫����m(�)]	rKu�39ޢ��.@���j�VWZ�B�Z�7�Ӑ��
-���f�݊n�?SL�ΠM{�1�+�7��=P�밷�P"q�r��qwp����P��Vpwt����wmGc���B}�Y���
-�9��Y����w�,������-�[H2Z��a�c"�t:&�>��9I�*���4an��R�q����>4:�����ذEKTS���Ȑ�"���Jꫀ+�Ur����c���p�f�Ht*s ���(�����ȫ
-/A��S��_�[n�8�[]s����T^)�a�n�zq2ܣ��F*NWaрP�u�����$�F_b)z\W�q
�2M79�$<��{�{�����*��%G����c%����e�o�%e��k�S�6+��v��-��No�=ևa\@���l���U`X�i��ܣh�9�#�E��f��QΡ�y}9n�9������?�o�4�a9�d%7kD&�Ĩ���|+���Q�Z�['M{M�[TIF��ӭ�|Q���Χb:c�qe'PTͻ�
-?:���Ka'n��*�7�T���,�Ax+*�N̲��me� �>�ZL�K���b��X��:cʛy�Y��L��;7�͙=���z�+9]������*����̯�V�F���k���_
-j�y{ܥ�iY����,h�q�Z�8���l��sKLE�,�Q�k�q��m�F;ݽ�����>�[#ܠQ�7�G��i��N���Vz���..�@����Srqa�IS1����E�}=
�y+O,/C^&y�Q#F�T�G1�#��?V\䕈�N��=֯��"*oR�k����I�k��H�j��l!��B&�8��VU�`^�#8�����St�m_��9��hJC��f��0�*'�Z`X�߾�W��
-��G/g��8��i3z�{�1Ls%��m6�-�@�k��kj��B�%��mE5*��{s�aMst�D"Y<�f�g��j�>,��q�]����G�~��HM-�	�(���2��H�-5T�0!kgb�T��p�D���L�9(�t�h_PJ+<����Kg貛\�6B>�}+�$�a��bE�.l�Ǿ���G����q?�J�ټ��a��dc)ú������øPP�؊�:�z���+b)�[th���]XxBZ�Oa���Г�?W8q�?MS���MC��>��&�$[��'#lp��b�[�`�T�/O�Z�
+427 0 obj
+<< /Filter /FlateDecode /Length 2247 >>       
+stream
+x��\K��6��Pn
�2|?�"�����toANm�Ksh.��R$5�$/%K��A�]Y��!�����v�;���@��|Ҏu��UgQJu~y����ǻ��l�_��Z�U�u�t<�>ܓ8�
?�~�3�+}���OK�4,+��w�	kY�$�Y�fv�<>=���1�*f��OY�c��a�8t~��J��ON���;M�o?>��� ���(��x$L0"��4�ӱ�Sa#��G�cq���N�38m��m��f*��v�k��K���k�3p훋���ʙ!���4�.�x���d���p88.���1]���&:�&����c��d�����A.�b���%��+4u��N;
A��8֛Am4����fA-���8�o.�v�8c��!�c)����^��E�I	س�ī�E�e���-'LԀ�2Ι�~�8���Y#!C�R�&8�A���G�VO�=|D9QZ�+z�Gc	�
���s,��إ��ۄ��0l�r�gXb%Δ �C
4�ӥ,�����<���6߯��[y�no]y�%ƉQ�C�L�4�)kD��F%���Q�<2��L?�ב�%��Q��
+Y�0*�~D.�c|�9�K3�	�*%��4`=DS�L�8i��]�w�7�nu�Z
�1�d�Bf �3�/�;��)Gs>������<ͬB�۞��}�����}t��%���[������R�J��\�+�.�W���PN	MO��*���	��KV�!�u#±ī(qUv�MG!Qe����X��3ׂ;f���pJAbr$��
�������z
+��Ͽ=���l?�U�y��tA��ז�(�53�G�)��M���Jo#�P��\��.S}�
+ba�x�e�8�dk�����eu���,d:f\F��9�3_�Wd���S֝�J0�Y��=(�zޡ/^����Ƽ�%�qp%��^{�‱T�1ײ%�X�
+h���m:]F�b�?�O���Q�iX�+ͻ3�
��O��Qǋ�z/1������P��ӓ�ꦱ`�r/�T�nx��]	�\/�����P=;b�҆B�ngx �%���6�޻=(��ڃ%)]�F��-�Ѩz[��/����D�5S��ö��S(�:���W9�s<�V�9E ��?E�$�*�����ϢX���+�^/A&�6d�sni�v_�m6�ܕ��s��qgl#�-5492�䄈�1;ãֳJ�`��MXF\��|y1�;Miaޭ�{7���(q��%G����J��7�,��{K-�bS�؜wGD7ED��K��	�y�	��;�~�n<�����nI�����ݥ��.,ޖw���-�[8hߘ����P��
�����[��ݕs�M8wei�ܞ\��L~ƃ%�����X�����b��o��ሞO]��>)}�L�1��Wj߄q8�W��s�|�Tg\\�Y�:'���Dx~�TX?8�Z�x		
ްy�\�r���%�R�K��?/�����4���W3����;
+v��1Դ�Gp�� 3��Xאs&ΪI��Q!��һ!�^�m�$�M�1i�����}Q2��mH��SV[����v���jD2�-Xb{*�'4o�ڇ��M�Xei^�|D�,X�g����F���@C�6x��_�Ѣ��	�]&�i!B�ʚ-2�h�j?_J�N��K�n]8m
����5tJhO�>;҂Q���.�b�rR!���C�F�ғ8�Rw�R���h,�6��/��p����{�I@�FjkD�V�9F�a�W�C+�Š�6�M�k�bB�������]l�JK7���a'h5Wߦ��!M8M����k�8.
+�װ9n)s������m�_��wP�l�ud�cXy�p�9�Xʝr�6ǸS�m�F݃���K[�m�l�,7f,Ѵsq��Bb��<�@�����)}��AOe��̓Vb�
+-�����Rh0�|�`�9�2��_7bL���b��$�l{�XLlr#h����?��D�j����q���`����N���'��q�:������	�e	��~���Am��A��Qٳ�&����4��v�<,m�/M����tJhs�d�b��1�c��W��|?�0� ���X�g0*	��m���M(�lXD���-�wd�mK\���y�+#>#-M�
�3�'c�`�Uã��\�0@�e����I}x��`��e,0�d�g�T�I��f3��s�.m>?=��Hnm
 endstream
 endobj
-419 0 obj
-<< /Type /Page /Contents 420 0 R /Resources 418 0 R /MediaBox [ 0 0 612 792 ] /Parent 409 0 R >>
+426 0 obj
+<< /Type /Page /Contents 427 0 R /Resources 425 0 R /MediaBox [ 0 0 612 792 ] /Parent 416 0 R >>
 endobj
-421 0 obj
-<< /D [ 419 0 R /XYZ 69.866 758.996 null ] >>
+428 0 obj
+<< /D [ 426 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-418 0 obj
-<< /Font << /F75 318 0 R /F84 326 0 R /F58 311 0 R /F61 319 0 R /F85 396 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+425 0 obj
+<< /Font << /F75 323 0 R /F84 331 0 R /F58 316 0 R /F61 324 0 R /F85 403 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-424 0 obj
-<< /Filter /FlateDecode /Length 2024 >>       
-stream
-x��[Ko�6��W���2|S�l�*����-��v������%J��(�v$�Ar��yp�ΐ�շ�W�>���?~�JTN0nM�3�T�>|�����Sř�u�o;��a��������s2o�����*y��_x��gډ��������Dj�af�6������7 ���ᱩM%�c��^��Ԡ�t�R������s.5<��+��\�����|B8U�����(�c��R�с����� J
Ϟ�r������A$���5W^w�] �I�K]w�X��9�^���i�HD��	���sP���.*�;�ڿy?J<��뿕���G�n}��ю
-v�i����2�{�ee�߲���X�A���q*?0�E��4��&
-':�Q�8Bj�wej�\]|v������!�hBL��*^K&T�� eo��Ј����,&��|���+���}��)���I5�Qc5u�~�~b%9���F�V��B9KX�,�vo
-�n�����w���2c`����w����C��]L`G��w��<���F���Af�d���+üs�C�c2�;���)P>6Vd)j�5��!R��1�e!DP�-w!LQ��(&�����x\���e9�j��:�%b"�G��L�ƝNĿ}�ˁ��Li7Xt>	bܾy�K���$���;��8M�J�I�(��Lz���h.BAd����e��/�)���
Ӱa�?1�=B�0���Ԓw�!�s�>1D�XG��m�����vY��"����+∁]��7�*F��a��W2nc�ЖIXt*�=V�����H�>�
����b��XPl�z������zd� �I�.�ji�
-V��-w���T��.�
-��W���x0����P���;z��o��]R*n
-2�qJԡ�a��Sy񑨋�����n��?��.8\�f��/�D��^�����`��Wz#�yAͤ��S�D;qߋ�̘/gڛeh�g�[���9�8����>p�#&�3�S�BեIۻ�iܛ�w�s1�y�&Y�i���l7C(���ą<0E�9�<0E��<0�z�VP�U�����b�4�7O��2����4<��M�8�3�Wn���w�7�/Z�a�S������7�}��%j]��:|���xE��6|���p&P��"Sh����a�v�0�(cB�^+��ݬ
-���D��.콴a��׆lV�u`eac=�-�(d�k��+[L1隊><x�攟�᪡�n���z�)(��7?t`a��c�]��mw��
���}�cwI�������g0Q*a�����A6�{�����HoE��G�E>I��>�]��LE**}>JoLJ��&j'�$��3Fz�C�W���ɭ�wp19�ɞB+	%mm�9��Ɂ�"e�d���j�O��Ƥ�}{�ȅ؃)�[���ġ�S~�P"\�%*a�{��O�M�~s��IS�ls3mk�-����	���qt�n>���
:��1��QTz�7H�<`���vD�oz�50�0�&����)�7��Jk�"w��ޥ�c��Z��,�?e��$��FA����eo����9�X�\����Uf(e�M(�����
-y`�2hy`��2��P�r�\��<��4}7�>�9��|���U]��((���������o��ǈ-���-�8���S�A\������
-],\�.F�/DP�AH�B��J�P�7��� �eQ��_ؽ����ط¡|{;O�CY��]���`�dr���$
-e�TvP���ﵷ�ק*Ea��������;���pD�,	G��3-��:[T�[���Z�[rq�a��ȿ��ntF��wH9�bb�Yv�'xE
-�S��2J!L��)�PX�M���;��o;�0��Z�������]ۄ��S�%�c�L�7�B!���k�&�
��|�ʧ������L��S��
-���v�t��-�oܞ=j��`
��x�l�CħY0�ܮ�ș�+��p�,lΆ"�}h������>�
+431 0 obj
+<< /Filter /FlateDecode /Length 1976 >>       
+stream
+x��\K��6��po-�j��fv�rk�� ��ɥ94���R�,��<����]��l4�IQ�Ǉ���sÛ_�x��ϝ�oވ�	ƭi�aƘ�/w>���p�k8S�m��g~��j��|�w���o�=�7����s�����I�/k�v"1��W�	s�Im4�Y}����4�b�b���twߵ��a�k�>%ZX�t�P���ӗ��.8�2\�s����K����1�:�i���1��J&���w�z����
�2�	��.|���G�w���7��S�-}�FT�E�+���)�˧��;O<O�@�k��a,�<ț��0�F�Z��G��i���K��hsdt;g�휵�Y��A�w=HZRN�%�:>���t��>z�d��$�}��R�J��8M�&���

+ۤ�D�B�C\4	&ɝ���m�(,����nf/�65�v6�FR(����Ť�>�au����D#�Q��Mt��_>�ϣc���=ƹ���]�t���h�����LMr�c/[׻IX����
+�2�UR�F�!k���a�we.�yJLB��a�臈.�%LI����>�����F'�K
+��)풱z�oFt�n��J;.Sj�0��"qߙ�1�;�y�!�!����}��D�B�l)���<���<0E��J�b�	����bwh�b,EX���vזI�3Qs?Er������>���8��P�"����O%�6�0
��m���y�7B~�(��q)x8�� ��ٱXuNB(�sߜ���$#ſ��ξN�t�,T���4%7������O�8:
����MbhQ��?1U(Hڝ���Z�F�R	ޘ�*��vY�S��@^�S�񐐣J{
L�a��ض��9"E��8�Ћu��'�?�+`��PJ�)���l?{�˂�$���;�8̓�b��gm;%S0+�k@k�α	Ex`�+y`���C[HƵ�=21R�н�09����Ù��ub��6��(�b�XG��+}S������G$�l��#T�`�)o�2ZRT�Wْ��h��3pqD[,(4p=�U�o���=
+�+�¤�J�E�Ǯ��CU"n;�ώǏ�;y�CUˬ�{�p�ҝ�\���laK���SM���oy�O���_F+���3l��O��,���n�Q��ɴrPl�v7$�W�@���^��3X*��I���⏲P�II<���Uaf
+���:D�w�9�v��R4���>pl#&��3\Z���K��^���x��>$���)d��1v��|�PT�9؆+y`�:��T�@u�VT���k�CM��n��_5Ӆ]Ӆ�={�t��[c�l���(#�d|z��R*�L�U�6��:�%�y�tP�����{)�5�7[صM�=��l?]ƭ.\\|����B�9=�u{E��M�u])�ҕ1ņ����2v7W&�Wue���>�K��Pqou��"������Z�-�_m����i��oz�e�Ko�HFz�lxfxdx�$+<H.a`������v8~�6����6�G��y�F*��F���_���D��C�߷�!�V8E詳�c�͈��/#���U�*���~�le�����S�ȏ�h��a��ro�`̎w���C]��-(�9Pn r��Y�aj��5oq�:���\?���3\�&���vco�\���L���na������l{����f�،9��a��CVa��lk�U(�,;t���[�����LEΗg��wO�w/�m�r^C㑼i�c�V��e�h���9,�5�ڗ<��8.Ga������k���M��J�b=#��<̌��1EUK�[%LQ�m`%Lqc�P�s��"�j��_���!�8�d.�3�v7��K(�5��j���gp9�+�$�h4	���1��Gk6��i�
+C�nPc�W"��B!��:�`	��\�r&tx��j1v�F�������
 endstream
 endobj
-423 0 obj
-<< /Type /Page /Contents 424 0 R /Resources 422 0 R /MediaBox [ 0 0 612 792 ] /Parent 409 0 R >>
+430 0 obj
+<< /Type /Page /Contents 431 0 R /Resources 429 0 R /MediaBox [ 0 0 612 792 ] /Parent 416 0 R >>
 endobj
-425 0 obj
-<< /D [ 423 0 R /XYZ 69.866 758.996 null ] >>
+432 0 obj
+<< /D [ 430 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-422 0 obj
-<< /Font << /F85 396 0 R /F75 318 0 R /F84 326 0 R /F58 311 0 R /F61 319 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+429 0 obj
+<< /Font << /F84 331 0 R /F85 403 0 R /F75 323 0 R /F58 316 0 R /F61 324 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-428 0 obj
-<< /Filter /FlateDecode /Length 1778 >>       
+435 0 obj
+<< /Filter /FlateDecode /Length 1715 >>       
 stream
-x��ZKo�6��W��h��[�b�*�����S��K�н��w(Q�-�t*�2 ��p���3!4_h~�����;�����K�6�l��v��3����}B�����
��RH�����w�Os�`����צ��P�SL_�6.�eV����հg�58��fg�]h�4�������<��w�m�D���ӗlJ�)�~a �j��5�~�@=`3h_����{��ęǗ^!�A�����&!��
-mz�����u3��ZXχ)yr�ѡ�Eu�[�m�t�J�??�f���`�n��~,��cњ�X(�&���������h<ą��_��(��=����q|�~���4$06�.������YT��.8a�cQ�SuI�v�R통�M����Դ��y���B�դ-�e78���Lٽ���> s�M�2i��q�����p�
-Q;F�7$������Rl�-�N�9��ɲhUo[<����Qv�'���D�,�>5�O�R���`3E�}g�Ɗ�=�S��BE�$��ǟ������-ro�%~H��֢R4��v�q�s^8$��p.׶4�����I;�[�'�&�+�b�t%8`G���� �3K5sP��nC�柣�><r���H�A#2�6-G��?�1�9�0��~z�d������GI-W�+�L%Έd�
-���̴-�d�K>4t+E3��nˠ��l�94+#��y宅�|@;�h~n�E��kx]4�+�L%Έ�0��]�L��h�(GN�!}�W@��,��¼���ȯ҃v�6Xa�}>�<�7d^�1�Nv�'�7efql�@�a�I�(�0	e�繝�0m�`<3�-佟b�NG��;lf�Mb~�H�Ct��ݐ�c����o�x�P �8F�Ya�	���̻ �!����:L�ܥ2�mi&,|9��_�	�y�c�L3��g1_�m��n�/~�د��s
-�E��B%�wf��I��)׶8R�/k��rso�~j���~:�0�k��G�l�JF�U�S��*���8�L���rmKsn��-�O��7T?5>��}��S�x�����0�3���rm�#��r+�Sn�
�O�!Bx��S�x%���Ѭ1�����rm�������)7���Fa�o�S�JlS���0�vg-����O��c�TY��y���d�0���U��6�cV��ā��]��2����}nkq�՞ݫ��zae���UD���]�"J�1��?��S����;�;��v����2ln�**�)�>���
&�o7Cs84fZo7C�3�6��F%�*�d-*U�r��$����N6�����^u�2���7T�Ѐi�k��I�$�x"UP��#�Ͼ$"�W_it ����7�J�1�|�75��u�d/D���o���4*�싢�۽���Us�n�b�&�ֽUi *1J%��*�ɫܫ��k���_Jb&@..���kl2�I�b��ׄ�£���ʡ:9��r>x�'�^���%G�	r�뿘|�C'ք#,��p��Ļ@��\XfY0+���W
-T"��;��Oel ϕ�F�j�y�a%�#���56?��fv��2`�3]߫)U�D4$֕DC%��r�B�&q�mJ���tI�Z�R�QY�\�'�*��<��a�an�J�R�0�AP��f�֨ ���<�OwӪx�:�)�Cק��D�w+uP�3���BN�֮F>܌
-��! ���A����$�����g����B�ܧ{^We!�F�A%�X��[��J��Xu ���'p�|�vq�Z�������I
+x��[Ko�6��W��h�@�@����mn�=�ݽ4���C��fh%f��VFD�#���}�� ��Kǻ�nx��wމ�	ƭ�g���ϧ�����_p��8S�w��W>�a�`B��ߧ9Y0����KW��Pؓ�^<�m`ډl����gl��k�af�mn��������N���=~�f<8+�I��{|�>�
+���r�U�ǥHG������{ȉ��K�rr2�3+daE�r��ꦗ~Fr��R��H�ɗ�˻�����y>��N1�{���1P?��ג3p����٬K�Y�p[�C<����� �@��#�`�HW�g����}0�?+������qr������l��8�w��l��ܧ�e��1�����,2�@�o��7�0��ɶ��?�T����Qb6d/Iy�����On�?��
+y�Q����p}̇t�z���lmÂs��A똌\���ǟ�#oV̎��I���!�x�Ɣ�Jh�`���S\d�c��,Q+-���?����g�8�ޖ���9�I8A	��2Z��T���ӻ�j��ͳ�H�GO}+��ua��2)(q=%@��9D_��E�	�H%o�uԄ�Ri�(�	Ha$�(��y	Z#qI͔v#��y{Z�aGB����#��y3�a�쿏�o2�+s�R�~�R�O�B�H���t������E�d�ѧ���;e.)��8&g�̽L�q�C=���B,T���3��X�oC���6p�3Գ>�h�*��V�O��k�uO0�x��M0L�s�)��e�C\�
+�긅��/|Z ��;�a%�Pk��RĒ7[Z��4k*0`���9|t�O�(�Q��m[����Q����(1��n��Z��&Y�#�N�܀�Tm܍`%W*��Zj�\e�@�+<�e?���L�S��477�H�ǻ�R����t@�.�ϔ��TD!1�0��2�8����Z.����C��Q|�g�G'vmɴ�"U��(�JJ�#�J:��6��iWA���Z�K�5�]K��[�ɴe��.o�h1!L�ϱ���Ffm��<�G$��X�Ϭ�wզp��H�#D2�Яdl�9�I$���S�I��^��,�u��Y岩��\�x�*3��d�rH�.0��j����X.bٌf�D��VQ;/��a�B�����npw޼L�Q�m<���
+}aY	�5�PO���jL��S���2kɧ�Zs&��lM>��]�|�%4�|���<b���C#i�
+6�'$��k��ۋ�NU�U\��1Z9{�NO�!�7utHFTI�x1*m��Q���
ʵ�Sj�5��\�tJ]� �Ti�9��Nq��H�#D���O�dl�9�I$�"��/H:UZeoޞt��D2� �9t�j-�Zk�e�֤S���K�2H��U:%���6�^:�Z셟>�M����Q:�{H�<Tʨ�~T -4T����
+�������L 5,�-fo�n��J�7#�:fD��ɡ���3vy9��
+<⼟�H��bO�t��7+k��x�	Y3uկ�5�����DhN�6��Dh��}SIkxD�����qu�]��dc�$���M˓��._��!�uo��Nx"Ɉ3i�w�KU3t����~o^i��c
+�/	�ZK��Fx%*�3Q��S��QIfo�J��v$á���]�d(��ק-ID%F����������ǒ�-9�L˄��OAbrf#�z2����9��
 endstream
 endobj
-427 0 obj
-<< /Type /Page /Contents 428 0 R /Resources 426 0 R /MediaBox [ 0 0 612 792 ] /Parent 409 0 R >>
+434 0 obj
+<< /Type /Page /Contents 435 0 R /Resources 433 0 R /MediaBox [ 0 0 612 792 ] /Parent 416 0 R >>
 endobj
-429 0 obj
-<< /D [ 427 0 R /XYZ 69.866 758.996 null ] >>
+436 0 obj
+<< /D [ 434 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-426 0 obj
-<< /Font << /F85 396 0 R /F75 318 0 R /F84 326 0 R /F58 311 0 R /F61 319 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+433 0 obj
+<< /Font << /F75 323 0 R /F84 331 0 R /F58 316 0 R /F61 324 0 R /F85 403 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-432 0 obj
-<< /Filter /FlateDecode /Length 2141 >>       
+439 0 obj
+<< /Filter /FlateDecode /Length 1927 >>       
 stream
-x��[K��6��po-�z���aȭ�ނ��&���\��K�z���#g;����G�H����ºO�~y`���>Y�;�{ftǕ�����??����<o;�K7t������V�Rwu�=������/���ͻb=Aכ�+�zey\�˟�G�k�`f�2�������7�����q`9�hn������E
-�=}���̘d�	��gb�Jx8�����͇���+���3@޼��>N��W�K6�)�2�s���/�;��X�����N-,�`t���?~,y�n��&��0�1�8�M���'���"��$���q���Mo�|evy�N�@�2����
-;y������3��kx�12��ߦ7<�u
��ؙ҆�!|:2�֋�N�s~��.��Y�Q�F����2JίU&q�_~|�y��X��e+d�F�^(M��q�C�{g-|b��$�����?̔���U� i�������h\,+|
݂"clo��������"�xV�+�#�]+���ׇ�\�y����.���9u[�5eȡ�.a��*H׍.�)�P�и��#��Ke�ʆ���Knb��>�����s�R	)�v�x/ğ:�`ԑ"��^a�.50��>�䑢�g*x)�V�x$H���y�< ��$|�v��@�������q&����V!W;
{�+9�0��,M��p���>h�S4�VA���@��8N@'m }3[q��!�+l�x�h�C���usa�8�VZ9�26���$��,����^��5�|8�6�����Ĉ��g���e-��8u@�88\j����d�D�5/P��q���j����.���jbQ�U蒐��� P$����/گ&�B
�X�6;%�	�8���:��*c;@��RTJ��wɚ��1�k�s#8��!0JGS�"ꙻ��`�¤1a!P𙨁SKH����5���$�Z,���#���XI����"�[o�x����l��������I�hV����,h�����h������mk�}l\ܯbPrYyx'�`��
-���Dþ��%P*�A얱$��@"���Ha���-�׾K�І=��{�W����v��f�*�ǅ�2m�O(�h��]%7~ʆ���8�jR�x��+��ŷj�p�A�̪���B�-��ί�@$��T�g[g�Uc׬w.7%Rwog���]�n4LqG�P�i~��S6"��.B�w�;u'}�8���A��w���s��[�K�A���IP��닢+����(��.u���Ԗh�v����z�S���2������d�85%0H
��6l�V���&Pr>m$�,�4R�^f�'h�S4%�HC�K ���t�u�ÓՂ
���R��rc3ԑ�����C�z���7႓O��Y>��â�9z@�G������]�%��q:����;s��a�(m�D(��N���/��؈g�,���(��
-V��)�^j��) �"�!�r|BqD�-��
-	n<����@\s %�#��1C�x�6n���1z�|��c�;��j��r�k�h�H���)/2���e�<W�
p��b�����Y�:���
|gzU0RF.��8�]�����4m�ԜJ@a6��9�����VZ[ak3I�����J������qn�ԒAQ#i'� mA��o������c�}K=��8_�Cq^V8ѳ|�%u�c	t
-]w�>�D�bZ��\#�b��t�^���i
���5����P�g�`�#���]<Z,�2��c<�)pvE��VfM:5����zJV�C����atՉ
TB��L��+��R↱�k<l��DJ�6��*
-����}�Ѭv@Wth"�Tk-���Іx��!\nꎹ�����͙U��N1���s�x����*��c��P�ė��'C��c�j�u��K��
[u�zH�nnQ�K���1��G�S�n���){b
��F��mq[���m����K �;F��wy
-6����I�ͫ���"���jG�++���:� &[R�j��w����<7��ܵ���6"�s"n!��h'*ؠ��Q�	=`��!�s�7���X���0��F��u��5E=�f��j>ʂ��В���ǧ��]��
+x��[Ko�6��W(�hi�)��}ȭ�oANm�K}h.��R9�Rk�F���A�^��y��GΈ��׎v������;ߴc�a�j�E�RݟOw�>������Q"l���[>��%�2��������'��K߾vŝ��>��s���D|����5�X�$�,���h�� B�5������}��?��c�Q�t�_�)=8�
���{|�>�J�`��
\�Rv|���)�����+������kc��mb�uW�ň.}x-ٝ�)�s������
+��p���TX9h�@T:'��÷��x�{Y�t!Ea�Gw=��)uƢ�s�q4xl��lb��������V]��'��.��т�O1r�j��:���pmC�`�Cex�\9d#|T}��5��4�;,!E?���I�5�J��>�x�>�M��9��&^(���q�{gZJ7������Wf�����4!�h7	Y��m7��,">�p_��L��l�������o�D�5�hC4x���ɠ����`jl�RdP�ׂ���k��Т�z9�u���<�ྩ
+wY�LC��R���p(A`�#����ч�����%r�J��ډ`�3˛G�=4�}';�S��Ĭ�.擣�SJ��Ѽi�C,�.H�ׁ�Q�X�rEO�}1�ʹ�N��/q���s5�+5�4yEem��.\��X��X?b�Fl�]�ڏ����D��MeMZa"�Sx�թu5ie�x#��ĂHvC�K!9�6;�s_��R4W����ҏI4+���lya43:1�7E4v�
љ�r�V�M�}!D���Ft��h/i���v��ܼ�"�H�<kx֐�ky>�3�z��e9�)*Oęk�e�_,�m�Flc��$�h@Yv��L1� �f�8�v4��[p�y
��n`Fw1H��s����E�v��rm75V���:����=z�)�T>�2i"rbd٣��]�mQ�Ba+\�P]�/�GS\T�3�A��*{_Oy�w��ܼWVUb��Q���3�D}?�,�($�����6�.�`f�e�̍����[{���YC���5i!e)�^����Ul��X��q��\�Ef��bk-$��k��a�k��L���ǵQ�hӁըK\�~��#֚��ǖ��a�%��g�5��}յc�������ըC@N��ě��t���HX�Jb��pR5'1d=6Ca��Đ�371䶯�
�A�I�o�5����R��D��4�����39#=�=��Ciü�P�33=����*>�y�荔0�%���������F���um�!K'/c]ۆi3�w�7eBB�f�%Mja���b��)��؟~�&1`aa�D��/>��!V
+n@��f�$!�!���Bֱ��\8\�7��(k2�Z����J���teVC⁠<f�L�޵�B:Β��TS:�i�Z
�x����Q����S�*�!
��%����&���g�@S�;ШK,��]PH��ʛ�JnFcy{=$3T+r/^�c*����'��*�3	���L>5�+|sE:���JV��z���C&�D>��FXb9��6��4ߍȧ0ゲ�jh��䮬��$�ɕ~Ct2?�d!�[	�!j$,�FB��FH`A
+b��Cd7��܌��!�i]����eg�������,��غ4M[���85�h�"�6��rl�$��%��z��J<�ٯ���4l�{��j<sa���s��j<8֍��m��\hԁ%�!�o^�)̈'�N 9�-ávC{45l�C�`�#,���}'��Y���A�P�B��.�f۟�'D,�:y�x&���y�<�	�t��z��,������y'=�=��)p�~�:�RI8:����&2�F��������kd,��R(�:���+x�'*�0w��m���[��yd�w�W�k�
 endstream
 endobj
-431 0 obj
-<< /Type /Page /Contents 432 0 R /Resources 430 0 R /MediaBox [ 0 0 612 792 ] /Parent 409 0 R >>
+438 0 obj
+<< /Type /Page /Contents 439 0 R /Resources 437 0 R /MediaBox [ 0 0 612 792 ] /Parent 416 0 R >>
 endobj
-433 0 obj
-<< /D [ 431 0 R /XYZ 69.866 758.996 null ] >>
+440 0 obj
+<< /D [ 438 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-430 0 obj
-<< /Font << /F85 396 0 R /F75 318 0 R /F84 326 0 R /F58 311 0 R /F61 319 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+437 0 obj
+<< /Font << /F85 403 0 R /F75 323 0 R /F84 331 0 R /F58 316 0 R /F61 324 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-436 0 obj
-<< /Filter /FlateDecode /Length 2207 >>       
-stream
-x��ˎ�6�_��Z�U8|�@��Zrk�� ��ɥ94��~GC���u�
-���)��yk���ر�V��~<�:=Ӫ3�WJu�zx�����M�za����Z�.c����~M���ҟ��b�m��g��e�m/
D���>P���;��Ȏ�k� w�ܓ�� �ǧ�W�:�G��>DRd���y���{�3c�1�%��	#�&����������G@��6�j4u*y4HaAG:��g��g<�D�gOb8fr�y�����^��1rfc�(O��r������ۉ�AVYR�Z��En��x��f�ba%A\=^�c.�����N[����tT"��	�9"
-Jp�J�ȝa�wr-��ѯ�?�#�kv�C���SZ�o��LD�����x"p�jfn&1��ыu\�H��\�q��5�<�L.�>�OJ�enjѦר�T$�Q �;��������8�p��6{��1[�?����w��Nek,)�sa��{è�c&y���(��X��[��-�Ο���"��ͥ
-�̘	
-�RW�"��ؠ��M�$�̧��Щ��g,Lϧ8G���?�0A�5T!��C񝇸�#k#
-І�(#
-
-ц�hc#
-ц�<�Fb���ZN���4{�@����[dKnaJ���o�|~���^�/&��u.�x���� �^YF��rN��g�$��K"}$�D[��8�v�z,����ū����+�gJ��4�є琢EY�KL�X��<��t[|/�q��S�&�B	nÑA�ϣ(<3f��(!9qE�G���%v�C�xAn�`���C8=�Xp��t����"x�CJr���52c^Mnj�1w�2�ZjyO=Cڜ�Pt��O!��
-vW���K�ϔ�jG���ݬ���,_L5�SpX��k
-��.u�ਖ਼���Ӥ��8�4��33pF��jOP4�=����sL���]�s2`$5�)�$cF��_�Nk'ς�FA�{�7V4���l�2)i��E�XO��ekU�V%C�㜖�1=e����`�v���K�,+ޖ�>�T����D^Ȣ[�լ�P��4%���6���4��ݓłn+��̤	�t��c��=�1'�4���e�X��z^t2/���?)kD�����D��Q�)�^�����-�"%�!���Ƅ���A�.�q�),���`�9B�H$Q��E��5���Cm.$�-:Nv�>	'��,���6�(�LA���=G��H�[�g��c��ԋ�"�F_@!�,a�����aX잘W��{�����\�CiF]Y}J��z��
-�
-}�C֔iIPsrN��QS&��+�/�=c�	��ȲW_�Q2V��-���ʳ��4��O�g��0�-�X�@�W""��2kZ�v�G���q����ksD���Bh�A!�砄�Bc������<k�ȫ���k> fm��!0��Ge4�*<$si�:�k���_$�lPb�B�Z`M����{	���X6Y'!��r(�-G`�"v7�������̯�n(C{�
�:fP٠�Ţ�+2Bz��P�;�
��^��r*�X�KK���|�-N���8I�
-��],�1�)0��*e���}���`qs��JӼ<�w����I��5�~>m'#��o]�7��=��6���X"7����9��R����-&J	m3��~&�
d�%d�6����X�K^pQ)oW�6i��Ɨk�᜚�h,e�<���#YvQiUث�-4�����0�OMM{D�:�xm�q�96�Gb��Z����I�¾��p6���!��x�an��J5�,n �L�T��&<�f�Hv��qЫ�ܑ�6N�v���,�1������E�s�MM�\�8(D���Q�;��M��CLN���;�ML�
-s���[��3~���L�v�XԷX��02��-��b1Xv����d�!���)DS ��Ն#�h�AU#
-q�p��7����_�a�vϩYmD���r���U 
-R�F��%�j�"���tL+;k����ŏ��I�m?�+|m��+G�0�6tU���!z��=�r��
�h�i��F�>
���DAF=s|�<UFѺ_�����]���=tAJ�³R����ɞ����oC�Ұ�n��<�F�C!�\�
�;6��W�Tփ���_=��F�2�����_
-��
+443 0 obj
+<< /Filter /FlateDecode /Length 2330 >>       
+stream
+x��ˎ�6�_��Z���-���rk�� ��ɥ94��~iYR�g�lg��wv��HQ|�ú��~}`��?>Y�;�{ftǙꕲ�_>|��	ϻ���
ݿ~�x7�l�w���oy���.����+޼/
+�p~����	�׿�OŊn�`k�R��jX~x�� ����8
��N���?%8��p"�����&�4?o1��a�3���	�	���2a�a�p��y�����UW)w�����{'�b\�zLa_��{
+oe�xl"�:�w�~|~����alfW�o��<��xf��5�g�9U8�Ĥ��p&�"BEoe�x����L��d�Tp�{d�qaL���1cxϖ�a�w1&ΌSL:�������>18X\r��>�-��Bj�8�0�$<(A�:ɗ��A~�S��)Hr�&�0�Y���\�Dg�sY��Y�=y_���3�=%�ౘjvJR��b�~�� �G8��Ҝ�fJky��Ƃ�Q���En��#R�l �I�Ƣ�EU�,�$R��&�"y	���
+�FL	55c8�>�	e�"pU�h���G�J|��菀��YƷ��Yzsζ-�ڢ�IzG�,�Œ�/dW�b=��v�q�C�{g-|bh{1;��
?����8^�a��p��F�
��Fb�q�"��:y��dx�SAu�(�OA/�|	\�DAn�K�n�L+��v���r���k�%Zh�����$�+��P�����͠�,nE]v�0B�(���?���Å���% N\!�yA�����{�2��eV���m)��,��@ċ�X�w[G�5a�N�J����HB��*�#�=���?����k��:Z�2"	>��AI�+b1r/!8���� �#�EUd�R��|uu�f���quIA(9`R������(��)��$Ȩ�י_�ؖp�Ĺ��U�1B��4Qr�Swai�ВCY�G+ݜ�o�=�!B�Ͱ6��V��Z'Pt>mD�,�4��^v�cO�6���b5��w�YBo+�Y��/Y���B(&S.����*�)饻��
+�sN>mX��r����Q�j* ��t��%��U*#�%jNw�f�ʢ�4����:1Hd�õiME�7S��ՕG�^��&��lS��T1�S0������	���h	+�K���/ڹ�+ڣ�P2{z��OTԍ��A�W[�g;$�o�xq?�W���px�[��Mא��7@
+�7nU��O2耚Ox�� �c�]ZFZV;��OG��UII�t��h��V����J��Vgj�e ;S�V5ݺ�[�m���@�V���+�7��qr���[q5��w���NȮ͖P`�Q�_�	�}sى1u��R�au�o�!շ�4T+!�ּ숥<�J�l��%�=��\�5�M!?��F�
bZ#qG�  {a�p�@ɸ֗��Yţ��)۰y��9�.��=8lgLq�2tҩrWO/P�S����F���tU��B�zW��R��M�����F��I��8_��h� 6�Ī�2�Fm���B�4��T�-� ���(���ntv�\�IRH[�3���\���:;�Z^����v��Q;���ޤ�+�E�Q�\!72��
wuʌH��pQ�ˣ�2>��ԧʀ���]�&�D��FM^�Z
+ц]h#q?O#����d�`��q�"�9�q�@=B�Q���\��f'e�$b���:۪FB<&��Z�Zo���FBw� a�px�V�Ag��S�	%`J�!�s{L2�����1*�K]����!�0�?�S��
+�5S��4���I�2���8��p���r]�+�",Day�'韥Y�Z#I�s&9y4�ˍ�aП��rvm�Ȍ��\ϓB;�v|b��I\EsP�rw�*��a���u��X��F��er�}�K����$B�]����$ޜ���\~?gk_�E���;� *�7rU������z�����z<;UK��k(����A��U�x�!N#/�b؎�y1є�aemā!�p �oā!�pdqlD��0��mD�!�S0���れ���V���|�%Ύ��ތ/|#��>���� g@/�&�������l�@M8[w�7&@���	��ŪJ:�}���ܥ�!|RI�߿ٕpc�i;�h�(��F�~����Ç�2�~5H2����MJ��T>uV�Hv����(C
�㦞�*�y3������Aj���1E���C�Q�����S)W���1q��rw>B��r�{�/&��LF�SSa'*
Oủ��V�~�G(�C��p�z��X��a��5����y�"T���8���
 endstream
 endobj
-435 0 obj
-<< /Type /Page /Contents 436 0 R /Resources 434 0 R /MediaBox [ 0 0 612 792 ] /Parent 409 0 R >>
+442 0 obj
+<< /Type /Page /Contents 443 0 R /Resources 441 0 R /MediaBox [ 0 0 612 792 ] /Parent 416 0 R >>
 endobj
-437 0 obj
-<< /D [ 435 0 R /XYZ 69.866 758.996 null ] >>
+444 0 obj
+<< /D [ 442 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-434 0 obj
-<< /Font << /F85 396 0 R /F75 318 0 R /F84 326 0 R /F58 311 0 R /F61 319 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+441 0 obj
+<< /Font << /F84 331 0 R /F85 403 0 R /F75 323 0 R /F58 316 0 R /F61 324 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
+endobj
+447 0 obj
+<< /Filter /FlateDecode /Length 2076 >>       
+stream
+x��[;��6��W(]�:�C�v�V�ž�p��n�"n��3��R�.��:��C�h�|�źO�~�c���w�Y�;�{���s�+��?߽�����M�z����
���b��wu��~K7��Х�/����ۂ��N祲��<r��g���X�=H�5<S𨀲H�pt/��T��xw?��j4p�=~��TIh�m)D���{�c����@���a�����"��.}8�3o�����K���k����^�3�>�1��x�x�<�霘s�ϝ�
+lV����}P��e����*7��6�xR��Í�μ��4��&�V�)�8�_)�Pi6�)FS�fJ�<�S.���ך���
+��e0��I�-O�x�|Ͳq�X��8����U~|����ڟs���Z�0�z̕v�PA!Y3�=�h�����l�ߏ���j���{t/&�'F���&��Q�*�A19���2�$g#B0sx
+v\�t�p������/?��$nI����J����������[8�̻�=�{X����~5X[󨚟 v	���\v6ꖷ�a׸�)ņL�-��s1.EJ�x�
+�������|�A���0�l�^s���Y���s����=H��[-���v1����y�P6��"��C��|"p�
����%�RǨ�E���Z3�����a�~#P���JTj�A)�Û����xS��~l�
�_>�Tq����x^L�"���P8��v�C�
)j�"�t�q:T��S
8�م���$�<8�1��0�Q1V��9���)�1��s��H��K]3��|Y����,S��r�8�^��h�p7vF��kÏ��	��yP�
1j�R���1*#����Nx�w��C���7$zZh;�A\�V�i�Iux���2�jl����u�H�:�� `�cs���9��z>"�|U`��;�荞C)6��E�P�{N.��sk���9�F{{�j&�����r�$y�����FZ��a��S!F�#E�/�|'�!'�Ç);�"�|�c�g
+Π�\�l9�O;��0�3W�%\�cc}����i��k|�쐶�q�Ѯ�R��K�䩩
�d��@B����i8�ƶ�����yhF���*����}�B��6
+��L4?�p���ncU/E�E��u5a����/ʥ�{���>�l�����6��\K���M~QhUګ�-������ �Cv�a�W��u��miȱ�<�U��'�[�-R��t��WK�,)f	��c���qN�e�#��E@MM��b��]�I5�,nؐ�&�3
+���H��c�}�Փ;�
+II�x��'��Y�dv�[�'���L)��tu5�m<�T5�F�e���S.E|4�HS��3h�~��k��8s����X;.�ǚ�h����k
+�(,;�Ƣ�b���ц�ES�����h�A�����.ր��I�l
+1���Z�uZ����4��#z�e9z�F���
+Q�F��%�j��E�0Vvi�"���B$xr	��a;_�u$�b�
M���;A`�k��5Ͼ^�L@#��6@#�6���:��QO_�Le�����,F�j zaT!J���g��7��hK��D6���J���B6:m��Q4aU����ۄ��6mQ��
+1�v�6�d�1Eڪ�@ȋu�ۧ]�i��xc��5}�����c�W�G�4��骐Xb��i"�sz^��ܜ������$b�+���5��>1b5��{�TMX	�m7���!����I�#��vIc��sL�Ӷ��6�K"��V����{?m�+�M)Eb�%�Ȃ�q ��ȂRlf��oS+ĈK��朜�&4�WJ�2��	����7a~�b�E/ﾩ4َdO�]{��-VTi�Z�_�cC�����.rF��u�uw�����&�r�jk4/ �f�5C�)�rhG�
'2�&,��5��aW�Mf���c�L�E[C�{J��|��{t��҆KK��H�%��U���.����KE�j�g�d�*��Z���Z�S��2���fB�;���Ƣw:?�����
+endstream
+endobj
+446 0 obj
+<< /Type /Page /Contents 447 0 R /Resources 445 0 R /MediaBox [ 0 0 612 792 ] /Parent 416 0 R >>
+endobj
+448 0 obj
+<< /D [ 446 0 R /XYZ 69.866 758.996 null ] >>
+endobj
+445 0 obj
+<< /Font << /F85 403 0 R /F75 323 0 R /F84 331 0 R /F58 316 0 R /F61 324 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
+endobj
+451 0 obj
+<< /Filter /FlateDecode /Length 1973 >>       
+stream
+x��ZK��6��po-�z��ff��u�� ��ɥ94���R��,�h���4��ȏ�a�4���T��<0���
�����9��������Ɵ8~`v���~ơ�����������UC���i��<Wy��]�ڎҰ���_�G�b#��8�x��Im����+�����q���j��^>&Y&T��m����yx�3�.qX#p0�'��݇�_�F��P\y��>.�-�j4(a%��	�a���cf��gP�U��*6S�o&ý'�a���=%^����-�<;'�TQH=j�+�]�x��@���tʙx�4Cz�T?�L�՟���Lh9��Z28���A[=*������>�w���8Κ���1��`���~�:qm_����
�q��/��2��G܇�
+
3�r���O"��g��u�Z��=i��z��̉nÝ:	�����A�*ڌ�9�����ja�uB�e���k[�����"��̠�ȅM1�M���;&�@�j���F��Y>���S��vJϊ�
+��2��_J�0g�XW�J�g���|�˞_�i4V��k+�{q����#;>�z�I�\��,���5Zc�,3rL�4v}����qѬI1aޞJ���y90t� }�&�dA)�x��A)�x���A)V_n.!��(��7���H�����k2��>B�T��A8a��*U�Na��i�!��J�¨���o�?���B�2��ȿ�2��I�U�T*9��
+��DnZQ�"����y�dF@�׋9��v�7��`߾nh��� 'y;��ɼ$a���A�d��-�5�4}S�v{0�@f?�i��0��)��-QP�:7�:�A�`�e��[o�A�Ʌ�s#�7čd]5�ˈ7�0� :�;1���)4D��((�x�h�ɃR�� �����Av\'J�ǃ8r'J�v�Qq��<�F��8��P�D馍�[q��>���&NǯF�ibDa\�Z��C�;M��mT��f��SP��Μ��~Y�d���H����K���"��&�᷄L�a/���^�YL��
+@�0�!c�֐�!�Jl�������I��P���ijf:JI!:*���/){*��ፖi�(Ǝ�dw��gR%�ؕ�L��;�+!�
+�t]�X}<��:yP��KRa�k��^��8�e��T�v�3�<�s�G�+-�����Nk����5�*����ܭ1�bE�}(Z�@��t6E )�An���ٌթ��Iqu�
+%�EӤgg"_� �M�j*�"ڶU��E�1�$)e���W��k�$�ʉ���Y�DWpm� U9���ZّG�]MCXS�9�ђ�T�*pd!O��RJS�f
���R��t�;�<(E�8�<(�^�xE�yC̲z*Cݞ��
+�ĩ̇��Xo��Mg7Ɋ
+��U�N�x�x�aޥ��˦'��TB#B /�n��d����#ʠ�1
+��B"n�G���<�}�d��G]�et��/z�JF�F�D�V�0�Nt���o�T4B��&�х���ko�1�
��ec�; PSO���c;cByH1��*dv���hiY[g�s���3ƅ���L�K�}��9�����T0{���%M��v�>
���ׅw�-�3�ۺ��f��	��dO�5��5��v�v��
cp�G�/���!��8�<�8�8�^|�=��3�T��I^���ûsQ.�n��a�r�E>��u�>�Z8� �BN��h��]�ݾ�1�ӵ�D��L���'�,(E�q;yP�.t?��((�x���A)�q-F���*1��ɹ����N�/_n��;�Ȱ�+խp�V�3��|oH���*Q��u�*?7oU���Kx�7�J<+^��S�4���e����2��$-���fq#r��J���[%�V;��Qq�Ф�J�5)�.}g�}�.[��!h�9��ۻ	`d����<Z��\�e~y��J�h
+endstream
+endobj
+450 0 obj
+<< /Type /Page /Contents 451 0 R /Resources 449 0 R /MediaBox [ 0 0 612 792 ] /Parent 416 0 R >>
+endobj
+452 0 obj
+<< /D [ 450 0 R /XYZ 69.866 758.996 null ] >>
+endobj
+449 0 obj
+<< /Font << /F85 403 0 R /F75 323 0 R /F84 331 0 R /F58 316 0 R /F61 324 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-440 0 obj
-<< /Filter /FlateDecode /Length 2119 >>       
-stream
-x��[;��6��W(]$:>E�K��|��*���E���gHQ�E����zSh�V�p��7$Ot_:��� ��$|�NvN�b��tC?�������O��O��w��~����~���;vw~/���v��ۗ���T1T�a�o�'3�ou�1���������g$��UR�������$hd��?gYFPI9�l�T������Z�\�Ih��B����O��Nw�#
-@�<a'זB�;��C���.�q�E�|�"���|���N��]i���=L�ڛY�+D[�i��#�K�<�F�TOk��+������A���l�wOI� �!�$��j?���|W&�������^u�z^�lm;�X�)ٹi�,�!Y:{�L��(�F�����@(���.�u�sJ�U���)F�K��mZ8��1MFu��,'�B7���w���{��wv��J�'�\��MrD���ɮ� �s���I�@�Ӣ�M�����a�Q��u�]OKD�K
���o�(~Gr��y���9��I��
-.���)P>N�lR���GJ�C�8ϣ��"�q@��d�)f/)�j�`u���wM��Ȯ(���M���Q��#���9_�g*�Y9��H��JQR'��$�z�c�8�l&
-=�Η�xX�(J�
��,Tg�Ȥ
-L%of�ԙ��.x[Ҙ%��7iZ[e�*��L�L��[��;f;4'�<�)x�)��)n����{.�b��	�J8$V��_�V�c�n��
-�[p
-�
-//�sǥ@JQ��=&�e�JBh���`�%��ByUD_�8w+X^h�ۅ�
�ޡU���_��$�z�Rb�pB=�)����V-�\\�r�|�*y!G�@���Mܥ��G#ͱ�
-.8U���#�m[�-H�G��f��p�_1Oj�����@/-�l��4`i��vN��j��gSrJ�o٣W�l5�2ϙn�{	�
-,LB�,
-P�ʋo1����f6݇K[<M�,�!�
-s�.e�� �#�ZVGLq�I&���S4��KJ̒�)X������@���)x<�'0Y ��o�~��J���
-�&��մc�f��">��R�Wĉ9E��y���8�+˳Ķk��[C�$M�y!{�-���M^�(�H�X�ð�Y %���7!�*�\/V�X��½������и�Vk־�Um��FQ$)q�{J��u�l)�0[��(���f�Q���-(����+�q�aY���DvN�d��$��������'F^%&�b�3
����@A��)x<P�0y`
-�L���H���^6�Ln�3F]>"3r��Ja������P+��k��x�R~ %a�Z�
-G�QI�>����=?��ʌp�f�οw��Y�=���⣗n{�[+Ap!n
�"I���"Th2^J�\��2�p-5W̫ȡɫ�G���*��p�8zB�̪�7�v��c��?�u��2��q��(.7��Ao�Ȧ���r��}MI�Cy��Y!���22�`e}�	L�����SlU�W��TY��b�i�J�o���W�����jU��b�q&��2��%}Mٹl˯�P�Tҩ+f���"Vܪ��C��e+̀���v�CO�?0Q��Rn�_��a*��ɖ���xA�^J]�U/�Tmvδn�}�:O�!���k��⠼�E9$�����'��2
�s~F�Jf<�Z�HsH�`NT��Ȟ,u���pm�ͨWМq<c\X��e~Ļ��u%}�ݛ�S���ơ����A^;/�n.n��9Z7�%�gM�2��2���#z��U��(��\����K+7UE'��	�5��iS��Ha��@i���\g�%�*�{H�Lh������1��A\���.C�t�ę�W�	����)x<P�2y`
-L��ǣ�%�"�R����O�Tb�/���{��L}�{��ؠ{ie�n���ʟ?~����ƨ�)�����y��2�ϯ�J�X�3H	Z���v*��K;G�\$}7#}�<�����.���\�M�V	�6��v��T���߄&�j�Ҽ�&��g&!L��s،<���逧�8))zi��n��MN=�c%�#��� ��
+455 0 obj
+<< /Filter /FlateDecode /Length 2295 >>       
+stream
+x��[;��6��W(]$2_"E 0�{���s��*���E���gD�1�H����uX�^��3���!ź��~�c���w>Y�;�{����0���޽�����c��c��{�\Z�S�vu�������Х?�?vŝ�?��[o+m{exd����氓zP0�|��Q|m���ӽ��gw@���ݫe:�a>7�Ç(����{�Խ��1�
+.�¤�pq&�������7H#���	��y��}����h@3\�����u�kd�0�7�;��p�õlW��H�9�@7�ߥU�TOLt���?�����U
���A�Mn-����4c����AF�tY����*u�-�����H���{�:�rޔ%��܌�%~�7~w
+�&F`n�@f3X��{2LydBGn|�c'i.�M-��Z���b�\����t�<_���z��&��J�.��]��ꌩ�����G^g��7�u��u�΅EjѦ���Q�H�~8�ZnT�!3}r����x�)D:�cc��P��=rU�셴Q��m�u`4�9%('�e�����	2����؃��[��XMSJ�u�bE(ט��窪�col�dq"�8e�c��]� �(`N�''wm�B�R�)�DWwOؘI��
+mp����cɪ�{_v��T��~0��E���dv���n�s]S��{e��z`�TZ�,�`�艬��-�&�m��2���Ά
+[��׻�j�
+��b2�PvK
+(!\�vwl��N֌))'w֬([��_Mrb��PS�ĤFLM9o�(b;�O"��	v�i�
�zH�5$Ga�Tc>+��C]v�=!�q�����3�xaZU?N�z,2}Z�c7�����!Ӌu)����?��R�Z4�R��$s��<�1�=�,��rD����#"LA぀����@5��S�x�bK�)H<p����(h<P� ��4(�y`
+�ވ<0���)���1��04OkǨ�M��gw��a j4{y}r�'�z��-�Q�Wo��\g.���D4���H�3�jX+�r���F��L�ĂCS�`�0����"s����M�Ċ��cz#
+�_ t���+�/t����(^.t� ��7
+1�c]G�J�>�7��z�Ҥ��׏)���V����;`y���*��R]����6u��l�� a�����3��#l��=ݓ?h�{�D�lՔ �������(ac`ײ�GY��ef��pܱ��f��S�b�e���T{]{[F�:[$���|���{�NE�qh�(��}�)k�.��2��ѽ������b�d�e�2/��mG�O��׋3��c�ZFYO\��{��[{��$�å25�����LI��Yן�/K�=�&n{��n6��ܢ�v��%�L�������i��S�x��B�)h<P�y`
+��4�r""L�rOj���?�Q�Qb<�����Ӫ`��).�������N~|����'�Y\�M	o\��ܝVɨ订��njK��b��&�Wz�ę:W�R�S%�TQ�9Iq����1p�ގ�т��T\�/���6�=f�I�J4+�����a�9�S�<�Z'�
+{m�>�і���`��;��v۞�z�۰�۽=5p������OD����*����@.A�)^�@x&�=�U�.�xv&.nP���*fD��0�,���Ⴚ�y��"�K�71�Lie\Ԡl�wP��j-�Q1t~��ѩ-5[�0Tv���"�}�jL���R9|u���:$�I�բ�$�
s-��5���s.���0K�y�%��碮G���S�����K��*��մ��@�S8<�NĢ_���N߄Nզ�x���ʩ�p~k�46��u�:�y��e5�ͩ��rVlX�R�O�����:pv��Ռ�T�q� ��4)��,��k"LA�\��S����y���]�ۃ�|F�YN<�F%7qXY��Z�����ƣ7�=K[��\���e�0�A/�L�
��0J�5a3�R�;�zGcFR�7�Gt���R��ʛ+�rx����7�mU��Kl#��A��q3Taf�5���yR�|�ٹڧj�X�qH�jAFA�7�Ӊ,��'"LAがC�)ZU��=TM�za+X�x�R�x0h���z��y��YM9F�ʍhٗ�TB_���c/F�~���zUm����)��h�����(���%�������8�z�g��j���c��0'�����_��e8
 endstream
 endobj
-439 0 obj
-<< /Type /Page /Contents 440 0 R /Resources 438 0 R /MediaBox [ 0 0 612 792 ] /Parent 409 0 R >>
+454 0 obj
+<< /Type /Page /Contents 455 0 R /Resources 453 0 R /MediaBox [ 0 0 612 792 ] /Parent 457 0 R >>
 endobj
-441 0 obj
-<< /D [ 439 0 R /XYZ 69.866 758.996 null ] >>
+456 0 obj
+<< /D [ 454 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-438 0 obj
-<< /Font << /F85 396 0 R /F75 318 0 R /F84 326 0 R /F58 311 0 R /F61 319 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+453 0 obj
+<< /Font << /F85 403 0 R /F75 323 0 R /F84 331 0 R /F58 316 0 R /F61 324 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-444 0 obj
-<< /Filter /FlateDecode /Length 2204 >>       
+460 0 obj
+<< /Filter /FlateDecode /Length 2281 >>       
 stream
-x��[Ɏ�6��W(�Hd��5�o���)�}��%��"ťHQjj�m��i��bm�Ǫ"�2��@��(|����DɁ1
-��?>=�{����@Fn���\��Q=�=�}�=:Z9�?�?՝Ǌ!+��B�Qh�8|�k��Y����/<)�,�x{dA��(p|zxu5���F����C�ŀJL�f`KƆ�Oû_	�&�p]	�.J��쾿~��	����x�;�Y�WWݖ�
-9j��%�.�$}��u�����?G*P
-�	��t%�X~�i"��?�VL�nPl.�0��nq�љ���ܤ���Ww��E6a4�2Ro���I��'u�-��e��j�ZE�F��2��
y���e4n�쐾���{��o.c�an�AzrXT޿U6��č���Y����2�a#��*��(΁k�&�JP���&6�5��T*ly���`W�LѴ-/��>�ӎ�o$�*�=�G�TiYDi��&��;G�Ҵ�&C�p}�(!I�������M���IB���#7�!�ȸM�(mm��r#�.��x���^�s�O!ȥ�y�8�3ȏ̱wo�jV�	�	�
-.S�1)��M��\�H�76yx�#�)�Ѵf(`6���(#���@�J ���c6!	gaj:�5ƨ&t/�1-Ymϗ�vꦪ�Ф����K�'<����-c�7�V�B�m��Xp���I���K��iK�A\�B��#F_�b�������nZ��Z�0��	;��#\rv�.O��k�9'-�dqE���\�b���2���I	�1-z�φ�!\�Ã1tt��M"oΐ"���յ��2LB�K!��h�x<k�@BvDM�4��0K�~R(�C.�>�"�r�Z�'����\-�����Q��*ڤ0P���@��#���,A��u��}<P~��S��@�G'L�����<0E��v��]<�:�ǣ��ざ�N���B�N�����N�b��̭v����y����?�{q�q�
Q�(�������42��:_��ufa_��%�D)nՉI�DL�"1������2�m4��,�X����w�ov����N�ֆ�t��ѣf��waa����,x_�K�����
-�\F\G��sj/��D"�������'��1Rc��T��:QG\��,���-�ڼ��,����EatE������DM%�%U�[�b�9�0���Wp��$�{�#֬~��.�k��g���
-x���^)B�F��$���-DmmF~_�9g#3��ߵ8��+�b�((ۮ���ܦl��t|:�+ޜ�Ih���C
-O�_Ֆ]�X���{����6��3'����,��ـ����Uя���^f4D]����/�To��`�\�	f����CV�d����BNL��pY���Ep�W&�$�F
���`2$��$�0���To��T�VT�ԩ�h�M����Vh�9G�����������p
Y�2m<)��i/�Zؽ�oS]Ҏl����hv`�����cd+H��?PS��9�co��?3�c��,'AS�T��Kt39R�+Q��7��cn�u���
-kaG���6��z�a�)��Jd�{TR�=���9��rV��֪ce��Ox�l���o�Ϲъ2E��gn���v�R�y8�N���)p�}Am���Z;���щ�����~
,fa�/j`�F�8���u��^[WLCe#vk]a�}�����3)<ٽuU��ֺ"�w��Q�pP@���!I�;9�Ḑ����@>����N�I�y��m�ى �:fW��w�p�)D�$�����4�<�ֻL˸�)�;St�y���,���Br�vo�Tb���SG�E1����{q:������P��L����>�Q^��V�HV�ɸ4>pi��<�\�m}؏���s%�:���7cj�r�C�
�P�:�Q��_�Vi� t;����L���~�B�eundD��hϻo<Q!|�P3���h2��M����*J�
-`5�qG������V`)(nL^�ųD2D2A$��+Ӂ⅀ތ9K�ɠ���4��@�Ne>Y��o,s[�	{�50E2!st��wD&�Gj���T����Jc�9�'���x�!��?@�*lo:��Z�5�&�E�� J!G�]�^�xa}w�5���)p�2�A�)�A<���CF*܎?�2IN�ԗ������
+x��[K��4�ϯ7� �c	!�#A��2�'{ap��Sq��r�ngf{�=��RW}�*�Y��c��O�����7�xgyό�8S�R����������p�б^�������7����~z�1��w��}�;�
+��2��+�zey���o���Jn����'��{ w@�����4?in��ߓ,�$,h��{�ؽ��1�
+.�Ĥ�pq&�����~~�	`e�?��λ,��d�Rp�{b�3\�(3[�)�=0���2�_���+�/���E�^�F�^TrD��A�̪B"iQ>�v�P��E1�T��n&�754,~,��n6堪��R�b0�(�M���7�10,��h�	+���'ޜ,�2���a~�JS�\�Dg��5q��,\����3���p�������)�똄���
Q���J��w.�	[F�x?��R\7
�e
+ }�;1o�F��`�Bb	��D���IT:�g�[O�͜	s�ϕ�	oq'��ϓ:�;k�c��Ōwhʿ�����y2�J1p��@q�G�j#D�px
bރKc,`�:.1�d{f�7�p͆��50	X1�l¤U�c�}Lr�;UHR�$W�| ��VR�4d�`s����s)D�p�e/���	ab��x
Ż�ߌ8���8����ވG{���ؘ⁑-��:yxdS1vG�5�-�Ŀ2�c�i���%e*F�B��*���N���x孆�P��D���H�ư������G�5�O�u;�����s8߮s3�V-�h�=!o�	i���M���@�N�J�d����=	?�j����4��)�a^��,R����Y�X����+�oPl��%[�
+�
�f+c���Z,U��DF����̔5
+?"�:�3��5|љ���gT���zuA�����>$����'�B�%JDT��v|�}��`�ǡ�����ûW���WL�����ΘB�2$ys3�97������ �$��t	6?�hS����#:'0ґ�Z3�ٸ��V[4G��,�3�a���Uc0Hu�{�qkW�#C7>�h�� �Y ��A�r��J}4�b��®�Т��ٜ����;-�x�Pu��Z�Y��(/�N%R�u�08ܪ��r(�*>���Mۦ4��sJ
+`��<�P��|5��)���e�0R=%�93��Vi��hvBф'X�F�⁈�Y�=Q�u���'*L���B�MhТ��VEQJ*��������g�����F�_�J�a�HCi��+3>l�n��VS_@����2K���)�w���en2��jD
LцL��<0���A�o&"E�hE{�	>� gr�o���(� �o:St05ݪ��I�t����oi�7w�Q�9�{�+�Q��q%-�(f8:�
+1���0�WTt���J!n=Q�˵}ڬ�oLA%Y>�Zdj�xǧke��AG�FR�o6�o��-�
+IGƙ�YK�4b}$F�-h\'�6�X�s��U_*H�� f¦�9+���q5s-�vpj�
�{��.v���^ѹ�17G�䛡H�4��DB�ЈH��)��n�)�z
+�÷Щ[Q�_�o�&o�'hWmkk��"]@�5hRތw��į�Jsx��S ����D�
�M��\ce�f5A>���ő�3��K*FY�Zr��=~|G)z
+���In���ʄTL#�虻�7���b+#m�JLц�h�y`�&�|m<���o���z�@܂�ۏy'���/@\%%7�s<u�3�g���)�:���gٽ�⪷��{��7��x\� $d>�͡R��Q�c�3u��g��HD�צH�(*����k��!MG>��&�Y+�Z�a)��|C1䇼V��=��ᾱ|?][R�?S�� M+x5��=tȣܞ	ڑ%��f�ʽҴo���
+�N��R��O1�4�Bm��I�$��Z�V���ܺ�B���G!;�d����u�a|w72'Wܯ�B��O:Da�ʴʙ�2a���0L�Cu�,�F��O;UL���n �`s��du�ãt|,��+�k�hG��Gr���v�tj��8��6��=w�uEA{��K{�fe{��ץ��Ig8��~A�<7��/m��o�&ޟ�����	k.e���~�\�C���sf>�-O�:1�!L8�p�
�0����X��s�v�:¾a����wb��&L���ͮ��"��1�h������S4��+eB��-F�<0E��<0E��<0E�F�<0E�<0E���b�)�z�D7��=�74m��D_���h�
 endstream
 endobj
-443 0 obj
-<< /Type /Page /Contents 444 0 R /Resources 442 0 R /MediaBox [ 0 0 612 792 ] /Parent 409 0 R >>
+459 0 obj
+<< /Type /Page /Contents 460 0 R /Resources 458 0 R /MediaBox [ 0 0 612 792 ] /Parent 457 0 R >>
 endobj
-445 0 obj
-<< /D [ 443 0 R /XYZ 69.866 758.996 null ] >>
+461 0 obj
+<< /D [ 459 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-442 0 obj
-<< /Font << /F85 396 0 R /F75 318 0 R /F84 326 0 R /F58 311 0 R /F61 319 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+458 0 obj
+<< /Font << /F85 403 0 R /F75 323 0 R /F84 331 0 R /F58 316 0 R /F61 324 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-448 0 obj
-<< /Filter /FlateDecode /Length 2408 >>       
-stream
-x��[͏���_�����7E� ����eoANI�Kߡ���ϐ��Eٴw���k��p8�����������_8�g,�уգ�z����O?��_����F����/�2
-�n����˿�3G����߇��犟 ��eeܨ,�����民Z+x�|��Y|〄�ݣ��+ �����2�sX��vx�-�2����u�"�޾?��1�
-^^�V3y:�������ʡ�	�+����ۖ��-HX���Q&Ƹg����Z��{��N���!�I�E5�½��g/�����F�x%�p�}�w�,��w��kb��R��a��uI���"}�B-y|o#���Ud%37:1g��L��%y�SCL��)
-t��Xd!WvR�+~�^���>��ky����������t����VW������0m�Lb�2��#�\ל��,e޸����	�O9c�ȴ�Q��9=M8�� p_��eZ#|?cI���=?-z�謅����(<d!c��߾�Û@�D)�)��(��d�V���-�3Ǝ��`ȫ��s�z}��C hj�NVK� ���8�ϑ�j���'�5j}Z�Ч�(�˺&��A^��A�.�%�p��F��8<z&�<�-��\��YS!r��8�,��4������<�b�t���C�����=Q�>��R�P�T��_^�j����L�-1E �]��)�x �u��{�������$�m��aV��(9:��ˋ=�EII�P���)�Ȗ����@�R�`�'�$,%��w�Ԣa�\]�hi��U1�����LTV�.�����x^Ơ
�>����#�N�fY�3lҜ�;�H���7C8w9+G�I�D���/M�А��)�'_�EY.(h9�2]=o�(y�T�eYx)Dpġϧa�'���ĩj�¯�Pi	�-��RA؅��l� �"���.��S���g�#v�A�%G��Sx<�cQ����PSXZ�����v���	�6�F�'�xw%yr����
-�S��)6m�f�L���3��f1:�1c
-{�3O�44���� ����E�MV�GC�Jܚ-�3�^PW�)���f]"ؾ��^P
-��¼fZ��i�;�����c�8M)ِ��.0p�NvA�����-�����*�R��M�σ�s�\��TTIT:�j.�!)B�ܳ���n��p�F��{�����r�^�4�i�;�b��m�@��'�EIEg��)��0�;y`�>(�u��}<JT�d��8`��cA(�x ���)�x T��)�x W��)�W�)	��G�h��h!I��鄅Is@�&�ؑղ�[����c���j��x��Q���Fm@���r�Bf|��Lnq|L�{
-0)]rM�R
�6ycj��G�<�Z���p���
ޠNX�}�SV��<x�����
Ub<��<�r>>p8<6�.�~Q�פ���L��MS#t���Ҩ񩾯ȥ��:w��n���@�f6�gH�U�:ϧ
-�N{�7⮒����~�.��*��)*���C� ���!9F��vdh�!�>6ǵ��4|������H���t��uP��
��5y�6�9�����
�+��Y�| }j!���cҤ;Mڤ��j�0���{�fi���9���Mө�*��]v&����w[6�a+vnN5�
(� �G�il� ��uI�Un��ݝm���nu��@÷`s]���#CSu�NW�R}��Z�[���P;{B��Қ��U��>��S�
�ߖy1��B�:�h)�B�\f �3���k��zWPE���0E_PE���St����ǃP��@���S<��uvx�#e��0''�u
Ze����O`VU��s��u�|/�^�ї���s�2�*6w7[��F�i�%��5m0�45��
-�����kX�p:��-�:�r�=:Y(v
-,��ܓ�;��N�Ot`	E���K��n�7�q��H�,��$�;K=��j:4F�й�|���/�C���/{��D��M��N���[�������N3J�jB��?7!�.��E�'�������p��O�d	nN�4�[H����U�3b�$k~��ޯp�9�^�aHX�>C"�yn ϘG�J�R\�	�	��%n�M�e��/R����D/��|�1$(���',�\�%ԇ^TRiQJ�[�QL���UТ�z�p7%AFˁ�R����a	Ca؃��P�1��J��Q�V�]+��q1�Wʑ�A������R�q�8-��S���>�<��}�Ϛi�W�\�Y%j%��,RF��b�'�,S�(#m��,K8s���Kq�m��]����u�{ٴ����(^���c�
7� �/s>�{���0A����9
-b#W~�����
��ҟ�]�^�rF@
+464 0 obj
+<< /Filter /FlateDecode /Length 2251 >>       
+stream
+x��\;��F��W0]2���^�L�3'���tq��Jb7q7������Kiu'U���.�ŇA��KǺ_����7�xgyό����_>~����k�z��_�W8��=�������|���.|�ґ3�z�Xo:����i�ou��
+���;�ױ��7��n"�_�����Aw��<����sbe���@�Bt�_��?3&cB��������i�����_Vv��3P����}7�:\����
1-7�";'��|~������1�
�s>�[
+&
�����,��T���-�|��TU,)tϹ$b	7N���c $�;�B�S�*^1�I�U�0���މ�8�kk�V'��E��c�����f���hbaZ2��S�y�_��S/]C��gi�`�2*C([�4]yk�`�����A���8�����%���<���ʕp�X��ݨ�N��Z��賽�ڠ��a�|7^��
%�w���Y��%���K|�%�c��
�gg�C�x��؜��;��p8��"ζ'^őaU�i����
)|?O1�W8���uH=N	B����x�QS�z�dR�)�:`�y�3G�Pk
+����}��0�^L��1+X���]����7: L���J���˽�ѻ	�KeK�x7^0��T����x�];���?O�
+9����dcRՔx�WƵW��,�`�ۜ>܇of��`j�(W���&��2T��dP礝��I�>��^Q����<��i�a��I���5�@��s�54QyOkG��ɕ��+���������
��� �c9�
�� |�0~��-�W�0���՝gk��E���9D���9�UM
�P��&�lحSu_K^1OԨ'43��S;!�wM��$�|�����U��pxBN��5�hȞ�S
+��8��i�񚸊���
+������q
LѶ���50E�hS���UAZB�nwo�6b�)9
+0ln���:���>�
+�T�U�}YY�)���q8����}q�5�x��B�5! ������l�6�y]�Ji��2��_��t��1Z��$K�zN(>���)�����2��)���bC3�#���[p���;��U>.8�
+��$v�`K$W���u2g=y��e�=�˻��i����B��y��`+�
��6__�+���+�%P��%�c�`J�m�$�
/KS��^h}�w���a�G�kʕN�`Qɸ�Y�Ҭw�ҶgJ�3*�&jV�d��s�p�!!�
	Sl�䕄�S��!l�ZjBp�f�f
+�H���is�.�\���t�����,qD<
+W�"_����#8����J� ��3��yYg�tp�V�}
{�p�A�mvGC�ŭ��$�X�7��t�.su�X��a����B�jv/��8[�2��,�w0��3�pc�5�K��:ʀ
+5i*~x�3 �(9��4�ы�Ы�jDZ�K��d%B�%�-�e�t/�6����<���\��p����|`�z�xUq���\L�!p���{#l��Xh���r�;E�\�lDxy�.�5C�!�,Y9�Y	�����Y��EV@�7��[��M�?���R>���4O'z���zm%�4�<-U��r�$�>L�9�B�k�a�A��Ǆ���F$`��� ��Y� �
+��8��7&5�BY�(z�i3I|*�7rn����>���P�W
+�\<2YZ���P�1��Y4��Nɕ�Q<Y_�J��~fz"�e���
+`X�6��@���a�{���L���8j�C
W
+j9m�+����nD
+��)����Rp��@t'o�R�yA~�{g&�b >˩G%U��*�z�Y�Tþ|����g���_j+�{��!��� �@��6i�oZ���R|[>�K�;�*ȥ��[[
wP��?VF�Hp�a\s�^~{2�ך�\HA��ҙ)7��jK�{Ò�q6ca�Yn�M|/���{a���<7�vQ��A��\Mj���:����1�r��Y~�2|��
+�/��b55um9����Y������PP���|�g��_������9�h�Ŭ������3��ի�VpE��w�-�((����Z����\ʞnwW������_5����&i�`�ah�bW��os�j~��%�ThN�
�W�)_�8����������T/҈�A�d��p��ڈsL��K��k`��/�5�ʛt��Jt����ǝ
(����o�?
 endstream
 endobj
-447 0 obj
-<< /Type /Page /Contents 448 0 R /Resources 446 0 R /MediaBox [ 0 0 612 792 ] /Parent 450 0 R >>
+463 0 obj
+<< /Type /Page /Contents 464 0 R /Resources 462 0 R /MediaBox [ 0 0 612 792 ] /Parent 457 0 R >>
 endobj
-449 0 obj
-<< /D [ 447 0 R /XYZ 69.866 758.996 null ] >>
+465 0 obj
+<< /D [ 463 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-446 0 obj
-<< /Font << /F85 396 0 R /F75 318 0 R /F84 326 0 R /F58 311 0 R /F61 319 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+462 0 obj
+<< /Font << /F85 403 0 R /F75 323 0 R /F84 331 0 R /F58 316 0 R /F61 324 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-453 0 obj
-<< /Filter /FlateDecode /Length 2300 >>       
-stream
-x��[Ɏ�6��W(�HhW0���ř��S��K~?E�K���՞���A��E�U�^-$�w_;�������y�ƍ�cVu~{��o��ׇ�3�����7������?~/]2�����k����'�p��2�)���w_�S̜���N�y�
-����
�I�Z!V�$=d}�M8>�D�Gu��|x;��@i��㗌�G
	���{��}zǹ����k��J��������`eW^������`�Q���"��8�X���~h����Ϣ�k7�&.����Y����78q��(��Q�
-o�~|	�Vw>z�z5+��hr�5�	7������w< ��z�v�����II�|�4'h2��;�Dg�aښ�����k�Su�q�0��ȿ�s�R�-s#�.����2 �*s{;���Y��Ԏ-��D��y�[�̶�Y�?��L�
-�]"���5�jO�׳�H�T)QqC�����eA��닢	��&��i�%�mΓƓ�P�5)�4��GhH�Z��:�(T�.^4�߄��~xR氊�+[=�o6�h�Ӗ@�g��T7��0�����7����z��E5"�J��
�nA1��Tvs�50����5����
�6�Q}�0˴�G�<��Ih�o{h��G��|�OC�}��P���V��as��ad��Q�����δn!�Tfm�M[�'������v����60Φa<�qr�H�[&"$R7���ӈ���,�Uh�DB�4�g����W��͹��
��4�I�\!������c�m��:4'��oz��ΣȒ�յ���DM��(q�}v��*{&'�ſ����O���@�4����YPQ��%gu���TH��VpE�Ag]�Q��)�8?Q��1h���Zc������̆�e	kd�Q�w�p�ɶt0Q�������g���b��eD�u:9�d�R�ĕ�b�U�g�9���7;�)b�J�!�Yx{q� tR�.I`��6s���Hk8�$S��s�ו<�-��r���}	ǲ�p�9��D� �m�'=Z\�$<Y���)�Al�}��u��>pJ!��;dZ���NА����Z%LS6ϰD9��d��&��t��D� ?'�Dp�i,��v��^�XO��J�㙻F�^����1�jq?�*�X߻-)�B ���1/��5��NZ�E��ג��D�g* �C9���+G҃�~�7�q�Ӭ��?'��̹����'��t�.����*�F�R��
�]R~[6�US�-�l��$V��
��8'A�v��le������v��hW:N�bU:E�c��ź1��W�A[�1,Z��\
�v�'o�a�E3,�p�}�U2b(���a��W7����
ǜa�R�*ڠ2\p]ek7�e�p,:�����������(��$ip?^I���y��@�P�|�¨���X��40�?:�lrƔ· �gV	���,��.�����1&�}>�J�՛Ʌ�t�bvZy��bܙE�t�5�E�j��\Ǔ��d����#A�ss�b��)�<g�!�StW�����p�o���f�Ro����O�kHu�J�}��h�#��vM������nvOj��I+
-+
-r�,��2V	�(_ڷ���٘q�{TK��]�[�N���L�V����-Ǣҩ��t��������[�\zzdj-�K�+�L�D�h��^O�Q�R$�EＰ}7Nt��%L^u��)V�a�ñ2ӛKS�N׊��Y�Do跱_\$ٶ�I���^��EL9$��g���uZx��Tni?�h�G��Kbmwκ�S��ӭI$�N0����yC�Ʒ����-��i���c:�K��rD,Q�Aw/M�}�AK�ke8	͈� XŔ�[Y�%D�O���T���Zר7$ғKVa����Zc�r"�d}��M-؟_���j[PX�i��-�0ma��X)�0_S0�X��]c��M,���P�ƹxj�ˡ�����m���h7	��2��D��B�ɩ����K��O�50^U���f[�e����
��Ck��(���JZػ��&A�A��9�c��H�O��`����Z^�q9dy�m$á(��`^i��5����gR�Y��A��l�tX�~M,SXf�#��a���l��Kɠ�aK��[e�I��-emAq2~4����Pބ
� \#����c-�V�@5�m`ix�] ��m����7t�+&�4:r���!X�Zds5?����@�/Ue3������ʿ� 5�LY1����b�9g�������M����������
+468 0 obj
+<< /Filter /FlateDecode /Length 2447 >>       
+stream
+x��[Ks�6��W0�f���~�d<#YRgr��[&��ɥ94�����
��H���-	$�o�.���}��o7������'�`�Z
�Rb��珛/_��/�>�g����?�R�n�������eL��P���>t-��|����vD�'����a�{k?���$l	�rm�{{4(����t�u�t��4�Q'M�d�tsw�0%)f��o�-q��p><��|��K�\*���Q������O���W��'oZл��F��%��K){�TJ)����|�O��"�0)�Nx`��$4J��g��G�n�$t�
1�vй;�u�)A���lu�@0���^<�s�]|���:�k���/���/����Ȋ��8>h��2:��w�Gx�b㇨��xe	?e�y�l*�ܹ�Zͨ"����
�:ߥ*�ʢ/�w��0�(5\j�:2ʊ3�n|wPvP��57�5
+��׏����`�������cC��=GZm9a⚜�T�e�t6x�[�l���30k�g!c+����树Ų�ja�>��xsw��=������g���J.\}F@�,)̠�ؙ@�}w4{-��!�A��52�c����&��3�.کr�h/D?�ؚ��ʲ�E�l�d��¨Y9�̨Y��{����s�=��Q�|�u�F.����
+���"v02/��q�֡��;�^}��E�!S�J�#��}��^?�H�V�N}R9�	]T$��Z&C~P�C�(�������(DT�̤V��r�RN�Ե�5#�}Q���ݺ��4��R�jˬV�������VLM��`SZ�Qr.�B7In3Z�lR��"�G�4/;��Sq��}ܼl��7=.���-���V*VY
+Wi���QLw0;W>
:/N�/�R�ꗳTWK�����&�>��f4�..u�s{z��<����+��u�m&�b׽�Yz��P��"�Tg���
+T4.
��[N�<D����}��kA_�xa�	Z&rZ��&E�U���t��T�4;�p����C�u(��v�#l�
+�54�H,Αj3OK�#�lr����ٙ���ҡ�^���Tu��֒��z%w���;bJ��N@H��/�[�ɘ@�V�!aT5��xK�\Ɲ��CK�s�(�������n�(��sDC����x�5-�cWl����se�7��E�/�
+�+8�Vw����*��cڞ���1O=�?�k�����.o#�]��O�`M�iEK�}����>�hp�U}hA�����]JC�#E�nH�x���~��A	��G��83�9��3놳S�8g�(&�4����g,=�c	�4%L�n�5Ō$�S'K�Y�\�F�t,׫�.N�;�W$ҋKV)�S��%k��5���71��
+���u�B׾����Wߥ�`��km_�J�����)�W_4��uri4�Rl��ӨpP��M���m�M���ͶJ
+Ն*Gr��j<ܫ�=�Ma�����q���rh�]���2����/�0�jma��*$T��IԮ�D;�yC��#Y���U@�+X��f�v��@�����]��L�/�Z���0J�S��>d���z[�-���2e�Ue��l��S(��Q:^����$��Z[�փ�1�����oP�x����f����Z/�Z�{	�'��*\�-P=��@M��_�r%Z9�u�n��P���(G��
���x�@91�J�l�3^�=�˿EK٪ú�$ں^�w�
¡���~� ��Q�e��7l��@5��(P��>�t�K��R�p�Z5���J����@��>�h1����3HW��N�4;*��SM2K�����M�A5S����.�4Ӫ�f2��q�|Ka)����|��lQ�����'�}��/w������?]L
+2��G��xq1�+��R�)z
�([>����6���-���ky�m���(<L6�R�����Nnc���δP~���Y]PI�Å�8c�22&gR,�2����y��v��UB������܅qE�2����a� ��؊3{�v�{��.�y���(�Ki��К�VFEm�w�,Ha�5�Ԙt�2~�>>�SK�����(���S��ɕ����_��V�d*l�:!�
+�m�D�2�Eۭ��q���M�)y�uI߃��ʀ�wĎaÃ���Z�E*�o��h��/�v(o�J"_��l�#@��P��I?�@�¤_i4^i)��q�xG�B%�6���>�hy���2�?Ó���qd/|PX-���5<��fF;oQ[y-�Q:?���]��<���K�	|챏��F&h���1}KZ&�U�`�x�e9����S��z��E��#mK�Y�)R�c/خcJhmlZ���	�A$,B���\}:�Q}��4Cr���4�h=7����
+ax&Ӯ��n��������X�
 endstream
 endobj
-452 0 obj
-<< /Type /Page /Contents 453 0 R /Resources 451 0 R /MediaBox [ 0 0 612 792 ] /Parent 450 0 R >>
+467 0 obj
+<< /Type /Page /Contents 468 0 R /Resources 466 0 R /MediaBox [ 0 0 612 792 ] /Parent 457 0 R >>
 endobj
-454 0 obj
-<< /D [ 452 0 R /XYZ 69.866 758.996 null ] >>
+469 0 obj
+<< /D [ 467 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-451 0 obj
-<< /Font << /F85 396 0 R /F75 318 0 R /F84 326 0 R /F58 311 0 R /F61 319 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+134 0 obj
+<< /D [ 467 0 R /XYZ 70.866 170.939 null ] >>
 endobj
-457 0 obj
-<< /Filter /FlateDecode /Length 2033 >>       
-stream
-x��ZɎ�6��W���]2�h�ݮ
-�s߂\�S��Q$�y�df:H'�*�eJ���<�%n����'A���駇7)��Z�o�?��$�ܼ��s�2O�Ϸ�?o�>�^���\�z�"������(!�x�2�im/_���N.����[���o��f�>3������/��:S�?�XQ�2k���G�X�Q锞�LV�W/�U�&~)�A�{`����Ϭ8.|`��3��p4\���
-3��žj��ے���Q?�R3���ʨ`Mh��I����Z=@�V�$��e�������Wn�.p?��
���j�60TJc��$ƕ�����1�q������Kׅ�:�Qg��q�p�&��[3"B�[��c)�UJ%�I)����	6
-�^�2L
-*���Ԟ�x����\�&)U�Q�EJv��e�?Q�&���FN���0�v�V��9��h��6jc]�[QG*C�k��.��2���h�7�v��ȿ�*��X�	��3zr�J���̞���p���IKIa0-����(�!x]�`k:�,G
-\S��mG�B��#� �%	��=����!Gص���e�Y�=^�z.f+�i�lź�]ؤnmnj��H�s�zT�4*���\-�)��2�[Z0��~�BLZ���w��������`� }]�,��� �2!ʳd�X����0���Qx��ϵ��rC� :�؇�=�I�pՒ%ف7+�cY�y����*�
-�s3K����t	
-[Y-|���h�׷��j�M���ɬ3��f��:av�3 �7x�,��/�����~�� '��d�Mi�����|�����u�1��7�M+��k�������Gb���{�_;<�`�:��x�~�V8��k�py`2Z,^3Հ�)�*r{��q��jw5y��T���,ݝ�be�z ��TXx�Hũ@Mz�֝����K��MFK����t�;&��;5�;�p!}YyW�ɷP�ڧ���?"�Ye�'1��N8Y����3j�މpK���ceab���9dUj}���^���r��t�b�U�](ޥ]F r��'N�$�pުR����J�~�R����
-�)��*�f��]�<_ڞΕYvBޘ09��0�%�0��9g��Uy�h�{���N�����7c�m�H�ۦ`Ϛ�,{��+/�j�G����z���yMF��
-��yJ	�)٦Pur�By��~-�}�ګ�ܓ�3�U���J�X�mwld�v:�"R���;��|��o̞�:��;�l��?�+9��z7r�-�Tк®Ǭ�f���Q�y���.�Κpi����Y�R6Vu��E���,7�a���}8>��+*&9DWn��PVf�j���qc�n�t:·P�@�	��Lk���֜o*y
-ߝl�m�6��q�v�*�"'��\�"W�Be5�d/*�+��O�����h���ަ �n
-��urC���M�� \�U�-�8#��J�@�j�]Ԍ�ty{�p �̯͹�j�x�@J�b7i��۔�n�.�$��Q
-C񫝗SgiFF^��`�i�GH�	��u�n��I*�^�E���] ��V>��;:L��m����������c2K��YK�Tf
��w��R�������e��?'��	�GU�������IJ�yBy�q���=w_��ɓ�{|}�-V,��4)�v�h�֏�7��u�������+|yR:�r��I+\k3D��pF��S�冈����8��=qJ�-qBe��3��҃�V��HwD��Q��853�'�"q�����bNJ�I��?��d�dH
�Nx�9�Y?W�u�̉z�c/C�IV�	��(s��P��(ŘH�0&��tu�cLاgL�f;!C;�+}dT����$i#O�`T��+?�?q�:^��-:a�g���d��������?�*E���X�ku�ӂL~�����
`�^�3)[�tO%?�+��+x��jx=�b�X��>��\��ߖ��9���%�i�N�}�F݌�f��p���p�`�
-endstream
+466 0 obj
+<< /Font << /F85 403 0 R /F75 323 0 R /F84 331 0 R /F58 316 0 R /F61 324 0 R /F50 228 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-456 0 obj
-<< /Type /Page /Contents 457 0 R /Resources 455 0 R /MediaBox [ 0 0 612 792 ] /Parent 450 0 R >>
+472 0 obj
+<< /Filter /FlateDecode /Length 1890 >>       
+stream
+x��Zˎ�6��W����\H�I��gWtۮ���J"�Wd[�;7E�]Il��x�8��N�����������Z�8_��ܞ���;�e�3&|��[z����7����D�^S.|c�-|��W)L�ï�i�p�bE�'P��U4Hq��Ƈ+�x��H�q����F=׬���`SW�1���� #�ο�����M�8�hN�~�CY���T'k����yGk���'�F����{�#����#�-��ZH�J�T�%AO�@�X��.��hL�<M��|�Y+IR�7fti��:!��4V�h��z"��4�kB�G�ʅE����W_!<`�%L���/�j>f7�4_�ד1�
+S������9a�\{%IuN�㒰9J\���j�A)��P �U�T\6�oxg�+�$6�p.�J���㢯��gb�����C^P���`��	\�[�v����9g.T6JsH
�^{��Z\�8��9{�

ϥ��P��̔b�$d��N)J�%ҔT�[?ɨň�K�������g��b� p�����cG��s����N�bG�,�M������}f^?>�uv�+�h�;�;���{&<Q��[���i��+R����RC������U��D����UfI,tqP�Ě�i韼�r��P�'F�$�t~49JEf9sD�q�2p�M�>��ڟ�xPVqk�����r�(�^�ȋ���^0>Y|��ϕ��
+��8
+�H#E�r�bL�8fD�����6�J�Ǹ,�-R`d��T��
��QE�)�X�pM!+�����N�z6���7��#%�t�.��C�����Ŵ�;�,q�-�̹�a�b�U!,���\�	,P�n��*"U��Ps���-|�
�IMV�N8{��O�J�q:^�-@>����6뭙�qwT�,�N�B:�l	������J���)}�H��,7�a��56Zn����u��+]��P6���"-�dj�kM$�=�����_����XU�zt+֛J�wPM�
+\R�
"eι��<]K)�B�?�r�\-��q�N'�a�_	��x��@��"�F�7\$�uu����)Jv�<v+_�Y�G�)����6�>rC؄]�+ŭ&A��]릤D��Sb�fê݇o��;��P��J\Y��6��:�1�f�δ�3,\��ú]�tw��(F�[��΀P�Uڪ���뷺R,��QWP�p��Y
+�E˪\���>���и�Y��MW(���cbRCJ-} jU�G��UoZQ�6�sT�j��2L��:��)F�I�@�X^��,�B��b�I������B'g@a���\7��G�~,q�����SлȲ��I+\[3E���#N��rS����mO?�5h�S��'0��<����Ҋ8)n��S�P�%N���I�7���O��]�I�@���'ab�5��!N��o�f�$q�Qi�K!q�q�����l'����&�q�1�	m۝����	���퐡Q�@��;���$!�b\�e�Vr�Nh~�~5�$x�Bb�E',h�]�h?6�y���oC���x�Z�f-p��s�u�
`�^�siL����J>�#�?��S
�}+���U���Ƭ�c�+��ڪ�Vx��㓝��_b���E@N��՜�l� �]��|�؀�Mw�ҦBd{�a�^���cGAz� �9嗌����2tm5���	7h$�,�J��\�A������nL��Kc�W��.�������6\������xI_�2>��%kk#�s��c��1�!')E+;��!���rp�j:�=:Hty<�R1�n����\@�V1Hw����$1��j�[���'N�c��A~ܾ��1�����uo���"G����:���������y���D~�p�J�[�-�b��f����/�\
+endstream
 endobj
-458 0 obj
-<< /D [ 456 0 R /XYZ 69.866 758.996 null ] >>
+471 0 obj
+<< /Type /Page /Contents 472 0 R /Resources 470 0 R /MediaBox [ 0 0 612 792 ] /Parent 457 0 R >>
 endobj
-134 0 obj
-<< /D [ 456 0 R /XYZ 70.866 721.134 null ] >>
+473 0 obj
+<< /D [ 471 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-455 0 obj
-<< /Font << /F50 224 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+470 0 obj
+<< /Font << /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-461 0 obj
-<< /Filter /FlateDecode /Length 2504 >>       
+476 0 obj
+<< /Filter /FlateDecode /Length 2457 >>       
 stream
-xڭZˎ$+��W�t\�R*��Ϊ+ͺv��ά����C�l��Y�1�6?B��wS��������ח77���{o�����uӷ��B~��������ߕ����We���gn$�I)k�
-�J�/��R.?ͳ�������_H�ļ��1��iVu���o����ۇ	婑ν&ޑp.�i��y�����.1m�<[��ۙy�HR�k�>
�������9�}�X��s��� ��>|�S�5C���׌��v�|���V���Wmꜻ��S9��m�sض8o϶H;��ˆ6v�]�1�mLk��?x5�o��h�he�Mg�z����
=����'�^��<H�y���x?^���y��N�2�t�p����n��DV���c��6,B���372���C��/���s�C=� ��©<�K��R[�<Av9[���@�����\�3�n�w�e{^�}���"aO\V�(I��y_Z����]��g����W7^8����7��cU��.=�.�輫�S�+�w��̻�'@e{�z�.�r����%8[
X�,�gv�,/�����P��p�m��}6j�a�&<�wZ���gChƎ���H̻����&۝π<��H|!�Ż��Z�|4�t�1.��D�I^�DSTa�U�D��|IQv�r�I���Vͯ���_�z1J���W3��}u�پ=� �ܢ�E��}�X���_����--�أ4r�6�o���P���C�$��|�9RL����`lq�1�~����ׂ_=QF��WT�o�5�K&;s�L1kvD��~fm4|ݷ��qw�R��I���q-	�h��f��n��l�	J:L��6�b���2FP��ڋ�良C:��Q����i��:�3�|c�e��|�.�'v����bS����8^I�CT�G�/��D�VMqoq�Y
;l3"���w��p�v5Y�7�M�ia�ל�1,�܎��p:h^�J���vVƊ�a4�f�k���p�C��t�O_�����%��q	.T_�O�´8�HS@'#���hx{u�i2��g��gFS9��2�-{0	��� 5@�{R�)A��o��KEӘ�Ʈ�f	n
-=5ެ'�}��A�VU}}�Л���f�Mx5`"l�7�*�EM�J&�'��)-��j�A����w�KJ�BEN��bC�%!:�8�E�1,ѿZ�W!"aH/�/�O�
����Jg\I�Tyr�8�#;�=Љ,�z�6����w�i�63L̡��y�����
���L��������"�R���8
-�b������?^h�Y!������g�J;GV	����\���ze������U��I��m��G�̬�s*k�]*��e&�N'������>R�>"���Q��)�Ev�)��E��'P�1î���Q(YJd���3����Bk�����^7.im.̌��bL��Z0�O3��F�����
�����5S�J��B�֔�sE�!���:�^��&����Mի
-'�&H/CJ񯚙��gq��t\�	�!ͬ��eϧ-Q��
�f��9�j5�.4�U���hKK�H��u��Hy��5)fH�k�z~������kuhS�;õ�&6㕖.�"��Q��	@��i������j��Zl}�b���V�C���(�%�撩a�c���$��^�`:�]fQS�*؁,�J�9ʀR���_�z�(\mKg%�,k�{ ����*��T˗Xi�nĬ��W5{����Qj(�����e��穲���;v1(���t1���@����i���Z���}uр<@��{���
M
�`NF����^L�`zhv9��dg�&�~=�ISam��R�ɵGRɀyS�*��	����'KwݦjL�Y�5|�1�&����ȵ��H�U�U�yr�O�Ͳ�5�	]p��Z:�A�xf˕��i͊���s���Hw����wTΓ�N=�?6L�n���̆���Ȼzt�?�?c]�o0�?����k���������,������v��lgCFj��h�^(ߨ�a�~��눲㽥�t��(���x�)\����u�ͷ�/�F2N���Oc��Z��q+Дf��X����}:���w���}ϥ��]�\恏��תj7<��)ZϹEQ�;�v��gI�y��)s�_2�^���$�a���L�V�^�]��7�9�!�5t\u��<_q�)��m?����a�wie��7�C�/���8��P{]A�7t�f���M)$�|����g�\�so�8�n	���?א�.G��7t���V��j���s�}ǡb�b���Z3�d�Y��.&p�]L���bZݕ7k�Uwq�ص�����x�JG�s�S�r</�JG{�U�"~����i���w6�5��;���W��?e��}iB�����{��;�8v�E�r���h���[V�8S�s�}\���i��bGwt���%��ͩe�I~�>�	��?����y
+xڭ�n��>_�?0
+wR�р5��ξ=䚜��א��E�R������Rm��T�n���5�n�?�xzs�zY�������ߨ��E���Ϻ��>����M�_�����*N>p#px(�L~w��k~������i��oL^�:Cn��_+�?��;R����k>
+���Nտ��B�F��|�>����t�A���JZ�!@T�F}���nB���қ_(�_cI�	�-&TV�DkY��`�L�D�
�.*���N�+q�2?��.�4����
+�Hc�9a��<��H�J�x�R�:[��+�T�	ԊvM��z�����p�N(���z>P֨��b7�]1��9���U8oQ ǰg�&Jj�W;�l��������Y�46����w��������s��G�"�<��Y�h�E���g\^�5Ŗƥ��m�����^�Ѹ(�H&v$�ۄ�lC�"�ц�z�!9�(�htʮ�6�@�/&����>?��<���OnQ����N�Y�̝�t[�Y��*�vk��e1[0��B0�f)�h��W�ԇ�N�x[<�>��{m%>��З�*X��y�G���~�I��*��eFDSTÄc�!��.]H��R��\F��Q�Cs��b���<�L5�����x��m��#�uZ|y��}�` �����o_YZ����Hmg؄Cu��YT8�����z.�Y���Q��W_��k��3`��2�,��2������U�kf�Y�c��]W�����n2�sk�Iqg�ɴ��d��g~�h����b��W�;C���1bN�bC1_3�tį��b熈��F�����0�[F�6Ǣ�6�`]�w\s� ��s��h%��^ŧ���D}�D��F��W��x��O�lFT��^GU�N�����fs����|0�iN�>���{g
+��W}���V3�݀���su��6L�Ըo}�k��.�����yO������*�ɖ�iU@ϐ(�N�3aWϰx�4����v3��3��8dcB�x���G�k�m�[����S�J痰R:��B݋�6F4���̨���**u�¯��*�)�gn�w��j� �{�ܳ�����N�.mo�@.��4>D��5�r� r�4tHm�W��rE���M�������J#i�l0X�/�h��{i�ʣu}5�%lED�8��M�Dn�@�����R�"U��N<۞�_^�oĩ�+�/)8�H��߃��J���n�w�(+���\�w�-�;*c ԇ71��lML_SE�h����g+�dT�����b:�U2�C�r���
mb$�,3f �Ȯqq�{� Q�&KK��CɅk�{���i��C�$�e�Lʠ�I���ʯK��Ŵ�C������+�������
W���+è�u�61%�\�S|#ؙ\WQWپӴ�+����G��ە�$�n�!�Є�
+�1wf,�[�6�4�-�պR�1��!1B
+R5ƔrnU��H���I0��V��n�9?�I�Ϫ4��4�����d^�R��<���+pa
+t�xSF_A���}�j�@DN�����[h��քM�h���q4�����B��գH�Zs��}k��@����v�}�׆��mrAo��5T�1��X�[������ڽ��m���813���^n��+�r���>ocUۮ^��.#Uׇ�dXA_��T��`�D��_��AL�A�qH��P�^���RzI�$`LPQ&(�_D)��Q�p���V��-�<��F����a�'2�&�Q�pv�f*�.�h.��/��gE$�~!8T�����{�"ϭeRn��O��帼�)]�ס�z�o&�q����,8�Y�bu�H��^��<߉�yV|k����VŎ��(�}�<!�����o���̣�
r�LͲ�Qn������1�@ݴin���o��UhE��Y�I8C���Oz�{�O��p��̎ʬ�E�գ;�P~����P�(�`�t�(��1�5׏d�'g{���>cC����č��p����-�T��C1C���4����`����|8:^��h$K����>u�'��q�����r��΋�������}�|��Q﫸/�۫D_�����]���Zٍ��E�:j�OE��	�7_Kp�2���E&��H��+6��AH�k���8i�9��0��|G�uQ��C����؊{�C���
����d[I�J���!0>�7!�X#4���G	o8����O�|w�&F���x��C��7OJ�_���;��.��t��|�i,\�a��j~����ǎ�;m�&���+�c���@`w�k��q�5r�b���s��Y�n���'�V_w����	�V��x���^r���J��Rڣ�ώy�4��{���W�:Jf�_�rC�����u>t�.�T�I}m���<���,����_a��p�p{t���%G��S����gS�����?���
 endstream
 endobj
-460 0 obj
-<< /Type /Page /Contents 461 0 R /Resources 459 0 R /MediaBox [ 0 0 612 792 ] /Parent 450 0 R >>
+475 0 obj
+<< /Type /Page /Contents 476 0 R /Resources 474 0 R /MediaBox [ 0 0 612 792 ] /Parent 457 0 R >>
 endobj
-462 0 obj
-<< /D [ 460 0 R /XYZ 69.866 758.996 null ] >>
+477 0 obj
+<< /D [ 475 0 R /XYZ 69.866 758.996 null ] >>
 endobj
 138 0 obj
-<< /D [ 460 0 R /XYZ 70.866 591.192 null ] >>
+<< /D [ 475 0 R /XYZ 70.866 602.757 null ] >>
 endobj
-459 0 obj
-<< /Font << /F52 226 0 R /F50 224 0 R >> /ProcSet [ /PDF /Text ] >>
+474 0 obj
+<< /Font << /F52 230 0 R /F50 228 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-465 0 obj
-<< /Filter /FlateDecode /Length 2713 >>       
+480 0 obj
+<< /Filter /FlateDecode /Length 2712 >>       
 stream
 x��[ˎ#�
��W����R
�v;@ֽ��6Y����H���T����]4lWI��!�V�����?T�||�����M�i�^������F��-�)��?�I�����۟�J�Tx*?��建����S}c��y�]��z1#?3~w�<ύ��+�����ٹ^�9?3߅�	�mL�����} ��Om��>)�8u���?�����������y!q����M���}+�AQ!�e�!�ǖϯ�(x�ق/z=�4�CAn�-c%#
 �2@!�{2ŖʨX��LϾ�-�^��qn,o�$L�b;i�yT�*����o�2l�[~I�������`�Ĥ&�")�0`c���r)�����.�sh��B�2D)��bmE~(�9��j�5�&�G�����W�˸�o
@@ -1619,58 +1667,57 @@ x
 �֩�d��@c�K��c8��-�����PC�m�O���ª�������uw��uۨ��Hb�F�6z��U����A��/�g{�&v�Ce���m���xw�o�=xC@�+���R�8��QPʳ.df����E%Ih����8��]��-�2���VX�FNl�0-�ᜁx~���f3�YK/��/����k/A��2e鹹�pn	p.��xR")��('���C���f'v�'p��G1 �m��FP�"�Z���P��LS'y?�j��W�&�!�p1M�1���!�`6�����՟�f�@�7�CK,����k.E�M;�p,=.u�Vի�q�%n�Ⱥ��T�F��i�s2�ʁ��SF�8�*y�C�����=R�Q�-Ⱦ�ښ��p �{�����e�<�0�Lv@�$��j�=2�إ%l%��\+>�#��%g������#uC�f��ڰ�1�D�����QaӪK��Yw�b��{l�e�x߯E��XG��g����yMD?+Ց6����-��/�<>1_��pg4����ʝ�~l��|�7�x����C�%�쭝�kZz#��u�L��&��s[	��� o�8f�B��1��v�&?[y�%�c,���{^4��J�kQZyѭ�f�F�8�
�����@pu���8���u�ԏN���3B*4s�cب�3M=�e��������K�J�6�d��3�(`
 yw]w��'�<[�W���g6d��.O�61�m����8�|�g��Ac�u��xU��Zx~�\�
Q���:��)MI3�*�yQw��ѓ:���fo��}ݩ��e�ޮV���#I���ٳTP�DUL#	xk{*�6�\y|
 ��<�v���-�����{~֐�\�)�,����r�^ul��|�K�n�Q��l�`�2�`�m�ʆ�p�2���A��KY�2MO���W{w��2�ȼ�(>���[f*���Z�ي�~r.�����L�Bc��$p�a�2We�,O&��l�Al&(������Ee_m�=�"�XQ��f^��h4��߅ȣ&+N�Jn|"�fT;rWaMM�S�N&yi��li\F*\˖֓�z�JG��-��Pk�����T���:�������Upx����3c2t
���X���e�"�/��Ϋ�?��k���"Uj�!]i��p\�a6������..�R��n�i��.w#�y��cH��P���]�!�"IT����8����m��v~�tpS��o�SH��px`]�S��b8��k0	l��'�&B\Sj{,��p����*m�|Օ�:<��";��)��]�#�Mݔ�����]+�JJ��(�C�=���*�}7$�I���j���U]�)
-��C��5�$΄�5��U��3U�6h̼��i�21�����37f��F��nR�v��e��K���ͯ6��l�S��{J�����>��l��9J���_�����[�y��E9�5���87�[q�Z�y$�u����Q��N;aT�x6�ީ�W��:�G.�G��C�����'�#m�Z���7H�k����b�fч�W�W���,��Am��iv��H%�Wx�?I=�:ignNe�'�O�s7������>
+��C��5�$΄�5��U��3U�6h̼��i�21�����37f��F��nR�v��e��K���ͯ6��l�S��{J�����>��l��9J���_�����[�y��E9�5���87�[q�Z�y$�u����Q��N;aT�x6�ީ�W��:�G.�G��C�����'�#m�Z���7H�k����b�fч�W�W���,��Am��iv��H%�Wx�?I=�:ignNe�'�O�������=
 endstream
 endobj
-464 0 obj
-<< /Type /Page /Contents 465 0 R /Resources 463 0 R /MediaBox [ 0 0 612 792 ] /Parent 450 0 R >>
+479 0 obj
+<< /Type /Page /Contents 480 0 R /Resources 478 0 R /MediaBox [ 0 0 612 792 ] /Parent 457 0 R >>
 endobj
-466 0 obj
-<< /D [ 464 0 R /XYZ 69.866 758.996 null ] >>
+481 0 obj
+<< /D [ 479 0 R /XYZ 69.866 758.996 null ] >>
 endobj
 142 0 obj
-<< /D [ 464 0 R /XYZ 70.866 321.063 null ] >>
+<< /D [ 479 0 R /XYZ 70.866 321.063 null ] >>
 endobj
-463 0 obj
-<< /Font << /F52 226 0 R /F50 224 0 R >> /ProcSet [ /PDF /Text ] >>
+478 0 obj
+<< /Font << /F52 230 0 R /F50 228 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-469 0 obj
-<< /Filter /FlateDecode /Length 3111 >>       
-stream
-xڽ[͎++ޟ��85`~K�"�t:#ͺw��ά���06��*��{΢����������o��?T��}���é7���9�>��C�'i�-�%z�>�E�����o��(�)�b�4���VJ�GΤ��ՀRpSʦO���{��4��V)�����[~��*�ɳ]�uOt4�jy�G3���盛�DO��+��"}ɿ¯v�6��]��Ex����_��p�@Ò^d�iwWuO�eF��Z��}=���
H��wt�ϙ�U����ϲ�ׄJ\e�ĊX~���0���\�8���v�ȳ�g���t�gZ83���1)�]A^;�R�"��",ͫ�0�8�+��\$y��Iy�N
-i��@>>�\�L�q��Ҷj}�^��'Q�4� I�l�	�3�ums"��ɰ�G=�%�{EO�4��躔�I����
�8����C
-Ӧ��UgDMT�x��t�z�Hz<����@�"��+��+�%f���v:�ذ��6�|<a��٦�_�5��vX÷n{z�q~��W�گ�u�[bG]-[���2���h`��2�m��A�MW�R����@��C¡�@��+�s)Z7TOB�ċË�?�+x�]��14?57I��'R��8Z�ʦ{դjO���̒�x� XG���I(
-R<'>��	�&?��&*�/Z���TH���7j��f��U`��8a�;L����M�N��T���B�9B�L#/�5K����h��蜾Djs��J�ʺ�,3m�����j���f�oY5r���	��}�Gw��h�F���~��;�+��\�C��C
;J����`>~�'�NO�ۿ	O�D��YR#Ұ�H��p�4�^wn��iؿi�6�!�D�D]d�f;�����M-�#�
-�A�Z ���r�.���V��8����//�\��(b
-'��AK�j#�Ӡ�tQI��xuL��Q�+�ϰ��U���F��	*Ι�jD����D�:�9 ç���l�b�-���t}N�z}k��
~9�+*I:�^_�[Y�>�6��w��=%��`Zu&�Y�jPH*$o�Jqt;�x{GX�,��:`���3���a|*��Ρ��V�#|z*2��BA|�2뉪˴6.�ƺ؎�'.w4:�9��ۉ�UI'��뺀j�����'RG�����f/��������4��E߁��}m&_��r�3n��$I�B{�Β)�vC���,�"���;�@]�F@4џ�,�%x���H�/���̓s#��3��֗͢R�V�/y۽��-�xf��k�]�0�dR�R�;3�V�C}\�����;�i������ә\lƖ
�e����M�1��=��uP3�p�}�^e���2�5�<��� �>�+H�(�!w2�k��г��5KU�\��jɂ��%��zsd�|�J��D���@��_&�:���#~vK�`/_�E��㼞��\Н�n�e^;W{��}Fk*:�.�q����ԊrOtg�q6�4�������#�:�
�`��.�5�D�t;'�e��@{:Y=�����y��o)3KxDK�֔��5��I��hY	�98/�}HnvQ���}�8u���X�<f8
-᚞m�2��,�۽-�G�wښ�j���=#�ۺޖ#��Gr8:��͡y��&�Kx!}Z�00�Gљy))o����I�����j·b��=��	�2�˼��*�Ft�v�/ҜI.�%�G���9NKǍ^��ZG(-]~9�/�'�ΧXn���R���?�N�b�Jw�A8�u&\q7e�g�!`�&����m&N��5pq�Y��ƪż؟���VJ���FVXGR�F�,S��
-���ka%;�(�����*�ڊ<
-�����Q�a�N�����	E�5���/'��ؽG�~,��n��Oaώ��O޵[ÉVT��~m����
-�(*�')t\S��ж��M?���J��Rx+�I��J5g!��Ci��f��:Y��U%��οS9�������Zz����B'�7�?r���������YcWV_X��:^��J
��^G�R��2�ò�B��VY��<��l 8х�SoA3�bj~U0�.Ԁ;rre�D�u��DB%��vJnS[{�jsd�ɍ����xߙ7=��G�DXt������t���nXu��L��+e�F�4��R8�h������ߜ�|�X
-N/�K#7�����z������q���K�F�t2o�QX�
5��O��Q Ui�
�J����_��<�T�)��h�b�)u���.H��y,Uf�T����(�y��w�>��غAZ���s�ؘz���4�m�"��+q6��G��D
-h��V^��ې��3�]V|j�E��2.��21N�e�B��K[��d[[5¥��z�;���T������s7�y��~�[�fRj�VL[���V�]��5���jۃ�/>�
-��1�1��MXunf���s9����K|��&����p�n���������)ҌȞ+sR@�.��k�
-	RD���Y������@ۓA!Y��@�^�h�r�t3��ш�(��8/
-p�WV����B��yЎ�Y����	.$�j׮���U5�Q^�	�RP:Uj�^���Գ���	j5�	�.�(d��'��ĦܷCڮ�O�K�蠇Đ�{+�VTY.���/��y�T'\�	�$��H�e�J���n�C��K�w�d8����ca`��cQ���[��W퍢:U�ѯ�NU� f�1n�V6#]��ɘN��Б��	�wKd	��zw�e�ڦ6XC�I����v��җ��	y�m�=�<Aen]��	Ǘ���sÝ!Y2��O}���1	�_܅���E-�-p�1�n¡���u�d?,�x��n��N�)����P{�n��,�f +�;�"�H��R��[�2�`
-�m��m
.�����lhj�T�f�}���Df��bn�E�t�7�z�
R����5��/o�����]�ڙ$���Q.s>��������M���x��+P)��I3�V�pխ"�����!;<�O�A� ����ڍ(<�������4�g�}T�w����'Mu��$ִt�~�fղ�(S5&v/||��cEt
+484 0 obj
+<< /Filter /FlateDecode /Length 3187 >>       
+stream
+xڽ[ώ#-��S�L-��%E�&��'}羭��{���.`lEU*�;sh%�(clc�l���?o����~�>���ԛ���o����ӓ4���O�����߷^��r�}���S+��#g�p�j@)�)eӧ�ti��_���L��~������-��O���ٮκ'��D��ˣ������MR��e�F3�H�F����͵�K�X���>���#���"�N�]!T��˂L����|=p��
H��9:'�L�*����gY��kB%��yIE���w�\��j.������y�|���O|���33����ORH\A^;�Q�!��,ͫ�0�8�+��\$}�jIy�N�Ik o�d��L�q���X��Y�Oߓ�Q^�dl6����ǃ��9�ȇ�tX�����}V��UH
et\�{I����
�8����CӦ�*�<}�l��\$v�כD�䙪\\�RY.R<^I21+	Օ���j��HL)�.�i�'�Уb�����Td��Dd����Ú�U}cW��3�6ќ	���Z���~�w�g����x��2�$ҤںZ�s��������s��m�1�|��o�Xl��3�d[��R�Y�G�:�͓��FD�^f���qG½7!��t3#s��H8��G��b�ʲ���W�~�~��,��[��F��PC�g�ڠ�kD�4;Y4zJJ"�@�k�)6S�
+)���B;c�Լ�]�
+�7�vO+
+�
+M��������Bǜ�
iy����:$f���(ͻV:'��)�S�����m/�m�j���j�<�B�V���!�=�T:�x��l��U�H�5z��q�{�\��s�T�c�dG�W�����qQ�q���p�LU!.�5�AL.�!&w��!�0"�������;���r�����b�*�t`��C(	�NIc�6oМ(��pR6r�F����TV_�CWemD�D��y��cΦ�����
+��	��	��%8�8�e�����7��G9�X��pq�u8O��n��4�!"��0����p��A�ֽ���Ɉ�����1E�sU�]�}�W�n�/͆0�R���£,������s���5�Ug2ϊV�Jp'E�U����wd;��f�}M�R��s��k&P��P}��h���>�(���n�*@|��jط6a�rT�{����I�GN��@?��x:q��DQv]P��dS��5ƭ���
+�GJ��RXX�ð@��c�{N~r��L��g꬙�k/�A^�����s��B��t�?�Y�K���;�f��9�n��<���m�7�Z_*�Z�m������Y�e-�+��H'�/��3l�1���y�D:�����uTL)�����ә^l��+}�cZ(�F��-Q��/�j�ø��1A�"ݥK*��@�qw0�_*���T��3����M	F�J0�֬ܲstL�<^�����͑;�]Y�o����3����@�7���-�r����o	���zڳ}A�/�ݗym_��۪�Tuz]��#�/i������<�,��{�G��~�p����,�5�D�t:'�e��@�x:Y=�Ac���W�|�~+��Y�&Zڴ�����LM�-F�+�`[���R���(���F�'��h���6���g!|�jE��
+D��"f�vo�F�P�A�[K7��mC/�-�)\��V6��>�h.��i��}J<�Qvf�+5Q�eX&���/ŀ̝��U����֤��sB�,�2�k���Q]=]��4g�~I�57)B��l�����/G��~���|��&��)���a	�p���U��Q���w˾y��Ja���
+�H�f��zY_m>/�����X��{Et�J)���̪乖���f���N�(�����=S��,��ȣ�B/6+ܚcھ* ��|BQb�)fh��yɜ��{��߯RpKl=#{~V2����yt�ų�O'PEY�$�Ha8�{�m;J��#m�O�*_�ܙJ�n�Ҩ�Q�EH;�PF����N���'A@�B�c���t%U�I����-=y�U}w����|z6�C���}ج���/��E/�C���è�	
O)Ot��aY�e��Ú�0�y����f���s�-hF�R\ͯ
+&ޅpOX.� ����P�n&���C�H�}����1����.��g��0��4�e|gހ��+:aq�]��}�YLh�����b��Z}���c�nn�
���%�7v����7�6�5��Ӌ��&��M��v5��d���ݸ�|⋍�p��FY7�,��"C
�򓻸�(����wOBN�;�U��`]�{���:i(x���<���3`*�|V�H��;��ol� �٥z�����ZӐ�9؊�����8��N)����&؇�١���S�.��TqY�T�q�خb��\����:��pi��:���O�0ti��8v+�.�/d��M�B��G���@�V�`�!�h�Icۙ{�˻KiMu2������rI����T�F�:7��Bi���j�0��!>�P�"��MC�}B��;��#�cʅ��m<�ƜйKs� ��B�T����B[?<MsS��'=�B�R8Խ&� 
+��d���Qr"%�q^\��_�Dn��o�g�ȃ8�g��N�g���@@�[[�v=[%�����"w'�L���ҩQ+�r����5= W����먺����e6~\(�g�r�*i��8/�F=����[y������%\W:�꤃�>���au�\�D(�q�
+�_�f�#$��|��C�/��{�u�z��(�S��*�T��`��h�fd�ti2�S�ŷtdi%j�ݒY¦�ޝuټ��?��$}d�D�Cn��W�m��<�
+O+&OP�[��x����#��p�F��4�S�i'jB�����]jQwK��E�[p�N��u�
+ُ:�^�����5�2V���`�ѭ];˪țǽ�Q��۽5��L���@Ӳ��Eu� �U�
M���a��_Bp_�h�l}Z,��H�n�FQ�AJ޽�Ec�6��7��L[�.~�\��O�F�,u��#Z��*����t>��
+TJe0�A��=\Eu�ȸ�x��1z���=�h�[;E���<�3=��	ˬ����."�]�Ġ��$�IS��6�5-���Y��!�R����?�r��
 endstream
 endobj
-468 0 obj
-<< /Type /Page /Contents 469 0 R /Resources 467 0 R /MediaBox [ 0 0 612 792 ] /Parent 450 0 R >>
+483 0 obj
+<< /Type /Page /Contents 484 0 R /Resources 482 0 R /MediaBox [ 0 0 612 792 ] /Parent 457 0 R >>
 endobj
-470 0 obj
-<< /D [ 468 0 R /XYZ 69.866 758.996 null ] >>
+485 0 obj
+<< /D [ 483 0 R /XYZ 69.866 758.996 null ] >>
 endobj
 146 0 obj
-<< /D [ 468 0 R /XYZ 70.866 721.134 null ] >>
+<< /D [ 483 0 R /XYZ 70.866 721.134 null ] >>
 endobj
 150 0 obj
-<< /D [ 468 0 R /XYZ 70.866 269.997 null ] >>
+<< /D [ 483 0 R /XYZ 70.866 269.997 null ] >>
 endobj
-467 0 obj
-<< /Font << /F50 224 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+482 0 obj
+<< /Font << /F50 228 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-473 0 obj
+488 0 obj
 << /Filter /FlateDecode /Length 2859 >>       
 stream
 x��[͎$)��S�t��)URee�H{��j���}���?DD�Hӳ�C)3	�?6�y�ϛy���i����~{x�v�B���翿��ļٷd�5���-6mo?���ϛ1������y��V�li���������jK�Gyc���,=K��-��o�FC��$��<��̟_+S���N�WFMe����if:�<���l-���Z˰q�1b�O7�Z�5X�2��}�.�|��ɬ�퍌d��u�+�㋤��ݲ�ؖ_������Ǩ���%9�
@@ -1685,133 +1732,148 @@ _
 �O�l�Nھ|�#쪫�Y���\A���*�)Di��p�ݩk�E���sX�$�B��� %�v��.f�,��T:��0��R��0�a8��#`��8�d��q���wĻ[-[�r�ږ�f\e�8&�����m����vw��3E�L��ڪ3e.z}���֫��М���}1���H����o�Ą�[��q^0$,�|I��b���\ӛ��sKX%*���M9N�R"�.��:l	�
 WĈ)��N�Q��v�9�-<�s�xW�x�G��2�<��\�*p"8	&�h�E�L�3AZ�H�RWE���	[����يV���k�c�<N)��*�kN�g�tOcw^��(��V͘���3�s�@��e�RX��N����G'���\��O�����5��&�����h.<�;�����.�+�v��
C�L{1�u9�_w�?���ܶ�u{�"��-E�K�U��lr����>�J�8z�8����u
��)c�'�����mA�voq��c�˭4���&s���<�W�@	Nf�)�r��&���P>ŉc�_��f�ՋO�/��4^JMTcۦ�ᲅd~-�
 �^R@���8�s�Z9Ù�Y��0�8	q�I�;N`���S�-^
f�J}�R�Y)]V�:�GA�~c�i�t����q���ͬ�x r�v��d��]_쒕��'?��'�1����.Y�T�|��k��c6doR���n:K��eDal=����#4gos���������awaw@�6i?�4���,Оx�<+��,�j�����%�ײ@�NN����wB N@܊ކ*�_�N����n�c�t�86N�H�WO6P�ݵ��n����r�)~�>�Lq�3��/݆���@,>�v5��&���U�����F�"�K%��d�댡�HCow�{��;����5�E^_��I��:r|�"UҬk|e_�*+J�;G�u]��n�^��`�s�ݨ���ڱ���u�'���y�#Hɹ�߻V��Fa؋ӌ���K���=��
-魡4K�4�3�7������Q-�Û7˖$V��9t/����_\!7
+魡4K�4�3�7������Q-�Û7˖$V�s�?~~�_!2
 endstream
 endobj
-472 0 obj
-<< /Type /Page /Contents 473 0 R /Resources 471 0 R /MediaBox [ 0 0 612 792 ] /Parent 450 0 R >>
+487 0 obj
+<< /Type /Page /Contents 488 0 R /Resources 486 0 R /MediaBox [ 0 0 612 792 ] /Parent 457 0 R >>
 endobj
-474 0 obj
-<< /D [ 472 0 R /XYZ 69.866 758.996 null ] >>
+489 0 obj
+<< /D [ 487 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-471 0 obj
-<< /Font << /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+486 0 obj
+<< /Font << /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-477 0 obj
-<< /Filter /FlateDecode /Length 2792 >>       
-stream
-x��[;��6��W���7%�X�|^H�]�6���ې��P�dy�I��MQ��<�r�埋���������o��.�NK�6_>��b�s��l�9��L6/��/^�I���ʧ�����9c\��˜G[>��s���űdp�����߹����W�,���hV�:�h����\�"����Yɍ(��|J�q��r�L�L���ZN��w�F,|�w���ݑ��~�{(F����J7Z�խ����c�´�){�*�j3��ǀI*Q��Z+Xfy�d���'7f�>
�1��dR���]������w��4*��`����6�+�e�ݱ��j���m���碐���֫���E4��k���'����ˑ��<%;���^e��Q΂��R�)�#vEx�
|�R}�U�h��h�@���A��;�BAg/
pr�'�]����~E-0M^�}�{��|����B(qڱª�V�"��l2�F��4o��EK���s\���R�?k!�E�%p!�*�����1�l�ĸӲ�����_��~"o�����(�L�e�I/&��ٛ'����*}T���5}�~�(G '�r�?�#
-ͪ������v����[(n��]%r���a_^�n�3C��8���P���?�#F��K�m.L�G�=5:Ss��E��(�5�U)1�	�-�+�E����:�sKU�7N�#x��4����l����`w�q�u��{��{2�!;�F�>�Jree�h�ԓ.��w#�x4Q����f<�n�xU�;�P�;R�+I�]�\y��_�<�<&���Kr�G��>�i���)Yw��5~'�2�CpC���f]�ͺh>���٪9e���RA��@5gN���"v���A���bLTP8�.����d��)�ξQ������,
-�q-F�F8�J�8�x7�c3�:���M��<�Q��@82���-��n�Snz���0ƺ��,2G[
=F����ۮ��R%��7m嘙�D��S��$��S�nR�#�ȓ�p|ӡIT����^�։�V�	K�#6&�`-���B�(��x��v9nnٿ�ao_b��YR~f4��<�
7n��+Ġ��k+;R��x"-�P��i�I��"���f7��bߣ�͎S�i�:.1_��V�CW ,�9��ê�ѓ��f7�4�Y㿖	�����F�t��G^-Fs��w!�u
z��֍�)�1RT�'cF�&G�����n��.jE�L�{�5C���E
�p~r}����U@l���@�t�/%��,9��~�í�E(�g�5Pf�U!��F�?^��i1o�0�T��g�`��XҪh�7v�Q��C>�\g����� �N�ߥېTБ��ZSu#$1�~�g�Ȫ�A��ˤ�
����0�sF�:��+W[�(��q}|��N�D���4�}H��T�t%]6�T-H�������h2p�z��l��o�v��B.�r�ݖ������|64������,�K]�G��d��#y &�)ўA0k� �vS�D���C�ƞu��It?��h�!����r�t�s%?�ST
��&��ȶ,�']`���~��R�NЭ"ɲNI�:�k�W��Kf�̑���Eq��n��xU������
-�v��rt��ux#�S��Z�a�����ޱ<�y�3�����gU��ƪ��M���&WUq�}bm.V?����"x����B��ȝ�s�²�0��3q58r�Y���Y8��Q2k���|���h�\��N��>�t}8��q�����MFdT�w�ӷ�Ҷ��F�&0P�+��A`Z˥�Q���˜i�Mt~uSf�������th�6M�Ǘ��ͫ:�ubm�;c��������k�f:,X��A��w|�8֠����{_ h#Xۼ�z�{.�~��#�;ͳ�������5W�}!z�V�n�x>�eC�8,�j�HŁ�[U��{���뺣�|5�] {u�L�˻��-�*�,����Cw�[��w�{7��!�51�����b���xE�'��
 ��:�.�r]�����>���"a6�\�Iw����g��œaH�M�6�IOt���e�]_�����
-U��ǚW�
-t�=������Y����q�Е������3��%��8�����w�]�mK�^17(L���,T�B�($���3�2$JO���q�g��9����nۋ�Vf<��'����ѫm����С9l��\v�v��zg�$���*��B�5Ps*õ~���k�����vl$ڈ���{)A8��*�i��e�Ɛ8'ⵌ��m\�)}�r�p��w㨜�o��w,�KǧW��9ҧ[&u@~pW��6p�/[�iD_�n�7��=��Ҿ��.�k
-i�Z-ʬs��Q`�?
�w�nrI[�<��
-�%Ę��|��it.�'ӟYh|^�#Yڹ�N�}^�g���U���.�'�w�.���_�Y_@A��~E���_;ٟ9�]�8���n��Bnh�v�'�����AӮM��VŤ�ٮ�Ku��p�LO7(���s^J��>��;mt�M=ͮ���ΐj�þ�^�ϟ_$��=�*&�:����`��&�[߼�L�p�<�[����2���ş��|������&6�yz��A��~oM�|�IR���2A_�߻U��c���>,T��an��b����a�Ϊ1�����r��
Jۻk����
-	�FEr``��)0<����6��!���嗾o����y�7�צ����	�AR����P�Fp�����X�����K�����U��,�N6�K0Ӓ��zt^�/�|��5�
+492 0 obj
+<< /Filter /FlateDecode /Length 2824 >>       
+stream
+x���n�6�����)�I
n�;@ξ
r͜���!kcQ�z����P�����ڜ�{2�_z^�~�r��d���h����Θ�=e����c]l^O_�;��lL*o�է���R��1��ُ�f��h��Z�ǟ]�,�%��m�_p�����wo?�j+&�Ѫ��ц_����\:#����o��Bl߶�|�5�w,ۊUN��Ã]=����+^���:rE(�ϻ�v
+�n����`����_����_�mְٕd�]�l�	%&F�6����V&|z:xC�"��V��D�+��V���N�m��2�62�[t�q�Ȯ������A�ڊ�x���Y3�Nx�9#g��IA��*$�V|{~�sC��ڐ�o��Oo�����H'd7ÿvڐ�^�D��f��y��kl���x��NG#��3Ų$[�E��r�b�Z�+�
+BW����/�I������}P/�|�F�M�ORQ^���J�z_2��+�ɻ�N���>�
�a!T��aMbY�
+6�3[<B�tc�i�W��C2�UI�!5��n��$;R�`�H���X0�Ī�j1�x�f�S�z�kXۿ�a�b��h�[��hԓ�9&rt�%GW�ptu��l�@SoV�ތ0���`'�fk�z3�O��o6�����l�*W,��IGGn<0�V|j��'�����&�Fk�j��X��_��	���w�Ɔ6�J6Ip�$�C.� �=ب���pg��M��d/��̲bלH/p�Z<�AfG�/��������~�s��831e3��Q�'��~́�l8rBk�9�5+�\1%����w?���B����[�胜�-F�}0�<~��4%�w#
��O�R)ǐ�F���j*!��=o��r�t5���X:$��94���y�J�B���.PY���ȸZ�X���5��'�Ѵ/!��Ctp����v��M�g�&W�i#��^>��/� MQ6��7��Ȭg�V �0w���֑�,�:�ge�q>�j
+��0���ڡc/ݐ���u{��w�k9�����>�s�r��ZȐ��E{�|��!ۋ)=)�/�h�ՌBq˚zJ�N*?cu�F�[GL�-$}*���	�F+�Ꙁ����[���տd���+H-�b���UǼ\��GR�PX�Ѿ���"k�~�<��U]�c��&'��D"���^E2Z�Z���*q�s�Q s�	zi�!��vK@pd�̪E�0��D���bx9PN�Nԝ�\�b���n*)m9��8�+� |6S�!{��]B�cO�
+����l�@J�� �d���b�U�L�A1I�.l��`�g������;��.�p�P�rBM�R>�1�ق�(u\k��s�gw�=��N����_�py��D*l�L�1�2��%D�@H�%K�z�P���I���|�^if$U
x&��ҭ�Kʨ�I��Pb�U�-���L%V����it[3�%����p�m�G۪����!6���fDLDD섈a�?p	��3.Wj�&\�.�f��~��N0cM�ՍX]��yJgŨ�K��cxX�j@��X��7.Q�4���O�C��e���.�c;w���hEU5�*����O*N+�\�$�,��te
+d��~3�\U(KYc/#8�+�Ds-}l+�뙶w��]�h� ��X�a-�e��~玅�ޟ�ů��a��������D��T��H�2xY9@T����+���
�D�iʛ�^2n>�o�ƃ�h��
+� .ƙ�W�C��5��K��B�m�2�������ʆ�z�t`�����X�k��+�[{>f�=�Y_��dd��V=��n!��5��D�;Y
+@�:3��.���Ą����5۪�?rA4�2�w\�r�֬�Gݰ��StgU.T�P��+I�ߣU�*�F�����G�;�"���������^�S%]����HZ]tBPd������إ����J�yf�!"��;�Vn�:�gG�nC���G�	e���[M㊩��h4�oF���"4�Dsx�f�n\�\7�ލm}b�=�6��C�	�NcOX{{�w��9����q'�d!˵�bOH��1-u<��)�\�
+<��Ҕ�&LQ��+�2�ʔ<�����Q+��+�����.I��n���a�U�s7�3$��0b�W�)(��&�.�Dۛ�v�,�;4�w��s"H�� ++!z�.c�H,{�H��OW��.����o���c%��U�횗5���X��/����8�ůⶽ��
��^�hwJC��U%Z�"
��uy
+/k~�oE\E_������rED'i[��E��ų��pM��/��@kB����{$����u��]�ǙW���{��H*h�9�Q2ܱ�w,ʶ��#���n�y�Z�x�귾+6�82�WZ#&�Zz�ltL��8�>�cH��zT^DS�{��G�4����Ϯ
+��� (�U	��Vtqd���}(0���+ɫ����uD�¿T:�SK�f�&�*>�k�۝d0Z=lmS�g��.��CP�[��z}ю�����7�$�
l�
+�Mu!�G��;&�ȍ��0��mb����KK'|f�	իf�B��v'�	�.��;k9��`dv%�4s�!�
+��=U+��
8�=�K��"�	�>n����
�^3�4ܱ	�� ���F�8Λd�E}/?_�o��)^���y"�\�x"s�JVg�	gJf���;���YA��=c	�����
�?aVY��t��q(8�گ����	�ĩD�L���"��p����
5�
+��M�4�o�5��N/7B��8�U�wb.U7���Nդ�y��>�>�~�	#��
 endstream
 endobj
-476 0 obj
-<< /Type /Page /Contents 477 0 R /Resources 475 0 R /MediaBox [ 0 0 612 792 ] /Parent 450 0 R >>
+491 0 obj
+<< /Type /Page /Contents 492 0 R /Resources 490 0 R /MediaBox [ 0 0 612 792 ] /Parent 457 0 R >>
 endobj
-478 0 obj
-<< /D [ 476 0 R /XYZ 69.866 758.996 null ] >>
+493 0 obj
+<< /D [ 491 0 R /XYZ 69.866 758.996 null ] >>
 endobj
 154 0 obj
-<< /D [ 476 0 R /XYZ 70.866 552.746 null ] >>
+<< /D [ 491 0 R /XYZ 70.866 502.186 null ] >>
 endobj
 158 0 obj
-<< /D [ 476 0 R /XYZ 70.866 174.348 null ] >>
+<< /D [ 491 0 R /XYZ 70.866 239.355 null ] >>
 endobj
-475 0 obj
-<< /Font << /F52 226 0 R /F50 224 0 R >> /ProcSet [ /PDF /Text ] >>
+490 0 obj
+<< /Font << /F52 230 0 R /F50 228 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-481 0 obj
-<< /Filter /FlateDecode /Length 2760 >>       
-stream
-xڽɎ��>_��;%�0�O�ynA��)�}�*Y�(Kr����6�b��/�n����?��������OonZO��:�~���N3�oQMs�k�t\n����]��Tʙ�y>L��K�x�,|��ߏ�v�i&~��4�"�
-J)���)�(e>i��yi�I�C�o�IМ⿂����x<l2+��c��ǆ�ؼ0��<	^Z��
-ˍ�[Tp�?� X�>L�o�Ƙ%��'����Pq�C�ʳp/DU1<����'���~���\X��e����|����Y����o���h��_�ߡ���"�W� �Wo3�ɳ^$�-�Q�,�.��X�ē���T�H�#%$���WĊ�0�Wx�&�df���ʘ
-v���vR�]���\�G�_�
-B�(C�:%C���=�/n<';���ݨ�����L{�հV��Pvp���r����)����} ;fvBv��:١y�3��4�';����dd���I�5)���w$��H�+���\�U��vRH����&��e��ޯL��"+qe���G�\���qy�]2
-��Bq��4	�WlG%-�g��=��o�+>Rn���U �:���㳐+��F����)ѯI	A�d�n51۟*$��V�0�梭2�s�! m�VeF7�u��/'ݐL�����X���/�$�.�����&�i$��,��t�e���[�κ#�#����/�'���;��)֞v��d�Ky�|n��1��cDP�x|O��m=�_x*;�ɳO�L���L��zx��r����6٦�'�������q,��n�~�����%��2���O����M$�K��WA���5�����8�i�g=������q6�����*����n�m����w�*��kJ�c�c��A]��]֘���C1�<*�@W#��҃g_�YO�z��2��2W8��3(xP����B��Lܸs���ͭ_wF��l㶝��;v���
h��c����]�3GvK��p��-�ҁvY��g|��5M�Y�|V�yA�4�y��_��������u�B`^_]	a�ݮBg�zH4�1�#�F�8�p��8���X�����8#��E�~����L���<r�;h�e�Uh��7t5A'/B�+O�}-&�l���]�����J�g�{�'ޣ���i��8�V���=
-��䪗>
-�4�K#Eńpg��_fe��B�DC�笌C�ptX�s(b��+ϕ�;�O��_x�z2j�1��=�˸����0j{,�#�Y��p���W�ɺ��;3ų�8�b�v�n��@���Va��Ŗ5}~���d6hcy()3�����&�Ñ&��S�0��,�Q!-�1=��B�^ڀVv�ӼĒ19�#2{A���*{3<��4��n2�Z�$ĢdFU��4\����D��J/T����Z9w�Ƒ�<��!�+Խ,CX+A�e,��+K�Y��\�e/�| �'[��_�8����Fo����6�B	����x�0���:��C+7����2������կ�\�^&��5Eyc�#���SR��흆�s~Jj�I'�Oǯl*����h����#3���`�ZL��=��&��G�1�3����1Z��^k�A�Ð�"i+4p�3�ZuIi����K&�S21l���mou�����R��,rT��ǔ�+�gK�V-ɖ�KI�%7sS���Qfa�W�Sݍ�ǉ��i%T'S�=s����ȇ~� ��}�;"���{_QQ�q��>�BK�����Y*}tS����v�I�5p\�!p}�j��ؙ��ӝe�x���99�gvi9�h�֬�Џ&K5�W��%R(�k�D��4mMB�^���m;I��ͮb�U�hY�
-�'��+��d(c���=�hI�p��H!�1�!�}�_��f���ANU4�A󶷯Oy�XV{�xQ�߭<�s�BG�b���z��x=9�|=ː��c�U�e���i�n0�w�q]Z���V4�`��n�~ݴ��!�:�Hae�'
Y>o/��ٷ,B���>��t��M)�DO�-�g��|�����,�+�H���Lp��1L�-�x4���L�q$�R�(�n���&�)�T�p�ŮU���}`D�?��~M�W��M��!��.�W�L���
-����v-�k�9�`�ѝ��,��0�,�y���*��Ġon�FF��æm/�^#������T)�*�/���W�����`�Z%G�!�_��*9:$G�Z���l>V�\�,�u���t}�E���YK�x:����P�C�-W�
-9S�+������M�XzR����_�I
-X�J'-fC�K��עm�#!C%ٞ{E��V:⋄����n1�sѢ�ʰ���3���Y4�k��������p�h�$��|�!�lr�k5W����Vo�����KN
-N牚po���Tt
?K��W;��lGk7��=����'39�t(_�Iϩ��=+)_3�Rk��Z���7�yQ�Q�5Ǐ�oڿ�?)Bc���xCt�v������cz��P��R�E����6ÿܫk�V�!U�aKCe���=�D%F�� �	-tD�5kZ�C	���j~˱�|��4� ��ԕ�K
-�fKv�)ި�m��)��[����Ú��^��E ��_�M�56	��==��P�:�ߴ+墳�-��Ul]�}�M8�X�}�`�������t�7Su�����:櫂CV���t�qz�I;ssjZ�,�c�m6����/#
-O<
+496 0 obj
+<< /Filter /FlateDecode /Length 2658 >>       
+stream
+x��[ˎ���W����4
+��
+�u�wu��)ɖ]��Iɢ�,Q��:���򿋺��M����ۯo.Z/�{/�����uї��By�E�|����7�����
���Q)�K���^޾�����������{�����&
���R��W(��*����/�H�G�]��VH�iH}�������XZ,���W���ya�F�q7bޔ^�����<�[�,jC
+w���I�>=�C[���������BvK���0���4g�3��O$�� ^�Z`z�>�>g]���y׸�u�"�ߠ'Њn��ޑ�l/�]�6�F�eJ\0<�:|T֝v�čb5�_��yWӻ��~�}�|��1�;������gz-�h���vFQy|c��N������>�*.�@���p�TX>-A'�"r�p��Q{�}�qƊ��J�: f���r0%�
+�:P�D�<�F�#�,�W�@ϐzv��W����}E0=k�ҡ��P<�+�nt|k��v�-��o �$��^o���@����N;�P9�L��<1�8��p��No�9ԑ�w6z{�fR3f�J<��RfF��ѪxP\�[$����ο�뱭
+���P�R��j�c@u8XB��l`g1�y�
+����N��k��^��g#�Y��E�J,��ށ����n����g�Si);���yP��g<��4��0@yi��LTlRԅۺ��&��	��K>� g˄Y�?D'_�<�e�\���E�l�%��I��O�e"A�m	;F�;�~��_����W��� {�kW��2�Q��2�Z��FW�
���μ�x/��Lfz�7&n�n�X|�ul�1+�RE�X�$v�n},YHy�E�=��=�2�gD��.e	���َ�^gf8���LV��y�e�I������Pyk�r�/;v;�O(���ޘ#QPXH#L3]��r�WF!��A�8����"E�֯�⃩A��{=h�ҥG��^����m,��u����H0�u��@��e[tƕ�m{ �`g���(��|hA��Fu���:]��o�h�rP4i�4E�o����uLE�u��?
+�㗁��-�p�Czs�oLc�1�	u+��`{{�3����t/UI�l��^��������Qe�E�D����Ӗ���
ƮV4D�=�0��D���ӫس2��v��D�E�n�����g\�Ѫ(v#f�5�|��W�����hC�.�U2i��i��Za���e耬�er��IFQ��Acf��@gZoůMz%�5���"<�ԓJ��Jh�K"R�r)���Ut�(������K���TC��Z�i5��v��bЉR�:w_���	�����J5�DM�^����]���v��:����q�	E������Y��d��^^�6������u��o`2���Fj��j]���3v���<j��}��u[��KB��^ޚ�B̄��k�k������@�8̰�P��9.d�ސ��lY��?o�S6�#E]���nN4���ٔ��CFw�^�~l���z����ԩ6��`��B���f/�_D�Q��M��k#"�����#��n�ž�\��b�y㷅���<_��|3�E��v1:ſR��r�J���_���h�C-��=2��K�+j�Hт"�3���v�Y��/�N�/�� ��?Z�x���E�֟�h�����4�iEk�I���!�ż�<��v��`;���cӢ�s�����vLr���V�Cߥ���Բg;4���Tf~�)�-1kC!(�W�7���3���ZSR�vzq�H�^Rz�Hu_݃q�����T�)�����^�G�=6���f�8�����kjuܓk\ ��������8j�3Z�߉�L�œ��0���lڣ�q���r<�����J��d�V��x�b�$s\X�r��s#W�)��2�*V�ћ���f�9�B:�"�}��5k���HG���	V*T�UP��]�=�Zۓ
oOjx������
+ʟ���p
+�أ�ю�|A���n=�:�1q�a��>������!E�G�
�i�|�g�N�])��L%�[��n��g��s^���{���-�{���B�z?�Gsjgy���U��+�a^X���5gųyx�Y-�Jxv|�l�Z��Σ	�#f����ס�L'N�:�n���7�n�S��p�܋�
+o��/�ķ��0���'&�;�_ܜߝ�8��@��N
���
ש�cXL��ޟ�I�.�������)�A�k�լ��\�v�K-6\�򊊂9x��L�BP�ߠ����UR�,�=N'�^G��*N�qf�g&�P ����W��کE��"�k��j���ƪ�W|��Sy�a��8.�X��z��Zp��}.�㯌����8�07��6^�Qh���ڻ����bB�n�Zz���E��p|<`�pvBw��.b}��o�k����Օ��f��g���v��}_����s�	�@��f��l�)��OYozk'�|v�a�r��}eC��Y�O>����pM�fj<���T�lp���&�Ö!����D^ږ�����>�T@���;U,��]^:���A�,��W���^�Y�̥D�ܢ�w�À��of��V
 endstream
 endobj
-480 0 obj
-<< /Type /Page /Contents 481 0 R /Resources 479 0 R /MediaBox [ 0 0 612 792 ] /Parent 450 0 R >>
+495 0 obj
+<< /Type /Page /Contents 496 0 R /Resources 494 0 R /MediaBox [ 0 0 612 792 ] /Parent 498 0 R >>
 endobj
-482 0 obj
-<< /D [ 480 0 R /XYZ 69.866 758.996 null ] >>
+497 0 obj
+<< /D [ 495 0 R /XYZ 69.866 758.996 null ] >>
 endobj
 162 0 obj
-<< /D [ 480 0 R /XYZ 70.866 569.634 null ] >>
+<< /D [ 495 0 R /XYZ 70.866 494.963 null ] >>
 endobj
-479 0 obj
-<< /Font << /F52 226 0 R /F50 224 0 R >> /ProcSet [ /PDF /Text ] >>
+166 0 obj
+<< /D [ 495 0 R /XYZ 70.866 248.909 null ] >>
 endobj
-485 0 obj
-<< /Filter /FlateDecode /Length 2930 >>       
-stream
-xڵ���:2���=ޤ��Ѹ���'[l����O�:X�(�3�!�"�źڼ��ͼ����s����[4o�.k�6����7���G޲YJJ�c]l^�~����c�cbi�����c�W�f�w�gۇ�g�ϊcɼ�9�,�������v�\q6��yڟm<�;���~��v��N%/�dF;X��s�q)pp�.].�`�o+&W��_n?p���#a����2���;X��
-��$��&��r�#U4v�TXu��$�y�Z%��6�
-|
-X���d
o��@G�pE�&�p��{�$����v8��l���I2�E�ʰ�?�-1�O��IPA�g�H���(���'��5�e�i�����;�	��[�I�b�H��P}�vOw��-&�W�����@݊�C�Cj��R�ΕT�^�H�jSQ0�Q�r"����*���W ;�<<?�<��Vv��;�ҏhy/F�Ḏ �a������"��s��|e�۳�i��U�!�~a2.(�.�2n�u�Y�5�hgU��l�D��<3�y�&kl�t]	�Q�W��Ш� џ:7{
Cg�=A��pչ�'�8����
-$�PP�B	�7xK&W��)��ȳ�	D�U5�2�IP�A�3&��_�b�dyNw���|.-�RW�S�K;D�RNJ2Y�Zs<�xD%2+z}���ί���'���%��d���#{�ݜ;J�OR�Y�X�	?H^2�!����E&�nɉ[�aKu�W>01��~=�ad!p
-�"�C\�C�G�#��&�e�^6G��9\�Ń��N.��K�(+�0Ϭ@���@��~*�Z��M�W�X�ĉm�;�Y9
�������]5Y���SxN$���Y�*
-&�*�^T}7Gav"�t�g� q��$����2�����gƤ\�$<�@4�H�0q�4:Pٛ���93&ܡ`��v)�0���+c�`��,G
-fE��Q¼�ӳ%a��(am�C�������N��JX�\T�L�SO���&#�A��݅u����{�HkNȈoGq����
!
$ǹLNZy��i~�jN`Z�ut!��X�%�j��h]ġ�
-���Ate���>�����CzN�q���
�p��3�NE}C��uG��6�l�=�?D����B�KTB	�����%�Vi���`�$᭤B������'��֔���w�0$�S�.�p]�����8c1�8��X����$�{d_�+zB���TP�M�"H��H/h��,:�4dZ��whJᎦ�>��ʃ��c!�>���/�a�o5��b��J��y���X�H/�M��BDդJ�k+v��L?�����W43�Y�yTuޥlYS8�:vy��3�̪��%8:�R����
k|����Y�����r_I���.U��uJ�������%�V��v�4�N�'2^���v�l1��uQ��0+leg�Q1P�;b�k;��Gg#fRO�fV��qI���8s�CA��*N�#�Ld�b����f��Jp���"[+n��P@�TՓ�fG,eIf�q=L��Δ�v	KZ�F�d����XYKB?ӎ���	͏R�&"�+�T.hdK�q
�G����'M}�_�R����v
�,/�I�(��jplÿ��4����嫌�:2+�r�Z\"��=0�b�b�.���o�5ԥ���F
-6���Pt ����=JY}�K�-Q1��\��Gn>�R�/u?�����օ��(�h�{�HP֦5�FA��w�l8��g}��1C]?o�f	q�>k3:���M[���xЂX��jUjK�)�
�u�-���/۷3I:�T�&��.#��52�y��&y����t��	;�p������6�:�p秨֒Ս$�P5�2�](�.*eL��t z]�F�6-��vh��/RR�B�6s/�����.�+�� >��d�'uRs�΀��N���ֽ����q���Ժ��ݺ��9��LK��YÑ�l��S� W������Ɩ��})�K�����ap�8�5�f^Ⱥ�q�Zd�	[�\Ќ̋��J����.7����}ƭݹ �q�]'�����?�c?6��L�,7*:�҉v����-3�F臸תF����M�Yotd�z2q�H�Z��b`��$��3m/[5g��عr�g�C�NY���i��1du�kƥݐ��V M��
p�Ħ/���	��Y�P�h�e�D��뀇���n�q��o��2����A]���U�"
��,nLN�˹%��Z����P�T=X�ғZ��^�\r���^?T����.��y��ep�ӹ�3e����" *�s�H
-�[�&�߽q��޸�\���mO��ǻ��Kp���E�Xq�ܠ���#�ZKΤ9M;��4�A�'�&��ͮ�SJO�hhu>�F�h�|S4
-'t��C/X,���#�3��.������f�"�d������5Z�����X���4�'�.0&0��7Rc���Ƭ*v����luш�n
��0��`n)ؘw�#!r��n��;���<��#r>.%���ժSp����*>��LʭNL]x/���c&��,��I����Rt@���b�[�
-��^G�y��:�ו��,i
���
]٧�.L����2�^P�T�S4���~�}�̫�ҩ7g�Ց
-R���Ҡ�ߵ���Jq�)(N��zڙ�9>�I���ᷮ�+���:�,�8���Z������1�������.���:��N���~�H�jR�rn芌��߳�r�G@�k���?�*�*6��:r�UW+&"�z��Y��臨{ؾ�!���L�}�>��iL�ʎ��Н?8]�.ڼ�����p��e��q��(t�
-�c�"���0ܬ����ЈB��?_��������[^�o]
-��OH�,�+3m���̲�/P��n�ǯo���k
-endstream
+494 0 obj
+<< /Font << /F52 230 0 R /F50 228 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-484 0 obj
-<< /Type /Page /Contents 485 0 R /Resources 483 0 R /MediaBox [ 0 0 612 792 ] /Parent 450 0 R >>
+501 0 obj
+<< /Filter /FlateDecode /Length 2898 >>       
+stream
+x��Ɏ�����?�4��h��� ��&���u��X�)[v�	�АE��b�ۜ�{2��0��|���3���K�����~���9�S6KIi}���z����_gc�՘�ֿ������ߞ����O틻��O���ɬ��������ݧ�������VmMK��#�.87���%q�ݽ��u���(����4H��X��u>����w����ys��ю�f�iby�c�p��s˲TÒ�g�1�`C�h#͂��hs>J(�b��'��Aa�ȓ�Ɓ<�UJ[W����$��[��AK���&B�
D5�0,��}F�F�����陦�+fq%3�4s�!� ΍X�����u�F�M� <�ׯLyP%��B\�삊�Y`�!����V7r���8�o��%�B�gS>W`�(TyV�x����f�K�LR�P��
�lp�SF+�Y��R��+�8!F xBk1�WE=���X^)zo
=�ߞ�|��A�5ӌ+�MH,,4Ɛ�lv�Y��sF�V_��%��]F��I�E�h֢w+���t�/ge�̍�,2�/� ��	��#��o�w������7�S&��d�-����%�~z_^�]�R�����ڒGE��M;��D��o@�[^��-
+�Ѝ���T_#�ou�>���p�%S<㥍��[��}t��Mf���8˭�l�:��y�ʪ>ǧ�����S(�YsF��w2z���cV.uiL�?�q+A8��H�L2��Gfo��{�FaV2��
+B�CȺ�m��t�oE�
�e��xPOtR\�dyC������N8KZ|�\�uG��?�'�u��9�Nz7�[�}w �#� �X����D������m� �i�q��n"ޑGD��vd��^6__����aB�.�|��1�E��Wϝ,bJ�Q���ܘ���/c��01��9�h!�=!_�R<�g<$�n�Ѳ�|GxiF�H�>v]bq��a��7����f$CG�`��S���x�א�u����]�(�r��Bc7Ϊ��>$���� T��
+�I!	�A��L�?g���& ��n;�Dy��s$7c'
+�)X͔�3�pw�)h����ˎ�9��_�{���L�k l�SٌG3𹙦��5Ha|<b�qZ��&�dƆ�'l2�|�5�F�T[��2q/��ֆ^BU�@>������A�)
+x.;!�7�M�P֨8���o��Y�^�6��@�j(��k�Jw�^��UmDd�񞿧��m�=���iC��Lr�����G�(�"9��kO�uF�Th��:�}���M�)<p�[��f��D�0[+��!hv.{P�!�[.�-�8!��Ƨ�G�uǞ�JN>���X��AB���ל��H�AБ�<������s c1c��cb����h��Th���5�0b���pT���ʇn�͆lk�},��s�-P�x%�+�87������Z1��9�aD"Qqg='e[9^8�M^�0Rr�����[����	:L�%�Ny�֌#I��������.�1Λ�����XЕ�zkJ��R,8�������0|۶���\��#� 
�B�D�}�eAa�F�&!6/l=U齆���5��y��@���h�5�}Kr�%4�)����*�?W���K�K�7�ce��޼E��V��q:���fB�n�G��M'
:~��p�+�!�^q���U�P�瑠kc\:�
+�z)Wѻ��W��?n`��$����t7���&�{��Vr�h6F>�$
+��hl�ޗe�.�4�� �:@��U���f�p���c����
+��8�WC�{�Q�D`�*H��P4�s�;J ��|*���)"���sp!͑oUi�0R���>TO"�����n�w~���~,�q�+�x�����
W���s;�y�X�vz9���X�]t�I��W>���W�f��e��[�7�h�]�0�(������~�_u�E�:Ä����K�6G��܆�u�jU
��s�W��ƹ�k�~O�=�5�
+s[ƛUk�A((o���|y�[r���NQ�G��&��J�w�!}Mi�� >cRvK�����9ݑ��H�֗�
��4=�yp��x��5���u��>.W16]�!%˜f���n�}!�(�Mm,�Q�O�LY�jo%��8�[5*�"��+F���h�a`P��� om�h��#�x��u��o��9rE^z
Q`�Y�ÿ���Wxh%u3���1F����\�?��+�8R�
��6C+�iA_�5�@#T�'0�Ң�9
+���S�8�	:���B��4���� ����g�$W����.�I4�2a�JitCeo���&�3&ܡ`��w�R�c��+c����[�0sK��(8b~�ûN7g��k�D�������]k静�H%TE:�)�Ea�O��
Q��ânt"�!���}Oi���V�h�m䆐6$ǹLNZ���i~� q0奫�%�=�ת�6������N���)�>�s�ջp�!�e��z�t��V���zF��gw���m=s���r
+�z����l
+K��UƐV�h�&wd�tj�0�$��t�z��"��z�.V�:5�����KWǆ5�/�=	!~GO��s���!���FmV��Z��whJᎦ�>���ۻ�C}��q
+?�[_u�󾘋�4�^1�������#F{����s�ǯ��~Qsi��=_���QW�QM��Ň|�$ތ�Ĺ�ֻ�:	jd^�2
�ҋ��� �=�ilgJ���t��x�N�By5&��
+�9>�?$k�����������P����B2]�˷$[!��N���\ܢ��{��D�5=��;���~������1k���c¯{-�y?��,!������ǟ��k�
+endstream
 endobj
-486 0 obj
-<< /D [ 484 0 R /XYZ 69.866 758.996 null ] >>
+500 0 obj
+<< /Type /Page /Contents 501 0 R /Resources 499 0 R /MediaBox [ 0 0 612 792 ] /Parent 498 0 R >>
 endobj
-166 0 obj
-<< /D [ 484 0 R /XYZ 70.866 721.134 null ] >>
+502 0 obj
+<< /D [ 500 0 R /XYZ 69.866 758.996 null ] >>
 endobj
 170 0 obj
-<< /D [ 484 0 R /XYZ 70.866 465.632 null ] >>
+<< /D [ 500 0 R /XYZ 70.866 393.842 null ] >>
 endobj
-483 0 obj
-<< /Font << /F50 224 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+174 0 obj
+<< /D [ 500 0 R /XYZ 70.866 116.015 null ] >>
 endobj
-489 0 obj
-<< /Filter /FlateDecode /Length 848 >>        
-stream
-xڥV���0��W���`cG����T�ynU�����c;٬���$�= n�=��������|�G����}��.���O�͙�<�٧ez��~\���":G�e�s�ݾ@�e����s���X�e?l����L�eU����*�e��d_1��j��c*B��aU�lVֱcp����Pe��������
-�-�k���%�!��Uj�U_e�HtY�hQC�Z��LR�t�Cp���~ċ�0'D�˿�Q�%��t
-e�|j&��(���+�LQ,F������J�9J_���Y!�H"�qgo�_��L�8�Ň]�J�j���Ɣ�`@���<�c[��4 �C��ĺ�Í[��po��08T6ԃl77Q�
�3�b��g֊�z)�"	�_���Z �jl��R��nZΖi�:x����TZ̿�	t*E%�(Y�D����Mj�)�T��&53�I�F6Uc�P�6U�������5Et��ƺ��U���	z3޵�!���C���;I�8�`����=U�Z$���)?�Zż�`���eI��k���
�����&ZC+���^�Dȼ2��"�3��To�u�T;Z�籲��\�KNZ�Y�m2�4x���e3͘�߀Xx6r��t�'�ǃ���8}P'Y��N�zDE�ƺ���޷�+���>�]#qT�<'���6��0K[p���`�(���"��ܸ���T~�B��?��S�͐�gІ�5OZ�F������6@���l��l�Y%]�ۮ�����~����ZB�w~Rl�]���7e�N,�|/Z�������G���v�Oiگ�AS"�}�)�yI��Q��x��q���n�
+499 0 obj
+<< /Font << /F52 230 0 R /F50 228 0 R >> /ProcSet [ /PDF /Text ] >>
+endobj
+505 0 obj
+<< /Filter /FlateDecode /Length 2516 >>       
+stream
+xڵZɎ�8��W��[�V��
�9o�������-�"E�r,�Շ@x�(���"���.���7����o�\�]�l�|��͖7�b/�,9��.6����_��fL�������Z~�W�����by�1�������:ڕ��a��A��?�F5����S,��"ы�W�rU��J�'ͳe�X�/s�{̕�o<��s���m�x�]'C�
+��_���;������F���&O�8�{��پ�˘*���gW��4��:���ʆ�ꞃ}�'�sV�D��)c����v�%
+-贊dM}R�D\_0� b�N��]S�x$����Ȯ��In1ѳf�F���	�g�=�,���Z#���`��,�	�[��)fe=�YO2mC��R�����`�qm�f��3�rMIӼA�Q�g�ҕ&���V�2�F���v��b�Z#FpK�f*��T_!/�f�W�ù���
+^��xH�7f��Of:*�G�A(�*�o��w�occ��Qe���_�W����fNH�惼xQa�#4mxh�w�)���u��0U��%�����t[#6��%�
+�Bp��F�\$�}*m+�����Wh�3C!
�|�0����U��	�WG�f�u��)���0�����-L�-� �І���[O=лh�m��V�r�[Ƿ����i�4�a��D�b��8�௭�-��#�^�C��Op������
+�BG�=�Wrn�j��pk�gH
�Lx#�j���mT�@���J�ݐ}��6>(
+}�v�▹�������
+�I� ��cO
+���埡
��rXc�$�錵r �Y��N�y,"=�U�U�
+�<�5�{�5ӎg�BZ��^�d��D��3�������237h�Ӑw�mC��N��釸vdA��]��XNd
/
]=��{2萶c>g.#L��s�=�-iŗ�6t�.�
�֮R�8^��T�0Zo]hN��y習6Cx&m�Q��r���[f"���^2t�+��{�h!�ip늩��^	��R�T�{v�*i-�m�3*)KcC�B �4^�gk	�價��9����淙���������LK&.�8*�/}6��(m��QB2O%��%=2�d�Hx�c��4.t��Hm�	�a!�!h&����?c0�DU����N,�/μ��@̯uJ��]����{Q|ҋ��}�a��~BS���6y�'}���T<�5��'��ӵ���0 �E5�5v�8��:��w�Lo϶�޶�ѝ�Ɠn!q�Ir��5"���F�1�Ik���z�95�R�gJ���͇����.�u~1��=b�-����D�O V�bѓ#�3�j�f�M�_@,痒ڽԋ���W���W6�1u�;�}!IOT!1��1	i8!��[s�օ���U�k���y����F�u묣����4S(#JđY����<o��콳�S�sk^J���ɭV��X��R�F����3���
'ޫ���}%E��.�~~lL@��@1�+�zX�J�pˇoļ��_�ZggH�iVڗa���ߞ�`ϴ�hfm#��Yl�N	Ve��޻j�l=K��Ko�D1�
+S|�֠�z�e���L	��+MV��<)7��Q�3��g��k���E�d͈2�Y���Ґ���-u�z��x�A������5Ǳ��	�����8���Q/�^��g`"NW�\�>�������Ts�fI�T�UrJc��t�9�c%�HRo�jh!�ƻ{����� �=���sKȒ�*(�� �y�5���=����	�Nj �t��S�"�&��6m�v��g/UL{"�PCok7�VG��r?���%��@/���?Ֆ]K�/�Y�V$�2fM����b�c�v5Ťk��H�w/k���iV
+@Y����ؐ�̴	1N���6��cm�\]����G?4VW�2ӗ�K\�K]�+u�Q!��@�%�w:�~�V�L�*��Ok��8�4��)��߬�|�[�K^y�R����v�U�>t���*4�ڰoM�*����Myڒ�������Q��D�߄H�:D��Af�ί��^�� f�����j�Y{p��F����9W
+��1'�7'"�͉�
(�CA31�6vgNmm
��͐^0���g0��#g��U��o���'{�3E:X|?m��J���'�g�)?2H���h���ʜ��!R��h?�N�I
+�XN%��p���k�~���\v�ӫDc^�h���NR���lɏA≯w��خqq����̪����8��]�G��w��s����:}ݹOuZ���V^�}-�ܡ8bJ��RGH;�w���t~�j�]V#>�@3�,�����}��.�2�6�)U�t2?�}ؠ���(�}�Z>��%��>Pʒ/Mj6��P�R�s*JWb��F�ml�з*��x���/��9���y`{<mn<����ȡ
Sr���?~��i��4-��R��D��LN�6�AT~:�Ml�]��9��pVj�x��IT�h=����P�r
 endstream
 endobj
-488 0 obj
-<< /Type /Page /Contents 489 0 R /Resources 487 0 R /MediaBox [ 0 0 612 792 ] /Parent 491 0 R >>
+504 0 obj
+<< /Type /Page /Contents 505 0 R /Resources 503 0 R /MediaBox [ 0 0 612 792 ] /Parent 498 0 R >>
 endobj
-490 0 obj
-<< /D [ 488 0 R /XYZ 69.866 758.996 null ] >>
+506 0 obj
+<< /D [ 504 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-487 0 obj
-<< /Font << /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+503 0 obj
+<< /Font << /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-494 0 obj
+509 0 obj
 << /Filter /FlateDecode /Length 2022 >>       
 stream
 x��Z�n�6��+��p'4p��r�-�59���C��M�L{�N2ȡ�U,��ƒ�鯓8�� ������݊��֘��2>�#'/��\��R�������z�<J!D�tw!��C�s����(G��������N�W!�s�8�ş�x��U|n�S�oe��\]�s7�Xӌ��&މ'�u�D�h�	�ѽe�������/ޛ�n���=�i;��5�G-�8�D����;��[�FT
@@ -1825,22 +1887,22 @@ B
 �ߵ�h�4���R��5 �Ϧ*}AM�_KY�Is�+wQ���6����i�E$O�����0��4�Uk*��FI8�����Y'��%N�N6ź��@���A�X�=W�`�R���G��@^$i��0(~V��xdq��\"i�Չ�p�Y��-�1V�D�{i"��𱎓j���w�� h�?H�VĈ�'U������Q�!mq}�k�{��x���/J*,��I`Z�=`�1�%�+�8���l�R�s�CtTh|0��w&*!0x�m�M`�5��
 w�:��4KD������(���<؂^��ݜ)���.���B�9q�QE���f_��s����z��u�$��e폀��{������(��@��#�5:�d����B�I^���[}�*�䪶�7'"����d���~�^�Yk���[�&���^i��oeR|���b����;�Ek��UA�YW5�L��߸��C�:��b���Y�#ޅ^�%xl�x�j�O�'��һ�����	/y��!���[�Z��~
I�����ε�a�wx�YSn]V?n��,��m��vU)J��ڽ�9l�(-&k��x�^��d/p���Y�WC{���hg��{�獽h1��#{��^�f7���ZC{���E�%X��T�~�`Z����x>4����x���N����1�/j��U,VN�;�},a�׏a�կ��|��fۜ���T�Jm욣Zim��XUe1����؃-�0ph�cy�ˁ��eic���rG�_6���� |�.(��T�wgA����3��oz�3���cU`��b�xx>}4�쥕)�l��/�R�E�ɷJ��\lX����ņ�ņ0���
 ���Z@4ҷ2,��*)ò�.pJ�=Q'��f�X��T/H�gųk���M6A�=P��*�"�0����*����
-T��*�o@%Q�����FPI�	Pi3�$��i�J��FHE>�4�dD<���3$�z����7Nt+>
+T��*�o@%Q�����FPI�	Pi3�$��i�J��FHE>�4�dD<���3$�z����7O�+N
 endstream
 endobj
-493 0 obj
-<< /Type /Page /Contents 494 0 R /Resources 492 0 R /MediaBox [ 0 0 612 792 ] /Parent 491 0 R >>
+508 0 obj
+<< /Type /Page /Contents 509 0 R /Resources 507 0 R /MediaBox [ 0 0 612 792 ] /Parent 498 0 R >>
 endobj
-495 0 obj
-<< /D [ 493 0 R /XYZ 69.866 758.996 null ] >>
+510 0 obj
+<< /D [ 508 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-174 0 obj
-<< /D [ 493 0 R /XYZ 70.866 721.134 null ] >>
+178 0 obj
+<< /D [ 508 0 R /XYZ 70.866 721.134 null ] >>
 endobj
-492 0 obj
-<< /Font << /F50 224 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+507 0 obj
+<< /Font << /F50 228 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-498 0 obj
+513 0 obj
 << /Filter /FlateDecode /Length 2419 >>       
 stream
 x��ZKo$���Wtn1s����h���n�OI�K�_���b������,ÇQϐ�b=�z�-������rz�^>\-lJ�h����/*��Mm^��\zD�|�^>o?=Jy�~��2`�`3;%�:KiNRZ��AJ
��`Ӭ��K�/�~���5�ri�|\�3���9)�b��FN��+$I�3��M�yZp
̩�9sK��[�Y�_��]P���O��(��TΆd�8Ѭ+TV����x���hr��Fx��5hd�"��]sP�
@@ -1856,68 +1918,67 @@ u
 �pn������獵�0�Q����??��s�G���ɤz���)?#�V�K��k���o7l��x&�o�]��Ea<.���h���_�i��bæ3k�ZK����헻b'�����-�2-��oY�a�S���`�8!)~e����wmc��JOP��S&�ڟ��0��m��PJ��q���S��z�I�=}���MT"�����r+�DK�L��0�	Ys��R
 �{	hm%!���z�(�ƭ$��
��c*�bΔڞ��>M6I�$���#��,Eb4	����2�'��)�|9��)��|Z&�]s��`�o�F�=��ќO{����N�>vR��w�n��M�h޼����R&1�C�l|U^)g�c�Z��s��R���F���lb^s.��T`h���>m���)EL
 ��U���l��
�p���g11�}�6lVD���?:�y��;������;E�jEȯ����]H�N�g�c�r�}��s�g'i�� @�����d�Y�oZy�$�".������BT��e���u���K���oW5ߥ�{F�3�׺���Ax4
-:�FA��QɃA}�((��i���tV��]�oD�j����rP�oVƩ���j���8�*�Zǀ���ںq9�.����"��� ��{��y� RDl���п�u�A�t0�/�k�-
+:�FA��QɃA}�((��i���tV��]�oD�j����rP�oVƩ���j���8�*�Zǀ���ںq9�.����"��� ��{��y� RDl���п��c������!
 endstream
 endobj
-497 0 obj
-<< /Type /Page /Contents 498 0 R /Resources 496 0 R /MediaBox [ 0 0 612 792 ] /Parent 491 0 R >>
+512 0 obj
+<< /Type /Page /Contents 513 0 R /Resources 511 0 R /MediaBox [ 0 0 612 792 ] /Parent 498 0 R >>
 endobj
-499 0 obj
-<< /D [ 497 0 R /XYZ 69.866 758.996 null ] >>
+514 0 obj
+<< /D [ 512 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-178 0 obj
-<< /D [ 497 0 R /XYZ 70.866 437.192 null ] >>
+182 0 obj
+<< /D [ 512 0 R /XYZ 70.866 437.192 null ] >>
 endobj
-496 0 obj
-<< /Font << /F52 226 0 R /F50 224 0 R /F85 396 0 R /F75 318 0 R /F84 326 0 R /F58 311 0 R /F61 319 0 R >> /ProcSet [ /PDF /Text ] >>
+511 0 obj
+<< /Font << /F52 230 0 R /F50 228 0 R /F85 403 0 R /F75 323 0 R /F84 331 0 R /F58 316 0 R /F61 324 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-502 0 obj
-<< /Filter /FlateDecode /Length 1840 >>       
+517 0 obj
+<< /Filter /FlateDecode /Length 1945 >>       
 stream
-x��ZɎ�6��W(�H8ŝ�zS�b���)�}��K~?%�z��ZݔH��_�
���ߟ����D��4%�dG�$T���ӗ���/�>v@�5���7�$��0���}~�#7J���Ǐ�]S����BY"4M=����Vv1��Q�5CA/�-�]�~o�5*�˯�F��_��{�mQt���{���1����|ƺ׷�����;�;��'��N�N���w�q��
�!ܙ�k��ѷ��;��}xg����������G�Jͻ�ᦲ*��C��:e�Z����>�Vl�C`�0�ý[��Ͻ�S�9W�f����zCN.x�
��C�h���~����`�<M�P��V{?)b	�����0�繗���j��Ě0��r�?���P�Wt��qqX��)�X�#�ieE�C��bS)M�y�Ӱ�<v�$��Qo���xp3������>$\�4��2�Х��ƀj0�>�����,��7��*rf�g2b$'��e�o
-����x-k\Xn��|�q�SDU�R�ݔx�.������eT^x4�H)G��c+WX�x��(����6����t�aJid�ID2A��Kމ�Bb:��W���1�N ʢ�o
X�x�.#x<�8j7�L
-\s�(�[�o���8J\\����wЁOL��o`�ͥ��l-���02DE%L�.a~'�0����sıϞ�Ip���cl���%
-o��}���8��N)�.h��3���KsօJU��s.�`G�k���@�R��=ML��QK�߮M���O#�$/�r�I�=]Ab_��.(@�+'"�3�ӆ���@�:Vc��2�r���q1�%�s<�%�Ȃ�ڞߏ�j��Tkn�pW�0_>uS)��KD��@I��u�$�ILL�I��+:
����Q��O��Qa�I�MtX����cҡG�#����6��tX�����_��2m�]�2���`Di�4�%�O
��9��U�&�ڼ�����r�� ��j�LkA�0/b��}�V"u?����O�.#Z�-r;(4^ٔjPM5~c�
?d��L�(�M���F�:�,�+i�bU�$�)��D;�DX19�\K�Q���)`�'���zl,9��Ya6M��|��cs��4��g��Z�Ș��T�r�fc��DKf���ڊY��bmAuL(5
-��B�Un�91���%6n�`��AA��tJL���b8���%��e�:��|�m.�b�����_G,%'ܰ�Ҹl=Ӱ'im�iX:`�����Ę�DĶ�vϮ� mz;�Aښ�0)|djI������ޱ����{�-@%k�]�o���P��P(�����px�ژ���.����6=o*C�r��][Jךc���Q ���5��heh��P��`��J��%����\�w���8vy�G�����'��z��0׸�+35��X��� �D�����
-W��8�4�KEH�z]���l�K�V�f��n��7�[G�S�����m��y��ޕh�M���4-���P���]�=�b�V���3Yq���l�~���0�T�V�Y2�kp>�
i���<�ڪm��,s�SCZ`�{�	�P�7�m�/�q�͇��C[�_�2j<�Lk��$wu&�.��fSy,R��S�ƌ�q����?�@��Xf�b�����$�q\��� ����z���!��s�7bt�K��ǆ��e���]�j/�����FQ�s�����Hz��'��`��ם�T5���0@e�N��A]L�V�O��N>�H�J6��@-�t
=�751�T-�N�O�
{
+x��ZɎ�6��W(�H8,��)�o��f��ľć���O�⮥%u��A�J$��W��H��[G�ߟhs��	�N;�4�d�U������/��/�>t�pk�����������>=��%Vv��Ƿ�)��t��]������Ï���ec�)1J�f�^4[�L��1{Ԧh.���~|yz�5��MI	�{���1����{ƺ����w����)��9�\�����p¿�*�7�����;3x�:m��Ρ��q��C}������8��w���dU�10�%�u�*"������ (�b�@�~��U	���19��#n�m��l�/
+�0������
4==���v#��4���Z�Ia4�@?5
��{�{i:I��x/A�	À-�󯿸�Ͻ����aU�Pc��<��]����Ji�pΛ؜�����h#G�M��냛QN��E��)�J��6Зq�.=�6�m�h@�kÿ��Y��Co��ș%�Ɉ��0n���)$�W⵬���m�`q:L�¥zpS��K�htw��',��h�R����V�����^K�����6������aJ!��$"� \�%��P!��E�+�Y֘F'%(�(��y3H��d��2���)�Q��fR�����2X�x����Q���v
+��|�:���A���s!c��9[o~=�QQ	�F8���H�rKB�;�\������Ip���sl��𥕨�d��%��q�N)N3]@��3���KsօJU��k.�N�e�u�~������=ML﫣�߮M���O#�%/�r�I�=]A�X��.(w さ����ᨵ0P���XxF��py{�@\LI��Z�$�,�����Ȯ6��M��	��	��S?������@�(	��$8��	��۸�C�<W��b�O��x����
+�L�l�����tX��H��=��:K�[��H�uOi��%^�+�&��')�-fF��L�X���@����Ρ�7Q��l�?.G{	"��~�v�ϴ�"�*=�gj%©���]\�|,w�Ѣm�[��xeS��P���
~�*~�=���uat#l�~�tPҾŪdIV�휊v��br�_K�Q����b�'���zn,���Ya6M��|���c�{�m��3F[-v}d�nx�k9c��y{�%)f���ڊY��bmAuL(5����91���%6n�`�� ��tJL�����4�T>Q�X.��I�玶`py�H������:�`)9ᆵ��e��aO��m4g�[:�a5m�1)��m/�ݰA��ve��5UaR&��Ԓ�ݛ��ޱ����{�/@%%��T����	�v�C�����px�ژ�����|Xl��7��\�r��]�-�[M�1U��Z �(i�C1j����$�i�k`�!���0�3n���`}�q����QH�=y��l4V\�y�\㪮��P�b�CЂm�+\5�����,!��u9
+[�.�Z-�=޺�~���b8*�2fn�-l;��3����P��w!��Ҵ����0�� �p+�0\�MZ�����Ɋs��g3��[�&�I��Z5g�LDP\���hHs��!�Vm�&f�ߌ��c<rXL����l;1��l>��j�z�Q�ifZ����͙<S�dS6��c����7flt����W���(��������cib���C�~J���G����<H/]��V
��Hm,X��j�>jm��x�_����YP
哴{8�jq��Ė��lg����w����6�Y<���P�f�.ӈ-N;���>��"H��ިm̸�ƽ�9p#��s��b�e�c�
_,���C�}nu�á��Y0h�L_����kRZ�.��s�mÆ�ܲ�u��U�m���P%���j���H|R7����9h��ɦ?�����#m�>1��<�XQ��
 endstream
 endobj
-501 0 obj
-<< /Type /Page /Contents 502 0 R /Resources 500 0 R /MediaBox [ 0 0 612 792 ] /Parent 491 0 R >>
+516 0 obj
+<< /Type /Page /Contents 517 0 R /Resources 515 0 R /MediaBox [ 0 0 612 792 ] /Parent 498 0 R >>
 endobj
-503 0 obj
-<< /D [ 501 0 R /XYZ 69.866 758.996 null ] >>
+518 0 obj
+<< /D [ 516 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-500 0 obj
-<< /Font << /F75 318 0 R /F84 326 0 R /F58 311 0 R /F61 319 0 R /F85 396 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+515 0 obj
+<< /Font << /F75 323 0 R /F84 331 0 R /F58 316 0 R /F61 324 0 R /F85 403 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-506 0 obj
-<< /Filter /FlateDecode /Length 2281 >>       
+521 0 obj
+<< /Filter /FlateDecode /Length 2282 >>       
 stream
 x��Z�n#G�����qw�
,�"ǀ3��-���x�����%���`x�Q��I1}���ӝH��ǻ#&�f�F���;�38291{k�-�R��������� ���.Bh�C�8����	a$~���)�S
 7;-�|��{\����H�{��$�I�9C|WY��,���$I$�l���H��
q� k}��+�Qb�}�G�*$=XZ%⪪D�����(S�����r��"E��Q����\�eE��mw��y�q�>Y�4�*i�-���Zh�شK�6�V�2}v"�U�q�������Q��!��獉�%��x�Q︅y��6�-�-͒���4�Vit�r|�1Xʪ�"���Xć�+1��涍#���]���=zWuSE�bWgu��*�y�1�.Y�1�������1j�w�w&'�@{$͆؈S��t�SQi1���j,�y&�R�
�$v���pj�,�(7R��7S'��p�b��m6��xش��l�K�Ӿ_�&k�cM�ue��X�U�ǥ.�+N�P������*���YiE�7/k٧��J��c���=���
��9~�	u�.e��[���>`���(�T\��!��N�>�����BgK�*�����<�I��p�UM�٢U��[깑�H�9��
��Q��d�W�#/۾�Y��Ni���v#e�s����玫�l��,�`\���<��6��Z['&m���R��|��J��0P�l.m���_R�FasA�pY֪d.�o>�[���Mb�tj]u��Qr�s���=mg#K���Jn�o��t�e(����]���DU7��CV��	ƹ���1�\l�_bF	\r�۹�yA��>>��S�%ga���,��~�r��7���'1��y�|�U�O�����ڑ$i�BO��4�
�v�q ��u�/_?O���
�U��F�$��r�����I��SsP�0E��
 ��"DԢ��N9^o��z�����
 �+1�@
 �8�xi��G7ߴ��n��0Y�� �y�4���Q2�&}|a��:*�Hh�0,��d,��M��r�&�$����:e
�fU��̴UT�:�DʳSS}a�����e���fvތ�����M9y=���n���tc�ܵ7��^���x>�[V�Q{*�T�l��ǴPA�+�E�(�W��9Vks�,�9���ަ�YiW^n��}�O�x��C���0�]�2�;5H)UW�>���lz\�J�k.Y���dR<M�c0(�v����4+"�Ay�����`����w^r3Pm�����"�Z���c5���o�;'ũt��t����h���ڎp���b[�o����������ځ�Y8�2|�Z�D��U�CjAi�Vjib6m��Fb58�zcf�nE�s�������svzdǦ}�hS��{Qȝ%�o�����q��ݬ�{}�It�5+��hA;���,�f��h��R�8�\�v�gL�U3�fuI8J4���B$:���067K	�}�Qx,@I�:�M�OX��f��N�� �,�$+ǟ�J��ę���K�����ѷ��B悚�m$a�ƈKj�v��kX�ߞ��q��H ���q�%L�� �:��H�k�:�b�&�n�s��ţ�D��->�1�z��`UI|���{9�T�p�
���1{1G8�r���M@��Bzrр��׳J��j���ND�"�v�6p�g~M�4�u�j�q�2�8>gܧ!ԕ��2����������<fF�]fF����e�0�\8��R�j��0E�G�K���6�J��L+��L��F���Vf`�VF�,�0P�~���t�����������������T�
-�\��tP��G���L5�Y��(�kВ��]Au#M_��1P��_F��'�KԖ��Q5`
EM9v��yK���~����⤎�5aҠ��-��(y���t�i{�j[P�Ν�d�2B��[�B���!
�!
<��F�]��F��GH����l�t:U�S^�0�0�T��
�.�p˫����z뫜��ːP��H�ͺ[!����]�zeS���.��]����!%��'�wd@��
P���s��]J1xej�X�wR�K���|"-�Fu|��&>�z�G�3q�%?v`?Z�������~zv�?�7?�π��b��������T���g_m׾��/WO��l!	j������^w���;R Rn~mg�3���/WgA��@U�M
����#�Yj�����\s)��ho�������l
+�\��tP��G���L5�Y��(�kВ��]Au#M_��1P��_F��'�KԖ��Q5`
EM9v��yK���~����⤎�5aҠ��-��(y���t�i{�j[P�Ν�d�2B��[�B���!
�!
<��F�]��F��GH����l�t:U�S^�0�0�T��
�.�p˫����z뫜��ːP��H�ͺ[!����]�zeS���.��]����!%��'�wd@��
P���s��]J1xej�X�wR�K���|"-�Fu|��&>�z�G�3q�%?v`?Z�������~zv�?�7?�π��b��������T���g_m׾��/WO��l!	j������^w���;R Rn~mg�3���/WgA��@U�M
����#�Yj�����\s)�|���ǻ����k
 endstream
 endobj
-505 0 obj
-<< /Type /Page /Contents 506 0 R /Resources 504 0 R /MediaBox [ 0 0 612 792 ] /Parent 491 0 R >>
-endobj
-507 0 obj
-<< /D [ 505 0 R /XYZ 69.866 758.996 null ] >>
+520 0 obj
+<< /Type /Page /Contents 521 0 R /Resources 519 0 R /MediaBox [ 0 0 612 792 ] /Parent 498 0 R >>
 endobj
-182 0 obj
-<< /D [ 505 0 R /XYZ 70.866 721.134 null ] >>
+522 0 obj
+<< /D [ 520 0 R /XYZ 69.866 758.996 null ] >>
 endobj
 186 0 obj
-<< /D [ 505 0 R /XYZ 70.866 619.748 null ] >>
+<< /D [ 520 0 R /XYZ 70.866 721.134 null ] >>
 endobj
-504 0 obj
-<< /Font << /F50 224 0 R /F52 226 0 R /F84 326 0 R /F75 318 0 R /F58 311 0 R /F61 319 0 R /F85 396 0 R >> /ProcSet [ /PDF /Text ] >>
+190 0 obj
+<< /D [ 520 0 R /XYZ 70.866 619.748 null ] >>
 endobj
-510 0 obj
+519 0 obj
+<< /Font << /F50 228 0 R /F52 230 0 R /F84 331 0 R /F75 323 0 R /F58 316 0 R /F61 324 0 R /F85 403 0 R >> /ProcSet [ /PDF /Text ] >>
+endobj
+525 0 obj
 << /Filter /FlateDecode /Length 2232 >>       
 stream
 x��[K��6��Po
�2��
l�] �n���6�4��ҿ�!ER��%�e)����H΋3�q���kǻ�8���A�7�Dg�FwV3�u�Ƿ�O���O|>t�I�c�o�������>>�>�ɼ�_;��D�AC/4+㙲���W���pĵV8���mP�U��#�ݴ ��燷�:!P-l�����P�(*�{��}z����Şsxć��~�������/ѿ�z�o6ali��RU}e�yӷ���������?q���$���d��w���0+��#*�iy��S�>bߦ�f�{�WN7�O!^&���P�N��Yj.wh
@@ -1927,19 +1988,19 @@ x
 �"p�� �̦���(��.C!�%H��K��SX|�e2
P�����	@}r�8�	Z2�0��a$�����L*�H��P�X���q���^�	UᔦԖ*X�����,� �*�xF���L�m�T-�6u�K�Ը��$y*�u��TS,��L)��b��pWS������a)�t�����`����8���A��Nm�%>gm�����7Mp��4�dol(�-����]'�9�
٘
 qfu�~�hKn��(��ƺ(��uN
�������x-����jNɧ%"T)�^r�$زI�M�މms�v��L��͈i@\q�WHOy��r0��H���T�S�)��s�ܸa�q�����x5-�Az�p�Hݥ6?h�j�	|��}d��%��WG�>�Vf�ɉ����ݘ\�0�%G���=(�Z.fK<	��*���VK:d,�t�ClwN:"g�<�N�4VU��~�q	�`\jFLK@`q_��:��%>'�'b���7�_���T�P=W.��7��$[2`�����u���\rh&đ�����h����]���GΔݽaN�	�QȤ�cM�@v#pX*�����Rf�Ru��-%]gw����[J7.�����#3z=bbF��
{�zM{�|�U�y:��2�>��xS:��^��
�s&�1��HW�?���!{�s�C䒗[=���Jܢ+�I�[GN�̈́��
���REO����7�P���4����Z��sl�`d¨�FCn��r07�h��jY��&y(�\���H�Q	1p�#�=����]�@��4��������T��,A�xKAX�4�T�4���1x
 (Յ�\�S�R���?\��������n%���:S��7�x:�n��?O��}'8!�eF��DKn
8A8�NPu�t��a�2�?bwY!$����8������B8t�R�m��n�X�K;�Fv��3��;;S3�\�Tp�]
-O��9kŢ��hw�K2
�U�L���I���GO2��e�����zO�+�ǥ�z������t���S�"z���:-�Bن����
B�K
+O��9kŢ��hw�K2
�U�L���I���GO2��e�����zO�+�ǥ�z������t���S�"z���:-�B�����
4�J
 endstream
 endobj
-509 0 obj
-<< /Type /Page /Contents 510 0 R /Resources 508 0 R /MediaBox [ 0 0 612 792 ] /Parent 491 0 R >>
+524 0 obj
+<< /Type /Page /Contents 525 0 R /Resources 523 0 R /MediaBox [ 0 0 612 792 ] /Parent 498 0 R >>
 endobj
-511 0 obj
-<< /D [ 509 0 R /XYZ 69.866 758.996 null ] >>
+526 0 obj
+<< /D [ 524 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-508 0 obj
-<< /Font << /F84 326 0 R /F75 318 0 R /F85 396 0 R /F58 311 0 R /F61 319 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+523 0 obj
+<< /Font << /F84 331 0 R /F75 323 0 R /F85 403 0 R /F58 316 0 R /F61 324 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-514 0 obj
+529 0 obj
 << /Filter /FlateDecode /Length 2740 >>       
 stream
 x��\K��
�ϯ�o٪����ʵU3=ө�-��\>%�/�C|��$�j�z(�b�4=�&	 �%������O�|��I�'�D���t�0cL��oO�����Og*��C�opY
�\������ۘ,�����?:r�gBOf�������������k�adu�헯O���tB�F��������x�H�}����#��.�ۗ߾��v������9�;�0��gh�݁�����I�koF�Ӏ�ħ����ٴ\������xO��߹ǩ_���~���p�JY�K6��5�i}�|<ّ���ˇ�B�xv _�̋�,pAy�E9� 7/�HI)ɤ��h��
Е�s��ɲ;0��_^�J��23�J�R����
@@ -1949,20 +2010,20 @@ x
 �i9[���I��Xˌխ�iN��pP�q���G�S�����Ʉ��g�Wψ@���&^�d�R���hփ��BleD�N.b����#I�ov�[#uuy9�]6�0��;s9�DL�'㪊y�/!�Y	��*c�r�r7H[KtWv�ȟH����%�N�c߇�X�D"
 3R���S�$��VoHsa�꾦�,B���C�3��
 
e��O%4ns)$�t����`J��i�rr{\d�`�����dմ�7ѿPV�����-*��F����rD�]`�?(�Ή׌�ɴ�G�;2�9X!��y9��&^�(*ߵ<fH�Vm
�Z5���G����+�GX��������|Wk�tH�z��'���q+���tA���ʁ��@�}=A��4�
i�Tr�Gv�&�Xeȩ�����ј�0�R2�o�lrq�m݌b�C�'�|=Gm��s�sD��V�S��~�H���zZ�۽�T�y+�|r�D��0�ǳ�|���nZ��cZx��J����Q-/����=g1X浞�h�\ьqǬ=��kS\B�BX{� ]ǳT*(&E����s�U�`�=��RȺ�X
UK/��(��]��!�>���O��O��sA������~4a~�Y�V(��d�w�9���g<H�Y�8��������ņ�<͜(o������SY͔\�ݽ�����.Ⲝj��A�\t>�k�}���G{�c�U��)�w�գ�˟�j��W�uQ3����:,��d���`&`�a��_�����khϚ��T*���3!Ph9�41Ԫ��[R�F��4ptz�]�����jm��"�,3�H�b<L�Ҫކ�����j8	ۛGE<��O>��-�ɹ`�C��̨�9B?@8B˜r�M:�L.ڀ��Ք`��y�
�3�X�<��ÎNT9���hN�i-�Z��Z�Tа�t�[K�3�T�mk��9f����O�.��Y��Bfi�ɣSg�U�$�T��ٴ��Ř�
-U����`nN�Y�Ma=�ꚋ�ji}��U��2��ȼB�
A���b�t�S��Z��j�Ψt�T۰6Fm�~`�a���B���gN��Ӟ9s�k��@INnU'e�9e0BU�f>�k��$Z�\`�^A�@^N�>�W�QŚ�5]�4�eGj�u��{W�x#g�~T�$h�����g>�Lt!}���\�>A���EJ�YY�EM���9��J渐�_b�)���^T>7Lߞ����l�"�第�ũ
�˸���-�ӭ��k���6�
%:�VF������w��͆6Ϻ��m�G���oS1x����2�x2#o]c��l:���֋V��R�e�[�f�\	7K3�02��E_������`wC���e�`���ʸ�b�$��A�RbY�G'��&q��N�0S��k�	��a�A�Lx��q~�Lx����f�������t�f �q�1���lX��R�	����T#�!���^�6~��a"P\�f�.�a�{2z9�O�	A��S��a�"ьƴF��n<c�ޚ\���꽰�fp	��Y����OӉ��؇�����=�X�*�V1���ݞ�}/��Z�Ek�<�tڣ�)��4�B,-C;D��{"�9��sq~xDH��F�F~@����&�Hԙ�>-P2�2�vX{{�����	�?57��><��W��ק��ej
+U����`nN�Y�Ma=�ꚋ�ji}��U��2��ȼB�
A���b�t�S��Z��j�Ψt�T۰6Fm�~`�a���B���gN��Ӟ9s�k��@INnU'e�9e0BU�f>�k��$Z�\`�^A�@^N�>�W�QŚ�5]�4�eGj�u��{W�x#g�~T�$h�����g>�Lt!}���\�>A���EJ�YY�EM���9��J渐�_b�)���^T>7Lߞ����l�"�第�ũ
�˸���-�ӭ��k���6�
%:�VF������w��͆6Ϻ��m�G���oS1x����2�x2#o]c��l:���֋V��R�e�[�f�\	7K3�02��E_������`wC���e�`���ʸ�b�$��A�RbY�G'��&q��N�0S��k�	��a�A�Lx��q~�Lx����f�������t�f �q�1���lX��R�	����T#�!���^�6~��a"P\�f�.�a�{2z9�O�	A��S��a�"ьƴF��n<c�ޚ\���꽰�fp	��Y����OӉ��؇�����=�X�*�V1���ݞ�}/��Z�Ek�<�tڣ�)��4�B,-C;D��{"�9��sq~xDH��F�F~@����&�Hԙ�>-P2�2�vX{{�����	�?57��>����Lyo_���
e
 endstream
 endobj
-513 0 obj
-<< /Type /Page /Contents 514 0 R /Resources 512 0 R /MediaBox [ 0 0 612 792 ] /Parent 491 0 R >>
+528 0 obj
+<< /Type /Page /Contents 529 0 R /Resources 527 0 R /MediaBox [ 0 0 612 792 ] /Parent 498 0 R >>
 endobj
-515 0 obj
-<< /D [ 513 0 R /XYZ 69.866 758.996 null ] >>
+530 0 obj
+<< /D [ 528 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-512 0 obj
-<< /Font << /F75 318 0 R /F85 396 0 R /F58 311 0 R /F61 319 0 R /F84 326 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+527 0 obj
+<< /Font << /F75 323 0 R /F85 403 0 R /F58 316 0 R /F61 324 0 R /F84 331 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-518 0 obj
-<< /Filter /FlateDecode /Length 2235 >>       
+533 0 obj
+<< /Filter /FlateDecode /Length 2236 >>       
 stream
 x��ˎ�6�_���`�����׻.�[۽9�M.͡���;�HI3�veK��Vk��<9Oʲ�������<(��F5^	�l㭰�6|{�������F
 C�o�
/8�������_�5E�����׆=����^�\�U��_͗1�3�-��f%��4ß�6y���������F)��*߼|�QQ�#	CNjݼ|k>�$�QRj'�R*�_��?�|D����?	*y�P��Z������%��-���'(/�r����
@@ -1977,40 +2038,41 @@ e
 (eW�p�OK36*\�7�;�pQ6ӌ���F�z��E�k��l���\��p1FzY��س�_�;᭛m���]�����f��~h��^��׏�D0۰ɯ
���1ً�$ݜd2�^o���v��ι��*���IZEl��Kn�V���G�^hZG5�Ǥ.1�q����
 ��w G�]��/	]i�ǟ�ڌ���LV����J� ��(�ܪ��_�вJ�غL}�ĜB�21gt�ybN�y+1�2�?1ϾQy'�����j`�� 3�����0�����nQ`l\`��z׌��^�ᅷ����^h�3nQ*c��P�go��\r��t5�{ӹO˙
 U^ʤPk��ɤ��K�l���t����yiTq���Mt*��YB]�H� ��2r9sr[��f�3
�
-kܙeXѨ1����lE[�P1y*_���|B����}φ�S92ڠ�~,F��4g��zW���k?{����Wkd��_��}f�$kW����m�cؒ�EZd�Wy�����x_=�N+PD6���\�YD�D���~2X�����K�L1��95�L��R�K��B���Ȼ��VO�mSlh��K��GWx~y���
+kܙeXѨ1����lE[�P1y*_���|B����}φ�S92ڠ�~,F��4g��zW���k?{����Wkd��_��}f�$kW����m�cؒ�EZd�Wy�����x_=�N+PD6���\�YD�D���~2X�����K�L1��95�L��R�K��B���Ȼ��VO�mSlh��K��x&+<�<���
 endstream
 endobj
-517 0 obj
-<< /Type /Page /Contents 518 0 R /Resources 516 0 R /MediaBox [ 0 0 612 792 ] /Parent 491 0 R >>
+532 0 obj
+<< /Type /Page /Contents 533 0 R /Resources 531 0 R /MediaBox [ 0 0 612 792 ] /Parent 498 0 R >>
 endobj
-519 0 obj
-<< /D [ 517 0 R /XYZ 69.866 758.996 null ] >>
+534 0 obj
+<< /D [ 532 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-516 0 obj
-<< /Font << /F75 318 0 R /F85 396 0 R /F84 326 0 R /F58 311 0 R /F61 319 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+531 0 obj
+<< /Font << /F75 323 0 R /F85 403 0 R /F84 331 0 R /F58 316 0 R /F61 324 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-522 0 obj
-<< /Filter /FlateDecode /Length 2045 >>       
+537 0 obj
+<< /Filter /FlateDecode /Length 2044 >>       
 stream
-x��\K��6��Pn
P0$�O�(���ȭ۽9�M.͡���w��H���J��Y6Mj�|3�M�/~������w:�A3B��$R���כ��O����5ÿ��W���^���>=�X9L/߾U�cE��\�P��F��>�f\K�O��lk��v��8�}�y�����#��>��0+��CMr><}>�B)HJ9�n�(e���������S�>|�������ï�޿���pI���3\P*(w�nc_�ƾ�+J�5���E��ĹP[XŹ02P]0s��R�0Փ�(�R�XM!ف�
-�M��:r�B?���t6u\T��
F-�|P��Ӥ)����"PB��a�)��=}V�m���PC�Mm�/��C���1ֿߧ6����� ̀S�5޳�m4��5M�����0@��z8Sx���ͽ�~��D���Mɛ֦&¹@�����D8X�N��+�U?��.+1�ώ��pD����A�`4LE��bYKT�p8h�b���
-B�l��׏+޳E�g�|�)KkOA'���$�iB-T��9�mEŉ��MV�ӣ��e�a1��i���݊W���ٓemte������t$���+Z��3/�;0D�1xx�b��γ��DH�*rޕ��bQ�`x́U}ܬ�"��Y�b�⥏�wS,�D�R?�,r�^VX��Ffg��f��&?�����*DzԣF���׈A"�������9vAϮ�֡Ɖ���G��"�i���"Qy�WͿ�i�yL%��� �.��'�M�wNPà�$��ǈA�b�#+t�������n'`�?�Ey��_r�Ƴ,�R%qPw�c"�!FԬ�Lݤ���a�z�4K*���Bǻ8"��d�4��q�py8mve�͢��B�?'0�b/x.�$T%�Y|���13=��L���𹔘>2ӈ���+����<�u�|��<�p\U�v���i[(+F�y�B#c��F>�J��N5�P��.�+:�J��1��
c_@-9�\]����z|�s��{�wʄk���ʊ�"�yvWc�
�����g�(�߶�bw��@`�D�XJ��^0Kh#�b��0��*m�
'T�J��hT(��|�V�&�L���)L̶��|�^_R��~�&���u�>�^g"��f)ɚ�E��A�ޤ��X�O5��$=�Q�Y���#����n�׹�h`ň&x�/�F��6��5��G���&��F>�J��1u�r�4�$����:�W�'Yu:�F�ⵘ����z"����bR��'~���#|�7:����5�
���E��.��_R�
-A�����(K�[J���.[����t��e�{�e]~���ֿ�:�|vۼO1�	��7��G\	
-��a:d�5
-)�]�XYqйXY��;.Vֆ�/���~�Ze&[���\�HPL��#]:P�q&�?�\�]�m��rV��~�8��r����3Ոg��'n��<��8�w�V��%�)Q^�zR.\I����93.��v!0�y]Iu���QM�sc�nj^�E��$B��ݤ�f�s�示{-Ϛd[]�� �g���:i׎�ٌ[����D��*/[C>�
2#.�j��U�m�jē.%9�j�tc�����3#v�<ᛧ8�����㬤�ƀ&��d��2�q&��ʫ;�gY;��~�����"-3�#�&hܶ���K��zOž�lԀy�����B�Zv^�mڏ?�����(NK�[�Qf܍�(�T]˦��D6��B&z#�|��D���KT�b�?�A���6̂���`�:�b!P\�[�t0넝*�wՏ"U۱n,���ǭI.�bsv>U�r��n��V?�RU&��n[�%�)�^���j�-������+}��2ŪX��Kܩ<V�վ5n@Xv��*r�µE�b��ut&�ȴ��a:���'�ۅN9j��5z���N�D��z����Og@{*���4у �0�/��u�UM:']�eO6������S��ƶ9�|��R䧃��	w��C�d|��3A������;��
+x��\Ko�6��W(�(��'P؇] ���9�M.͡���wćDR�5W��� �Ks4g�qM���~�����;�wڱN3B��$R��󷻏�p�O�>t��5ݿn�7���Y�vw��>>�Xٍ?�튑������BY"4|����r�H->���8������������c��d�{�2�¬$�jT-�y������ )�x�)e��/�ף��w��0��\7�����")�D�%aJ��HA��$���@�sE��X*J�5�ps���Qr���Bra��:c�'��ha�'�A�(X��C�ez�jku���y�8�9t���pQh�b5���NN�O��P#�@
Z�������Y��a^?�blss�F��iOc����qL��q��?J��j��dMx�FY���(v��`�3����b3,�j�QJeD4�s%�Z����ڂ{�`	�R�V�"���Ĩ?;�z�ۨw(㝆�`����D���F�M��{� �M��E���X�}�W]���$�q_N�Θ&�B1�]P[WU\h�t�<��R�C�ȉϹ�ݭ���p�Ց,yhe(K)��2Jho#����t��S��+��vHΫ���bx�����%[�X4>^J`U�0����
+sf��|�r�q̥ØW�'z��a��!p�ىc�U�q
�m{G�a"=�Q#KLAT�k� Ǹ��������yT&X�&'�K??��W�Oe�H���)E!.Ze��PJf#5�A�]`ԏ����&��A=�EVӈ�N!?Q�x�S���]ۏ��}����9?�vId��r栎�s"�!F���,ݤ���+a�{�2K*���B��8"���4��=�0�rp�z��x�G36���5|�`Ѓ�V�\pI$�B��K
c&v�]��>�s)�|d�����1y��J�N�y.�'���`7]ӺT�Q����W�#��@-�Sͷ�9�k�B�ƀ�0��K��PK��'W7���=���Ú��^��*#GJ�A[QX� Ͼ՘b��Iȏ�I"���:����L�s��o���aDެ1FZB�u���jQh��
+��]d��ث@�$�h�?]j���g��F��m����t�v�Ţ˲�Y�Ƽ�k�d{.CE���`�7��!��c�{7I/GT�|cz������u%�XFQ���R�#���x]%����G��<R�)�҅ʭʄ�#tl�O��t�	��K5�K�W���Bn�Ţb>��(��'�F�,�h�\�{4!���E�Ϯ��?���6D+�?kQ�Z״(;]�E	
++��,�+-�|��-ʲ�XH�n|�
>���E���
+���W�H)n����0�[���Ѭ,$hܬ,��7+K���`�o�W��V7�+	��Tr��JG
+1Δ���K�r�����K��qN�d�z\��와�b���&�[�0O�B,,�q�
+�<g9�vO��m6�=��b"�n��X��\7��Ţ�t08ᦔu�s�A"Q���1LZm&6׋qN컗�,�Aֵ�AL�zb�_'�@��Q��wj��\k��ؕ�5�!%��ŕK�!�* ���K�p�%gח��X�@��&"|͈��F���K���s��a�RX��ey���I �r���Y2��￫4B�c�e�r�����6tb{)�R�%A {�����n7TJ76(�UK�+ްO;�]|�8;��i�ym:J��2�Uݵd�+Y$u�+Y���3WhQq���x�&��03I[4���ۀ��@q	n	��l����>_�Tu����#$�������i����TU�Aj(��B�kX��ku�@4��*�ٍ	���`(VsiqnW\��\��\�)t�Jo����ү��q3c�o�ɝ*W�]3���Lb���B��б��?��[�X���b��{<��I�����
�~{�QA�x�š����&DDF�ٖ��U�Ig�4y'����s��S�JKh�XJp����D)a�?��ut>\H�砇��<f��
 endstream
 endobj
-521 0 obj
-<< /Type /Page /Contents 522 0 R /Resources 520 0 R /MediaBox [ 0 0 612 792 ] /Parent 491 0 R >>
+536 0 obj
+<< /Type /Page /Contents 537 0 R /Resources 535 0 R /MediaBox [ 0 0 612 792 ] /Parent 539 0 R >>
 endobj
-523 0 obj
-<< /D [ 521 0 R /XYZ 69.866 758.996 null ] >>
+538 0 obj
+<< /D [ 536 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-520 0 obj
-<< /Font << /F75 318 0 R /F58 311 0 R /F61 319 0 R /F85 396 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+535 0 obj
+<< /Font << /F75 323 0 R /F58 316 0 R /F61 324 0 R /F85 403 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-526 0 obj
-<< /Filter /FlateDecode /Length 2408 >>       
+542 0 obj
+<< /Filter /FlateDecode /Length 2406 >>       
 stream
 x��[;o#7��+6]�y|?À$[���.H����&?3$�/�䕴RR�W����8>̧/�~|����o>��	ƭ���L����������ę
 ~�'R~�GY˂��_�/?W�,�����eZ>
�d'��
L;Q|�s��J8Ԛ3om���i�5�J����S�A]u�/�O���蓮�S�ݾ=|�;3	�2�Mo��J"��A���޾N�>q.^9�x|z���{x$��w^�ܤ�|�}_���l���ϛ�C��]
�������L�	�O��Z����`���ڛ$����A�l��8[�q��� �B��*a�����V�
<$Dm����������۩��<my�~&���C
��7~�p;��1	c�5��>�V����]��>tμ����zɄ�F7�F�o!�p�Ū4�(��&V�c�닽�I�[��F�BL�=�)��T�n��64��sL������0X�&��g.��,xi&�l��&�/�S�L���o��e1B�"��q�J͔vwC�O�cڻ�̱�-\���bMD"��e���[��H����(l]�¶�Ph୍!WFET�!�a}���Ȁ�C->إ�h���Q��O"m=\�+
@@ -2019,271 +2081,277 @@ zm
 ��K��6�˦���w��h�Y(��q�4�Aah4���u-�p*��LEL!|�����	���W�n'.f�{�~P�+����3:+����慪	mQS66�F|�N#�/�����;#0(J�<�{X(���\]�B�bp��BW����O|Ҳ%���1.�3])FM���k�K,6�uE�����0s�=�̱ja*a9+�ޛl�g.�b�����̗>���m�h�-�P��4���M�Nd�EeE��3����G�]�̗���WY�'L`1�J����ME
��o)�b�\�x�7�,��{�jg���P�ku=2zFK���X�G�.�F�<�B�2�#���!o�N'�K���cjH�W��4���M�5,��x�ߵ0[���7&<�2�h�v-i���ū�nR�E9��윤�f�}(��X�| p��ʖ1�I�+�Tx��>?�	M��iH�F����|~�j;��X�-�)��HG��7N�nC�n��K�-N'�^
 NDϏ�i�22V���1ą�A�8퐮����������ڠ����@0��=�~�JnA�hO�7h���r�)Yj};N��HT�G3�ݤ�V�G6Dk�5�B�2GeO#�hu/%G,W4ry �[(�8�(yO2�'?A�0��2iT�%�bm�%f"�E�����}�nQ�I����+���Ԫ�Yʇ�E�]g*�G�޽��!�2iT��Y;�a-�u�9"x�⚆͌��+'�3���/�,�B���6�F��T��&tO�����_;���"�e�R���zE�Ƣq���AANR%�ƭ�iD��1#���*⃷��Q��F�uD���i�*������X���y�/�j�Q&�~j��3�j	��FQ�r�O�&����d^�upV�u-k��@~>�IV��/!���C��)��ȉ�9`��t����Vu2�Jha�h�7���49-�]�Kڄ��٣��s�'7d#��nT�}��i4��%�%�x�'��H��H��6��d�*9�%:���B�
 �/��hIϴ�'2�*|.���0WI���ׂ������%�a
-����ŧ|'KJ��9}�*���k\�F���♂Q�_��Z+���!5�>��ft-��}��U��R^H�����ӫ�q!]su�nHM�Xr}�<?�Xw�Cy}#e�?+g[�!�T����U���&ʶ�EI3�Z|]mg/���%톛�݆[=�[nΥ�+Z5J��\�s�(t��{�JI�t�X��W�rq���+}2|�<�\��;2*S{�
~% ��A�*���� I�ߋ�����c7� i#X���>��,�ƑgI'gwҕ}��^˧:u�������љDZǜ�e*�I�%�u�&nu��K�����x�ж ��-]~+����Y�_Y惽�dA�:�N���J���Hٗ�g��N�x���.��U2
���;)���K��A����<�m�A�\{v-�$�R2]6�,u����ʉtJ���랖�{4��%�"�C�E:��W�z�ּ��sV;A�ח�d���z4RЅ����#�+��`������ңX�-�~�o�cea�����"ׂJ�5
��>�כ���(t{���n
���>��
��l��$��(t�h��w��S����2F�c�����F�p�T��K}�����/�8H
+����ŧ|'KJ��9}�*���k\�F���♂Q�_��Z+���!5�>��ft-��}��U��R^H�����ӫ�q!]su�nHM�Xr}�<?�Xw�Cy}#e�?+g[�!�T����U���&ʶ�EI3�Z|]mg/���%톛�݆[=�[nΥ�+Z5J��\�s�(t��{�JI�t�X��W�rq���+}2|�<�\��;2*S{�
~% ��A�*���� I�ߋ�����c7� i#X���>��,�ƑgI'gwҕ}��^˧:u�������љDZǜ�e*�I�%�u�&nu��K�����x�ж ��-]~+����Y�_Y惽�dA�:�N���J���Hٗ�g��N�x���.��U2
���;)���K��A����<�m�A�\{v-�$�R2]6�,u����ʉtJ���랖�{4��%�"�C�E:��W�z�ּ��sV;A�ח�d���z4RЅ����#�+��`������ңX�-�~�o�cea�����"ׂJ�5
��>�כ���(t{���n
���>��
��l��$��(t�h��w��S����2F�c�����F�pl��dg���ÿ�85
 endstream
 endobj
-525 0 obj
-<< /Type /Page /Contents 526 0 R /Resources 524 0 R /MediaBox [ 0 0 612 792 ] /Parent 491 0 R >>
+541 0 obj
+<< /Type /Page /Contents 542 0 R /Resources 540 0 R /MediaBox [ 0 0 612 792 ] /Parent 539 0 R >>
 endobj
-527 0 obj
-<< /D [ 525 0 R /XYZ 69.866 758.996 null ] >>
+543 0 obj
+<< /D [ 541 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-190 0 obj
-<< /D [ 525 0 R /XYZ 70.866 344.288 null ] >>
+194 0 obj
+<< /D [ 541 0 R /XYZ 70.866 344.288 null ] >>
 endobj
-524 0 obj
-<< /Font << /F75 318 0 R /F58 311 0 R /F61 319 0 R /F85 396 0 R /F84 326 0 R /F50 224 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+540 0 obj
+<< /Font << /F75 323 0 R /F58 316 0 R /F61 324 0 R /F85 403 0 R /F84 331 0 R /F50 228 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-530 0 obj
-<< /Filter /FlateDecode /Length 2011 >>       
+546 0 obj
+<< /Filter /FlateDecode /Length 2023 >>       
 stream
-x��[���6��+�..��0p�ݕwq�3\%������~�Iq����v���9Ù9�!�V_*Z�{����oZ��0B���"J��ϯ?A�_�(�V�5_~�GK�ʸ�ꏇ߻1�SU��ۗ�h�P��H�o��iX����s.����0�ب6���Fm�j__k++�`����sR��D����9�^�V�P���TQ��%�%������o��7�/-7x����׾�yC;���B�د����z��0�lۚG�=�{���}��cm�Ԭ��`2f��6z�y��T��P�Ҩp,�ٳB�	1��~4�G-��%�Vݏ�[���	p��ռ���
-��0�RN]!]����V�[��Z\K}�x+�`�;����������EPA�=�z�+�N�E
h�	�|x7� �
��I@Ѝ�8bF|�i��]�|���{o�yBݚ�K�����<�Xr*5���5J��_-�#*�n�{�r� &@�V�y3F��cQ�%X�ڟ���'$}u��һ��jd���8�M�R�����h�@��haݾs��iz� �т-�g�`=DV��ly^n�2��Gv��@�g���n����?�؈��p[�괈��F�G#.���;D���0&J=����Tn4f��*�B�:�(ۼ�]၏��Rd?V�M�
��r4ó�3�~H�Xx�Y�Ѻ���	K���c*<5�@cK]�yxU�>��V
-�0�γ9C8Z��G5����k?�c
	�P����^#�/L!�)ݞB�Q����M�s�|�Ɩ�8����"X�Շb(.Ux�[0�F���RU�u��[,�Q
-�<px�4�>�#8MY��n��,H�=�
-���T�0��B�C<_Z;w�@�:ʘ$]󙠌:�Bq�#-O6��K�s�0���+&xf<\�)�w�L�nj	l�qX�^�B����Gaq��mCf@�
G�?㱾�
-"<�rf!�p��{�
-=�ed�^(#�LFf��2���C�Eq��\�r�<�K��n+���X�i*�Ѐ��yM�6�]Newc;�ÓM-�ؚg�h�$N#7�[��Qn	�/�cs�A�[j����C;�w�]+����P��  �#ֺ�xg�K�CAa��`P��S�B=x��֋��5m#R�gv^ͨ-	`�����'�p�K5�"*����X���(S@��97�Z
-G�)% <�]�k(���%8ZʍG��@���x�?�ݓ��z{r�"��5�I!��T#K2�(�9�v�Ed�,Uč��w�w������B�aA�ID�ō�_�HA�kܑ�o���w�ћ�|��3b��;ar�_���u�72�U9�P HA�g;M�;�u��2^�z��L
-ٶN�v,Q���3�ͪ,$±��	��K�q�G7��M8][�^��%�E/U�!u�Adh�7�/�2�|��;y8}ר�~�6�Y��"+�1��=�8��D
-�_�)4���M^|���^�}~cC��z0�9�/V�W�.����e�B.���Bc{��2m<��8\`�X����j
-�/��v̅�M��%��\Dq
�M[B�ѝ]$�׫7n�
T���iJspם����t���.�u-��8��H�V�c-F�XF�-����5,Z��m��ene,ح�T(�0��oe����2���-ָ�Q"�(�Ǜ	_s�)�KI �$W�|��Q�1X��4���rSk�MI,}�da�QR��~E!mLg�?bb[hx�
r���(�)�'�q��S*�xfgW���J|��
�+@�������̿���3�!͵�(��}\�8�Y�}�K���\x`��.��س��W!k����)��J�%�kV�lŌ�u�ī���QI�:u��@O�X��������d}F�v�c[s�k-�/>�%F�Y�`T��w,0��R�B�=�����9rU��wn��<e)_���G��ϓ岢��z���u�[�ڝ�F~��U�������� �Fĳ~��z������Ц
+x��[9��6��W(]$\�`���wq�3\%��������$��P�J��.���"�qf93���kE��4{����M+VF�V�QD)U�����gh��%������o�h	_W�]���[?&q��|�Ze-3y�k��vD|����J�VF;as	�J��ha��}}~x����y(f��/���p�r^=�>�����C����K0xKJ�S��w�jJ�¿,������w_7���y��ʄ~�(x�h4hЋ���kk�������q�Qs��N�ʴ�U����3j07R��ϡN�Q�X�9�B��Q�?Z������I��G�-j�o��`�j��@Z���t�	�.�.�
I�_+A�ͱv����K-j����9���Є�"� ʉ��G-֕�f��P������
�xݨ�#n���@C�h��)hW�;<����X�<�n��e��%��2�{_r��7�Z��W����[c=zy��'@�^�5j
+��ǲ�KА�Пƺ`'$}�u��X�EX�	52�1�:l�<�24^$\�-����-��w�X��� ��{���-X�%��X�-��x��^��g��� {#���fQO���]�c=Vp�m6��<.XI����wc�8�aL�8���Tn�����\�:�(۽�]၏��C�f��{3����h�g�g���)���un��	K�<�1��@cs,�,��Uke+ ��h�	�r
+��Z`?�����y������?�A�_B*xS�?��ڣ2!�շ��ܛ׏-�q����mVʡ�T����P�3KU����ٶX�Hh&S0��WJCҧ	N[�0�C�_�9��sO���Q[��r���ޟ/����O���1J��3AuP��GZD/l�(\;4	���7�J}Y&x�<\�ɥ�ӌ�nl��zk����4a�����ح���" 6���ņ�����3qɕ+g��'�c^Fb�2��d$�[)#���-�C��5*���rE�.v_�ŗ�2���B=�}��"��,�(
+9�-����OB���ck�]F�&qZr��
+��m `k
+�X�%��.5� �"G���ٯh�S:Ȧ�:��n@�%
+H㈵�����$�җX�i�
2+x0g�q"�ޣ!o���z?��md�T♝W3�JX��Gy�c#���'��ƜG�,~�!Vv��$�Μ�e]
+G�)% <�"�/�d�+�]ʍG�u�JO��E�(�'�����c$�x�?P
Y��G�Oij��Q�̖�*���.���㛫��x#9.�2�rq��,�ӥ������{Kk���͉X>HÙ�e��Av��*���J��P��iP�}� :�0�i�� �N��KQ�m�Ia ��1�%�XWz�I��@8�4�e�	����8~���y�N׎������� F�s��R�Fe����uVF��m�G��Z���q�zE�J���Yi�qf��� �$R�W|�&C��6i�ƪzf��
�F�Q��Xu^94�ҩ�ة�i
+���;H+�}��i�����K�>��[M����X�X�;@/�����<�k�h��� ����"��޼q3m��2�NSڃ���Pd�(�3$%2�p�5���㴡 ��|l�QL^�`�7_Կ�
+�d�z�n�$r�}Y�n��\��1Ez+K�H�ȴ}l����!E(o$|���HI(�$W��ՓVc�"�iin?;�$F�MI,}�d��ɤ��~E!l�g��1��z����R���3����)�x<��+�X�B%>3��Z� �@H��p�!=�̟is-;�(gW�+�xVg_q�җ�8g�xFT�!�@{���&�`���ɝV�.�]�
+�e/g� �?'�l�����a�sLpj�r����=�����"t�sks��[5Y���#š����'F���W��d|/�$�ٳ����b"���B��	��Hq��{�?��=��'�	]c}�rv�:A~I�2�O�w����vQB�Rڟ���?J	knu�z�$}E#<=?���7�
 endstream
 endobj
-529 0 obj
-<< /Type /Page /Contents 530 0 R /Resources 528 0 R /MediaBox [ 0 0 612 792 ] /Parent 532 0 R >>
+545 0 obj
+<< /Type /Page /Contents 546 0 R /Resources 544 0 R /MediaBox [ 0 0 612 792 ] /Parent 539 0 R >>
 endobj
-531 0 obj
-<< /D [ 529 0 R /XYZ 69.866 758.996 null ] >>
+547 0 obj
+<< /D [ 545 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-528 0 obj
-<< /Font << /F84 326 0 R /F75 318 0 R /F58 311 0 R /F61 319 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+544 0 obj
+<< /Font << /F84 331 0 R /F75 323 0 R /F58 316 0 R /F61 324 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-535 0 obj
-<< /Filter /FlateDecode /Length 2621 >>       
-stream
-x��[K��6���Pn;��CV�	
� �L����s�\��o�)�������l�&YU_�Ȣć�>��Ļ��>	���`\�AY�P������~�?�E��g������'���>�2NʜƏ����oAh	�v��F
-�{��&1�3�u�FH�*ߒ������{2���/މFA$
�6~T�B�����b� ���^�,�X�	�E
-^���8�K��8��/�?j�w�Ùs�����~�u�kۀ�CG�%�����7���o��=��c���,�ǝr�.h4�����~V��¦�e�����)��������9��L���x%0 DxΑ%���j�����4�q���Xo���$y����D���eŜ1�]ۊa@�S�~�#�^��a�K4#~�R�i���jL`VJ��Se7^��s��e6�"0\F���Q$PA�x�󝒍�f>tb��#8[��m(�$����%:G Z�fZ��������-�c	]>��G2 �O��x��ޕ[�kDM���6LoW��!��ݖ�`�z��H�~�����	�2�r�C�DH�y��P���M��WzS=�ބ�r��N�
-�<jz�<�c\D���Z�"_�>�iM�{���Y�g�$�!L�쒲�1%X3���O��WzA=�����_9ŤR=fyAC�<eMJ"���$Ə`;M˷Ѻ�+r�QE}l�)������x������Gti�DC-p^�*�$�<R�Tty�{m���(R�4�Yc�)O)AB\�F~/ms��K��pI�ZB�۵��J����bd�rGJ�����K�#�q�r�!}�2�.yT����'�Α���S���|Oϋ7�M��4Y;�����~~
����sG�5ZF�xэ�*t�(���\���"�m�3rCsr|���s��)���M������V����A���F���1*t\�9��d'2��ZU�V��7�e���E�9I3n���4t$hiNy�I�Z|�:[9�<�`�����ŁY[\Z���"D~��&�T��ٌj�
-�+R�]�h�L�R�6{��s��!㯨\�B��F�2^���0�D=�E7�E�3��o���w2s�g��vͮ��a���:_pt��IJ5�,L�X�ٹ�f�/�"���]��$�� �Q��
g�Sԉw*�Z"�%��{e�]�T�]g�s>	MT˼9����ge{8�N�.&��I����y0�4
-Q^��/�؜.0UF��F�Q��iF�:Ż�n�`�r�hU��P��Z��f�&S:	im*Մ�h�2�quO�]f���
K�z�L��S����eXa��uI�`�2~j7I�Ysh�5e!�'�)[����
-�PX
-����8E���҇�(�d��&[��E��;�L˧�o�QS����[E�T �(�&D^�l��)_�"[d�뱞i��o�cs��~ V��Ja�r�60�Ps�̥�����[+�ڲ�:W�Tȅ�Wʬ�Lٕ��K�u�M��;��[�U��P%�{���)T`n��ުB����ܲ;D��ɎIiU�!>"0���x��X]6���:�ɋ^� �f;��G&���}�B�
-訯���w��oiDS�w��[/M,Z
�J���F�{���z����<[u�m���UEІ�%�����*,��J|�7�\5-~�2[S��P���:jq��f	J�o.����r��4�~�E�u}�|��8F2vTW�4ٜ;}��(C���L�k8Z�۬���V�k����N�E�\���L�k8B}y]qm��P]Uq�>U�!�?x�i�����J#�r�c�p\�n���������ӿ�C$�,���-ei�&k��i���GH��G��砂Ҿ@ѫ�!k�v(čr8�I(e�"��40+�X�m����6���~�����(����?��:y�:`�9ڦ#ܝ�]�c���S|
-�<�����)���ƍ�[%��Ǫ:�H�%|m�����P�����F��r�׸�)y�R��hL	L���"�"�@>[�j�Bm�m-Ъ�6H�R����pr��ؐ��Ql�ʤ���K%鵲/�H�� uO��u�~�����³j��Q(Q=(����)}Hg�z�D�H)�Jy��}����7���B3�v3�n�m��
�q�V��"Fq��ܺU��3*Vg����Y[&���ww������;�C�I����e�t�ݴ\���&U��U��GC�.#�[	py��'K�ظ��Ǯ�Ri��l�s�T"l�/�S����A_(�<j�U�o�Š_-��	m��:
-���ӓ�B�VP�Odɂ6��V�-g�l'�	y;��p�щ�
-�$vh��m,�n�\C����.�/$���Ayl�V��>n�P��j�H`6	M�G}�i�/,N$2ka��cR�����a5/���3�<�J�>WK�"Uz\/J���j_�|\�1���Ch�X0V�z����`�w�z���,�"/�n�4X����:���W��6XIu���1D��>��y^?6-zG�6m�GǏ��;���#������>�J���kK����f�_��Q�p%�z�mo@Z���K��~��~��Ay��<��VݶøÖj��X|���	J72�_�	�痧��c�
-
+550 0 obj
+<< /Filter /FlateDecode /Length 2436 >>       
+stream
+x��[��#7��+ڙ
�\V��tp��dG�o`'�}�&��i�ջ�V�-��W����ᯁ?>����'�o>�`�q�%�q���>?}��~��>?
�	g��B�Ͼ�q��<�<
+eN
�?��5to>v
+�U��K�4P4����g�bj7gVk�rs�nImQ[���{"TI�W�
M☤V$#�Ѽ!4/O.V�3��Yl�����8�|>�89��?�s�з�<���SQ��b�5��i��N9���3�c�T�Ȍ��͛�UFE��?�m��fC@��u�H�?��^$KŌU��@>����0�!}FD���X~�j�%Ok��.��������F�=m'i��+��YL91I5�nJ9�Ȑ�ҷ
�(�Hn�.)Pz�l�'�ܼ��㫔���&�R�>A��\��\�EJ��貏�<��J6g2�ߊ�L.�E��������$����)���=N<�CS�{�>��&�2����9���Bp*�����MŅs��s4	T\�.y��=��@���r��2�m-��M�:ʱ
��]�TvP�C�u��4�}��w����6�.M�o�k���
+-,3D�L�KE
����SD-�`Hb��2QK��v�(x���R�A#�-����K�j~�U��Q������ı@������3/ąL��&�#�b�r(����6�r��Rl�|%���61N�4�si������u]�Q�w�*�o$T��m�RN1�T����c�g�C0L��Z��_U�k������X3��{E��2`E��=&}�������v�64=Z���
+��1�s�N'�Ā�gwL�iv%��.��HZ�ى��|fH�Rg_4��� ��0AeR���G��?<H���g-�NyJC$��a����6��mi�>�D˴��﵋�
+��"����ƞA���)-���D���g�>�o�A]��%��~y��@G$Ŭ2���ta�o�m�	��򀐦lF����W𧻓���5>{+b�0�O�`B\\Ftc�
+����ٜ�+�`���;��2�5$���h��R�O�3jz6�?��_�[���'��d��/`��O���e����DF��[���|�:�Íq��z�ɖ$I�ں�H
+1�:��i��n3>Y��\�6�b�C����S�PZ���"D^X�IN��pN�:� d��jW
S*�C��io��dN�x2���)$�6z���Mt�qT��-�I&z��V��7_�ј[�5�
�]%�1��Ԓ/:8�o.`RQ��¬=VvV
+&EYHȉ���ɔ��G*}�L�iD*;�Ie���_�cr���KT��m�9GJl�Z�-�YdSY���	qY�.9��U\q��<�r5�R^��2/�l.��2gm��U��nP3ҷЩ�Mc:�Ŭ*O��R�:�m�I���Ik�vMm�\W�TX?`Ƨ}Qݘ$\o�IC�T�bui
V�5��Q�`�2~j7�S�h�D5x�4�! We�b�I�/�$,N1妽�a�K\rJŔ-K��_Ý_����9�����﬿�����7����t
+��8᨜�O8����	G>�X�{ւ7�MP�1Tد�B5oX��t�p�,���,����^�D�[�W6\�2eq6�ed��j�ߌ;�3��Y^�%˯�5Ȼ�y��Bo���ܳHd%��[#�8��}q�kY]֑}�fB�ͧE7��["���1%i{�#�촯$��]'���P~�qqz��`h#X6|�6@6�Ȼ�g��'�J0a{[7�\�G��e8�)�%��qU������_�rռ���f+�t���".M�ޛ�ك��+��{+ſ�H��|�Q2m]��(L���N�*�&�s�Te�Z�����Z������F�k���>����@����"�/�׶�*p��U���P
"���[P͛80\����6�"�^y����j����x��}(J��z�YZ�ɜ��#�~/Ԟ|êG��ǡ@SuЇ�!s�;V(�
+S���1�)�i:`��A�r3��"۰�V�9	�_�@�
+h"��0����hPY�A����o����/!�[8���5Ц���w���^U���/�b{<{{3��j'���Hx?��{[�K5R�ΣU0�I��0����.����#��i������G���b5�����0��VCc�.������j�@m���Ǽ���R!�g'H�g��׊�P��|��Z��v/CAu5�խ���i��L�I��2V�c,�d&lX��F�،֍�Mi�h�"�n}_ؿ(��Z��m��<���X�<��e�`�m蜋I�y:K��I��f�2�1|7ona��`�pȫS���]��L��X�3�5�[׏���]�ֲ͎���`�}�� 0u��,�@�G���F],��
+�=�-'\��?\Z��JIɀ�|2���.��7�/O�[���
 endstream
 endobj
-534 0 obj
-<< /Type /Page /Contents 535 0 R /Resources 533 0 R /MediaBox [ 0 0 612 792 ] /Parent 532 0 R >>
+549 0 obj
+<< /Type /Page /Contents 550 0 R /Resources 548 0 R /MediaBox [ 0 0 612 792 ] /Parent 539 0 R >>
 endobj
-536 0 obj
-<< /D [ 534 0 R /XYZ 69.866 758.996 null ] >>
+551 0 obj
+<< /D [ 549 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-194 0 obj
-<< /D [ 534 0 R /XYZ 70.866 575.421 null ] >>
+198 0 obj
+<< /D [ 549 0 R /XYZ 70.866 532.084 null ] >>
 endobj
-533 0 obj
-<< /Font << /F84 326 0 R /F58 311 0 R /F61 319 0 R /F75 318 0 R /F50 224 0 R /F52 226 0 R /F85 396 0 R >> /ProcSet [ /PDF /Text ] >>
+548 0 obj
+<< /Font << /F84 331 0 R /F75 323 0 R /F58 316 0 R /F61 324 0 R /F50 228 0 R /F52 230 0 R /F85 403 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-539 0 obj
-<< /Filter /FlateDecode /Length 2312 >>       
+554 0 obj
+<< /Filter /FlateDecode /Length 2355 >>       
 stream
-x��[;�#7��WL���I�8,�����]�*�&W���P�5{l�
o1k[#�?��Di������x����O>��
-ƍ�w�7����������4p&���U��L3�3�����)�z�|�{�J�t�
-��x�����9�Պ��͙�z�l{To�uE�^�I�Z���ZkX9��5HɗƱ3������y{��wzM������
-�4)�0�}~��9(|��oo?]�ԧ���&S�w��Vp�5>(Vh�
I�"����D��x0���xô5�T�sɃ���c�PɌR����Ù
-�?�.)�2j�2-L��r׏/g�1#^y<8m��A����ˏ��34No�h�`:�ϞK+�\n^�o2�Y/�RM���h��=sx�@m�p�c��g��M�R�P�,�ZOF`/�,�')�tC�?�vXk�+��B0lMt�n���)  d���>Or�|F�Ƈ��<Փ��Z��Of��1��a�=UH�׃`��L�N{�U�
?�2�z=�r�%�F�X�O�LKs_,pqen��M��8�H�h4��Ġ��53:�+N����K����PX�ľe~��v�v���
�yk�Y��e�l�q����q����m�݇%,ѶH2�e�y���!˩��|�4�?��
�V�k���<�
��̢FT����>"�&��ʿI���oj�5�w�4��]V�N����������P��InIk�k	�����>�^vx���wţN�����C{��}gzP�N1̮���G/u���c��'.�m�E�5�2���8�vW�	���Jv��0m�U�T�A��ĺ�$1uPe�������y�h8��AH��-���I�z͟�M�*��R±o!J��f,s�خldD{�i��W��k��4��>�)F�.�;�u�Oa��m������������0G�k�K�b�bG��xᨹ�Զ�+1J�Io��b�q9B�ƕ"E�����[����ʜ�x��U�����i��}����f��b��W���&eBl��r���,t���a/B�X?pj��4�S8+Е*����G" �p(�y�"[��g#?�Q��4J	�4�ZҤǸP��V��)��v����[��_iz�*Kٌ-����G����Ƽ�>x�-m��[D4���o��0��Eڨ1�͜��B~�U���B�8H�8jiu���K~ǳ)oK��y��4e���s�@��Qik�Ie[I�Ozd����`l�B���P�R{�[]B{
��2Z>�?j�E�.j�FT��U���^ҬJ���8�j0����0�D勌�5#fa|jC<��"�7���cpu�1�,|��`��{��Y���K��o�Ѝ��:��&.��@�n_���Ԯ���:����fuL/FE��ˠ���1Q�pl}!���uُ Z�P�ܽM�Y /w��_,��3���u�r�5��<z"߯�k���^�Z.O_�(�,V��9,']�8Jt�3q����
M�K�M�M5�a����^����V#cյ��*MhI�mQ�Xa6cL�g��o����\z'��g{�v7�����D-��\#�,%0p���6\�1u��#�-c�:� ��;� �%Se���6����T�*i��Y�=�B�j�k�0-���w��c;�k�xI�G�q4�G+{u�<I��-�-�4�W�
(����}���H7HI��F� !�ޝ��dN=D��}����(f��k�:�����U�Lqf����L��=��j4wR��j�	<*uM�:<"xj<�pV�G�h4{tR������*x��s�i�8��!7�FC����K��@�o|&řֺ��|����>7�)WW�,�+f��	T�[�kw���/��=��5I�7���S�>����.�ܞ��*9�T�~=�#	o�S'�}s�N�(?���sj>1�H�u��99�y-A���T��S���x����8iAs㧨��؅uWǄ|��_z�$1����nt�M/n��H�+�
-�4�Wu&���K�*����#����<��[
-��1�(�s�"�|�ջ$5��U�%�c4��Z�p/(���28���c���>k�k:��}��bv2g�����D�>�WT���(�^DB���F1u�4��o
!r[�������7�'S����Q�f�t��t���s�X�]F�H�E׸Z��h[�]l8A��ʿ��JF1��/a�V�WX%R������U2�م��UZ�^a���ZX�������*Q�ԝbZ���ژsa�k[,���n�D�
f������0�`P�[��b�z����(]�
+x��[I�7��_1��@�H�V x���] ��������K�~�uD���x�Ƌ,�?�")�|��w�?	|��`��AI���ׯO������q����]��#�b���ᗧ�G��Ɨ���OCI�ve<SVT��>�Ѳ��͙3&����
+�J��s'���V(.ܨ�����BZ>�}7oO��VB������U&�=���8I9�}>�\�r�s)�>c�P���������E�p��k}�}I���9���J��N\�PC�,90T%[��c<��A ���� ��{wX��*
+zmFM�y|�,�#8l^�w2�Y����J7�Zɑ�.���%��w���*�;|6����*�J8&3�Wzw��r�s7T�coϜ�Sm�WCZ1y���&�W��ikF�G�e��y��d8�C����|1�j8H�jΟ����M�2�!��3��dꮐH����_�h���؉���^j
+�}m��Y�({���,Ḧ́���4Xe��	��>�8쵶�*'q��;�Q|2@T�=Rah��4�6��!���<��1�
+��UKa�s����2&҆���%ͽ�k7h���&Zq�IK���H#�88�� ����sx�:8�E3 q8|9�)$�a��8U�4�zbh�y
+#���X�X(�yJ�2�Q�6��뷚��_X�=tla��X7L�xp��j��<�&
"f]�]��NQ��W헕ʺ	��S��@UI�	�}�4M�)ia:�I�/g�9/��ax���չH����N��Q���7h=��JiZ	�(��4��X���	�:�˅X�
��@1�h6E+A��V��q]0�z<J
��c&�J�Y�R;Y��<��L��z��A%:��t�WZ=���Rx�/
i,���qV�*�<dMD��w�j��s?IZ֍���Ψ���E�(�%#q�1��#q*ْ�8��h$ީs�H�Z�?����/���e��G�D�D�b̮�Z�h�u�M�a�(1f���s]<��X��<$0)�#-"����^kyt\\�q����3�v�y1f��3y�#�cr��VbvP�R����Q��	�����p��cЭ��B�}x���?,$j��R��I��w�Ϧr�ci�H[��/��>�k�Q�@�9�/�V�N�����1�蠸4����i��<3%#:3]�bz�3�-=U�#�ّ�]_r��:ZC��K��{��e\qQbO�����e�7��&�3G-�n���k�\�%��%�>h(�	�������h>EaA��UGQʊ��+�"Cj�9Ӑ��
	y�P�Ō���/������W�Px� &�(@�y�uc���i��5�d2-C/0�1.���W���*�J�q����]�@����\6���i���z�!�1��O����yD�#�Xhk���Ds(�F���h���J�L������DY�/q>��ki5<�~��bd�i��e\ Ҟa�_$PYW���I�,�,�G5Ǻ06�3�4�R�K!Ր�����Q-�qQ7j���ĶRxI�*}��<��8�c2�D��+kF���ԆXF�E������WG
#���wJ���J(!K|�����bkP���1!���,�������%��K����8�LA�,�i��(�y�������LD�
+GBB=f<h�~Qˇ���mz��[ܞ���9�zO]�η�A�.'�[��|�k��$�-q�m��-�r�{]\����c���}��Ѵ����)��&f��
�g�H���QXcЪ+��j���-��%7v$��0�$(��П?�k
+m/���\v��2�:!�(#ڿt��a.ݘ�g�:���Q�e�s�_za��޷�����"��\[��O!�
���O�[E�ݭ�H2�[yM!�r��h!���պ�R٣b���ԑW(�D�$��~_jE$��%P��H=$�<�omJ0#��[�?�h��Cb0�|���u�|G�.�i�9S<�>�	�<����>v�@s'":�y-�Z��G����Y܄D��	8"���IA��^:�uQ�(<�3�	���~�i
1��c9!���T����j�}���>�4�9W׻l���F��d'�5�}N
+_"n�s�֤8��c�k���~(uﻲs{��>�d)�H�z2G��>�Nx���K8]��@�[`�|-(�KIc���9'�5��0�����jr�9Q/(��B�W'#.�狐�)�Sȧ���0���I�jFw�z1������,r�M������Ȯ��;GT1�[�18r�m��Ϥ�ٿ˿K�,��E����w�jP���K0a4 �p��A�����<g��}m���Ą�C(�Z��-�ޞ�㘸Q
 endstream
 endobj
-538 0 obj
-<< /Type /Page /Contents 539 0 R /Resources 537 0 R /MediaBox [ 0 0 612 792 ] /Parent 532 0 R >>
+553 0 obj
+<< /Type /Page /Contents 554 0 R /Resources 552 0 R /MediaBox [ 0 0 612 792 ] /Parent 539 0 R >>
 endobj
-540 0 obj
-<< /D [ 538 0 R /XYZ 69.866 758.996 null ] >>
+555 0 obj
+<< /D [ 553 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-198 0 obj
-<< /D [ 538 0 R /XYZ 70.866 488.747 null ] >>
+202 0 obj
+<< /D [ 553 0 R /XYZ 70.866 416.517 null ] >>
 endobj
-537 0 obj
-<< /Font << /F85 396 0 R /F75 318 0 R /F58 311 0 R /F61 319 0 R /F84 326 0 R /F50 224 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+552 0 obj
+<< /Font << /F75 323 0 R /F85 403 0 R /F58 316 0 R /F61 324 0 R /F84 331 0 R /F50 228 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-543 0 obj
-<< /Filter /FlateDecode /Length 2703 >>       
-stream
-x��Ɏ���_!�<@F�}
�5�o��f��ؾd�%��"Y�X����Mw�Qk�ڋ�*�ĺ?;����5���p���w����N	�+�~{��7x�/�~�X/����~�Kx���w���c@�����?�f�熠�ø2�W�
-���Q���zgLD#�E�L����G8�v�ʰ��De�����ؿ�|9[�q���vo��K��x�kk��oݯ_�'��.�K�%��Gx~����<�a?��W��p�1��s|&�����@(ܔ?Q(2Be��U�g��X�c"��N�̪v�A!��\�#PS�!��&,�!�KO�-[q8�c<������BF㗳�v�m�[E%c*t�3�V�ř�C��<�.&���@��A�ș�=���堄��V�ҘԷ����t-SsniX��n%x�W�C�ڛ�rq�!��V=�i�������j�,���rl�H�{T�&�#��k8�>9ϗ�v�p���%���o���.�j5���.�������>��B8�3C�Z�썶B��[ˇ1�f�W#��D\
j�
-��Zy���A@���
-��i0��/oiTֹ�f�4��
-pa$�!ne���ˏ�ǳ�met�F��}h(�0�ʡ����r(���
-�083���������'�<�
G�"'�t���T.O5�tx������������7�
�\i�8ݲQX�#K��?�ˮ8�s#�i��G���)��9��n��J^z@�G\��,��Yb4rMF.?��4R�����Bn��4�*��s95Rq�R��L�l�
-
-:�����V��l���Bc������f���!̈[�����wcy�4rd��|G	�ki6���X�&��q�Ø^�����Zn��^��]��>-�Q1��F
���KZؔ�wϿ(K��r-���
gɵ`%�
���Zy��l����^CM�r��\���r-
-�=E��po݈��������I�	�����~��B�@)usކz�H&{��
-	�ռ�>`�ՕF`+������S��[��Jr�c�D	�q�m}[���t��تh��bK�~��]C���&���_he\����?)�z��`��X8U}Oy}��մ�L^��W�\�q�˲����$*���^��8��D}�����K9��a��%���x1�y�D�GO`��
c!qh{�w�Y��{�q��6�eJ݆��<*$��B:՞���=_���_���m�'r^x�Ov�[�5���*��8Q�yi���bT�DR.������h�	�HE��9p�:��܈�Uv@�9�<U���0R̪CCN�����@x_y�����*/���G���sʈM�$Z<*U�>�"�	�c�&C���k�Y��	ĢxR�a!�b��5�#j�F�����T��.�_?�.�~����],{5�N�A"�/�.��H��U� �bJ$�0n|��y�Wp��UHe?@FJ���J{�ev�#���!6�����8Eۈɨ�e-$�e�(�X&^@yN��G/�`���P�%x�-�,�	��
-��1I�%��<�ϫ2r��*�(7�_
-�P0�nQ�g�i3�
-.Hl�=�F�RKf�����B�>�V(���_��I��M�*�g�K��WJ�_.�Fد��o�菮��,h,�=əG	b�H��|�Ӓ�搇������E\fr~v�g#�g�BG�����6K*��TBT��t.9i��4�r=w�bE�*1�?�T�GQ�23ae���RY��`yVVR�lP�յ����S�Ss(�H"EϙV^���-���Ez�3�d�N�0~i��R�ά\4���rzX�T� |^���2�����	��y�ů��
-���5!ȷ�5���j��*��5UA���ra H�L[�֔�A:^�*�Ι�ps
-6.$`�ċk���[5��m�Qk�/r���6���JYn�n@p�knG��S_wX��|��p����U4\�����X8�T�&���-�b��cf��>�%9씢���v��P�W,�Jo�ݯ2�����D�T�{y��Y��3�f���@�c�|�O�ΕƁ���:�9�
-���K20l�X���lM��k�.��ĀO9�m�`�����=M�>$�dw��`�xT6i��-d_��x����N6����Շ��#O�O8H��{2�h����Ggoe<B��ƣ�`)�/J�F��y��a����9�c�aO*�r06^W#�&�cK�������~���̔�ViI~]������L3{�$�M�(i	�s�V�+��l�G¡����Q�Z.�K/�)��U]�2����>DΛ�b;�t�4���Yl6��Q��m�w}�P�syY�är,JxD�[%�}�J�HG�}(��4PNIs�,�~�N�F�ɱ(iJ��̛Q{��1�T\U�����[{)�*�%2�f�?�ux-X�����Г����{z�Z�B��U=g�h��Z�Nx|%�o�D����K��z��j���q���uWvYgSrHx�tt���ӌ$u�����:�-k2�E��:,�QC,�Q���4j�e4�]H�XF�r��$j��Vs�����d6Oə��U&G�n��-�P�����b�C����w"�\�}�N��uѭ=�Q�eǡ��:r���p��!ث��z��������*���
+558 0 obj
+<< /Filter /FlateDecode /Length 2424 >>       
+stream
+x��[K��6�����`���hw�䶓�9�u�9l.��)�E�EY�d�70j��zW�c��������<����'w>��	ƭ�
+�Y=�����_�����q�L?��H�¥�`�
�~z��(�3���s�|��I�/>�60�DQ�������՜yk��NY��VZ��̞5�U������B@�p��U
�I� y)����ϟ9{��\.��Kr�8����W��y�S��E��M�O������(�-�C�N���_��Dj���I�$O ��	%8��v^(�ϒ0#�cA�į�:5��Z��KS�kLT�Iߛ��!&��^$X��
�g?j�ʵ	�ʾϠ�̪�盧�t~�u�Y
+�dB]��}~���ِ_�5hn8I������&�㇃��9�����m��ݿ��V��	<	ǿ�_�c�m���!k�A��Z����#*�B��)D�T떈
+�-U��J����R�{!j�u;DU���P�J,�PS)�
+Pm"��Z�e���PG����[
+����6����U:�,�ϵ?&'�tO}��*a�ղS��ֹ�sG�;6+�I\Б'_b!��P&&Z)��\9��jI�l�)g�����9� ��m�(|[�B ꬚+L�sT�7.L<�Vwqe�����M"4�y����N�8��V��]��az�>@3�vu��Y�eZ6#��}w���e�ų�:�e:���2T閳��I���I�g��@��8�2gYX�[�e�.5�O�I��3a�ۨ�s��N�Id7�-�B(k9.�2#��G�2b��[��R�	ʈ�M��ä�@Y���i(���
+�֛�N$m��`�c���_���_�W�Sø�Z���t�R�@�)�`���
+fqTh�Mg���Z
+�ŧ2���n�O�[����+�_����<n�v6��Y2��lf��<9��[�]M�!��@,����� �
+f,����t�Č`��԰�s|6��)j��}Qo��:h��y ħ�n� �wԷn��
�^�����
͆�j���M���e�
(�T�v��Eᑆ��\C�Q/o�ڈ.k�Ǣ�ָ�:Z��5]�p���]Wg��u͝Z,����S�6�`$���m��2r.��|��3AM�ޢ��Eqg�U}���7�q9���*��\\o4��)"L���r�P'P��L����O����L<�1`gՙc��}��+�����ho��a���r��%��k�{������H��z}���B*��6�V��0�zf�|�����7�G�#)����%�m�)��ƾ��¦��X��7�-��r�5�R����'Ŕ<���>�ث����J�C�8�fJa�S5��O����x�������=5�H��+|���9C���U��$Ac���Bj�g�%���j�����,���o^R�tC9H8n�EBET���FWS��y8u�ϾL��h��@}�-�S��Q��c(�[%d���t��f �G��pXX�6/�̬�jdM������9�1x$�aJS���Ma�SPU_3+PM�HV��������e��p,B�6
u�s�ǰ@��l%������o\/BE�ϟ �)��g%0/�?[�E���pD�"�AF6_֗'���|A�ŵ�|�
��Q^�O�F-��3iz}��M*��+2�8��r��3_��ᄥ�yU1PP60��1��b�m?�!�beyzҏ�k}�|�F.�Si1f9N 5�P�l߲�ų�#��2*�V�G�1��j�U���X�P�hh���6߳l�8�ґ��H�Z5�P���F��+��*)q02�kQ�8M���	"~�94�}�UXk�2�z�mvU�0��������,�gJ��7'<�_tڽ�%-��G�KTc��=;�;����RQ~L$�1M"��SE����&�u#�a�
+N7n�z9��G)��T����%�����)e6ҙ6]ي���RbSz����GEEW�X�I������{�3�da�Nu�?��J��Ό\L���e�>}	X�t�?�⫀Հ�=�:-��+��2�������7^���-k�ο[\^�"kV��Ӌ�%sN􋃺Ӛ�7hɛ}c�4٥���/næ�	;��t�����]�;J[��Z� =�M��8J��~�:���\����N���܂u�G�j�&���_�S����e늿ϠP���	��J��q��ĖZ��=�R@QR�R�V
|&�k��?jl�'���.a�\�K�Ũv�zOn�u��]�+����>5c�g��xMe�k��+�u"u�7ɓ(�JKo�;��[�����'?�h��ϥ;�[�Ϭ��M���3�?���9�1��`)yĢ���<J}����&��#%O�&Of�#�5h�4����+s��MGr����%�桜�Mg"`�_D4���.a��Y�
 endstream
 endobj
-542 0 obj
-<< /Type /Page /Contents 543 0 R /Resources 541 0 R /MediaBox [ 0 0 612 792 ] /Parent 532 0 R >>
+557 0 obj
+<< /Type /Page /Contents 558 0 R /Resources 556 0 R /MediaBox [ 0 0 612 792 ] /Parent 539 0 R >>
 endobj
-544 0 obj
-<< /D [ 542 0 R /XYZ 69.866 758.996 null ] >>
+559 0 obj
+<< /D [ 557 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-202 0 obj
-<< /D [ 542 0 R /XYZ 70.866 416.517 null ] >>
+206 0 obj
+<< /D [ 557 0 R /XYZ 70.866 329.842 null ] >>
 endobj
-541 0 obj
-<< /Font << /F75 318 0 R /F85 396 0 R /F58 311 0 R /F61 319 0 R /F84 326 0 R /F50 224 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+556 0 obj
+<< /Font << /F75 323 0 R /F58 316 0 R /F61 324 0 R /F85 403 0 R /F84 331 0 R /F50 228 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-547 0 obj
-<< /Filter /FlateDecode /Length 2340 >>       
-stream
-x��ˎ�6��_��v�mY|
��sۤo����\v�K~�/�EI�lˎYn�XU�7������_/�y��"��;�Y��ѝҚI)�߾�|��>_:Τw�_�>`4s\v��~y����y�
��蚑��@	�qe<SV������&1�3gLD#$��[!,���GH�B~��������՝(-l��{Oǡ��(P��Ƿ�����9�����(|�s����w�>��'�Ȱ"�&�'5~�s"�6��w���<g'7�~|A5*���j$#�Ʋ�%�^6+�f�0e�Ђ3��j�w*ӵ�>"]2�se���h��Q9Mx��z�,�g:�
����y9��L��
d~>�i��2�r����l�1��"k�H�3ks�*�07���6��c<��l�W���W��tθ��ĺ�l\#�U{�q]D�yf�H@Ǩ�9A)�
-�'�/}���v�FaZ|���2��\O��??�FL�p1��?}Zd�94��n��G���"惣e^5���^	\���Q*����w{,QȈ�L�c���E�!�b��2�����
-�9���
��8��f�|��%������'���9�*y�Y׃�xC��y�в<>���<^M\����q��?|"'���)�S�G�rBu�\��
-WL���g�Vg�9(&����9Y���B?*��k�����.霠^7�7\?q:���WI�ͺ����M筀������j��t^ϸ,�k����O�t��1���A�R]3�������2u�t>��(��ꋡôQWD�Kt3��f9l���J�$U+�
-����
gk(�0����J��}����j��\-א]�ݦ�$���N.E�R�I�Pm*����|�V1�5`�� ��6@8��ʀŎt-����`,��FToW`��V=�>�pL9���fw
Z��CaHrV��{H�#<�{��q��J�g��Z�˪V2�OKhT�-�QϠ��l�^��\��2%#)�SŅ�������
-�Y�i��y5���L��i&���$����e^�)q$�ɪBE1�lE�ں24��lqv�W��O��z"�=o);F{iC��y��y��dq�}I��/%�����d���S3��xȌi�A�8!�{�1�fpL���M"��7��ڸ6���j����<��XEi���w�oIZ�'@��͚�!qĪ����0�ͫ�P�=A��2OUX
O�����;P0���ev=�B����it+]1Ko�(�����UZ�W0�Y7��2v������<&���Ix���y('���|�mb
��mV7��#+B��E�+D�AE�� v=e��WU�O�#�ȹz(�u��2�>aHw����L�X3�[S�dҘ����\��*���ǐ@C�[bҚ�.BqiB^�4�}��4R�|O!2��U��~\~� ��%��T!{I�VyI����8�k�Jx��xm�>�yv�(���^mz��~R���Q��Z�[��h+�eO�JD�:"dH#�l��"�
-ׇ�q��׌s�3P0��ʹ~��)58�Ę$�͗�"h�`η^�.�DqMu+OX9TʴVs̈eM��WU���ð	oʟ��� Cw���=�]�R�ݾX��>�ИfEo�!����c]`E��"���h�����R�=��<b*A¤�,"�Ό��¹� +�.+�z��η%���p�f�Z�U'7�M��-j���i����i�c�+����+Z-��r��)�jL�4>�߅6�o������rU�P׳��f���*W\aZhe���ˉ�|N�h'��I����{���db���ә�tX��c>���r��
S� >�, �(�a�Q�r7�o�M����Ul�����)s��'S����D��=�I��n�EԳ#�=h�:���ל��p�Y��p1t�+�^�ձ�a����d$`Dl:y��*��'כK�@�G�����qd�o�(��7ZVŮ���7ZΣմ]%>K����x��x�/ᒰs��l��nod��
-��F���'jd�����@1���p�V�62�\C�:��yb4rMFNq]�����£X�mnF���K�b˭��{	�9��픚��va���%�}H�R.�������~��7�fn`���A*)���	�8?�^~�_O\vBGf4ץ+ܱ�d�j��9�T��'6�k���ɢ�h0�8�q�߹K-1�H�Q;��N��n���b�����
�Fo�1Bl�y�)���"M��ZȾUc�6 ���v�K�9���Vt��t�wre��0�z������å�.���/�^���f���8�U��CR�/���$
+562 0 obj
+<< /Filter /FlateDecode /Length 2507 >>       
+stream
+x��Ɏ���>_�� ӯ�����s3|Jl_��%��ꍬj������C������[Տwt���o��yp��`ܚN�����?����+��\?v����&��p)/�rݿ��_�1�d�tß?��'?5�$��k�v����wLa�5g�ڄ��k\k������R��]�1]������řNP��{��'�t�м�����7�řs��+�%��pI���	��~)�3�
+�W�I��峇1��>�F<⒯�+$:�v������e�'�<cVA�p,�FeOQ�D,
B��G]�:�
Y��'������&4T�7�ǧQ7�ynF:OkȂ�9�B�G4$ud��?	�<�o�'�S9Ôl�*�%]\�9۽Z�߮yY��<� ;,3��i�d��R�Ƒ�=�4�7�3��ri��B����Տ/���O#���'P�`R��q,�˴x��?����I
+*�Z'��%�9SM
+��]��I�獄%:�������&�,C`I���nZn�y�^GSڰ�Y���*��B�<b��[JQ�����91�\,="���3ʯ<�]ƛ�L���}yˋ/��ŤZ �HFj�u*�(��X��y�'O����We�#�I��
+��X��:���<�\y悚̠����d}��@p��v1�3��q:wLB����k��b���
+2�/e�uC8XH
XFM��$��e4��.��G,��\c!
<"�,���R3��ׯ� �uJ���*9Ju�Jn��J%�@Ū��`�����A��\�
+�����{O���C��:��.^d�͇��^����8xNz��>�y�{�b/�`v)���o���>�i)Տ�OS�0#�Gql��<2$��
+y��q��I���Q�u��:��b��D��e�(�	��m�����%8H�g,E\q��̼��9�*3�����U�-U�Ml�Y7R�%�`NQ�B�	��(u�<���gd�}噱Pg[�yh<���xx��H1S*n���\%��eq�訴eV��L��aE�Xh VO��f:7�p�+�s"�l�S��Bo��	�5ӹ�E��OI�������s��B��k�t�P�6�7
+Z��G�_��������3�Kw���9�b><�tN��J���|%�����O�=n�s����0��S}�o��1�S��)e���)�USy���M�
�s�URy+�6����i*)hI*�a*���r2�T�]`އ/�ʩ��1��x�TN�����2�z�|�9j*�c��?ꫣ_�=���|V�r�i��-���F��OuP)5��~��*U` A�y��-��%�ꬥj��r-Q+�M��D�5�8��R�ZSk(l�X�Պ1e��D��GsYYB��|�������9�GT?n	��;��졅gʪ�=fr�h0�nl�b��O�G�R2��g����d�O
+���itaՊG,j)��-��G��x�N�R\(+_��F]^�`U,OIK#�$}� n>Tf��,>��F8��^%N�����p��t��HCg������]�M��D����Fz�'Z$��8�Dۿ�i�ئ�{�1˜���v�	b���C=!$�|��
\��^A��i�>픀��{zAAT�e�
+����it�"d��ˬ���������fzܽ���3���qa�<c��/����2L�jy�ޒP��e��uֵ��ݥ#��y�O�o&�i5`��I[�#�d:�[��`�!Z꒱[Y�^�`�Gl�R岾����ָ8{��1�U<.qP�BҠl�z
+�i�P�;i�Ɇ��9�H�뷁r��~��@�2�L!s'C���g�t�7N&�i�yk�ϫɌ��vh72[�2���{eP�hB= ��-?A4�)��ܺW0�q��^�L��٭UY���t�d����
+;U�3�wc�&�Ӷ��չz�K�Mϗ>M�Q+���������gCe��	2�D�Ή*������;�Ě�'̈́ xr��Z9Z'�16X�^=W���5���2��|�i���|����5�Hq��]����w����]F�j���8�'����~����z&S��!'3�P�����ݓc3����7Yc�L������r�jk�ϾH��Y�����Q��q�I�Q�a���dd�}[�"Z?l��,�`>�����]��.���j�
+y�g}�ζ�L���J��ζ��Yk1u4pٹ&~w��g3o�2��S��wt��\s�@��Tk��nQ̷Ln㯸�����P����bb�̐���tx}��c�"�����,#Qķ�e`-l>��(/���O�����s%�u��Z~�$�o
+��^9K�-O�	��(��!�mu$�N"B|�
��*�@o�?�$\���J�7$�=<�T�R��2y16�7>p�*�*���=%`�e�u���56��oF��ٟ��N'�(���\�Q�*��x9�����흽J��g�����˙_�a��@�&+�al�Z��/�n��
 endstream
 endobj
-546 0 obj
-<< /Type /Page /Contents 547 0 R /Resources 545 0 R /MediaBox [ 0 0 612 792 ] /Parent 532 0 R >>
+561 0 obj
+<< /Type /Page /Contents 562 0 R /Resources 560 0 R /MediaBox [ 0 0 612 792 ] /Parent 539 0 R >>
 endobj
-548 0 obj
-<< /D [ 546 0 R /XYZ 69.866 758.996 null ] >>
+563 0 obj
+<< /D [ 561 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-206 0 obj
-<< /D [ 546 0 R /XYZ 70.866 445.409 null ] >>
+210 0 obj
+<< /D [ 561 0 R /XYZ 70.866 329.843 null ] >>
 endobj
-545 0 obj
-<< /Font << /F75 318 0 R /F85 396 0 R /F58 311 0 R /F61 319 0 R /F84 326 0 R /F50 224 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+560 0 obj
+<< /Font << /F75 323 0 R /F84 331 0 R /F58 316 0 R /F61 324 0 R /F85 403 0 R /F50 228 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-551 0 obj
-<< /Filter /FlateDecode /Length 2074 >>       
-stream
-x��[;��6��W(]$<��`ؽ�p�:�U������ψ"���r��ڽ�8΃3ߐ<�}�x��/��~��;�Y���;�,��~��������gʻ���+<�S��������<(�}7|��-
-��2۵�L[�9|����Y,�͙3&�N�[C_i�]�{1h�a��Jm��������XT�۽~�|�@ZP(_���k�������9���ߖ�=�����
tP��x��cl��)/�.�K}<��1���S[7�a����{0�U���LZ> ?c$��/i��I�8&zQhC�~�G�w'���-�5����3����
-�z���>B_�}��NW�Gs͸(�!�1<\���G{���j�fEpϼ�7��&9av�?:�@krێa���w��"��R�8�%��8��b��F�9`��\:���ǹ�i�q��<�
���J�UU�/�4��4J��,�rQ�;�X=�H���~�YGI�>��-A��k[ �.�=��]�U��Xf��R,�H�H�c�B)��)a뉘q��P�X�mP%Qx��D��GI�/�(��rRw�n�A�+���+&��j�E��P;O0�!�Î��=�,W
-Y?�
-%�Kj�9�xDN�4re5�c��!�r�Ex8�Bv�3],���&ˀ�:��@�U�-=�DƾKpB)R3�mV�ª!d��2=>�A�R�x>��뙷�1��Lh�!��)��FT)\�B��H��c���,A�Y`�6h97��m<Ъi�)�x 'l�)&%b�
-�DHk�E������l�b���ywJB��܇��^��M�����J��}�t�gh��T�Цd]D�<eN U�}��\1$�C�'�3mj���Ŏf�h3RF�5A�"��T����W�-JQ���m$�O�v�Qcݪ�U$11�iM�ZQPM%b��Ό�"����S�6w�?�§=�	=�	�ط�P	]ݜg㢹�2�q��-� OYyQ��
-̐q�f��
-UE#��[u���z�SS���*���bA��B�&-zj�ղC�
�+/�
-����\������5��:K�}8�i�:Ƥ���a�I���4���͹h�}���W��=`���PC��H�"!բE�(�	jh�s)������)`ʢgU ��QBфy1dh�)�x���S��@	���h�2N#L����F����Y �&8��� m<��l�)h�v�PAXXZ��P!���kq?x\���z`��\��>��@X�u�\��~^ {X��T��
o� ��'��ٔs᜸���	�����	��Fvޫ�q�.��"��3�?�h|D�iґ��.XfI	���|b�A���s��G۝-x
-��>�إ�pQ���Vne�?״b����ڍ�)�C���2|�J����N[#2���m�1E[Fb6���`e!wA�waP�k���2+]!�5�6��$�V���y¢���H����3R̜u���ݸkf��9��<*�jXT�̕9T3��)���+���������ߌ��U���Y<�p_S�T�LٻS�� /�p˟�I���j������]�R�Sp��W�w�K�9_����L�2x���3x�U԰�T���=m�py�䄃���~	uyn�A*_�OK��Px[S
��d�E�x.3J��ExY-4�ǕS��v�?q�`Q�L#�"�N�e����w��7-�qB�SI�9��q�&�ZY6����>^"m8�P4a}�k�<0El�6��~�~���J�����R��r���T;�����|��o��"4�C�F���w��'ǵ e��nǥǘaޞ��Ý���-<t��6���-�)AC���t����"�;'nb�7�L�Ӑ`�<0Ņ1MA�&�����%
-ztYF��F�zH�5%�垎�c�%wM���]���m����qi�)5.;Lq��|���
-�Eo搡�N�p�/�����
24���Ta�:��}`�; �
-٪���G5�U��+NQ�g'g�Vl��J(�1Ve#L�f�duՃ^�����o��B����_�}`�
+566 0 obj
+<< /Filter /FlateDecode /Length 2273 >>       
+stream
+x���n,5�hn<	��/B�df���!Nl�.�>�n�]�v��g�4ɋ�r�]����y���?_|��`ܚ!pƹ~����O��|�8S�ǝ��c�`B��ᇗ��3Y0���߇j��
+�$��em�N�:��1,�6NV��]ȷa��/2�OV������go!�#���[!�C�
6Xf��?
?~Ź�K
��3WN�Gp�z����oN
�HY�~&��ٵ��0Vd�c"E���Xz"O�qm"g��Q8�K�KV`o�LJͬeڛ��PNV����M$~.�uU�R[&y�A���f�ZP-Ց<�'B#�de�j%@T��T�V6|�1tRk��!2V|�������J�C�p��zU���n
���@�z�q�B.
3�ַ���`��^ޑ��x`��v�;���<�I�=qJV���>*�����i�
+b��M%c�ހ{]Ѫ��`��d�bt�x6~0��������&+���'�}�j�z��O_"�de�\��� 6@N|�p���ʀ|�%�rU��|.�}�Ey��c��}&��>�D��K�s�J8%��%����S`@����#�]�Pk-{�>o��2Őctx�-R�
F�b�q��4¨�:1^G4���v�J_'K�ȡ��S��S"�LP�O��cړ������f�=��v�s���؅X�^��^�Qa;��
+����q��sT?s4,�����0����"C�u�]9������k�EMƜ��j`h8�	�icJK�
�'凹��ĥ�3�8�f��$x����a��7
���X:WaA��hO��x�J)�`��	v��BS���犁s;�E��I�䭁�D"�$d/"fH�)+Q���m1Ks�=��]��b�B�l5����b?6��i�>bn��m l��FMem���L�I"Fأ$ޗUTS�9)�iiw��hK-��Q�/b9$��X;
~&�Mm5�)�8Ц@�{I�A8��(�OnP�b^9w�0���W��L��aN!�Y�J�F�X%�#Ra`M�*�i/'EͶP$��^�6�8!
�R�;p��^p�^�g:�;;q`�>(u��}8�Cw��}8��t��}8�v����}��Ю�ն��B�K���8���L�����cr��:}������#V�M�*I��w�y���*&?�j�bɣ�'4m�wP$B��f�L�TV�%7�ϕb�Fi])�ld���g{;��5��u�*���&�U�R@-�X�z�%©1���&�"�4���=����l	���k��i��]��h�����\r��f�)#�����R3�W�������ĩ��f����NK4�+��Ϸ��R��W4Y��J8��Ro��
+;D��d����%����Ѕkl���4Ү���1��ԯ�����
+�c��Д�
+P�M��*��/���bR-V� �J
�첥��>�#E
+���W:}�(��*yq�ЉC��@9���Á^'чe�N�
+ݝ80D�9v�@]pL�CA �p ��ā!h�v�YAYp,)��p
Do��0��_�)�O{2�]!_}K��	Q�Z���w+qh�Jȳv��Ɇ�pJ|�̥ǹ���f��.��]]H�pnq�Z���g��a
+L~����$)!��RlN�q
=f��*�����w*��=�8�pш�H�~Y�������Fy;�F��!�@�|�7��.8ߌ�"���Igl�}��ىC싿�A���w�]W���HZǜ�Ww���q�ky�=_�[��$9���)��1��FN�t��XӴ"E�6���gʪz���5TS�>]۫��?��<K@%�<�'%�k�T(�vW�T�z������E,���>xڅ�ius.��{g�$�ˤ�-�3)���{fg�P]���"0o�x���[��\0��n;�L���R��{��
+�����-�p�M�)
+�K�Qׯ���������H��-���t"�\�^w)�}{'�8+��[\T�k�n2n��]k+��ba�U�c��	DW��m����Շ�@\7.*�$��E���q�(Y�t�\�R���͒��}���J#ztIk4�:s�~8n)
����D�����mgeX���CO �lc,`e��%
ݪ�1/��7��;�
��i��N�ʘ��ȳb��`,>�TB�n�(������}���$s<TG�sż�����DP���߈�N��k�g����r&�8c�$+� K[����?>�o
 endstream
 endobj
-550 0 obj
-<< /Type /Page /Contents 551 0 R /Resources 549 0 R /MediaBox [ 0 0 612 792 ] /Parent 532 0 R >>
+565 0 obj
+<< /Type /Page /Contents 566 0 R /Resources 564 0 R /MediaBox [ 0 0 612 792 ] /Parent 539 0 R >>
 endobj
-552 0 obj
-<< /D [ 550 0 R /XYZ 69.866 758.996 null ] >>
+567 0 obj
+<< /D [ 565 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-549 0 obj
-<< /Font << /F75 318 0 R /F84 326 0 R /F58 311 0 R /F61 319 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+564 0 obj
+<< /Font << /F85 403 0 R /F75 323 0 R /F58 316 0 R /F61 324 0 R /F84 331 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-555 0 obj
-<< /Filter /FlateDecode /Length 2772 >>       
+570 0 obj
+<< /Filter /FlateDecode /Length 2575 >>       
 stream
-x��\Ɏ#���W��<����J����]���l�\��ſ��L2�)��Tv
|�.��0^ldR͇�>�����������fJ[9|��"�008μ��#0����m����sn���Up�aH��|և7%�w���4��	�Y�y��K�!�0��g�r�ǧ)�8�g��H�����%��~5C��������iOc<?�O�7������kZ^���~����Lpüq�
�Im�f6�ȷJ����rfb��/enT�J�*)���}�$�Y��jW(e�`��,#���Ɲ@�uY��K��N��Bu
�H��
-"�V��z��sC(4��;d���X�	gFm��F���/0z�Dyb�4ھI[V�G�}�<�J����v�N:�L�NfI����C��99��A�T��J�w!PF3+��\�K4�11
-��
-�SJ\1��h2{�nRf�sie�h��ΐE��X��_�C1D�.�l��b��0��V���m����$�;2�Sd	@����6&>�06�<�3Gt %'�Ҽ͝�#��We=Fn�t�#m���r�32*n��[�S�q�_#E�$���0µ.���#��,P��ʗJ�({�
�9+׬b:��aNi���I��e�R:�ک(�I�ׁ̊c"=OɅ�s	�<�P��">�LAA���c��	2�FGX��"��BY�NR��ȭ�\p�!����\+[1Э p�vj����:�T�)���I�+T���0V��U��`��dI�r4=��s�;?^�3j$�fp�9?����Ͽ��?��i�L?�7M��Q�	���׎���{�l��TC��.�P������	�u
q9Q���5�zQi�?j
C����
-Xh��Ik�F��v���>���L�̢��pJWP/eLv���Jc\I����E��9Ӽ����c��(Tџ�D�\ӼB��C���#L��A��"7J:�ƥ}G���fI#�q�������9癬I�Q�p�#{QFu�6�D�5�1~Nr�j��W?���-���蟍����b�f�t?���r�Y�]�py�#|<���!i�g�INލ|���1#,�j�A����V� $�0���v#_�@if���M�wﰍ��@S{���-WAwΏ���a���%Z5*AW4�3�g�\s���dUk@��g�*���J3�E���T|?�\1q��X�3���hI#��fk�%�eR��X�3ⓀE�!`-��fXH"AeWI�JBn	�+j��9�
-�����o$��1�M�s �=u�+j�&(:��u��׻�ٖ�xe�E��'��
-dq��Ǚ.X�t�эD��̈́��4�2n��y��
���=Ӎque��� t�cXǜ�D[e��!�f�:{��:��Zt�����c���G��'
[)�(�v�*{�l?{+`;���c��ճ�*�8�L`+�{�ל$T�3[ڑ�1�t8n��ꭱU��-Y/]9>]������tt�O�*R�s��D��[mdi����\�2�$�(�֜�a7�~��w
O1�W�B�N'�ʟ�D��d��ō4�K�ϥ����i�Ta�?;r:��R�
-]��J7�.����Y�
-�d6��U�
 �Ga���w#��ؿ*+�ѵ'J�7���9Q�Nwj���05b^M���g�ͼEJͬD�U�;T`��Y�EY���~�I@u�=DkY�N��MZw#��ƜJ�&�qNb��F1ϟbce9Οi+J2{��PSس{�g����WPE�3��MR���fś�S<M:���rŵ��fJ;&�x&>���t��̱@
S�>}��R	#G��/�9�ϱ�N=G���&��8����H>yWXr��^l�Y�1��T%�����"�A����-����>u��$X�;k�ifpg?�yE�h\�o�f�e�-:�~\"�q��2Z�e2t�h)��h�^(�������_dĳa�{�=ͳ�H�=ߏ���[b��g���H�-:������]ɏ���]*���N�;�x�\�kbi����Ґ����Kp!�������7:��z�Eή�����ޡ/3����w���������v��2HGA2��c�P���	�W��7]��L�`��ܗ6a�s�hx��s��q�,M�e�M��d��&�)k=�Я��b����7�Y�on��֑k�n�eV>Bd�Y�2�]�&X����W彸nM�t1U��FS��NF/�qw�{7Z��-Ŝ{?p,U�[�8�s��럆f_�%�rh%��`�B��q/��qf��v��Er�K9�3���g�^���d��}<n�[����$���=~|U�@O��N�	����MX����1�;N�,�n'�I�
-���&�f�r4Kv�-Ѳb�Q��[\��8��7��T�:�q�W�u�Y�WC7y	�Kݬr��չʁ���H��L����%5js�֙�аY5��͇��/W��V�n~W�3�TL��^O�H1�wv��b�-,&-��.&���"�&��:�<MD�SM ���t5�W̬Ĩ湙�(�<�r�jR�
-T��'��&��6Ƿs��W���.�s��4P
�YD�.+�Y��Uv7��g�߱��8-��t��n�e�!ZX�Z��]��:p�L��g$<��D�sc.��,f�z����To�e�^��_V#���=�4��~ a����x��Z|��
d��ﶬw�������K^#ֺ�"[��ct��.�����&ա�fYj	�����0n�D�njV���m�n�/�eF%�
+x��[͏�
��_��Z��O)�L&I���un�=�ݽ�v/���؉3��S���G?�I��,�_1��It��>I����у�v�V
�����O��_t�0��4��~������3�������P����е|��V�oG�F�2	����ϵ�S��8����Z#�Uf��N��I]���&
+���'ؤi!��ޟ�'�$#ii����*YIY2�����}��Y(w���n�B���W�ѯ����W�|1B<�]���'�@��$�%��->o8PoyL-��￧+���S��}��k�Xˬ]M
�e�kY��Kꗟ��&�N
�˰��	�i�`=�8��x0�t��sK=<ϸ��5>Q�%p=Ԝ�.���m�{M�Ĝ)&5JȖ Y�#s�����v���%��K����k���%A��Lc
�I�����A��Z��cʧ�
+�?��/�����Y�)D~C�'�8/�Xr��� J�3I��8RҨ�L�$&���R�e��Y)X�P�t����>"�oh�]ɲ�C��}�B1����{��S�)<���
+�
+e���j��	u��~�<W�9p3*^(b{h��|<�y
�9zS^�*:w���`��T*��(0�a�K������e~�}�2F�V�����;��ՇS�v,5��g�h9]!�z����{-P�%0<-�x:�-�e�ĵ喠@r����aX|�ƞ^o�B4���ijA%y>�G4/��<���ލ�d?�to
+v�&�d�Tt��ۚ�z��1'=�~8Y+?�λP���T�Uy�JF��GAT|�TiP$D�s1��g7�ɹL2Z�2y2�6�i�gCpKvOe8��`�a�����Qm�����7D ����ֶ>�Q�1>�)rLj*#)w�U�����)En�;�&����sX2rR\C׊����)�uRIg�nP$�Gd�۵���d(Et�sSJ�]�K��`�Q�����@ӻ#����Y�ȫ��@JB���H��4%3}�<r��ߗ��W��N�$��c柞��M����SSf�w��b�c��������\�j1)�kD`�vP4�W�]n�f�9�@�Z入W>�
+Wf���U��`��'MULO`�-����$Fr����5��.U}\�jZ���KI�R�=*�<����JKJe�T��n��JW-at9)��5��\߼RYq���u�lU��
+cY	�5PΕ���LIyZ$g�^�)7*�^���T�^kj$��@[\��#�s$sd�4ęJye�x(f���@v�@W�_���j���-���>�D8�Q��#ms�O���Jw���'xy��DDRJIKqdz��Q�Yh�!g�V;�I1�?�o�򭌀&�Qjٍ��k3�lT���7��ތ���'xi��v�l�vZ��.������,ۍ����F0�q��6�ur�6j�<U��Ʌ��%Z�V��f�@K93��_�h&�I�����bW{�KI�a��],-�ql9_�ց#��LEBe�2=�$��n1�U�c ����5�]����h]���e�⚜��բ�N:]
g �I\��_+�M��Hܴs�ݢU�RO�k��0Q-!`K7N���ހ��1���:m��g=CY�^������#��-z���<��Jû�����^m�W�m�s8l��Q�6R�
+l�U��TH�%���5�0.;	�F��*rnK|�m�#ʭ����RI�J+�g*r s�y]��ћ���>S�&~/Bc�	��1H���]��hhA�ٔ�k���V��mj���D~�����DmL7ڇ���$:�K?;G�{>Z��[�N��^�Х	��à�n#|��s�;-ϳx��v�/�y{�i�K�KX_��)o5Ӷ7��%�Y���jq!-Q�xݫ1���aޢR=�:���n�t��M�xn�J�Q	w��H���V�uPO��hкi9�uʩ���
+9��R[
�$6�11:;�h+9����RS*�'�E�n��J�Hw"t��T���4�x�[�I��H���޽:S��(;a���9�[�vr�v����ƹ�gd;�_�9~�#��Z�8�8�:՝踰��wo#�qaȹ��z���y�}�}����躈S
+���>b3��y?������$Isc�{��V�ιϵ[����-���EC��{��+ET�$TخQS��Q��RFMѝڿ��W�Ő��Rl��=ϳ9E�G��i����v�Aa'}q���Ӯ����qvl�ȯ��,̻u2���˱�G6�����I�VSν����ˣ�i��+rr����P&�2�;�vz��ڗ�뎶�C_�A���~nB?eM%���W��?4�c��K4�g}s~S����|�'#��,q��k+�C��߉cm�3_����/=�]�ݕ�VzwM���7�G%�&�
�ד��
ʾ�H��n��RAP}�
����>!iyb�m�u�ȉ\����H1��_<w��<�����>�'�l����޾oN�4���x>W�tCA�h�_f���w���2�t�\���d���t�����Y7��7���b���i\�W_74�:�?���_�
 endstream
 endobj
-554 0 obj
-<< /Type /Page /Contents 555 0 R /Resources 553 0 R /MediaBox [ 0 0 612 792 ] /Parent 532 0 R >>
+569 0 obj
+<< /Type /Page /Contents 570 0 R /Resources 568 0 R /MediaBox [ 0 0 612 792 ] /Parent 539 0 R >>
 endobj
-556 0 obj
-<< /D [ 554 0 R /XYZ 69.866 758.996 null ] >>
+571 0 obj
+<< /D [ 569 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-210 0 obj
-<< /D [ 554 0 R /XYZ 70.866 721.134 null ] >>
+214 0 obj
+<< /D [ 569 0 R /XYZ 70.866 647.651 null ] >>
 endobj
-553 0 obj
-<< /Font << /F50 224 0 R /F52 226 0 R /F84 326 0 R /F58 311 0 R /F61 319 0 R /F75 318 0 R /F85 396 0 R >> /ProcSet [ /PDF /Text ] >>
+568 0 obj
+<< /Font << /F84 331 0 R /F58 316 0 R /F61 324 0 R /F50 228 0 R /F52 230 0 R /F75 323 0 R /F85 403 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-559 0 obj
-<< /Filter /FlateDecode /Length 2366 >>       
+574 0 obj
+<< /Filter /FlateDecode /Length 2619 >>       
 stream
-x��\Ko�6��W���r��@ N��u�[�S{��K�~�Oq(+�I���o^Ό�������x�����O>��
-��JyfA�}���W��;^?�)�c�/x)'���_��w?�s2o���?����S�')^h���������g�0��3g18�kTk�+�����ɤF#��m��2�j�D���Oc����Ǔ5�(,#���g%Ixü�(5T���˗�{�%���z���C�}�\<�uJW����w�]�\=)��~}��*j9�HZ����[����w�k��
��3�7��m�Ǆ�z:���U�N2=��}�=0����W�y9ub񺙋#~�����;��?,y������1�)��e�M�
-QpŌW=��LDQ{d}Uϟ�WX��d"�i��	�aBL�ɢ�w��o�W�g��~<�D�����������^QB�/�l71��M��\3.�D���O-S��V�Y�W�w��;�R~Gޓ|�yR�n��t�\'�bg�ق��8)��&��Nn������2pzI0�?�3��F����,J��i�� /J�����V:�h�oˎj"�҃��:���ʚ[��~ѡۑʿ�
�R|i����}��&Za���y�#��r���N�;�s�zkN9f�z'!��FG{��u��)Jj���RQu�c�W��f��m.9�2o�ḹ�k�I$��y��k��\�E>�$�pڥ�Z�"|p�bX%�u�*2��UW��D��*��H捛���eC�r��+J0�ImcGW6��m�nvTW��J�l}�Kk�x�:5�c����^�a����=�z�r�q�߉���B���N�"�k�������{!�gx��;)6Qb/z�{'��@�SB�w�(B�EA{廊t=E)�{�|�Η��񄁪����g󆈳LZA�+}��wi��#\|����%c=}!D3`BS=\юX��TKb�#�a4��B�v�2�&]�юX��Ľ1��0ڐm��ٝb�#�a4�B�v�2��'.�hG,�h��B�vD¸�MϿ�k\q�$�C9>;������`8�:�Ƅik�W��݄U\ɷ���9�/t�q��4w܈���m�*�
趍k�-]��tWkT�$n1��^^�o�����og�g�s���-�)H��e_���`�w�s�H��v�g���]������SJ��h�ݩ���xV"A��i�4�(�{Ϩ\�����*'�L�/���+Q}>��n��o�Q�Yի��ʾwV��/ur�O�'-�B��)ǹP%ۍ���H��E�z�Qԩm�������C��7�X�!����2�L�1��y�%3���v�*���b��6\��C%e1�\�E���
-�[v�Je(���%[����e/L<�E�u� >E��y������uvt���1)Mt<+�H�9�0�8K��X��������.���!���P	ɀח����`9P�Av���р�R���ჲ&�t�+-AM�_�Ɩc6��o�|.#��"B������W0�� [�+����Y?�E�����!>Oi���Y��&}�i<�	�����Ni
�bk3z�ԇ?H�q�ew+��IP�<j�>����QV�w�O��2:T
-�$��w0�J�DV�~�E�t��R�g|�O�P,MI�Ћ�,�WC�B
��"�s=.�/"�df�8>�M�t�,�u�C3�ͣf����
-u(c�)c�X��C6�hf"�ɞ+�q��J�z�L������r�Hy�c�:
-�
-��癃^��ԾeEʶ5�){Vk 0����¤&2P9E�s2�B�Y��ؖ���-d1T�P��U�_\z��9g33ߌ�rA�(�s���2=L�0����I���1c�t*3��ŪɫG=�)R��	Ց������6_<�Z�� �%Es�)�d5�6�	c Fӆ�
-C�4#,3:����T~�Ԓ�FU�<�n�UO&��RM)1�2�U�ɚ�xY��WLL���ȶUB%й�}�*�&1O�3�Y#���>����T�H�K$Im�$V�m�(Z�r�l�u�$��E����<&Q"�A/����wQ�׾�����}r*���$��:E�}�_��zoP'A/ǌo_-!������5��?��n�FҺP�TIг v�E8P�KG��
-o����&��� 5�F�&u������PŚ�:W�
Z����dD�|�k5��FLZ���X$��G?�+��SH.��=\<�t�/��^o:�җ�{�oy�|Ѭ�8iyM�e!����d��w��	\}�i^���0�[�m�מ�E�8֍yT��ǨI�Zd��ba'L�^"�x8m��0���uT���sF.Q���T�U2�	�R��֛�!�0��0��
+x��\Io#���Wtn�
�dq�-��M|r�vysx��笠6YR[-��cڲZd}�����ρ?<p�����|��=H
�;?�����_�����q����C�ᡌc\���O��2����_��W({@){����>��B�͙3�Hv�m�C�q�?a���V�.ܨ����'*�;�ˑ(����vgp�������AԨvx���-�g^�fіRo߆�9��g<^�~y��*ƾ�"8f������E��;���w�Q�Y��H%Qxx<,�!8�^���%D�1P@��,�%�UA��'����؊*3�y=�r�j/B�8PՃ���V�INy�.�P�h���Z�V1�ő���:'qi�aښ���ͯٛ��6y����9~�����������|^4���-u��6}�s�n�a��W�ci���D�!�'����8�Jj����"�#�9yD1�ᗃv�f�Z�lr��Lb
k'}��Oi�'g�X���}��n�z2�sj7N2�cp�#.SK0��uD��KIN�D�zݬ�kpČ����D�u$��~�-e�y&��V��CL{��������+*�
+5D��9Б��ř[c
P4�����5��@�^�d�Rq���SG��/�@��`Ύ�{EH�`X6Ȍ[�PY	S��P3ifjg��E���b�>7�BU�-�\�I�
�R޾��Lt�֭C��RKnV�pa�$��D!�)�}C:*�{S�H&�9��r����赨��c�����7V#��D8R��X�jӎ�{��;7p$�
��;�a1M(�+Gͤ�娝q{9:���b���^-w�sC�-rg���7�6�#7�k7vϔ��{�W�Ӈ��H/+/��cB�
+/�M�6LL���6蘱\X>/&;M��;�����ɻ����-Ppc��"��M��y�R� yT��-��"��A����Z^�����jR-��1�xf,!��^v݄%J�&}*������,��Nҧƕ�����'��L���]�>)���'d��٣o�>���O��m�'E].�`����5{f'-U����r��C�]���֔Zʯ+Nno�7�J��o��J�-&;
:c�6�
+]��BG仾�B:"�ƅ�C߼���:���
+A].�DpwR�:f��~�F���.g(����{��geM�f���-�vh#y��땴]��a������f�e����P�:�ko���z�E��чKD���(���Ժ�,F��VNt�.�{����Zy�*o���z9F�c�^
+l�빋�����k�����W@݇�zf�-���m�ԃ.��SKm��S%.�x6��{'M=��������e4S�N�>�=�!����.g(��w�(��|����\\�yK;�O30���y݌y���L�v�<�f�d&F;cF����hg��h�Й�yͪw&F;cƸf�	�L���V�y݌yMa���Θ���ę�yMz���� w�-���B=^@7������Ę�z	���p0C�Uu	;���� �.n��⎅����(u�µ�����^�%v��m�D_�uu�H�D�<��F�����q��⯻~˷��������f>Չ��\2z��
+�u��F��5��R�c���kh;�g#v�S=My&���09`���-���y�$���|�}����o���'힟����(*�Tƥ?�;���R'�)�g8�R��-���y1�

j۞��3�����.��K\����C93�H�9u쾹r%3���v6�|m�Bc���ʫ$�e|��묂T�*�j�	�d(����S��޹/�"�#�rPt�(<���o��_'gW.�٢&P���'�cq�<�pqr�y�E~k�y��nƬF�c&F;���5���sF��kw0�{�w5�@��9O���Aw?*L�i�SB�<}����ʙ`�0��
s�s6Ƨ�:�|-3��"BN�&��)��o��jd�}R־<���*80��r�9��	��[ڜ��4b�c������ﶟ��R�n�lC3Kß�͸�$,-Ji+�OiP�<k�ޥ3*g!�G]���2��M����Q*�G�R��-���8^��=�5�N��@v��M$�g����9�̨��%��2�,C:S4��Xq~�����Iy�J��k�LE����K������H}Ș!>�8�{�����ŝ���h%(=M����V��d�0.A�J��M	0ޝ�ɬ�I����=��ҙf�
+?u�׻Q0����0s:k@a�7�b��Y�hƗ��n\�<�&s�1=5���%�����y9�����!7��d�6�(��}�'*S����g�,S.�<&���2����Z�7pLS6��^�����s9���W]9�����=g�J���W�5�D���,�{:U
�o�o�s؋i{�qp�g_<�߉'�e։��f�_�Ǘx	�(C�>dZ�j��+����n�D��/��؝��~�3~|�ʦeڸ�{��6�Ʒ�Y��9��cm�+}=�K�
+�{z���v�T�/$�[�~ׂ��e�k�e-���A������ 0GJ���>m~)_O�Zd���;Z���
+�o��}���k��rNw�ܶy�=2��މ�tc�d����yM��zJXV�A�!�
 endstream
 endobj
-558 0 obj
-<< /Type /Page /Contents 559 0 R /Resources 557 0 R /MediaBox [ 0 0 612 792 ] /Parent 532 0 R >>
+573 0 obj
+<< /Type /Page /Contents 574 0 R /Resources 572 0 R /MediaBox [ 0 0 612 792 ] /Parent 539 0 R >>
 endobj
-560 0 obj
-<< /D [ 558 0 R /XYZ 69.866 758.996 null ] >>
+575 0 obj
+<< /D [ 573 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-214 0 obj
-<< /D [ 558 0 R /XYZ 70.866 329.842 null ] >>
+218 0 obj
+<< /D [ 573 0 R /XYZ 70.866 243.168 null ] >>
 endobj
-557 0 obj
-<< /Font << /F75 318 0 R /F58 311 0 R /F61 319 0 R /F84 326 0 R /F50 224 0 R /F52 226 0 R /F85 396 0 R >> /ProcSet [ /PDF /Text ] >>
+572 0 obj
+<< /Font << /F85 403 0 R /F75 323 0 R /F58 316 0 R /F61 324 0 R /F84 331 0 R /F50 228 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-563 0 obj
-<< /Filter /FlateDecode /Length 1657 >>       
+578 0 obj
+<< /Filter /FlateDecode /Length 1486 >>       
 stream
-x���n�6��Po]�e83|EǱ�in�=�ݽ4��!EJ"-'�E%�h�4�y�HZv_;��od��g��t�4�'�3�O�O���On;)Ȼ��8�VR�������o���n�����z*xX���x�,���}�B8�:�6qc�Y��E���B�h���2^<a��P<ZH���a��>nnNu�$
�{�2��Kh�Y,�����ĝ�7��}l,w�l�@s��w
-������v�%�]l�����?�O��V���-^�ά����@"H/<v����$.ز���,���v��Z~N�lY�P ��O�̛�kn���T����֨��Gԁ���~��ճ����P%&I4=���O�g0c`��)��L�����#�-zx쀛g�x["��.8	�0� E5� {E�M{I;�R�c�{&X�&A�b)��^�k'zG��b�Kf�{���
-s
-J3bN�^!��"�h��
-{
-Nbˊ씪&�'�+0F���ql��p��)p�L%u�آp(�yR�O�%}��a��+CM�$y�dE��M�����s�GE��4�0�=$��Y ���7��.�T.�J���	����">�Ѩ��l���l��g;#���[�/BZ�1uM}(9a��u�vM�hEwc7Z���� ��#=����S���
L��NvT13\�|�CB�F�M�(pV�K��JQ�oZ��@�+���͊pP�r�8+�'��j΁J���il4�i*{e�ĥٯ(�q��t8��6*ϺލY@RUش�n�Fa�Nk:!љc��\-	��T�;������^*���K�v�b��3C�پM"��\1ecwc���q��{Y�H{�~S��3�#Q�F�q]��hWR��>Y5�⼦�+�D���j�Lᤏ*���k$ak.1��y�K���%XdW����~��+{���*���g=��!yU�^���ĥ���+�B_���@Ϋ��u�.��ȕ��D
-��`�$zV�����ۙ�1�z��5�Ud�rbݔ�@(.PKL��,J3����l�j����w��3͔UC�vA�7Rh]cAf7�SL�p���m��$��^�W���Y�d�i�U����~��R��JE��V��PXU��m�
T߄jjl�i��W����R<0i�?�{*6�|X���=�c��":�����ZB���'��K�4��Më�A��r�����mJXO�d���BNi��KDs�}OIhc'K�3P��]%�e�M�#�usuL{�Jې5��D
�@1��=�I7�6YN��PnD�d˸�UI��uZ�䄁�n�,�7<g�!���%��]`2J���N)N��N����b��w�8 x��Y%v-ϳj�۞g���q�UCx���ϺP�|˫%*M�0�wz�݀"!�>�LI?��Mg��@���7���|�wJ�Z�Қ�\}
_ZCx_:C�Kw�I�����\v7`
32\xH��S2C�s����ĥU�3�L���_��mϒ�k����m޵�^�kI٩���>��d����t�2r^�+f�w���E����&FUg/�r��2xN	�3�1%;�2��F�w-!g8� sr�2��3�Xi\w� Rf��K()��&��=�yg9fV8�U+9h(��_	��.I.@*V�?n� 6�A
+x��ˎ�6잯po]��P��@Q`&�[�s[���s�^���^��8�Ilg�=�Y�I�/t�젹���|�NvV
+0�CM|�����'�����y��G>sS�	P����m�븦�/_�tMχV� .셲r������1� �1�
+
+wk�C�8~n�>ZS+Ɩ���iw�[�I�tki�����	G��3������O����<7�MqC�?��=��ܟƑ���g�x���l/��A��,U������OO�Y����eZ�|��PL�p�c�[�@�[��r:AV�:Mj�@'s�������U�+a@.]"������N��	���d��خ�
4>��>}��e�6�Cxw'u�G�������0�᭢��j�Ik򨑶�'@���-,�����8���4�a�T���H/����ڈ�T�w�v��Z�OEf��������Ļ���	.������AUu�@��'i�Y�bخ����@��ڦN�0	(�j��^.pd�++�w�S++P�1��
+7i|�,�m��9'�b�����i�L#���i13#[p��Z���y��l
!h�.��s Ne�(~F�{k֣Ī�1�j]T���M=
+�
,�*��\�Ft�I�h���bTKs���\J��ҳ����nIQVf��̥#�ev1#s�rJ�̗J���Jr-W���&�F��1B��߽��%GW-�kb���U���0�p_�{���=�����Di]	���qߺ��ls%�3�s2���+�Ԍ���uh���Z΅]Ln����9��z5qq$�_vl��f�j�Y÷H^Ѱ��t�LgV;n��J0���_��
+������Ԝ����'׳���t���﷨佦�~
�d������1����xuȦp&��
+y����&+���6�,�7��N�\�JI'����b�m�N�rx�
F�
+G�sf�U;zG� '�h�ɚ&��Bec
+�u��jF[�X�ꐨ���"�vZZZ�y65�9բ!�ڛ�A�c%5&��*6c0�m�BR(�
&��,J3�������
�Ϥ�g�Ij���]ހкł�~~��{r�s�/y�%v�I�О�V���VQ������kp���@�M�Ee��7�F�
k�ZsXw��30��@	�Tl� ):/PkEQ?�fI���"�	�?	�PsoM3@�����*p��:xރpHc��в�~K1��>�, ��J�jp˘��i�"�"vS�з9[!���x0���J
nͳ�����VZ.*/�5��A�ݚG-��ԫoqd�B�͑�]��:2hf�[nᬏS���'�5x�~��M���v�RJ!9sz6�����?f[WdJ&VSv�W�R�qK3zI&�ZH��ߨ$3�L5c������j2�Q�䏄�1��x�֫Mc��(��7��d�ɜ��TVNL�V���R�8�/�F�qr\c�U;��Ί��)��V!�gB^��m�����y�
 endstream
 endobj
-562 0 obj
-<< /Type /Page /Contents 563 0 R /Resources 561 0 R /MediaBox [ 0 0 612 792 ] /Parent 532 0 R >>
+577 0 obj
+<< /Type /Page /Contents 578 0 R /Resources 576 0 R /MediaBox [ 0 0 612 792 ] /Parent 580 0 R >>
 endobj
-564 0 obj
-<< /D [ 562 0 R /XYZ 69.866 758.996 null ] >>
+579 0 obj
+<< /D [ 577 0 R /XYZ 69.866 758.996 null ] >>
 endobj
-561 0 obj
-<< /Font << /F84 326 0 R /F58 311 0 R /F61 319 0 R /F75 318 0 R /F85 396 0 R /F52 226 0 R >> /ProcSet [ /PDF /Text ] >>
+576 0 obj
+<< /Font << /F75 323 0 R /F84 331 0 R /F58 316 0 R /F61 324 0 R /F85 403 0 R /F52 230 0 R >> /ProcSet [ /PDF /Text ] >>
 endobj
-565 0 obj
+581 0 obj
 [ 6 [ 600 600 ] 10 [ 600 ] 13 [ 600 ] 28 [ 600 600 600 600 600 600 ] 35 [ 600 600 ] 38 [ 600 600 600 600 600 600 ] 45 [ 600 600 600 600 600 ] 52 [ 600 600 ] 879 [ 600 600 600 ] 883 [ 600 ] 957 [ 600 ] ]
 endobj
-567 0 obj
+583 0 obj
 << /Filter /FlateDecode /Length 23 >>         
 stream
 x�cVa���]��N��
 endstream
 endobj
-568 0 obj
+584 0 obj
 << /Subtype /CIDFontType0C /Filter /FlateDecode /Length 2374 >>       
 stream
 x�}V{Tg�a23Tc$�&`F2�ZJ|�/m->�"h�U�$@xBQ$��bW�>j
VE۵�P(T���碇e���j����ˮ�w��n��tw���9s&���߽��
I(I�����1ӣC�W8�����<�}X�=�"�FJ�4h�� 
$�@R2(	I��(Id�&�lz�H�~��S�*V�+��g��-Y�E9�`q��5m����0#���
@@ -2296,10 +2364,10 @@ F
 يV������ͅ'ƃ-
e �[�.����̍勋?�Z,p9�HAs�|^R��X(�uH�T�NJ�H�g�N�7�30�W�ȍ���(C�/��uR.��B���%%��	��T�ߢ!sl�EnN��k�+O��u���H�L�a��}�sV?AP��<�*�x=��}�2�e�a+�u_�h<ޮ*�"2DL�\<�pu�i�?��]��hZjCja0��C��-X�8���q�fa�p:��f*����W�~jA �t�oA1Qbѻ����C1=�xPh�ç5������ yӼd���|}`�s�ɾ�9���g���@?�����H\&@��˧n�_��7���t}һ�<�^D�Ppa�6��vqBUw��x�~o-�.g����&����C����94^+9���dT�����+��G"R��,׸��w��ѽ�]�f���b�*�5w�{���틲�o҇���|c��/�} ?�JV�����o˶�A
 endstream
 endobj
-566 0 obj
-<< /Type /FontDescriptor /FontName /ZYUNCI+SourceCodePro-Bold /Flags 4 /FontBBox [ -208 -454 808 1060 ] /Ascent 984 /CapHeight 652 /Descent -273 /ItalicAngle 0 /StemV 200 /XHeight 480 /FontFile3 568 0 R /CIDSet 567 0 R >>
+582 0 obj
+<< /Type /FontDescriptor /FontName /ZYUNCI+SourceCodePro-Bold /Flags 4 /FontBBox [ -208 -454 808 1060 ] /Ascent 984 /CapHeight 652 /Descent -273 /ItalicAngle 0 /StemV 200 /XHeight 480 /FontFile3 584 0 R /CIDSet 583 0 R >>
 endobj
-569 0 obj
+585 0 obj
 << /Filter /FlateDecode /Length 496 >>        
 stream
 xڅ�Oo�0������86$m�� ���hհ��
�	��	��l����!�����c����[�T��P>r�Nm}6%���m��<�Hwk"E���/���eAf�"�U�`�s]ϊ��'��P��n�3����:����Z����Q���m��h����l�ݍ3��L[���ŏ�s��R��>�}�AtueQo���2Wa�s�A,����J��<�s�ť���}L&,z���3o�D�F���
�Zڬ��4GrF��)S����Y��'b�;�\�h�]��W����cH�6�m�%��>P0���l���i@Z����6���P�{k|��6�|4�zJA	hz-@��8�� �Z�$hy��D�zB��@#V�L� x&pO���a�
@@ -2307,73 +2375,81 @@ xڅ
 O9��]�_A��"%]���K�S�a6��^o�]����d�gc�X�����f��t���n\��������?ӓ3�
 endstream
 endobj
-396 0 obj
-<< /Type /Font /Subtype /Type0 /Encoding /Identity-H /BaseFont /ZYUNCI+SourceCodePro-Bold /DescendantFonts [ 570 0 R ] /ToUnicode 569 0 R >>
+403 0 obj
+<< /Type /Font /Subtype /Type0 /Encoding /Identity-H /BaseFont /ZYUNCI+SourceCodePro-Bold /DescendantFonts [ 586 0 R ] /ToUnicode 585 0 R >>
 endobj
-570 0 obj
-<< /Type /Font /Subtype /CIDFontType0 /BaseFont /ZYUNCI+SourceCodePro-Bold /FontDescriptor 566 0 R /W 565 0 R /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> >>
+586 0 obj
+<< /Type /Font /Subtype /CIDFontType0 /BaseFont /ZYUNCI+SourceCodePro-Bold /FontDescriptor 582 0 R /W 581 0 R /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> >>
 endobj
-571 0 obj
-[ 2 [ 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 ] 581 [ 600 600 600 600 600 600 600 600 600 600 ] 617 [ 600 600 600 600 ] 626 [ 600 600 ] 639 [ 600 ] 661 [ 600 ] 665 [ 600 600 600 600 600 600 600 600 600 ] 675 [ 600 ] 704 [ 600 ] 809 [ 600 ] 834 [ 600 ] 855 [ 600 ] 860 [ 600 ] 863 [ 600 ] 867 [ 600 ] ]
+587 0 obj
+[ 2 [ 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 ] 581 [ 600 600 600 600 600 600 600 600 600 600 ] 617 [ 600 600 600 600 ] 626 [ 600 600 ] 639 [ 600 ] 661 [ 600 ] 665 [ 600 600 600 600 600 600 600 ] 673 [ 600 ] 675 [ 600 ] 704 [ 600 ] 809 [ 600 ] 834 [ 600 ] 855 [ 600 ] 860 [ 600 ] 863 [ 600 ] 867 [ 600 ] 869 [ 600 ] ]
 endobj
-573 0 obj
+589 0 obj
 << /Filter /FlateDecode /Length 40 >>         
 stream
-xڳ�(���D�#K� �Yҁ�A���S�@
-2
+xڳ�(���D�#K}�ـ,�������)��	�
 endstream
 endobj
-574 0 obj
-<< /Subtype /CIDFontType0C /Filter /FlateDecode /Length 5985 >>       
-stream
-xڕY\M��_{o�[ȎVk����2ĸ�I]uW骢�JRaH2rI��j$�]��/H��1g�q�1�{����pΜs�s~�W���{�ﻞ��>���<kK�=�Db���n���(��5�A�"��#G[ňssca�N=�0�I�#�]z�MzRrF��U�A�1��^�^��_��gB��2�L�����2:T�j��m���qc��1d����8&�?0h�t�q�rc���eA��kW��\ml�had�"�9��A�xD���Ƒt&�8 rMDLtH��1�"��F���1?v�X3�:���cln�$>���)huPt,�RnL�w?n���RF��`�F����bz3zLF��3}�~��2��(���3�Q2*f 3�Q3<c�fL�!�)3��gF0�1#�Q�3��|Όe�1�	�Df��R�@�P�Hj%���Hm�vR{i�4T&
�FKWK�i��!�a�i�����4Kz\zBzE��l�l��K�+�1�;�.�ߜ�4$s$�$��xiq[�G�"��C�ý�u����~���q�NG�c���{ݩ��zJz��l���Z��^o��.�k�����.�1���G#��}�)����ҿ�wz�辥�L�E��6c�g���fW���/�V��
�OV
-���m�~U�)6+(�*��_������*�=F�F�wV	�뒦zx� kR�]#�������w(_z���.�pP�DT��ܟ[B?۰%��9�p�A"҆�ȳb��:�$1��2�/�sg�N���M���{-R-�Fd)&�H&����1����]�h<��l���l 8�s�Ͼ-R@�ϡg#��vq�a>E�}�v�N�3勹夿���Y�w	�������-���<1#M\9�@��o_^	�:+���Z��I�}4$p�u���h��O�����x�a�k����g�|s��"t=����%IYF�=$���;������,1��06���dO��e�gܯBj�`xc1 >&�#��M�4Ij�|Y�B�6#�W�p��or�EAplB��X4my3�7I:�\Y���B9��8�����GpgеC�<�V5¡I���nC���u(Dd�h�@�W+�fU�ڥ*���zWt˓������|-Եh���$���M`,/�hj��8I��"ˤ �	A!�%M�C��Y؅�!R��A��[�7I�gdg�|���#��
w$���Tl�=��d,z�t��k!CA	�=���3���x#]�
��eэ�t����/�x#��z��U`�i���&��e�M��%�%��=C��n��q1����<EO1�TH��4N�Z�({��w{�n���T����GP���*;�h����s�:�VH�)1�v��F�����Ӊ/�����1K�0jà����TR�;����eu�hj�׳b��W;���k�{����b���:�7��)$A���ɾ4��7�l��d������(��h&��E�B�N�R��X�@;�g�$nD�:4�|�9�|jg���0�1,�!/4s�1�B�!ݎ>���%BR��Rю������
-�O-3���o�J8�^�Wa�&�\�@^N�6=�J���P7�j��'M�{b��@�����7ԡ���!���y5�x�<�����_ЫjG�_�&Ln�����T�@�:i��Ӱ;�@��!�(��p����;���S����X�C�W{���,b�Kү�.5K��l)�j�~��@��Gcw��Y��O�+�.��j*	�WW����œ)iL1>���Z��E�{(��yM��\P�v�C�4w�ߋX�'-�W�	x�܁��8�i�����"�Wq��v���7��9�[a,2�3�#a�1G5Ϳ�YvJN�)���O#t����-2\I�7�A_-�yOh���\H����9�_h�L��^˄��O]0����m_�����-�Z��!����z6���
G��ϼ��?`���;P�I���0�u
��H�Z�/�bF$����l�.�$b��0�M���0|K���GS���Fb6u
��$?˒O0��o��e��)�q�3-�6�6z�i�?��@�01"�VzǆQԛOn,�tr!�1���%��@�������VA��M�Ę�,w��Z�G�T��a�U{�ۅA�94雄M�#B�L�W�d$��b�bZ)�:us��]�-���	��Ƅ8��N�$D&p�M�|
-fݓ,(ÿ�!;̞����})���A�0��%}����_7r�.����d%%��y����C�T�@h�����j�k�3Ё�0���4�������΃���xb�jĬ&���~yU+UI����ъ�D����3e����G��S����A�(4JZh|]J��n�V��(��-�I5nQ�o����
�5>g��K�$ض�ƷLwbIL^���3�JiD�������./
-1�8�7C�pq��7�0�jD��Q�-R��Oш���E�`^-���4܃��2� ��>_�GT�>NQ6�	�U���Lݕ��x��}
-��XuR5É��#���`�n\��/����yR�̻_s�;!g���z	��2�Ec�0�>X��N�@�2���T������oB=�TD:ǅp��M�d��MN����l��T��N�9x����^���<��N؇�1�<�O�1�L̉n������%�ԀkL��X��I*W�̂@~�
-�T}_U�����g }�x�{�Urqww����f����SvVq7@���r�IU��tͶ�/�yGr��~bݰ'7%�e[��I��;.V'��uW�u� ��?���HޱX��$pW�<Zį�F�#W�|ó*ҾJ�*�o�����v`G����o[�^9���}�Uxŝ��;_s4xyXxPHv|>/�J��Iɇnr�Z�o�"�#lG�o�"c��5��vْ�*��CY|��=YS��&.�*�_�S[���q�2ᴬ��4�3z����$f@�%p��[�u�K!�ҫ��4����H�._:�U��נ�����T>��3�:��X(B$�{Ǔ�0�j�5v��u��U�w�8�����l�����]9w2�P���1tӎM�����[t��3g�rϨ�����3��m����ؕqDY������[]�}�	���g �V�ڌ��5Rϳ7�?��8��ё��-LM��#5�t�j�ݔ}F@`a6�?%,���������D0��4r�<k���~~X�`FF5����WQN�4�|+����S0!��Q�NUEԩ`0К��a������ﻋl1ip^^"��ϻ9���#��,"��Q���IO��m?t롁��Z�o��'~S��S%�ō۞�K?�W�֝s�*y)��7_R�Vвᗖ�ֻ��[�g�y��pL�~?Ք����9���;�'�
-�Z	��w[7{�������cO�r��i���rn_��..EE���1�g�"Ͳ�!����x/��G�B�E'�.U�z"��G��T��&tȠ6��Q��s���B��~뗪\���2vL���`�-�T���X�ɟ�%úk0^J6����X���wj�4���cp��-��I���5"�Gm��|�M��ZXY�Z�<�r/O�ړ�W�E�@�I9����{~�SZ��
-��$O�$x(��p�J�5�rf�G��oz$'�$�'���Y#����fG�g8_w�a��i	¶�
-��]N�Ϭ���ǎ���l�^����K%�*D�\X�!`�(�䫢��j�=��z��q��׉S�Jv�(٢mpAL�n���	E�4}R�_�M=@K�=�g�v'5��F�b��X
[�8<�ȶnqY�5�
-۶?�9y�#��b�ـ�$�&(�c�/�|�tX�iĽ�*����G�4�)$;�/�>Ɨ
-H��P����iI}�_�\��z�F-V���ݷ�g�W��
-*�XΗ�T���:I��CV�,�]z��|_	Ҏ���cm_�>O���zI�mZ=`�ѣs/
>�w�����D�8%Qĝ�h����:n��J�f��cP=-�Cz���ĕ�����+�;[|&>+<d�
-�ࣱ��;��r���l�y�0�7"b�AWhE����,���$]�0�C��Q�hD���ʤ󬻲B����i��Bw&{���� �AʟlO��utVy��Ҫ35�����1L��b_UC|��`vϻ�};�U�n{ۧb|�[�R�nny�3����3��������.gת�5�����>�чGl��=W9m���.E�B���bM�ҷ��łz��IϩcBe�V�������S0�U�v�vt�;�n����X��&/7?�Ԁ�xs�怤��H����R
�n��)�&������9�2��|��@����6��xi8�r*ר:K���D�q3�Η���^s��
%�Z�\�FI�m��*�Q��F�{Fٸ�s�9{��y������|N��E�,�ᬹ�̢|0���OƁ��S�M_\>u�TeKU}A���j���6S̺�՛�=�B��(���@=-��-��H�o�B��v�d�T2Ҏ������j����o��֕-�0G(8�Vй�R�"[|���BY�務�f��#�kv��e�d�@�[�Q�!O�@G;���ڦ�Yy��܍��`?7�P5�|)��[�oo�(�w������ʙs��E�;]M��y+\�iP��Kᣧ��~�_��H�-;��(�چX��fQo�B<�{�%�ᄡ�G����]-�G4A����h�x�e��t��v��BLb
�6�����⼦�g�.�>���f��#ϩ�;`1��`,����rl*x�8e��|���K�����={v�ڣ�0w���Q��ZiJ���TO0���nE���*x6ʒ|��������^�r(��,��iK8z���GGq}+��`*��\%�����Lq�`�q�\�%��� ��_�_��g�l�S��	����$%��%�B�,���݌6����`"��2xI�s�Eh�#?���gT�?(�+J�~�KI���"�?LQ˻�i��ব��q�r��IJ�ܘ�j�@�ݤ��D�$�i�-8�4]4T�� �]�9x��`c� �;�?-<D��ec����l��A0&�?�HH�BЖ�Vc���k)5��oq�M�.aG/�������|R�):5iy�M������xP�W�z��k�P<�!|�~��d��6�AIˈ�*�2�Nfh(��B(ٷv%�w�V[T���}��j}��Ԝ��8�q#��`��}'�@_�^Nّ��ӧ��U`pS�>���f�zN�ApU~|Ͼup'�ʐ̕�-���+�o���~�]����F�����t��>H҅[�t2�����]�<:���ȼ%�L�����ˏK��E�N����bZO�L�����C��`�2���0,�&\$�.'�ycobEF_Y��d2�~+]�+�.N�(�����U�?z�3*�D|ƣ��P���`�ܱ�C�Ww�8қIx�;z�=�xz�ӯ؆��
-�(���Wpd.�b���	���2p&R�>	��חA��sS��م+rK�WKs�+B�Ψ/_\u���Mƀ\��q4Q�!ӼO��؟���<z�r�[��D�KE�jO��U~��J!�4E�l�������
�p/;���}�R���&��^���RnY��kO*�]Ȏt���ƅ���A�}�NH��D���bH|{-U;��*�;w�x�����i�a]M��ݎ֎`M=�V
c?��`�1�Iv��y^&��`l��2NХev��V�0ׁjj�d"~ȅ�$�D:A�'��o��20B)��� ��
0�b���[_A�7l�03ğ�
-҅u���+���_J�de��?��ґ��СU��S�wK�ǽ[����^��7�
+590 0 obj
+<< /Subtype /CIDFontType0C /Filter /FlateDecode /Length 6025 >>       
+stream
+xڕY	XW����^P�Q�j�K��(�]qC�dPvP��q����b""�
+� ���Q�[���$&��vf�-�3��7��W�u�s�{���S-c�ucd2����Rw[���k���E��EE�����抦��]�?#��F�
d�
��ν��
`��!��Q���f��`�4���F�j�7\����?�;z�C�AkL}M��|��|�BL#V���X`�nMt@�S�p���Ȉ(��Ӡp���S�� ��1�����?��2��3Q�~kã��֌��.*hU`��1cǘK�q�E�h�9����kL�D��� %�y��V���A���'gL71��c���L�'c��b��!ӛ��1,c�p���˘0�5�3����Sf c�b3C���0f8�	3�ɘ3���̧�f,3��L`&�-����Vrk�b����V$���C�Q�5�xy�<M~D~T~L�.?.?!?)?%���D1W�L��V�)�L��ʸ��7&��l�zY��]>N@E�V�G�*���ws�v�Ht=��p,�Փ��}��N�����6�zp=����Ӱ�S�jCG�+����U*��ʽʿ��1��{Z��%}��D��2m�c�fװ��/�V�+�S�O���9��m�~U������Z���g2����3�/M����7d�u�^Ѩ�;����O\;h*B{�!����2N�I@Z���%�A�[�>�Q�$ ]��2#|k��Q-~����!w��ls�m��8�r��d�C��	Y��j��=����f|����xZ�`4+w5��&l2���g���çн��u)\tTث�>o=z������2�WCf�r����	��b��9v�Io���F�v"�ٷ/��Y
�@Ǖ��bQ�����㹎ڼ�RaUZ���ʓ�nO�'�
�0��\�w�PY�n;_��oҟ�d�C��A��A̪����+N���Z��`L6N�/^�{��*f�z�7�#�eF82G��ɠFY����Q�c��&D��r	nf�M.� ���F��%Ӗ5A\��C�Vt��q
�!8������8wp];+�`ui���M^����g_�"܀|DV�M��x�BkW�[�C�g�ƿ�[�t�|�o������3yH�?�O&<4�1���rbe�b�"����F�x	iD��R��N$�||r�
ݪx�QvN<�8���#�4��x��.�#�g��`3�ID&a�;ge_�i
+������������Û�ү�\��~���h~�Oni���9��y|�S���ce�b�B�k�`n<&/�w(	߂�(�����
A ����)&sDI0���^��U�.�
��J{��^����[GT-���9p�@���~{�����{b��!��p�D~��C'��g��LOp��S��bPQӌ�d
+)�I�|�2��o5��1b�]�F��u�5Su�b�����)��h�����}���v��Û�6OM��O�+�'jq��?DvA�حC��z.4���������1�
 �v���̙{��>�zKa��\�G,&��dP����É:��خ�P��p��0z��D�S��[�S�7\�@����yΤ��$S�Պ%Gr��I5������-1�?	�
+�SN	kQbLԶ ~�gN�z,O�^���|͈wk5��nu�^r8����hH��6vg(00���. ��|t��mr1�K|���Ja?z�_���miG�t���&�u�
+:���ϫV0����\f� �S�9e��+�_7���j*��5ˢI�4&[�U�~=H��J�sC�Գm<��v̫�Ӹ��y�;��$��f�+��i�@w�	�p�֌t��E�w	īXYC�P�����8Y���_�6���[�,soV��hH#t����="TM�7-�P��uOl���RLV���9�_h�L��VǄ��N
+^Џ��m�������t�͋Z��!,�5�'ul�5����~-%��>��i��&�G?b��B#��uN��FO���9ީ��mRqD�Xaғd V�%�o��X�{�'W����L�H��9$V�$��HR=�0�2�ʼ�y��LJ�1��������a��B;���	��16�3&���rzS���a��d,iB�L��4����Eh*&��k����j����u��ق�.�ͦI�(�j��b�J"#���)%H���5/�n��h�'R�7�ƈ�c�*:����5�U�{1�8�2�Kq=����8[�ޗ#]N/��pj�d.��d�V���[�S�=��BPǸ��H(�u�
v�?*��X#ѫ�����a>�^|k��/wa;���"8�Y��#f�{��j�k��S��'6p��˃�����Ε�.{�"}'�F��A4j�5���ϵ��-)B+���K�?vQ�o�����ڵ^��˨8pI�os2��Ē�����g���x���1⺌�;=(���m�En_����,�FsJ�H�_>ZD+Y��O��y50�VV�o+$�<�w�b�^�Vy9D.���W�:�+yw� ��}�\�Sc�A�#*҇�#��#M�����zX$�Lؑ �r�C���;]	9����@OLU�L;��|��:D|pJ9z�^v�_pfSt��_��
+vϰ�|��V����m��b%	��q
�\
+���9x�0�o�C�l�m�I<�l1�<�O�1�H,�~���u
��%Ԁk�Q1��yg��<a�
+�\u�/��|��o����])Z�Gm凜\�C]�v2��:D&.�P�W���Ќ�47_n�˲#����y�`O
+��'�
���f~�n��2q�W���$|��*�.�D��gP�	B۷˶��x������uUhC`DB��Q��E��B+<D$o�A�ۓ4�-n��
���|�4�m�+�Bn�ż��+CB�2�r�V�ݏJ�w�PJ��WwT��q"x#�����u+�Mགྷ�d��N(�tYt-OM\�Y�H��X;ce��YE)�i�'�$4&�h�ÍVK�"�_�C�/�8J���_�PXFs�=��r�PF�W��Fl	��O��}qd��X�HB׎'᡺5h[̎
�Փ*��j�p
ȿ
&�ٶ]�����z�tv>���)x����7��[t�H3�s���'S?�Xҙ{�6�G�N�N;��
+����\�੩ܱ������Oy��&7!;�w�������'�u����g�S_S��@
����Z`e���X�FO	K>!}�u1�7��}
�T�����O??,�3'F����]Β��?h��6����g`|�/��V����a<0К���m�<\������`0�-�<<$c�wqXE��YD�N�4�����
+)۾��CIӹ���O<�����J���#Q��~��$D�;�k�����zI�[Nˆ_��[�򹧶m�r���萃>���8�93@�;Q�׈���gk{�KỎ%�c�3�}Oh�E��o_	��)*A+\���5z�v�� �AdI����-z��Sזk�ݑsɣ�G�)*���
+���n֦g
+Q�(.�g�r�)0�6m���B)lF���ʶ�\C1��1dhc
���&����R����6
�R�]og��:�{�r�F�D鑛�8�f�B���s����x���+'�f�����&�RR��E&EE��^\��� 5�X�S1Qn��0\�h
>��s�X�I���Ĳ�[8ob����jϢ�l���lA-Aض_aω+)���>p۹j���-�O�����DZ��K�<6���$Q��Dö%�aIY7r8�s��zijw��b5[�.I��E��@�"�c�W���5ɇh��/�՞����L�	k`+��Ŷ�N�%Ra��4%��TL�!��T�@\��Y��w��=K�וe�5
+�H<��c2�dF��F{S���k�-�LZA/�ga��\��ȥ|L��6n����Od}~�W����ĝ�<�8�˥��h(*#�h��A���"��\<8�ɪo������i�����
+��M��I\��O�PH��&�ޡi�F;�i�l҃�� ΄��*./�|ѹ��Р�U>��c�tu�Qen?]����*q>Nm@���؂�1{kE��g���l~��k#uQ�vR��N;�w�*���.+��J�`%�?�A�?.�8����QS���J*�U�g���^�x �OVI|Qq�F��=�v�-���	�m��q�=J	��eu���ͩ<ca���&9:�Jf
ߤ
^jacG�	��><|�UϹ��=�Mw*��q[��j[����.���zN��(�����,@o$���2u�u�\�vcCG5�@�W9ٹYg���[���%�\A�M$ԗ�aV�L�F6I����O��E�P��7�e���|�a~ƳI��H�U����(�F���Nw�X(��"[�]�7�j@{d���j��$�oh8�@��cg�]��)��)����e�{��
+Pi��y�̤|0�䓁OƂR�W�M[Z2!yׁduse]^_W�r�œͻ��[�=¢�F����P-���%��D�o�B�v�d�2��+�Ģ*��['o��fj�c��E��u*Z�)��y�q����Ō2s����93�p�&�:��o��H��'��a����?���I}�������ŋ}\l�5�󱤌�o}������E��+�g������p�5��-p��A�s6N���6<��~�R"��Jܪ^�$Ȋ_@�9�I�1�a��h���C{��ܟ�k-�V"�(���]��iC���lժ6��L�b��6����x����T`3.�9�����a}�2#.h�;a$1�m`��xsl�{�8u��\��~���Z����}�v�ާ�8w�ݧ���Zir��)TO ���.E�H*6Ғ���?��#��^Ir(��,���9z���GGq}#��0XA/�
+�5�9�+�c��bX��$�5���Z=3f)��5N�U��%)�$�d'FK�ft�tt�{3��R�K2��(@��X5�=��|A�YU��7�cobO�dX%1�~�F�H����Ć/P�{HT�0G�F?Ը��o'$*5q�DSď��0�v�R񢝂v��d�ZD1��<� ������]wDZ��av�[D2�/�����"A]AW:��7l��<TO׿�)Z4
;��D��r0jT���J��իN��Ok<T{���
���ڜ*�!>�EC�D��Ak����6-�H_A�xʼz�����1Z�f��G���YmA͖��A��'����/'���{�7�_8p���2ʎǝ=+\��[j�y��W�5Cq�z�+s�����+1�V��i�jv&�%���h�o��V�x�ś����)t�:��Rů�dw�;:ߣy��B%'N��u���@!Z���0�AЃ��0�[xj̀9w����)X�<�'�"�������H?2	u��.�q�`+k�Q>T�ӊ�=�i�h���C|����
�A�؁� b���n���D��=��d=���v����r� �����sd.�b�+��E�z��ǻ
+p$r�:>
��o�����&��We����d7ԕ���\)\}εpqZ�l�qQ�AS=N����W���,j�z�K�����wϳ~�>m�)�K �d�t1�R
s���;�7�ýl
+z��J�F̜@���y4	K�Q��?�h�~)3�a��N��9�����w@�Th� �V���ɺ��Wa]y
+'�p�WG�
+�*b
�6����k�A�*�!�E��L2�}��R��c���r�>-�k�t҇�TQk'��C)��*$Q���M|��	ڋK��M�ݤ#rh!Uw�:�ڨ��#�	���r��	n'��5�?q�}<�g����
+�b�ק����ٌ��:x��]o�7:�a����nw��S����R+R��^N�$,U�F��HwM�ʿt�eг�����
z�/��9�H�
 endstream
 endobj
-572 0 obj
-<< /Type /FontDescriptor /FontName /WXMGLQ+SourceCodePro-It /Flags 4 /FontBBox [ -282 -454 814 1060 ] /Ascent 984 /CapHeight 656 /Descent -273 /ItalicAngle -11 /StemV 200 /XHeight 480 /FontFile3 574 0 R /CIDSet 573 0 R >>
+588 0 obj
+<< /Type /FontDescriptor /FontName /YROZNT+SourceCodePro-It /Flags 4 /FontBBox [ -282 -454 814 1060 ] /Ascent 984 /CapHeight 656 /Descent -273 /ItalicAngle -11 /StemV 200 /XHeight 480 /FontFile3 590 0 R /CIDSet 589 0 R >>
 endobj
-575 0 obj
+591 0 obj
 << /Filter /FlateDecode /Length 735 >>        
 stream
-x�}�]k�0���+��Bw��:��f�@���G��m���v���s�?Y���XWh�ӣ��9r�7w�Ѽj��\��?��������!��X��i���������;u׵����r�Yn��o���T����Ou�gȰ����?F�||���h���a�hӏ�a�}�?����BU�RUq�o�;�m�N�<��?VM�h���1��j|��X7U����!x�EUu�'��}h�0y�r��~�<��͍
�c߽��o����]�<��W2�1�����<*�f3U�ǰt�ѧ�ޫ񠿩B��_^�ß9�/�$�&pF��w���g7�	�Lݬ��,�M�W=g��#�?^�s�����B��E2�@�@4�,T@��5�)t
͡)t͡t-�����ZE�9��t�"��+����i�
+x�}�]O�0���+�$v�5>nl:�J�R/EӶ���&U�^����q�4�!��p��>ǩs���v4��G?2�ru���+�h�ew�..�my�������չz���vm����\l����?����|9U�<�߃n�s��2�.��я�����m,,�ʇ٣M?ʇYu�F��
+U�NU�e���X��g�?�y��j�E���8�T�s��U=�3-���>Q�[�C�����c����ͮ���>�}�S��w]建yV��dc�����yT��f��Oa�У��ޫ񠿩B��_��ß9��$�&pF��w���gס	�L]���,�M�W=g���?^�s�����B��E2�@�@4�,T@���)t͡)t͡t-�����ZE�9��t�"��+����i�
 �4~~�?�_��Ư�O�W��+����i�lʂ�MY�)~�,��%��g�"�Y�~�,��%��g�"�Yz-�Yz-�Yz-�Yz-�Yz-�9z-�9�?������~?�����g�s��~?����E?�@&��5����1�fak
-[c!l���5��&&˚��XSPZ��6�OE��w!����H���]Ci?�L�~d��'���W�U҉�Z:�TK���<���K�rb�`����,I쮙$��]Sp*r�N,!v7��Ʀo�j����n�߷ny�p��k?ޫÍZ7������Y�7�Rί��>��_���
+[c!l���5W�&&˚��XSPZ��6�OE��w!����H���]Ci?�L�~d��'���W�U҉�Z:�TK���<}�p��d8,8M��4���O3IF�����J�K��L왱�{�(��j����n��[�<u]�r����F���f8��aV������m��u��7�f
 endstream
 endobj
-326 0 obj
-<< /Type /Font /Subtype /Type0 /Encoding /Identity-H /BaseFont /WXMGLQ+SourceCodePro-It /DescendantFonts [ 576 0 R ] /ToUnicode 575 0 R >>
+331 0 obj
+<< /Type /Font /Subtype /Type0 /Encoding /Identity-H /BaseFont /YROZNT+SourceCodePro-It /DescendantFonts [ 592 0 R ] /ToUnicode 591 0 R >>
 endobj
-576 0 obj
-<< /Type /Font /Subtype /CIDFontType0 /BaseFont /WXMGLQ+SourceCodePro-It /FontDescriptor 572 0 R /W 571 0 R /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> >>
+592 0 obj
+<< /Type /Font /Subtype /CIDFontType0 /BaseFont /YROZNT+SourceCodePro-It /FontDescriptor 588 0 R /W 587 0 R /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> >>
 endobj
-577 0 obj
+593 0 obj
 [963 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1222.2 ]
 endobj
-578 0 obj
+594 0 obj
 [ 2 [ 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 ] 876 [ 600 600 600 600 600 600 600 600 600 600 600 ] 913 [ 600 600 600 600 ] 918 [ 600 ] 923 [ 600 ] 935 [ 600 ] 957 [ 600 ] 961 [ 600 600 600 600 600 600 600 ] 971 [ 600 ] 1106 [ 600 ] 1152 [ 600 ] 1157 [ 600 ] 1159 [ 600 600 ] ]
 endobj
-580 0 obj
+596 0 obj
 << /Filter /FlateDecode /Length 34 >>         
 stream
 xڳ�����*F�ztI0���O	�
 endstream
 endobj
-581 0 obj
+597 0 obj
 << /Subtype /CIDFontType0C /Filter /FlateDecode /Length 5750 >>       
 stream
 xڍYw\���evfP�aW�՝�`�A1*��H����7���b,��Ũ�DEl�"͆KY`E���#�X����>��E�{���_>?�`v��=��
@@ -2407,10 +2483,10 @@ $
 Q'�ja
��ݣ��@�����|���B��}����5>��P;0~�~��K
ƍ"b��g�	F)���C�U�w����1Z��+d�c��n0`�� ���A*����K����̂
 endstream
 endobj
-579 0 obj
-<< /Type /FontDescriptor /FontName /NQENEF+SourceCodePro-Regular /Flags 4 /FontBBox [ -193 -454 793 1060 ] /Ascent 984 /CapHeight 656 /Descent -273 /ItalicAngle 0 /StemV 200 /XHeight 480 /FontFile3 581 0 R /CIDSet 580 0 R >>
+595 0 obj
+<< /Type /FontDescriptor /FontName /NQENEF+SourceCodePro-Regular /Flags 4 /FontBBox [ -193 -454 793 1060 ] /Ascent 984 /CapHeight 656 /Descent -273 /ItalicAngle 0 /StemV 200 /XHeight 480 /FontFile3 597 0 R /CIDSet 596 0 R >>
 endobj
-582 0 obj
+598 0 obj
 << /Filter /FlateDecode /Length 728 >>        
 stream
 xڍ�Qo�0F��+��J�%��
B�k�RM{���E����?�Ǵ�4U�DՃ���ܔ�շ��`^6/n�oS��N͹-� ��;&WW˦8\��;W��z��mSl]'���rSWݍ߼����t�]�޴poU����G\?�_�����z�
yS:z�����];H���U��G�c��[�W[D(�ӵ���M�Կ��˼9�§dC��E㵪�6&/�G"�(���~߹����Թæ~m��T��թk�C��d�Ж���7q�UP�q{>��%�d6�{��}��w'�}76�_���/��y����
@@ -2424,22 +2500,22 @@ ZBkhH
 OL/���3�y��,y��,y�<pC?T�c?��۸�7�,!�(�7�����S-�ؗ%D�C���Ռ�&T?����1P�s��i�z�����j�1��ͱ?^�+��m���:�"��Y
 endstream
 endobj
-318 0 obj
-<< /Type /Font /Subtype /Type0 /Encoding /Identity-H /BaseFont /NQENEF+SourceCodePro-Regular /DescendantFonts [ 583 0 R ] /ToUnicode 582 0 R >>
+323 0 obj
+<< /Type /Font /Subtype /Type0 /Encoding /Identity-H /BaseFont /NQENEF+SourceCodePro-Regular /DescendantFonts [ 599 0 R ] /ToUnicode 598 0 R >>
 endobj
-583 0 obj
-<< /Type /Font /Subtype /CIDFontType0 /BaseFont /NQENEF+SourceCodePro-Regular /FontDescriptor 579 0 R /W 578 0 R /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> >>
+599 0 obj
+<< /Type /Font /Subtype /CIDFontType0 /BaseFont /NQENEF+SourceCodePro-Regular /FontDescriptor 595 0 R /W 594 0 R /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> >>
 endobj
-584 0 obj
+600 0 obj
 [ 76 [ 796 ] 82 [ 531 ] ]
 endobj
-586 0 obj
+602 0 obj
 << /Filter /FlateDecode /Length 13 >>         
 stream
 x�c`�;)
 endstream
 endobj
-587 0 obj
+603 0 obj
 << /Subtype /CIDFontType0C /Filter /FlateDecode /Length 648 >>        
 stream
 x�uRML�`��6*PA!*q�~!�nAM�9%!c&0�BRطuc�[�9�����7�N#H$!��ȁ�c�'O���1ѳ1��Mz���b�sx�߼?y(`���ZG�|�7��I�B�s��QA6sr�t`�%m��S��!��@�Acy�X���r��ӯ���q�)7�~e:��>s��טNp�P���@��T=�D����n����#�g�pHTa��}�a�����))�L�����'��h��bp�B4� ���0� Y�!YJƕ�<�a�$y
@@ -2448,10 +2524,10 @@ x
 ��H�Q���ƿ�O������4��(���Y`��,ƿX�����+Zcuk䋆��J���j)WV�c��	{bgǹ��G�~���Ț�郅L�~p5R�]_��ϹB:�0sR_�x������..��8]�4�����d�&��m��b:o���)����6�����]���}����H;�O�9�.�g���Ĉ�G��k\|��y:��s�b�6��
����8��5��:B�L�Ap�$K����Y�H���Zv�u1u�i��?4�2&r�����)eW�٥��b.���
���
 endstream
 endobj
-585 0 obj
-<< /Type /FontDescriptor /FontName /VGNMHW+LMRoman8-Regular /Flags 4 /FontBBox [ -456 -292 1497 1125 ] /Ascent 1125 /CapHeight 683 /Descent -292 /ItalicAngle 0 /StemV 98 /XHeight 431 /FontFile3 587 0 R /CIDSet 586 0 R >>
+601 0 obj
+<< /Type /FontDescriptor /FontName /VGNMHW+LMRoman8-Regular /Flags 4 /FontBBox [ -456 -292 1497 1125 ] /Ascent 1125 /CapHeight 683 /Descent -292 /ItalicAngle 0 /StemV 98 /XHeight 431 /FontFile3 603 0 R /CIDSet 602 0 R >>
 endobj
-588 0 obj
+604 0 obj
 << /Filter /FlateDecode /Length 368 >>        
 stream
 x�}R]k�0}ϯ�{(�֏n0��m7a���n�6���&���$�-�uB4�{�͹GG��ya��l���)�N��j2-$�*��!��vJ�[��"��hr"h�1X�^���F1����_��m�~�t>�{Ye���cѕ�r<�:�������p�
@@ -2459,124 +2535,116 @@ x
 ��L�����w�S�p2Ɖ㴽 {���ʈӾi�JD.I��O�M�z�zBܭb��(`|ǣ�]]�h��G�溵�h�U�?a����^7͡����a*6uFQe�@���"����{g�)?CM��P�ߙ2}c�G?��AO�E3?:�,��7Q]ǡ�Rz��5l�r���kY�]�[
��A���R�
 endstream
 endobj
-315 0 obj
-<< /Type /Font /Subtype /Type0 /Encoding /Identity-H /BaseFont /VGNMHW+LMRoman8-Regular /DescendantFonts [ 589 0 R ] /ToUnicode 588 0 R >>
+320 0 obj
+<< /Type /Font /Subtype /Type0 /Encoding /Identity-H /BaseFont /VGNMHW+LMRoman8-Regular /DescendantFonts [ 605 0 R ] /ToUnicode 604 0 R >>
 endobj
-589 0 obj
-<< /Type /Font /Subtype /CIDFontType0 /BaseFont /VGNMHW+LMRoman8-Regular /FontDescriptor 585 0 R /W 584 0 R /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> >>
+605 0 obj
+<< /Type /Font /Subtype /CIDFontType0 /BaseFont /VGNMHW+LMRoman8-Regular /FontDescriptor 601 0 R /W 600 0 R /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> >>
 endobj
-590 0 obj
+606 0 obj
 [666.7 ]
 endobj
-591 0 obj
+607 0 obj
 [777.8 277.8 0 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 666.7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 500 500 ]
 endobj
-592 0 obj
+608 0 obj
 [726.7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 379.6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 967.1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 445.6 0 660.9 0 0 769.7 0 0 0 0 0 0 0 0 0 697.9 ]
 endobj
-593 0 obj
+609 0 obj
 [481.5 481.5 0 0 0 0 0 0 0 0 611.1 ]
 endobj
-594 0 obj
+610 0 obj
 [597.2 597.2 736.1 736.1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1055.6 0 0 0 0 0 0 0 1444.4 ]
 endobj
-595 0 obj
+611 0 obj
 [531.3 531.3 531.3 0 0 0 0 0 0 0 0 0 0 826.4 ]
 endobj
-596 0 obj
+612 0 obj
 [826.4 0 0 531.3 ]
 endobj
-597 0 obj
+613 0 obj
 [380.8 380.8 0 761.6 0 0 0 489.6 489.6 489.6 489.6 0 0 0 0 0 0 0 0 0 0 761.6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 272 0 272 ]
 endobj
-598 0 obj
+614 0 obj
 [610.1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 843.3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 361.7 0 553.2 0 0 644.7 0 0 0 0 0 0 0 0 0 598.1 525.2 494.2 ]
 endobj
-599 0 obj
+615 0 obj
 [272 0 0 0 0 0 0 0 0 0 0 633.9 0 0 0 0 0 0 0 784.1 0 0 0 0 0 573.9 0 0 0 812.6 0 0 0 0 0 0 0 0 514 416.3 421.4 0 453.8 482.6 468.9 0 334 0 509.3 291.7 0 584.5 470.7 0 0 441.3 461.2 0 0 0 0 556.4 ]
 endobj
-600 0 obj
-[ 27 [ 734 490 ] 30 [ 556 ] 32 [ 490 762 693 544 ] 40 [ 272 272 707 435 272 272 748 544 ] 49 [ 666 435 490 762 272 639 299 490 490 768 490 ] 61 [ 762 734 544 326 353 272 ] 68 [ 299 761 517 612 272 ] 74 [ 897 816 734 544 490 816 762 490 490 666 544 381 381 ] 88 [ 272 762 762 517 ] 95 [ 721 381 544 386 272 490 490 490 ] 104 [ 707 381 490 490 734 544 734 734 517 1006 707 734 517 734 517 598 435 490 571 ] 124 [ 816 544 544 ] 199 [ 762 ] 502 [ 272 ] ]
+616 0 obj
+[ 27 [ 734 490 ] 30 [ 556 556 490 762 693 544 ] 40 [ 272 272 707 435 272 272 748 544 ] 49 [ 666 435 490 762 272 639 299 490 490 768 490 ] 61 [ 762 734 544 326 353 272 ] 68 [ 299 761 517 612 272 ] 74 [ 897 816 734 544 490 816 762 490 490 666 544 381 381 ] 88 [ 272 762 762 517 ] 95 [ 721 381 544 386 272 490 490 490 ] 104 [ 707 381 490 490 734 544 734 734 517 1006 707 734 517 734 517 598 435 490 571 ] 124 [ 816 544 544 ] 199 [ 762 ] 502 [ 272 ] ]
 endobj
-602 0 obj
+618 0 obj
 << /Filter /FlateDecode /Length 32 >>         
 stream
-x�c``��������>����02�M
-
+x�c``��������>����02�MC
 endstream
 endobj
-603 0 obj
-<< /Subtype /CIDFontType0C /Filter /FlateDecode /Length 8637 >>       
-stream
-xڭz\W�����`D�0��sg-�+�-��%�ް ���t)��mw��˂�b��T4&��hL$5�&F����^��}�.�=1�������-2s��)�s���ս;%��mش`����K���T�s���/"�3�vr�0@�П�	����yw��(>�ؽs*��_�Kަ(Q�����7�8��o�����9�C9t�DT?
-P?R�Ѣ^"g�w�u3v������?�p��ر�Gپ��玑/����� �Tx���/#_E�ȇ+�;|�=}����>�J�0��/,8"D9|�|��R�KN~���x*}��
-o�0y���|�5k����^>
-��|�h�\��#�����7&8��ŗ�Q�v-R�خ�`�򵣗.r��|��1���r��0��O�g@�r�_������ ��9��;|y�(��c(��Q�����j 5�F
�FP#�Q�hj�B��\���j"5��L�KM��RӨ�Lj5��Cͥ�Q��B�=j1��ZJ-��S+���*j5��ZK���S�M�;���S��ʋ�|(_ʟ
-�vR��@*�RP�TJ�QJ*���"�(*���b�8*�J�>������l�6؜ܝR�D�E��yq�������u�to�p�)%
�ޠ������g{$����Oϵ=�g9��x���^I�~r�x}�뙯���7��iu�\�$3��v����,L�A��}��ѽ5���������d��e��>�?��ۃ������A��O
�8n�A+�
z2����C�rz蚡C��v��h�U�%�X��J/����<VtH%yZ}F<�����ا�Q����%�~
-�����W�V��V^�f@�9C�$�,=��ỈP�c�6�0��M7a
Ws
�+xQ��I��F⤱&D���=���r>�)�q��	�oeg#�Ы���>�t!@nt<����o+��,]�XQ_QY\\����C'Jr�>��{�x�n��,3{��a}|���R��lI��w�+?W�%�'��
��s�����/��WoN�/#���4��pN�%��h��r#P%z���K�ЃH�-Jt�C,���
-��If�^A�+�����B�(17^��I��1�M��
%����ƫ�<	��$;�`L��U��r��
\��$��I�8vw�� ���z�A<�*�����B��h�7��hz�E��g4dry�E��.C=���۹͂D�P�%DY"-Lձ}+C�xb�h�����
-�����}�M�7��,$�+d�R�n�*����8~��a��8��!W��A�4�g|U��-���1�jjy�J[�p m߼,;�4v4~�q��k����g�F5rw� ���Ȣ�!>x(���������ܷ�7�ش�/���h֩�@�J:duF�ŵ�{K�3�h����/q�|�4?*�r�:�̈́;�-�V�I���}���W�vQ
-�Q�<C�����c��lцw��w�I�ߔy g������=��l��9��-��g����h�<�w�.\�P�A�vxai;2�3���3�G,^�%�3��h�	�*���$+ŝ�|d��/I|+�\���c$�ge�=�~ 0�+p�ο�:�O(���,�A3J�)��W��p��%⵼S'����6��C���4YÈg��0��8�Og_#g]��gq��}�O�����Hk��U��@PV�;�(N�3Tj�&��I�a�n>^sE���#�~���(�MP����2��hV���T�>]rv��:�%ؕ�ێ�Ht�E�л� ԍw��j��n����Ѷ;ba�)�N��$�.Q�E�Z�-�z�BS�nnm<�]�'�����ʊ����'��cag����H���"�l��c�~,��X��
-��:�<L�'���R^���9f3��H��������|d��":"4�4��07Ӝ��LBS��]��c+��z-�1k���#8����}S�s_ܱݓ�u�#��
-�/��E��!�@���F����s���;&s��%����Mz�c�E���yp�kYT���3t����vb�<��&g������ЅCik����v:��̣�[����~���1P�:��&.S9h�������޲�{[��u�v��y�P�:U�@�`E
D��������-bhٲ)�T��38��y`Tܧ�
��[PQ�t�*p$�tϣ��
ݻ	.�-�P��t6�a��?B�Mڅ����m�r�mG�癏��-���d�F��C���<�Ԡ9|)����	����!������H�zET9����$)�u�����< ��;EV�J��tn>݌*���1��Z4U����'@k�gǾb�N��e،�crj@n��p�݁�tĶM��e���g$~p�7��*[��j��fB�WU{����.np�NF6k�i�q�4����_�D�b�钪�c��Y��h@�Dm�$?����8���4>�D�B^�FZPKx���ߞ��r!7�+:����FF�{����m�X�I����J�|=%����GF��Z"�j�-��IK��j�ؽ�ւ[z-<������Ǥ��oa-Wwꭼ5���ϫ���0g��.LV�
ƣ��8xS�[��$)<�V��UE���M1z��,k�n�?}�;�����o��Tp��$Ԝ!w���F�Q�\}.��M$�b���;v�9�P�����$^������O��k��V�!���{���Q�?U?�������m���Ӟ\�
-������r9$�v� ]oN�n��$-P�٬��Ԣ-eY�<��<%M!�Sw󸕎��yf�1/4��l�NW��f���Fw������~�aܢ�G?������+��9P]��=�r%O!�w"�VAI�s�c��M�Y���Rh�O���d��:�$
-T4��ի����E��|n%7]N�^d�a�i�^���iE	Va�=�O�%��B��և.�Aܮ	��+������%4�ⷒC��'P���\���R<�
-����+����Ex<����'�`�*.6�J����`FOX[S��"p�hݙoʑ�ܹ�%�
-r�[�-~-L��B�0BV_��{W\<�����ƽ�p��:ߜ$�y�J��Qx���²4`k�V�
삇�	%�����=)���6n���)�4�W�$�����9 ��A��}w��/��w��6�f.���<3q�Z�Q����Ģ������,�
�� u$7�TQ���ykimN9�\P݃C�yDu�&[9k��ȹ-�g���@�%^�� 	�`�c)�©(�mᡸcJ�e�3c�@g#2I2�F�&����������՝y8I�SR��Lu�iƂ ؖ�f32Y;C��G�>E\�^��j膴�k��*�+�@4�������2V�S��n��F��6�r�����t�t
-��r��0��eU��E]B^��τR�6��Z���`M�>Фؒ�M�Z���R���R�)��������Y,I�Gy��������6.8�U��U��M���ń������u�>��|��%FW�X#��/�uXl��H�!h����KS�6=�!���]O�z���я��=�u�كRm������i�}�ߦ���TR���Li'��r�A���F�IV�,U��ɵ��$}��xI䁃B�}�;;.�?^r��Ae���Cm�^϶�i0�V5g�;ax`��Y��,ӽ"�D	��a�1��m���}p�>�8;��n赗f��"��5�����]aQSCQa?��557M$c(�6.t%4��
-��P��ƥ$ED|���v�%~��?���*a+iw�l31EOV�ͥIQ��2\����b��3i��T6؏4 �+>#WT�B}�3���mEJ�~��<tD�@��cmHɑ�]h��W><b,7�$�
-%�
��%y�!/i7&S"D��g� qq:imk^�w���0�Ff��٘xM�mݞX�6A�a�)�I��v�4T����Mx����������N1�sA���?J�P#��Jt�i@�>��h���H�b���&��|._)��������+rv�F�ɤ���+�˒����H��-���v�VrNwE�(x��i����$����.�d.�����Ib�.�#UT|I�h��*'4��q����TA>����8�w��]�޻�/��Y�r#|ލJ�
�H�']��Ņ+�W��?�1�_��RWI�o�@{E]�����JP�nQ�h�T�#�O2#8<Z!=��T�M��o�i�
-:+���~��[g�mJ7y�
-m��[��?�����K[=��g+³o?(�Z�.U��o#�m1�ӷeȂ�_���p�}q��~��v�줱j��
-����0�!���g4������ݱ��Z=�7Q/�����&���4�di��Oe��U͠����ƽo��×��Iq�J����	�F-6B�A�aBc�5��6��4�Ηm�x<^z�H�`:ds��T��L�$x�ߟ
-~ɾ������:4^:��?m<!��*+9� -?'
�F�%h�ߟ
-~N�S�t�}˕vq	]���n��5����{�£�+����x�Xh$���B4�G[��B���]F��������w>A�D�Nj�� JDZ��v���vʐl�/�;�`뮟�DR����3��-��B����[�}8�mE-
->��Cl#�h�Qp��HQ�t(ƻ�߻.�e	gZ/�|E,��c
���HO |DW@cQn��)�VXu��,��t��A IՏ�~n�P_����C������c��b	���0�w��_MM4hi���O��>��p���hOW+�P�&A,�Nh
-^oE.egUY����ڙ7^NqD�,���e�+�����w��w�2�+i�~9��{.�03P�_�+��jlS�Rh�-L-���������g�Q������Ҡ)��i
����h�q4
9��@N64���6a��pw�qH��κ����;~p{q.!���?퉢ɦu�$kW�7� �WN���}��c��C��)x;��o��(^�$ۤ6լOϿ}��p��
-D!G��ov��-�خ�D5���]��`í�yZ�UZKb�a��/l�_��KcJJaq!@d%��ۢ�x��/-*�˄}��ȵ��?�>��:��e90[�SR�-�]��mo���K*DDr�D�/A�
�����2���;E��㘙*�Èܲ����1x����bn�b����H���S5��"�<�
�5��Ș���])!����Jk����˶�ɵQ���m����l��_���e��"���}�Hc����p�A�I�d~�DmG����x٫�F�|K�+F|�|tqܬ�I��x���o���w�Yf���e�ՙ�
-�����r&���,f��0Um+4��]�V���ɼ�zJ�v_��~���굅L쓊/.|�>ƒ;x0��^w�����s�I2�ʹ65����>0r;���
-���n��-籿0A6a����PZ��Ӂ1��qL�[�7�#m�%�����.f����&��.0�ڐ��.ѩ���RƊ�W�?�h&%J�,�����E�	�A�vwj��P���W�Zi�W-j;����qN,ģ������G��z��n5}7�ok��^={�zׂ'm��&���r���G����NR�����ypX<�����
^|�w]ҧ1u�g�R���]����~޻ۖ�v�Ni��;�Ԑ�����6:jr2���L��C�W���o8���#�h5'7�������(���Y��}I��ooHƢ<�����{ˏ}�DcM ٘	Ӻ���.krtDR
-����ý��UA�F~_���cIgT��=Q%q�������mn|�%d��Z����o��l4�s&49lil���o���������O�f^��)�5�λ��Ɩff���?��|6HM��M�6���p����O��3�X��*p*�{��K�|�4o�X�i%���s2����K�Q��!�]���׫�l��4^��_�͋�{��se���qV:.3�����D�3�e�ߘo�����4�@�l#�U_�
-*��T���3PQ]����z�����b�5�q�׀�2�<B}�)F�F�-�l����-O⓪�-Wٛ�O�;c�
(E�p�v�	�D>���	o�����|uFr�.)AC��?�J4	/8E~ZNn:@�=s���m���"a�����n��Yc��=��`%�r-ȮG�\I
�g�e:cb8�i�`�p�G�{�/��k����V?z8,8�Vz�)�(�����C��~��6�+$܂���Qο\�����7t�!{?b-����^�
����2�R�h��cuQ����b���/�?��ot�F�*Q���������o}�H(�Y���\Y��9��
��s#��Y>0�l��Ʌ啅����zhЁOB3��R��h��v>�#@bp`���$��!�U�a	8&��q�H��B��S*>V���ڧ��Գ�i�5�,X����\$�\�z޻��?E�"�չ��w�C�;L������.6y�z�CR�j�m�e�ꊏ.VU:0��($?{�	��_�"�p�čG`ZyUᾚ���.�Kێ]�>5�e����KH��� ����v
{~�~�m�#݆b��F���|Ԕ�+Dn@����<N4~9D�o��O��D�1��0u��pP��X��}���h<�e�]HO�6o����}��L;Թ�x������͉'��}�z��V5BvQ���:�p��a�oI��V璎e���r.zI�&h
-#��4G��P���N5�$K�ٚ\ԭ�p`l#�YD���]#��EЋ����NI�٤)�
-�iř<R
-����a�G�tn�0�**�łFb��"rSLD�Czs��M�k4���K*V�DS`�f������
�C����gs��y<���o��ef�l7냾̶�Hn���d�'�#VQ�3�.n�C�x�.�G��]Q�F��OxT�O�Đ��JckO�J�TeQQ����t1�|fBf`�Թ�/� h��6�X{V�.�eC���J�ӽ�8s�6��2��y4U(B�:��A�.��NO�\)|Ut�Z�Jd�H��}�Qb�WqǼ.K��KM�iT��B��U\8�K4y6!j`�M��D�q������\ޜ��33j����H4�Д�V7�R�Q7����=��ݕ�#gbTUzZJ.�mN+2�OQ�S�(�Md��r�2s�yD�[�GL�D��m��c��c3��J%���N�hkq���#c?��Up#��kԬ�WƲ*mjj��x���.]�ifʋ�햟n��.O]���*��
-\��c}l�.�q�w L�s�I��R��4%�)fs͙�
E��$��d$
Z61&��`k� L��S�D�>֚dE�m��#�0�BQ`}�i���o�9��Cm����5���u�ǻˬ����p66I���L���=5;��0���F���̃���"{<Zbҧ��X݌����C���@dj4���wz��V�iآ̬�|p��m��8�� ���;�.}U��B��)���z��е�'(�:�n��5x9j�����2��4�]ç�&���i����#?�H�7�+�|�������h�6�O$���;�������vaU���\z^G���?�.n�M�}vx�-�	T��f	3�	S`&d��Uu��&T�;j��sY��|
���1����bC|�q��%��u�ulet~`
-c#�
-���g�[.T�h�0Sr�����s��v����'������u@J�^��R���IV>4f�
:�JU]%Z����ġ	�N�?^��
@p�=,�at��Z�&�&<��'M�݌{'EV�MIv�-�rS`,���~��~?g�6 �����mI�	Z�)%t�cn���e�ۆ7���hn���;;�,�ܪy�w}���e�bGdέ�f5]Q��:g�믛~<��ʓ�g���-zi�?�kq	E�a�U�6���p�����\����|�j8}!����9 ������7A���YV�_���'FBט	lq�
xL�˓����
�-�����/J�����9�z��@���ׇ�8x�,D���f��mLP���E��?�<#��>��i��z2JDt�}z;u��i8kH�Ԥr��i�@~@ ��M����yU,�F����?.�w��j���l�9�,hU�h�
���6?}�t�K���d�c�G�G?�3��J��UI���lrZlqR׸��WA��u��!�m�Ye+����N�x2��i3
z���z=��p���w?�9a���@��2���2o��-Gg�ům��f�q4�e�2v�����a]~�.����x��� ����+'`^p�����U��U��|���/97ښ%�߽������ݰ��"��z;eA����·�z�U�A`L1j�x8u��d���L��9�'�[�o�㐷)��f{���sd��V��s�ʥ`��@�б���K=K�&Ұe�M-��s�ɔ����KH�����l+
+619 0 obj
+<< /Subtype /CIDFontType0C /Filter /FlateDecode /Length 8734 >>       
+stream
+xڭz|���
+!�i�"�dV�Ыi�SBL1���"w\$��l��"W���6`	-B��I  $��Y3���H��wC�=cڝ�=�����+�:w�D"Q�Ջ֬Y�q��e����G���	�s��\ ���@�G	o��I�ł�3v�.>ܽs�T���$�9��MQ��7�g�Uo
+����� ہ��ߡ:Q"�/���E����I���m�G�;�% 02x��o��y���#m�S�sG��{�
+W��!wWx���/-_N��
Pȷ{���y���k�6�C�^�J�Op@h�r�h��Jyx@�.9�7����]��)Uxz�C|��׮^#_��/���Pz�G��˕^^rߐ���cƄ�����M�(��u,R��]7j���kF-]�2����C"B���rO��~��Q�������xl�Z���I���G��Sr�?5�H
��Qé�Hj5�C������j"5��L�KM��RӨ�Lj5��Cͥ�Q��B�=j1��ZJ-��S+�����*j5��ZK��6P)Wj�Fm�ܩ��IyQޔ/���I���(JAP�TL)�*�
+�©*�����*��H�1q.��@m���3�9�F�̝�vz.����۝�u�+�,�*��|+��^���9����T��]��:��n��w�^�cH��==8�~C�F�������7�S7�dƉy�kr��^�d�d�{�׻��woY��}��]�J�:�����߰~���|[�v���2��<�����d�3���N�V:��apԐ�C
�|h�/ǇЊ�Y�
ᢶU����H��cE�T��է�p1z�n
��ڟ�lZb諠�3I��C}o�ݍ�%Ul*4�s�Q�W����x���:�
+�TzVqUנ�����p�9��%�klJ.*��E���O��K��XP|#;��^�L���r�[����wk�7e��Ŋ��Ҳ���F�,=^x�;���8ރ�[t�&�d��ӿ
>��ǋS�9e��!��l�<�|R6l��m��k��>hfv�	8N�Co|(���Y�ע	�O�
+Ee�
T�ߐ8N@kJ ��p��H������y(
+�}��_��ȃ�ԡ<3;\���Y�և1�FI�NU��X��k�(램3��"=��3���⬂\G�(Lp	]D�3�e�<uJ��W���$;5#%�͏ˊ�i`|�=G�I�����c�b�y5��'��J2��D.&N�;:S���рX�@���fwg�r��0#���b�T�������qeFK���A�P�1��l�!������Q7u1�V,�m�N-$��r/!�far����J�s�G�x(�7㕨��6�=o*���#IM��w�$i�&D���
������9��h5���<�B}�l968'�~�OSWͣ$�b0����M�2��cGី��Q�|t����_��^����Q<�0G
1!�M�5h;7g�7H�Dg��^w,t�>7"X�f��+���	�V�
+�/}�8�9�%��s���r�����������IQ,�����۫ho���+��R�9�=���З�E���b��A&�>S�a����j'����]���s��K�O��#+ZA�<y&�8T],�)F;8U��K[������O��Q�c����;��f0��(�O�]��'���Ļ���E�?>E2~�Q���]���=�6rk}k*����R���4�����Am�akW�k^�;��8��s�9ԉQ2N��
�����G�:ە�C�>�Ƭ��ߝ�{ӎìmc���G��B�&GѲ,��R4�����s����:��?|��o���b>�u�\.L.摚.�fu�A�N��%g�.�!pX��ɟm؃�X���
D�xG쩶 ����m�+��n��D��7��t`^�Z�����H4������ў�|�7ܮ,�*+-ʵ^�|*�2z v½��$�O5�a3|��OE����R��U��+�C���#J+-%U����c6��Ck���K����·5x*"B��B���̩ 9�!4�h�%A+ܶ�Πײ�Ɯ�z���Whm�XE��[B��E���1�T��+���R4
�h8ZƏ5�VF���w��>��,� mGޯһ{,j�̃�=X���~v��Sg���#+d�|�0!��~���.D[ö�,�Ða�f�C�Z7��h�t����P���*�+6rA��D�e������̝�ٺ;�ώ��Hֱ��+�#z���\-h��l�@ː�J�X�r���~/��!�]��Dނ�C�U~#�X�{M}�hȞ�xPo�!����U��-��l)�Eh}�Su+J8ϜF��jR���Lz/ĝs\���E��f�,&X���Ǹ��D�K���#1�vQ%�2�G��h\���*����#�V)Hʐ�̩�hD#*k�8��	k�T�O.Gf͕����7��1Cg��#sG��k4\uu`>�u�{s��?.�	���,D�Ȗ�����C�����Q��^�����]����͚�~`�&�=��.���c��"��r��g�а:QK[1��OdEQ0�-����0�����B/�����p�D�
+�������yaa~1�ɠ�}�$�h�fp���B#_�G�*B-Aa��A�Њ�K%p�o7�~��^cA�,��[ev�U��cRS�7�����V��OO�g���t�~��sr&�w��Q��)E���p�\��B��Ŋ*�I����=is���X7͟�����P�ķ�*���C�N�;U~j�!�pz�>����X1����mL��'e�!8^����.-��/�Ӂ�[�Uň���~@eJ�ӏ��*���:%,E[d�۴�rȃf�P1�jY�\����7'�L'
+ja�(Wo�qLgj�����YY�����M�I�y�LG¤l�ј�	���5s�*\���O���WlL�6�P�`7n��#�E�zs���Tf������
��� �w$�VAI�s�c��M�Y��"h�I����[�}$
+�?��֫�����־xa%]f��^d�a�i�G�f��Z����?����idVZ��s�&@S ��C�O����X��J��WG
��<|��/K��
+���ZP�X~z$���J��Z0�Ԃş�.�X��*ܷ3�K���<!�
)��������.A��ʦ�+��
+lR7�41�f�pYaL��z�]�1���7ǋ���j�3�y�*.:R�ɮ�	N׀-�[|ֳ�"����n�'~�b�ذN�'¢H^�_T��d��c`���'�W&����h8�\��Sxf�l�V�V�1�q��f�3q'Y$���0n<���6T�֢���������j�*{��1#�6}�nj����x���$p���%�	'�Ew���i(Y��O�L���$�M�!��N�K��$�BV�g�xIx<LLb���&sS�`[^�aLMc�M��?��I���z�t�PE�%g^�V�^9*8�)Ф�Ŵ=���$��OK^�0�w�����e�����K���.���.���|&���1Vx��~�B������D��~1��$e���.���*Z��-�˒8x�G��_��9>w�r��^��_E+[DY(GL�������vć^��~���jok(=���n��I<�O���B�xe�� Ԧ��$U߳�IR�Up Z"������.0�(�֎i�
�Йf>��l�\�H"��UJ;Nl�u���L���je��eN�.*#�?�K����{︻�B��%/�A&�h<Ķ����kTsV���ơ^T�2�K�
+��9���t�ڇ�򌓱m�k�2K��`m���.��y�
u�y}@oWU�29�����[����)�W�%���P6:1>4�����MY�K@�U��Y�[F���d���)z�
+6�h.M�"���ꔤ�D���Ik�$�>��]�����k�w��=��m+R�`��橠#�����h}b�$�BKz:���ac��&yx�mІ�����H0!�!�f>{=�È�I�\�����|��$/,M�F�h�l�ʛ`%w���xG�W&.ۉ�u,f�7���Oך��j:�x���;�>�(�C�lg(��{�����+#u�M�f�Ԏ(�8|������kr#
5"x�h�+�˒�D��?8����Gڃ픭��ʑ�X%/2�(�]%���L.�d�K呶�Qbl�!�#U�~A�h��*G4��q��«TA~	Ͻ۶���Ż�+<w�Ǭ�,�{���~�2?t�Z.�_���>�<|��2a\I�m�u��ASA��Y��/��3�\�o0�rx�Bz�&V��
��o�i�R:=�3���a��L[g�aJ1y�
+m�Ǟ[��?�����+[=��g+³��<*�Z�.��� �1��wdȂ;_�;
���>��=�^�r�줱j��
+��`�*M�צֱ�袠��QA˚��7Q����d��F�_x�(쮇Ǐ�g��FPYXim�>�n��]�hB���uu踍M�&�^/n3���{I�0	�i�˶C</���$)�0�Y	�]�Z�����O�b��KꉇI��?/�Aҟ6��FV�������������O����h�̾��vq	]+m�n��5�G���[��#�3���ax�Xnh�\�B4�G[��<������e =��>�q�8�-pi��[ł�)C�a?c�}d[w�l���O�GN`V��.�6l��⼷�7)��#����ܞ#�P�����`�g%�g\���*�^$�5��X���G�ב�@8G�Bc8Tn��)��fXq�o/�}tq�I�O�>n�wP_���΃e����룏��Gb	����1���vׯ�&4�4���#?��;$\v2�<*�hEK�$�%�M��hL��YU�_.��V��)�h��Z�L�d%WS�����)\��������;�q��f�)r��AHitq�m�S�Y���u��������XܙԳ<�<&\�}���ZY24E�!?�FS�6�;��!�df@S2��1ic��s����s.�vRp��,�"d֔��=Qٴ�d�j�s5r���S.�8�F�����(���3x
+���u��4��@o'��)9w�}�N�T�(ԝ����f�_�.���й:t����:nM/�Њ��\X�l߰�~:�4F��<��J���E��G_XTD{-N-�s1��p�CFM�mV�2a�:&&�K��җ�RwU�3�T�:��䒉�_�f�ǣ����fT�w�s&>���*�ň\2��g�6h�j���bn�b����0���S1�k��"�=��5��ȘU�n�]��ܶ�2k����7�βQ��e���l��O���d/�"�
��GK�#=O��p���H�d��D-�˛ky��uP#@�-��	�>^�hn���I�����o���w�Vl��3��Չ�
+����&����,f��0Im+4��]���S	��Fb�v��������5yL���_��}�%w� �O�.�Jmqg��熓d�in�ST���`�v::�
D�9��̹W�c_a�l��%�7��Q��f��cF5,�N�Q/�GؐK2����N{k�-��M`dՁU�&)�D$K+f_W��<2���p)��cn0�~z����&hH	��I���T�_\�SIji��m\���,Zr��Y����΅	���l���ĝ��l���|��z��M��Oږ
+�LP��eAs!����a��&���%�7-�v�x��;�
+��<�Ϛ�O#kN��&9ٳv5;{ݼw�.�:�
+V��^3�s0��!�P��t$�d�M�i`�G��;��P��O��̬0�\��
+E����#b�+bd-;��g�=�����S��~��)9��RG��@�1
&w��]ք���DP�痺��=|�QA�z_~o��ģ�U9��p����v[��6�?�B�	�)z-�qr���i6K
+�9
���6׷��דgo�L]���g3�Mn���t�]_Nc��RS���[>$%��m�v�:�a���ӽ$�̻+�D�R�D���"-*�Û%ira���,���0�C���x�$�s0~�O���x6�f
+C�.��Eȵ�Q���o�l�˜��g�f���m2�w�7C�w�P�J��7��^���U��Lg*�n��
+͋�(/ȳ��:1�$�>X�q��+�\��D������l��6�DypK�ϔ���ꦫ����z����p�6�G�=��o���m�ujB�.>VC���s%�ؗ�"'93+��]$�{q�H�)Ej�Ǥ�/l�EڦyO��XI샜s3j�3WX��|��¶[�0f�!��g��5��{y�=���+]n��O�t+,�s�v�
���`�hi��ϗB-�����B=e���]������Vܟ�R�7Tj�3b��!��\_�=R,�r�¾��l���{�J4��x*˜>y��Q���G��"��%�vε�^���n����>d���f{�I�+)˫ޫ�
M�C�|R���}���E�V+���n;t��J��Ne��Y���ү���o-�u�4��c�ȩ�ka-˜RYÊ�A1^���Eb������?��S��1�X��~>���$
+K	���֨w;�'�"�&^&�,=w��́y�F"���?���9)��N�p&�T����(
+������%��Lr��j	I8L�D��C��؎a�o�������P�������4���2j������Ǒ�� ��A�-���s��8U7D����W؟; ���_�݅����fO�;З/δB�3���ˠO�NT�w�+���yki~=d�ep?��-��Y�J��0��:�-cƶ�ٕ��n��`��	C^@8��I1�hW�B����ɲA���Sg9�",�9�χ�]Ķ�Kb�&M.�kL.H�R��Q^p���/)�Ya�X��Pa�͢���M1=���*6^��āwp�X5M��يzb"�6x=�F�֞���e�h ~n�Y6��j�ݬ7�"�Rp�"���\?X[E����;�M�8�E������Y�.A���v�$@�!Y���V��e�����by��O��"����4�<�}U<A�m$����]2ˆ��A/"�k��hs�6�K7'�y4U�G����A�.��HI�|_�2������~�����#�Rm��y�
+�$��d���>;0��0A�cp�T+�dۄ���%6!���ٖ���c�xsR�δ�IX�g*�hr�)��l�Ev�n$Fm[{"S�+�GNĨ����,.Ü�o柡�g�^��Ț�e%�e���F�-�
+��/�rK�|q�&����&��;�g�-e��O�}m�W�
\���qP�.GŪ�II�F�y�C�%�m���-ɯ�[~�
\�;<u#S���G+p5���Q��<��^�0ʂ'��K���[^�f��R3�v�IX#I�7�l\d���>ֺI�Tէ���}�5ɊF�"!SHdr�d���H�^%��Bs��[�J�m<�����g�Ys��I!lT�*),�:�g"{RF�j.�6ڍ��%R�G'��D�.x�ĤOIұ��Ա�m��j����To���z��V&k�r�����p���� �� ���;�.}E��B��%n��z��е��*c�:�n��Ux9j�$e�Sْ�d�]C��&�:��i����#�D�7�;�|����g��h3�֣O$���>�������Vae���Tt�@i7�?�.n�M�:}FH�-�	T��f	3�	�rad�55��&T�+j�킳��
k����	}����r~���5)5lYDN$`���
+�:�Sg�.T�h�0Sr�򘱈s�
+������ǋ�#����b����3�B្�hLcs50�/��:J�$m�m4�C���v��
�@p�=,�aT��R�$�&<��'��ڄ�'EV�MIF�-��a���~��~?��!�����mI�	Z�%%t�mn�����چ7���jn�����],���y�w�����e�b��ͭ�f5\Q���5�<k�ko��?��ʖ�g���)bi�/�kq!E�`�U����!�\pb�!��l��">g���uty��p|��x��[� ���$�������uA�������<���������6���G�̂��V쀺�_n>���ѥ���CP.�$DD�Q1yI@
cU,��������0+�/m����l��۪�4�5$�i�8B�4� g���M���WE{�.����y�?.h�����i�A	l�9�8hU��
�}�6?~�p�C���e�c�'�G>�1��2��U�	��lBrTA|Ǹ�ڗ��o|y����m�Ye+����L�x��i3
z���z=��p��O��?�<n�Q
�u~*EYlAaY�������n��g�q4�eGSc�����~�(�����x���@��A�+'`^p���u�
���������.9;��%�߽���sgo��Nft����X�\�od�Cx�l�*s�!0&5�l��`��h�o�ܺ������kO��>�m����^guv
++\�P@fz�,^�7�{ؽ���nEFi�RS��yݻ[*sM��d�)%5�{!�W�[��ە�
 endstream
 endobj
-601 0 obj
-<< /Type /FontDescriptor /FontName /WYFPCV+LMRoman12-Regular /Flags 4 /FontBBox [ -422 -280 1394 1127 ] /Ascent 1127 /CapHeight 683 /Descent -280 /ItalicAngle 0 /StemV 91 /XHeight 431 /FontFile3 603 0 R /CIDSet 602 0 R >>
+617 0 obj
+<< /Type /FontDescriptor /FontName /SITTSY+LMRoman12-Regular /Flags 4 /FontBBox [ -422 -280 1394 1127 ] /Ascent 1127 /CapHeight 683 /Descent -280 /ItalicAngle 0 /StemV 91 /XHeight 431 /FontFile3 619 0 R /CIDSet 618 0 R >>
 endobj
-604 0 obj
-<< /Filter /FlateDecode /Length 734 >>        
+620 0 obj
+<< /Filter /FlateDecode /Length 737 >>        
 stream
-xڅ�_k�0���)��B��ƶ,�-!�:	��4��][���!�~��I2��
--���=�������r0��;���x���Еv�+v��մ-[��O�V�:���KזKۋ�|1]4u�/�rs��iտ=�u�\��8����|�9��_���ۢI���]6E7�����߸��Y)ܴ�lZ���m����A$�q��YS��֛�GC
-Ó�U�TՊ�=JRQ�eO
-˭˖߼<�{�]4�6���E��}w�o��sWٮn���3�n���m�$�h<�]��]����C��E�f���i*.�ގ;+��	$�me����]Ѭm4ri��b4w?��6�_�1v}��n���;}�*�?(yt�q���4iH��q�tJ�/��$q
-��<��AS��$Z4	Z$�M=AY�����T�@P�IP&�@Z�Bx��h��)$8�<�$�ӑ�	$�)�Z��1�P���=��H𠑥2�:�M����A���񨚄{Ƞ:�M�I-���<��@�I� �k�[Q�%G��[A^����!��{H�V��b82$�T�O
���
-4t�4�w��S�B4u�@�h�4$蔌��Z�y�:�ȼ!�m2!��#A��#3��f�G
-�6pdH�%��3�gH|�?3��9���1r�@3��2��2�5����f�(��@��.t4�}g?7���u������uc�_�]���o����t����o���
+xڅ��k�0���Wh��!�mYR[B ���4al���d��6���~��I2��
+-����s���/o����?�@����v����`�m�D77�:?lտX[��<�=����W����lY���[�������U�^�lweu]��۵�1X-������o��aS%�����M;���u������nZ|6-B���ʺz�}�n`^���MvѐB��,}[VEK���k��Teޓ�����7�N]o�j[G����e׷���.���m�j'n?���M��^����Xv�b�:�lV}���-�ӧ��nZ�+��	$�ua�f��vS�l4re��b�p?��V�_�1v}l�n���s��mZ(yv��8KƁ��4iH�A�@�Ɓ�	(A���R�DL�b>�A3��$�4	�$�͐}
+��,A�I*S (�$(��-R!*!5�h�AIƄ#��td@tD��zJz�ze�5�et��#<hT)�����A��!C3x�$�f>�&�2���A��AR<�8?n0<h<H�Z�TzI��!�TPG)�)xH�E�R�Up���	:�SC�!A��
����A����j��M�p�y�!:
	:%3@��T^��4*oH���@G�n�DG�i��ʣ���'��B�
zI������Lxs.��L��t��]G�c~���Ϛ���Q�Ё��1޻���ѿ�8?��{��� ����-+{�^4u�w���9�<�.���Y��
 endstream
 endobj
-226 0 obj
-<< /Type /Font /Subtype /Type0 /Encoding /Identity-H /BaseFont /WYFPCV+LMRoman12-Regular /DescendantFonts [ 605 0 R ] /ToUnicode 604 0 R >>
+230 0 obj
+<< /Type /Font /Subtype /Type0 /Encoding /Identity-H /BaseFont /SITTSY+LMRoman12-Regular /DescendantFonts [ 621 0 R ] /ToUnicode 620 0 R >>
 endobj
-605 0 obj
-<< /Type /Font /Subtype /CIDFontType0 /BaseFont /WYFPCV+LMRoman12-Regular /FontDescriptor 601 0 R /W 600 0 R /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> >>
+621 0 obj
+<< /Type /Font /Subtype /CIDFontType0 /BaseFont /SITTSY+LMRoman12-Regular /FontDescriptor 617 0 R /W 616 0 R /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> >>
 endobj
-606 0 obj
+622 0 obj
 [ 28 [ 459 ] 43 [ 406 ] 46 [ 707 ] 50 [ 406 ] 63 [ 511 ] 66 [ 250 ] 72 [ 250 ] 75 [ 772 ] 81 [ 459 ] 96 [ 354 ] 104 [ 668 354 ] 112 [ 485 ] ]
 endobj
-608 0 obj
+624 0 obj
 << /Filter /FlateDecode /Length 23 >>         
 stream
 x�c``�`R`T����p�K�
 endstream
 endobj
-609 0 obj
+625 0 obj
 << /Subtype /CIDFontType0C /Filter /FlateDecode /Length 1667 >>       
 stream
 xڍU
PW�%�I�ڬ�J��Z�)={VƱ#\�Zm�Zd!��ͧ$�+ZPHK�R�4
@@ -2586,33 +2654,33 @@ f
 ��d�/R�'�V�=�l����Bb^�{����5[��J���
���w�BW��^�*���:!�T=�_�dP
 endstream
 endobj
-607 0 obj
-<< /Type /FontDescriptor /FontName /NUBNBY+LMRoman17-Regular /Flags 4 /FontBBox [ -400 -286 1338 1125 ] /Ascent 1125 /CapHeight 683 /Descent -286 /ItalicAngle 0 /StemV 83 /XHeight 431 /FontFile3 609 0 R /CIDSet 608 0 R >>
+623 0 obj
+<< /Type /FontDescriptor /FontName /NUBNBY+LMRoman17-Regular /Flags 4 /FontBBox [ -400 -286 1338 1125 ] /Ascent 1125 /CapHeight 683 /Descent -286 /ItalicAngle 0 /StemV 83 /XHeight 431 /FontFile3 625 0 R /CIDSet 624 0 R >>
 endobj
-610 0 obj
+626 0 obj
 << /Filter /FlateDecode /Length 427 >>        
 stream
 xڅ�Ok�0�����@zp-;��%6N�mZ���i�Ē��C��J���a
6�yތ��5����~Hs�hr�a����(.k6���*��Q���#�Z#���8/օV�Z\:���ߢ�����u`|�_��m�]��~>�LU�d���]Jq�vP�����W�[e���6��GH�9��Ó����&�F!�����wG�%)H%ڞ�ST.-߼�6-V�>�\B�VVMk����_�D��ƷL:Ѿ��zC�Y��ē��rږB�(����z3�ϦõFH'dY�M]
 ��>#[�xˍ�2�Z~�s�:��`xM&CI�)���n��$󔮈&DO���@��j3�
�C�i_�N�@��3ׁf	�&МZ�D�7��ܰP#Z��yF�	������Ztֺ���Y��Ə�P��w�;��\{zٰ�Q
 endstream
 endobj
-225 0 obj
-<< /Type /Font /Subtype /Type0 /Encoding /Identity-H /BaseFont /NUBNBY+LMRoman17-Regular /DescendantFonts [ 611 0 R ] /ToUnicode 610 0 R >>
+229 0 obj
+<< /Type /Font /Subtype /Type0 /Encoding /Identity-H /BaseFont /NUBNBY+LMRoman17-Regular /DescendantFonts [ 627 0 R ] /ToUnicode 626 0 R >>
 endobj
-611 0 obj
-<< /Type /Font /Subtype /CIDFontType0 /BaseFont /NUBNBY+LMRoman17-Regular /FontDescriptor 607 0 R /W 606 0 R /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> >>
+627 0 obj
+<< /Type /Font /Subtype /CIDFontType0 /BaseFont /NUBNBY+LMRoman17-Regular /FontDescriptor 623 0 R /W 622 0 R /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> >>
 endobj
-612 0 obj
+628 0 obj
 [ 27 [ 850 547 ] 34 [ 800 625 ] 42 [ 813 500 313 313 862 625 ] 49 [ 738 513 563 ] 54 [ 707 344 563 563 884 563 ] 63 [ 625 375 419 313 ] 70 [ 594 676 313 ] 74 [ 1067 938 880 625 563 ] 81 [ 563 563 769 625 438 438 ] 88 [ 313 ] 95 [ 839 460 625 444 ] 100 [ 563 563 563 ] 104 [ 782 438 563 563 865 625 850 850 594 ] 114 [ 813 ] 116 [ 594 ] 118 [ 594 688 500 563 656 ] 125 [ 625 ] 502 [ 313 ] ]
 endobj
-614 0 obj
+630 0 obj
 << /Filter /FlateDecode /Length 28 >>         
 stream
 x�c``�0�/��x_]����0���o
 
 endstream
 endobj
-615 0 obj
+631 0 obj
 << /Subtype /CIDFontType0C /Filter /FlateDecode /Length 6194 >>       
 stream
 xڝy	\S���!9�-� ��9Aq��_�Zg��Eq�9 �(��c��yF&$���S�T[{�uj���j�d�k�ޖ�:����xo{���o�2�}�^�[k�[;"�Ɔ�D��6��ܼ~��禨���sf��
@@ -2654,10 +2722,10 @@ O
 ���*��r�7���E���v �V�/(��%��w��M��*J����Å�G�7��<�-
 endstream
 endobj
-613 0 obj
-<< /Type /FontDescriptor /FontName /RRZMTJ+LMRoman12-Bold /Flags 4 /FontBBox [ -476 -289 1577 1137 ] /Ascent 1137 /CapHeight 686 /Descent -289 /ItalicAngle 0 /StemV 104 /XHeight 444 /FontFile3 615 0 R /CIDSet 614 0 R >>
+629 0 obj
+<< /Type /FontDescriptor /FontName /RRZMTJ+LMRoman12-Bold /Flags 4 /FontBBox [ -476 -289 1577 1137 ] /Ascent 1137 /CapHeight 686 /Descent -289 /ItalicAngle 0 /StemV 104 /XHeight 444 /FontFile3 631 0 R /CIDSet 630 0 R >>
 endobj
-616 0 obj
+632 0 obj
 << /Filter /FlateDecode /Length 636 >>        
 stream
 x�}�_k�0���)��B��F�,�)!�?
d,mI�{K-�3$����|�I:7-��BK�W�ٺ���M��~��x�lc��ܖ6���Mrw����ɺ��Zc͵�=���.��g��j�rU�W�<���v�n�ُ�}�6��_�f�{����Xo���ey:��&�aͮꏾ�m�����#~ڶ�j�Ĳι��̼>��uɐ�����r�%��=XN���������C
@@ -2666,60 +2734,59 @@ x
 �zy*�	��"=�VГ8�3�DHI�L�$H)I��BJ�)IR�=@]���5|jJ	�h���E�4%�d
ך��,����~��z��$N˖avγ��d���n�ϻ�<���(�U�pT�~�fM݄U�7^���8��2��Z��
 endstream
 endobj
-224 0 obj
-<< /Type /Font /Subtype /Type0 /Encoding /Identity-H /BaseFont /RRZMTJ+LMRoman12-Bold /DescendantFonts [ 617 0 R ] /ToUnicode 616 0 R >>
+228 0 obj
+<< /Type /Font /Subtype /Type0 /Encoding /Identity-H /BaseFont /RRZMTJ+LMRoman12-Bold /DescendantFonts [ 633 0 R ] /ToUnicode 632 0 R >>
 endobj
-617 0 obj
-<< /Type /Font /Subtype /CIDFontType0 /BaseFont /RRZMTJ+LMRoman12-Bold /FontDescriptor 613 0 R /W 612 0 R /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> >>
+633 0 obj
+<< /Type /Font /Subtype /CIDFontType0 /BaseFont /RRZMTJ+LMRoman12-Bold /FontDescriptor 629 0 R /W 628 0 R /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> >>
 endobj
-618 0 obj
-[ 27 [ 814 613 ] 35 [ 580 ] 42 [ 786 ] 46 [ 829 ] 50 [ 558 ] 54 [ 713 536 ] 62 [ 814 ] 64 [ 378 406 302 ] 70 [ 636 683 513 ] 75 [ 747 ] 77 [ 613 ] 80 [ 844 636 ] 83 [ 742 558 ] 96 [ 602 ] 98 [ 458 ] 102 [ 553 ] 104 [ 786 591 ] 108 [ 814 613 ] 111 [ 814 613 ] 118 [ 613 669 ] ]
+634 0 obj
+[ 27 [ 814 613 ] 35 [ 580 ] 42 [ 786 ] 46 [ 829 ] 50 [ 558 ] 54 [ 713 536 ] 62 [ 814 ] 64 [ 378 406 302 ] 70 [ 636 683 513 ] 74 [ 989 747 ] 77 [ 613 ] 80 [ 844 636 ] 83 [ 742 558 ] 96 [ 602 ] 98 [ 458 ] 102 [ 553 ] 104 [ 786 591 ] 108 [ 814 613 ] 111 [ 814 613 ] 118 [ 613 669 ] ]
 endobj
-620 0 obj
+636 0 obj
 << /Filter /FlateDecode /Length 23 >>         
 stream
-x�c``�PRfz<�â��f�
+x�c``�PRfz��â��&�
 endstream
 endobj
-621 0 obj
-<< /Subtype /CIDFontType0C /Filter /FlateDecode /Length 3452 >>       
-stream
-x�}X	t��!ό[�L�40#��1��$e���Zh،-l��lY��&/�%=I��%cc��K6s(�)��(������?$�IS����TN�,C�SIg��Νw�w��{#A)��W/X>o���o�4�Lʘ���=)~�J]j�>�2�/��cQ� ��
-Y �JY��D){�"����p��Ƿ��8�B����@TT,�5dyԨ��7Q�����A�R��SS�1�������s
����4�vr|��WC�7�sⴋ��w�w�k�2R���o�i���j��C�v�.-I�Mkئ]�[�������FCNf��8���lm���CK��:�.)[����H���4�v��U���
&���d]F�N;a�V���i�L��7'N4���������Q���}�/_�z��Es�Z��8S�I��`Ԧ�LI�������ƝI��)���E)�S���"/�N�L�����k�Ϩ�,j65��O-�RK���Dj��ZMm��Rۨ4*��S;)�I�RyJ�z�I�!�*�@���|@�ʣ�Ei�ne(f	�9��=���G���>��`���w��E]��}V��C��9�
~�\ʀ'�_�.)��(qQ���*�5��B���J�Y����M�����c����H+�gי�Z=��14�c��	��٬��U(Kг�����P��VT�WZ%`�_�)� ��f����]U��.�,9��k������$s�MKmÌ��������JKl���U�ٍ�]wHfZVѻ���[4�T�K0�ߺ讣6{��7�Z5^���|IP���������@P_^rn�O�������o]�
-w�+n�V1Ȱ[�,#�cAy��k�(p�S��,�����]��ߘ/�g��ة޴}A�0�I�r�����[�3�DU�=���W�>���2���^T[bF�2��GW��?���x�o ���������N�u����Pƾ�j��ը�B���k��s�x�O�S�f��7���a0qy
�O����ޖ��+V���U�e���J���p�S�`+�j3������y0^�D�1c
-�nX#lܺd�t��[�!Q>�r֞�f>�t׌�D�ԙ�|�$6_�i��^�{��AF�J�=3��Z���ѻ���/��O������yE	�12K�fo��|>d�#D��a�O��|�NW<-��� �F��̪�F�7t�)?����w���D	^��u���;j���+Ϡ���pf=��su����ؗ�zX�og_E{�E��e�^�O �5��R�XaxN�>���k�v�����Ol��;��[���V��=���I���Ps�f��"L��^��.
-'b�9w����5:w���7��G~��0�!��;�ʀ=Ɵ�ΰ��r+{��!�2\���X�f#���f!W��oe�R�Kp/��[��gfKh��)���N�k?�'d����F�ōOۇyff��d,��h��J������J��(:� Ɵ�N���������5m	)eZ_@m�MW@V�4!Kϸ�o'Go���yr
-�g�W	{	�̫r������ �8#\����]�EF�[����@U�!����)I*�}q���b{���6φX��;���I��Td7�p:�E#�y,fM����b���4�cnϕ�Gʊ�����P�Vy��������[R�_��϶U��w��~���|&�4����"�G��b����K��{lj�Z8�pZR����x���~�2�?T/aT��/k}]�]�w����+�&c-�4<�2˞nD
-"�����c�'��@ix�S�
���$��c�����h�dE�"�|�P�D^,(���Fҁ}{�,�߄��%I���9w�I�Mɘt�E�(����-ty����{�uu5"��Sv:�4�$݇m�C���Cq-!n�|E.VKl���h��i$��Rz�Z��O��=YM�)��|G��j�@��b�'�=0x�zK�Jp!m��B�����@�
-�������R���)���S'�x
��F�{L�S`%��I��Q�j܀GM�),ޅ:�����b_����6�TR��
On+��`����0����Ax���h<Ɛw4��B�OM�b��y����H�����^���
w�qˑ6��z���z��$[��F�/)?���~�~�3��"����O��b� �gx`�H�3�I+��I�Og�F��y�n}^����@�����bR�<T�
-XfϫE3���{�GVd�x�o�������j7����²�T{%�W��g+�Z:J.Ut����y����\�|G��G�|���ߡ������z��?eo֬B�BASO�	�R�G�"^
-��)Usq�0�3���u��v��_�eՒ87Kv	�j�#}#9[��9�gg8�,��X�ŔcH�v���Zs�;m�������{��=�_�	�Q#T:����A�}"g���e�=�H��*+��D�J��~+�h��U�ח���l��*��%^ہ�H�e���=s�f}ʚ�Ykj��������#w�+V�U��l
-�,g�
-Q���p�4
-d���6�x"9�γ���7ݸI �������)����יK����~�d]U�,Hc�+����������]�<HCNL�/crxypZ��,yQ���/1�8��������}���[��Ԥ!����c�d"�����G�zL�'����+$1P}�Kpz;�>��g�i
$�ϑ��"o"d����/��|�,�␄�2�xz��к�&P���M�&S�K������C=<w��w�zT�r��>¡{܅>�e��Ƞ�#����!铣�ʀ=�U�Y<Ȋ�h=�m��}���58���gZV��t:�űd�0���A֏N>K0B���S�0ڌk�ސ�p����ő	��]�h���}\c�"❬ms�o���M�
-�������=���<hϕ�����ː�4����Y<fB��vh�9�x}�'�^�Vc���t~�:?kM�G`b.d}cwuz��`�.�8-{�w��F9
-˷��m E$0���URȋa�R���i��3
g&�m���ט�Uf��/	
�&�Ѵw�w	�X+��5l|�S��x\\J�x��{����x�#On�x����TY ]}�*�/�}���V[Z�)�(�$>_������J�W�܉�Ù�3�Ν��l�%�I����ƚ�eA���"İ�\���o5uS&�.�	I���J�4���ڊ�8��ǋy�����"�G�ׂ򄠞1�G��ڋg@�a&��te�Vjv��~A.dT�
-��|��O%��������5���0�����QQ�`iН�-.���t�:�g�QQ��y���r�Ը\Q��c���
+637 0 obj
+<< /Subtype /CIDFontType0C /Filter /FlateDecode /Length 3545 >>       
+stream
+x�}XTSg�>1�s@!uLC�9zN��������E��B��F!<��$$ XD��h��r�im��[��e����\�c�kg�׽�>�O��Ao��&a����s��������
+	�d2��W.�?a�ʵ�=���3���LZ�M��%��QR
+��4R&�$A.iB��pyOx���A��6=��dG^	\����G����8�>�
+Dɨ�T�H�Ȕ��2^6�_��3}�>#ߐ��j�L���61p}K3/Z�,1i�>7kw�&1=Y�,Z�2Z�J�KN�4o��5;������N�:�FMv�֐�I1�3��Ek֥�eir����i�괉Y�dMvz�֠1�j5��'��,ҧ5+Ғ��YZͤIM�V�I53ޞ<٘��7�L�Il�&�FY�ߛ�h��u�V,��pU��hc�Q�So�$k��i���
+�'���=�����ڥ��tc�1�"/�A�N��������,j5��G-�SK�e�rj%O���u�vj��J��(���ST�+'�'�(**P���Y�쯃�]�/��I	)�D�t+C1˙;l~�,4!TӄU�8���9D�/��ѧX�8G�����2�:�>w�7�]^Z�Q*�BC7Uت��Vsq��3��ʌ{�N�
ױ�C=�B-���hlku#��^�-D�p���'Ѯbd���P&�c�Ǯ���^�-(�/�B�A�_�ʺ V�6���H�M(sux�O0W�M\Q�w��)�`�
+�p�h鴷p��"��tժ�!G�]���z_��4�Z��
+x����:a��
+xlP���=0�7�{[ E���Ϡ��r�
xՕ����7^8}Q}i�e,�q���u�w~��Qi^��<��W����Jɜ�:�C`&��rv�c�n����C�u��~�Z�� Ƴ��g'
+
+_{��7�+�{,yD�o�<��؄̥6�)�a��!�4���B��k�?zG�v�~�r��}՘��PI9��Yt׮U�Gsx����mx;�O�5�}#`qyy�z���ߑ��}����:��E�_�ken:L)���M��f�Y:I��o@���ȱ��7���X�;��4��ZsT���G5gy��O�(�uQ8_7}�Z!�'��!Yv��]�k�-��dx�������w�����:{�>w� �;fII�\~!���#Fy��жT,�D&�h��g�D��*)�·y�p�g��`�ݤ�0Y���r�o��Jٴ��9�CP�I�*8��@'8�����rL��b'��������ŒКLl�n?	܌R�>����怠���避~����⫷��
+�V����&]�W��?�}(��^�F]��`"��r��K`~0�20�s7���Jp~ꅮ}?R軫\�Ez��Yֺc���s���[e��U���EZN���������O.�}P<`�e�������fj����ce��?"K���%4Bn|�>�s3S�L!)`�EY��EX�����a�xg�O�!#���Lv�e |g�-���XRg���g����rd1��3u���qr�V� �'%�yf��p��ȂJ�)A
+aO�\��"�C,8�I��]�����rA��P�J�<B<+��C}����!�ؕ�<�S�s����C�S��"k5�\Ư�"���Y�f�:� go���&�I��7���`*����*P�%��0��n�p���������违t��fm'���$M���VZ|!PG��"��+�I�2>8��]��[�(�Ԑ�/��Ġ����ZP�t�QH����w�򚴧DY^:k���X��F�� Ү��X�2&i2�x����̌����?�}�'�8�1ZP^9Q�g�P�%
+cF9�{>�6mMDЗ��l{�s�v��!�.�v�y8����Z�z�P����=�>��W�0��E��I�J��>�?�\V��/>r �3P)��4��&����a sя�c���h�|R�;NZ�j<c�������Â�m��Oc[+�b�yVzR��cV�Q2����ZN.�R�T��X��%�[�i/er�`��,~OR��ˬ(��ၸZ;PE���ٿ�����/�M����5���������>^ờ��{SR�c���QA"яX���S�/��l�ix���0ba3?�Q���c0��{P�������L�8��M*e����;r�Kا�qx�o�qx0J� ����a[�"�5h�x��D����*Л�jrÂ~�;����.<Fw��a�c��S$�����ģx��%c����o^�2Xʒ%$��������Y<��~j��,�m�z�=����3f���T}��a�ǭ/n/�P���ҟ���d4��9����$��>�����®A֝?퐾��r!O��VVů�H�Up�e���B�sG�����ss:C�7}z���ק^�<�x)����(�b�6��+�FY��	(����o�)<�]n��Y�+�*�r>���"Ftx<.�Yӎ���-��"�&� ��-�ſ���a����s�sJ�$M�(ե䓈�y|�sFs�+u����y���gz>�e�]�W8���ۏ\�@�J�6[v!_YYZXe}_��~�tq\]�%�{{�����]��z8'T�2�U՞�M�)y���������ݖl���l�vW�*��\
+s��D�E�����ȓ#���X�U��dv~d����ӛ��q~Γ�\�d�����jM�Ue�*?}:��,Af�6Ֆ���������y������q�+aBv�Bk�ܗ)m$:�M�����i���
�m0�;؉��łJ�jқi�(�O"u�:Ýy�,'o��yշ8y��� �P&��@��+�[���,���>�u�2O��jQ`�(a�p\����&�s�8�7�M�"S�CT����G=��h��� 氳���S�)'�����%�v�,��y�,Ex�>�O��x�q�:�1ڜ!�,5����qD'���+�7��������(2�2�o�?�Eg��D)µin���
63�O�;���W���؍������<��$�W�����5���b���?�MR����.�A����r�#m9�#�q�sR��~�V]�-��L�[Þ|u%"�<��/�I���V[j�՝���ZR�X����*�A��O��ܻW��,���J�g���'G��2iL�Kw�s2l9\���� �-59yj������(�5�!?]ބ���.�.Ae�3w�ϻ+v���+�b��b?��p7���N;�ß)��ka�\�z���G>[�5%{Յ�%��+x�a]YS��;Q}0�tF�ݱ�XTo�l`6����nY\����,yZ�u�wGN8�+|SP���$���t�[��<;���2F�"=�k<��V0�\ޯcL��y���98�����1]Q�,�:�K{E�7��>v$y�
�*��02u
�׹[~�:���0>D|wH��n�;�5�9Ox��n>T�t:��������
+ߓ
 endstream
 endobj
-619 0 obj
-<< /Type /FontDescriptor /FontName /ATGODK+LMRomanCaps10-Regular /Flags 4 /FontBBox [ -496 -290 1501 1100 ] /Ascent 1100 /CapHeight 683 /Descent -290 /ItalicAngle 0 /StemV 106 /XHeight 514 /FontFile3 621 0 R /CIDSet 620 0 R >>
+635 0 obj
+<< /Type /FontDescriptor /FontName /DEMFBP+LMRomanCaps10-Regular /Flags 4 /FontBBox [ -496 -290 1501 1100 ] /Ascent 1100 /CapHeight 683 /Descent -290 /ItalicAngle 0 /StemV 106 /XHeight 514 /FontFile3 637 0 R /CIDSet 636 0 R >>
 endobj
-622 0 obj
-<< /Filter /FlateDecode /Length 513 >>        
+638 0 obj
+<< /Filter /FlateDecode /Length 516 >>        
 stream
-xڍ�_k�@���)f��:�1i�4�"��E��k�\݀��I��ߙ9��EVP���gN����O�O9�S[�� ?{�o4Z�E_��ވ�!ھ�wU��8ۮ����x+�k/hP�[��K)�$�闿8~߭~�?_�u��,oڀ�{���\�ܤ��S�Cʹ�ݓ0[��T[����s��Z���́[o�f���R
-�&g's/�(�Α�-*�I>�ڎ��<��|�&�s�v�f��&;%H�������C�4W2C1�)t���ooyElb��
--��][����Xh9��E-�m�T./�͵<e���I�W�#�tj��8
�P�;W�P�ԅ8���Rf)�SPZ@9�A��i�#����'K��@b� W�j�KjF�	1G8Q�j�@kK3�6 ��Zb)���b�%p"�d	��1Y�*1f��=�U:��3�c���1���~��!�	�>[�x��M�%0���xE���:��v��>��>oHS7&�~��n������;�
+xڍ�]��0���+��s�1�����UAؙ�eokst6-i{��$o:�"+(>��|�퉓o���͙�虳u͠J���&�uS5���H���+۫�<RϦ�n��U���;Y�A��ߢ]+�%1}��D����m���?�M]ȼh�����B�ܤ����S�Cʹ�=�0[�'��j�+�9����yS�w��
�f�1.��M���^2Q��#�Y��9�|�w=�;yi�ł�t��������>� U�+�>T�C���Ÿ�eL�E�׾�5��qc't���m�J<�[b�������-JR�����v��-���y$�_q������(C��B�B�J�<2K��F���2m@��(�rJK@)b�\ދ��[
+נ�� W�j&+j�9�1GnNW�K�AKst�� tO��Zb)�����%��)|I0g�*	&��%}�t���,�d��-b�,u��C
+_R81_fx�����0{���堔�A{쎙��$}ޗ�iM�}ۋ6���zy�>}
 endstream
 endobj
-223 0 obj
-<< /Type /Font /Subtype /Type0 /Encoding /Identity-H /BaseFont /ATGODK+LMRomanCaps10-Regular /DescendantFonts [ 623 0 R ] /ToUnicode 622 0 R >>
+227 0 obj
+<< /Type /Font /Subtype /Type0 /Encoding /Identity-H /BaseFont /DEMFBP+LMRomanCaps10-Regular /DescendantFonts [ 639 0 R ] /ToUnicode 638 0 R >>
 endobj
-623 0 obj
-<< /Type /Font /Subtype /CIDFontType0 /BaseFont /ATGODK+LMRomanCaps10-Regular /FontDescriptor 619 0 R /W 618 0 R /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> >>
+639 0 obj
+<< /Type /Font /Subtype /CIDFontType0 /BaseFont /DEMFBP+LMRomanCaps10-Regular /FontDescriptor 635 0 R /W 634 0 R /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> >>
 endobj
-625 0 obj
+641 0 obj
 << /Length1 1539 /Length2 6778 /Length3 0 /Filter /FlateDecode /Length 7790 >>       
 stream
 xڍ�T�[6L7H�H���)�0�C��*�t	ҍ����  ������ԇ������Z߷���y�}]���u�^�¨k�#����� /�@QK��������ca1� ����X��p/*��"B�ٔ@�;�
@@ -2753,10 +2820,10 @@ y
 LX3Y�#�}@�e�!��f� �6e�g���[�	m�,�+&��N_|)��{�R��������!{���h����G����V��'SFG�/+=S$�B:��(X������\?�}��aŲ4:�̷��K���?4_T�س��=jܸ��j������I�&
 endstream
 endobj
-624 0 obj
-<< /Type /FontDescriptor /FontName /EINJPG+CMEX10 /Flags 4 /FontBBox [ -24 -2960 1454 772 ] /Ascent 40 /CapHeight 0 /Descent -600 /ItalicAngle 0 /StemV 47 /XHeight 431 /CharSet( /parenleftBig /parenleftbigg /parenrightBig /parenrightbigg /summationdisplay /summationtext) /FontFile 625 0 R >>
+640 0 obj
+<< /Type /FontDescriptor /FontName /EINJPG+CMEX10 /Flags 4 /FontBBox [ -24 -2960 1454 772 ] /Ascent 40 /CapHeight 0 /Descent -600 /ItalicAngle 0 /StemV 47 /XHeight 431 /CharSet( /parenleftBig /parenleftbigg /parenrightBig /parenrightbigg /summationdisplay /summationtext) /FontFile 641 0 R >>
 endobj
-627 0 obj
+643 0 obj
 << /Length1 1682 /Length2 10742 /Length3 0 /Filter /FlateDecode /Length 11823 >>      
 stream
 xڍ�P�.�������R�݊��ݵ��S(Nq�w-šw�t�����f��$��u}����P(�1��ٙ���l���,l�q ;���������F�l
�[�D�	rt�����B�d��*�0v~5T��ȺX� 7?����
��������#?@��lP`��ق��h���=��ίy�>�L�@>>�?��6 G���-@���d�����fg
@@ -2800,10 +2867,10 @@ jc
 ���������P�=����X�K!�*Y��UdUZMld���9���6n��R~6*&������2���Cgj�G��J��w
u��FH��!����`��$9�s��۶�H_`�@&$o:j�bŽ�]�QοKШB~�����Q��~�/0/��9�|rQZ�gm���!���Zr��u�E.T��hh�S��8���G��/A�&Y��MC���&��$���g�������
 endstream
 endobj
-626 0 obj
-<< /Type /FontDescriptor /FontName /ZTFMHK+CMMI12 /Flags 4 /FontBBox [ -31 -250 1026 750 ] /Ascent 694 /CapHeight 683 /Descent -194 /ItalicAngle -14 /StemV 65 /XHeight 431 /CharSet( /F /N /T /X /a /b /c /comma /e /f /g /i /k /l /n /o /r /s /x) /FontFile 627 0 R >>
+642 0 obj
+<< /Type /FontDescriptor /FontName /ZTFMHK+CMMI12 /Flags 4 /FontBBox [ -31 -250 1026 750 ] /Ascent 694 /CapHeight 683 /Descent -194 /ItalicAngle -14 /StemV 65 /XHeight 431 /CharSet( /F /N /T /X /a /b /c /comma /e /f /g /i /k /l /n /o /r /s /x) /FontFile 643 0 R >>
 endobj
-629 0 obj
+645 0 obj
 << /Length1 1507 /Length2 7379 /Length3 0 /Filter /FlateDecode /Length 8392 >>       
 stream
 xڍ�4�o6�%:�ޭN�ջ���B�����V'zB���E�D�[��Q����_����s��8g�f暹�皛�E׀_�a!�(~!�4@Y[[C����0�#�/7��
@@ -2838,10 +2905,10 @@ O
 s}j�vQ)�y��B+K�ה��R�zx��E��>����wۆ��5�)Sc^8v*_Pg��1K�l}-h�"d5�i�SX��+`�jC~2'���,(��)zL�����[�o4�@`y�7+�qG@m��{�
&����J����o�(Uw�
w�2g�<�S�P6ށ߸��U��ț�ƌAD�$˄��D�����R�#�7[Ѧ�ɾ���?�&DA
 endstream
 endobj
-628 0 obj
-<< /Type /FontDescriptor /FontName /PKEDJY+CMMI6 /Flags 4 /FontBBox [ 11 -250 1241 750 ] /Ascent 694 /CapHeight 683 /Descent -194 /ItalicAngle -14 /StemV 85 /XHeight 431 /CharSet( /N /arrowhookleft /i /k /n /pi /x) /FontFile 629 0 R >>
+644 0 obj
+<< /Type /FontDescriptor /FontName /PKEDJY+CMMI6 /Flags 4 /FontBBox [ 11 -250 1241 750 ] /Ascent 694 /CapHeight 683 /Descent -194 /ItalicAngle -14 /StemV 85 /XHeight 431 /CharSet( /N /arrowhookleft /i /k /n /pi /x) /FontFile 645 0 R >>
 endobj
-631 0 obj
+647 0 obj
 << /Length1 1512 /Length2 7787 /Length3 0 /Filter /FlateDecode /Length 8803 >>       
 stream
 xڍ�4��6,z���3�^g��{��F���%z�-z'zB��5j�AD	������o߷���Y�7g���}�s��3��>���������< 1����������
`(��n|f#�����@�
@@ -2872,10 +2939,10 @@ oaf
 [�u�eQm�[Y� q1���o.�:��X��_3eƬ��ۭ�&���4��KLWo�Tn�ˤ������a�v�U<_�h��䔾�ބ}QY���S��:G�Ne�3kb��i��~��S�I6��"���m�X�3}s=��'`��k�X6"߸�E#i���^�Fٰ�tM��NƇ2�7G>��F���U![
 endstream
 endobj
-630 0 obj
-<< /Type /FontDescriptor /FontName /XTWTHH+CMMI8 /Flags 4 /FontBBox [ -24 -250 1110 750 ] /Ascent 694 /CapHeight 683 /Descent -194 /ItalicAngle -14 /StemV 78 /XHeight 431 /CharSet( /N /i /k /n /pi /x /y /z) /FontFile 631 0 R >>
+646 0 obj
+<< /Type /FontDescriptor /FontName /XTWTHH+CMMI8 /Flags 4 /FontBBox [ -24 -250 1110 750 ] /Ascent 694 /CapHeight 683 /Descent -194 /ItalicAngle -14 /StemV 78 /XHeight 431 /CharSet( /N /i /k /n /pi /x /y /z) /FontFile 647 0 R >>
 endobj
-633 0 obj
+649 0 obj
 << /Length1 1566 /Length2 7096 /Length3 0 /Filter /FlateDecode /Length 8133 >>       
 stream
 xڍt4����vm5�6E�Q{�ګ(I��$� V�Y��jU�j�բ�R�g���=���}����w��������\�}�����*��B"0�b"@y�����8��Iyx�`8�/����v�!���A
a�0
��O���b1iy1y  ���D�4@0@_��D@�Hyԑ(/4��sU��~�@LNN���p��+
�}��zU�L�`����
@@ -2901,10 +2968,10 @@ e
 ���gVϡus+�ɓG�Vq`u�3ñ5C~���1둙����i%���̜U~1�o�I�ϥ�l0��f'
�|N��d]ޡ�u�(d�Z����p~�lZ�&X1�N&0�<L�|�?�yS�b����CvFLqx��3�~����y�뻱Grj���S|n����'Ϯ����2���l�$�n�D��wq�^JR�h���T���q-F�ŕ)ZB���=�j= �|��IQ�ϋ�?��D����CG<��CaU	�1*r�J B��>3��h��yr��^����zf	��Ťn����g6z˿��.�.��Gg� ���(�^�]�&�sɀ(?����6��,�����O��&����p�0�u�p�B"��Q5�����
 endstream
 endobj
-632 0 obj
-<< /Type /FontDescriptor /FontName /MABQHP+CMR12 /Flags 4 /FontBBox [ -34 -251 988 750 ] /Ascent 694 /CapHeight 683 /Descent -194 /ItalicAngle 0 /StemV 65 /XHeight 431 /CharSet( /bracketleft /bracketright /equal /one /parenleft /parenright /plus /slash /two /zero) /FontFile 633 0 R >>
+648 0 obj
+<< /Type /FontDescriptor /FontName /MABQHP+CMR12 /Flags 4 /FontBBox [ -34 -251 988 750 ] /Ascent 694 /CapHeight 683 /Descent -194 /ItalicAngle 0 /StemV 65 /XHeight 431 /CharSet( /bracketleft /bracketright /equal /one /parenleft /parenright /plus /slash /two /zero) /FontFile 649 0 R >>
 endobj
-635 0 obj
+651 0 obj
 << /Length1 1424 /Length2 6178 /Length3 0 /Filter /FlateDecode /Length 7143 >>       
 stream
 xڍwTZ�.��t�0��1twI7H	�0���0�Ѝ �!-!"%���tI4
@@ -2934,10 +3001,10 @@ g
 �D]M/פ��8Nu�jw�򿝎��LS�}�K~��yU*0��u�gaw���#S�^���V+�V��'~�?�j�L
 endstream
 endobj
-634 0 obj
-<< /Type /FontDescriptor /FontName /BSQGNB+CMR6 /Flags 4 /FontBBox [ -20 -250 1193 750 ] /Ascent 694 /CapHeight 683 /Descent -194 /ItalicAngle 0 /StemV 83 /XHeight 431 /CharSet( /parenleft /parenright /two) /FontFile 635 0 R >>
+650 0 obj
+<< /Type /FontDescriptor /FontName /BSQGNB+CMR6 /Flags 4 /FontBBox [ -20 -250 1193 750 ] /Ascent 694 /CapHeight 683 /Descent -194 /ItalicAngle 0 /StemV 83 /XHeight 431 /CharSet( /parenleft /parenright /two) /FontFile 651 0 R >>
 endobj
-637 0 obj
+653 0 obj
 << /Length1 1430 /Length2 6354 /Length3 0 /Filter /FlateDecode /Length 7319 >>       
 stream
 xڍTT�۷�K@Z��:D@�RI����E�K�S�+]�tKw
@@ -2961,10 +3028,10 @@ s
 �xU�4u��DX�1��gcwJ�����wJ&�-�Ĕ�K2�Wo�kl��B�D٫��?6r�Aj�����g��[�Nc�qڣM�23Q+?�����G�ڬ3�\	�*���*�
c��tFŧ|��3��8�="��N>]mGI�Є�qR7�F�U~�Nq�]�ԺbǢ!�l���c�m�AЀ!�jW"�]p��"���>�f=)�^F�Vg>;]���3�����)��I�
 endstream
 endobj
-636 0 obj
-<< /Type /FontDescriptor /FontName /CGCFOU+CMR8 /Flags 4 /FontBBox [ -36 -250 1070 750 ] /Ascent 694 /CapHeight 683 /Descent -194 /ItalicAngle 0 /StemV 76 /XHeight 431 /CharSet( /equal /one /two /zero) /FontFile 637 0 R >>
+652 0 obj
+<< /Type /FontDescriptor /FontName /CGCFOU+CMR8 /Flags 4 /FontBBox [ -36 -250 1070 750 ] /Ascent 694 /CapHeight 683 /Descent -194 /ItalicAngle 0 /StemV 76 /XHeight 431 /CharSet( /equal /one /two /zero) /FontFile 653 0 R >>
 endobj
-639 0 obj
+655 0 obj
 << /Length1 1518 /Length2 6685 /Length3 0 /Filter /FlateDecode /Length 7705 >>       
 stream
 xڍxT���6�"]�HwHw�t��000� 34H�tI#H#%��҂�������J|���9�=�����Yk���]���u�5�Lz��
@@ -2997,10 +3064,10 @@ f*
 �����I�
 endstream
 endobj
-638 0 obj
-<< /Type /FontDescriptor /FontName /SOARHH+CMSY10 /Flags 4 /FontBBox [ -29 -960 1116 775 ] /Ascent 750 /CapHeight 683 /Descent -194 /ItalicAngle -14 /StemV 40 /XHeight 431 /CharSet( /asteriskmath /braceleft /braceright /element /minus /periodcentered) /FontFile 639 0 R >>
+654 0 obj
+<< /Type /FontDescriptor /FontName /SOARHH+CMSY10 /Flags 4 /FontBBox [ -29 -960 1116 775 ] /Ascent 750 /CapHeight 683 /Descent -194 /ItalicAngle -14 /StemV 40 /XHeight 431 /CharSet( /asteriskmath /braceleft /braceright /element /minus /periodcentered) /FontFile 655 0 R >>
 endobj
-641 0 obj
+657 0 obj
 << /Length1 1415 /Length2 6047 /Length3 0 /Filter /FlateDecode /Length 7014 >>       
 stream
 xڍwTTk�6�)%2� 9�Щ��%!00�L3��"Ҡ��(-!��4�J)J��7�9�����k�������빞���ڛ���DD	�v���QX0$P�3���@�@H�����E���)x�a8%��T<`,S�`�~zh@���Rr`i9 ������B��P���F�0�*h7����/��-��Q����PB�<��@�u�!�!�	������_��u����B� �����0��u�00/��0@���a�����1p��������#<QP�_`��0p���8��q�u60��tE�JG��8:��n�r8�0���.�@P�_����xA������!u%#O�/zG����(��J�?e5T�D�PXů���=`��cǉ��+
@@ -3030,10 +3097,10 @@ D
 ����
�L�d��i	gi�=A|�a8�+@�$��\E�i�$���߃�l�t_�:9�j8�FU���	Kũ�uE�W�̬؊���Eƿv̘_���FZ�^���/'ׯ�O��k���`R��Ŷ�-�˗|jf�on������n�w��=�a�ňh�fP����
��7dm�
���q���M$ج��R����!����{C�J�8�_��0{W**eP�?Uj����}3n��.��i�ƚ;z�4�Wn�p@��V-��4SzC���Hq��N�L3�
������	�0
 endstream
 endobj
-640 0 obj
-<< /Type /FontDescriptor /FontName /YIOMCD+CMSY6 /Flags 4 /FontBBox [ -4 -948 1329 786 ] /Ascent 750 /CapHeight 683 /Descent -194 /ItalicAngle -14 /StemV 52 /XHeight 431 /CharSet( /arrowright /minus) /FontFile 641 0 R >>
+656 0 obj
+<< /Type /FontDescriptor /FontName /YIOMCD+CMSY6 /Flags 4 /FontBBox [ -4 -948 1329 786 ] /Ascent 750 /CapHeight 683 /Descent -194 /ItalicAngle -14 /StemV 52 /XHeight 431 /CharSet( /arrowright /minus) /FontFile 657 0 R >>
 endobj
-643 0 obj
+659 0 obj
 << /Length1 1417 /Length2 6123 /Length3 0 /Filter /FlateDecode /Length 7088 >>       
 stream
 xڍuT���6-N�	et��)��t��m��
���) � J(!� "%
��4�H� H7����{���}�y߳s������\�w���F�J0��.
@@ -3069,10 +3136,10 @@ r
 �Q����|�lud�˯��(�oz�F�Ŷ��Q䓟�ac��V�Qo��M>H=�=��bx��L�H����Z*_mG�5|��������E�ޞ�L`]��Vbg<�#7ǃ�՜K�羶4�'�t�'X��f>����Ym!�����ia����6>H����{��J�a�Yv�94�=�������vs�E��\~��[:씟&�gMQ7�!��|V|
��c\��nb��_����4c�")f��������/"�.����9_:[2��}
~�Z��a%_�����
 endstream
 endobj
-642 0 obj
-<< /Type /FontDescriptor /FontName /XYBSMX+CMSY8 /Flags 4 /FontBBox [ -30 -955 1185 779 ] /Ascent 750 /CapHeight 683 /Descent -194 /ItalicAngle -14 /StemV 46 /XHeight 431 /CharSet( /asteriskmath /minus) /FontFile 643 0 R >>
+658 0 obj
+<< /Type /FontDescriptor /FontName /XYBSMX+CMSY8 /Flags 4 /FontBBox [ -30 -955 1185 779 ] /Ascent 750 /CapHeight 683 /Descent -194 /ItalicAngle -14 /StemV 46 /XHeight 431 /CharSet( /asteriskmath /minus) /FontFile 659 0 R >>
 endobj
-645 0 obj
+661 0 obj
 << /Length1 1328 /Length2 1243 /Length3 0 /Filter /FlateDecode /Length 2088 >>       
 stream
 xڍS	TSga,Lu�Z����Ő� KD�v�"*S�y����%!FP9*�2(
@@ -3085,101 +3152,107 @@ n
 $4���Z������Y׿���3����k���<�YOOT�~M�-��/������J,w���g8F~l��h�
��Fk\FG�F�)��qM14v͟�*8I�F��	�F1�
�v��M��3#W��J��_w�+����;���5��6��X�}+�6�����s��K����8��~֛Y��R�|����Ah�7%�����/�B!Z8�؞�$]���r��"Y\*���q��V�ovw��|��L��%;��������AU]7��x4�����r��sJe�{9�`��H����wZ(�Ѧ$�ĻK�9H-��>�I_z������԰ulM_���։3�H<t}U	׹�LQ<��}�ר�K�1��X�+��ݞ]��&ֳ��rc�Ä[p������7��W$���lHhL��Uq�sa�˫,��˵Ң����wϽy_q(/?sUry�W���]%��Z[���Z�Q�]�~Sl�ty��E���������G��Ɔ�y�Ѧ���GWU�0��?JWy�[����0���fK����%��==>�|VRfZ�v��(�3�m�{BJ�G�&��<~Zy���'��߼W�Hӻ�!H�(^ 1J�"o?�o�M+�
 endstream
 endobj
-644 0 obj
-<< /Type /FontDescriptor /FontName /AICAWW+MSBM10 /Flags 4 /FontBBox [ -55 -420 2343 920 ] /Ascent 464 /CapHeight 689 /Descent 0 /ItalicAngle 0 /StemV 40 /XHeight 463 /CharSet( /Z) /FontFile 645 0 R >>
+660 0 obj
+<< /Type /FontDescriptor /FontName /AICAWW+MSBM10 /Flags 4 /FontBBox [ -55 -420 2343 920 ] /Ascent 464 /CapHeight 689 /Descent 0 /ItalicAngle 0 /StemV 40 /XHeight 463 /CharSet( /Z) /FontFile 661 0 R >>
+endobj
+314 0 obj
+<< /Type /Font /Subtype /Type1 /BaseFont /EINJPG+CMEX10 /FontDescriptor 640 0 R /FirstChar 16 /LastChar 88 /Widths 610 0 R >>
 endobj
 309 0 obj
-<< /Type /Font /Subtype /Type1 /BaseFont /EINJPG+CMEX10 /FontDescriptor 624 0 R /FirstChar 16 /LastChar 88 /Widths 594 0 R >>
+<< /Type /Font /Subtype /Type1 /BaseFont /ZTFMHK+CMMI12 /FontDescriptor 642 0 R /FirstChar 59 /LastChar 120 /Widths 615 0 R >>
 endobj
-304 0 obj
-<< /Type /Font /Subtype /Type1 /BaseFont /ZTFMHK+CMMI12 /FontDescriptor 626 0 R /FirstChar 59 /LastChar 120 /Widths 599 0 R >>
+316 0 obj
+<< /Type /Font /Subtype /Type1 /BaseFont /PKEDJY+CMMI6 /FontDescriptor 644 0 R /FirstChar 25 /LastChar 120 /Widths 608 0 R >>
+endobj
+310 0 obj
+<< /Type /Font /Subtype /Type1 /BaseFont /XTWTHH+CMMI8 /FontDescriptor 646 0 R /FirstChar 25 /LastChar 122 /Widths 614 0 R >>
 endobj
 311 0 obj
-<< /Type /Font /Subtype /Type1 /BaseFont /PKEDJY+CMMI6 /FontDescriptor 628 0 R /FirstChar 25 /LastChar 120 /Widths 592 0 R >>
+<< /Type /Font /Subtype /Type1 /BaseFont /MABQHP+CMR12 /FontDescriptor 648 0 R /FirstChar 40 /LastChar 93 /Widths 613 0 R >>
 endobj
-305 0 obj
-<< /Type /Font /Subtype /Type1 /BaseFont /XTWTHH+CMMI8 /FontDescriptor 630 0 R /FirstChar 25 /LastChar 122 /Widths 598 0 R >>
+315 0 obj
+<< /Type /Font /Subtype /Type1 /BaseFont /BSQGNB+CMR6 /FontDescriptor 650 0 R /FirstChar 40 /LastChar 50 /Widths 609 0 R >>
 endobj
-306 0 obj
-<< /Type /Font /Subtype /Type1 /BaseFont /MABQHP+CMR12 /FontDescriptor 632 0 R /FirstChar 40 /LastChar 93 /Widths 597 0 R >>
+313 0 obj
+<< /Type /Font /Subtype /Type1 /BaseFont /CGCFOU+CMR8 /FontDescriptor 652 0 R /FirstChar 48 /LastChar 61 /Widths 611 0 R >>
 endobj
-310 0 obj
-<< /Type /Font /Subtype /Type1 /BaseFont /BSQGNB+CMR6 /FontDescriptor 634 0 R /FirstChar 40 /LastChar 50 /Widths 593 0 R >>
+317 0 obj
+<< /Type /Font /Subtype /Type1 /BaseFont /SOARHH+CMSY10 /FontDescriptor 654 0 R /FirstChar 0 /LastChar 103 /Widths 607 0 R >>
 endobj
-308 0 obj
-<< /Type /Font /Subtype /Type1 /BaseFont /CGCFOU+CMR8 /FontDescriptor 636 0 R /FirstChar 48 /LastChar 61 /Widths 595 0 R >>
+324 0 obj
+<< /Type /Font /Subtype /Type1 /BaseFont /YIOMCD+CMSY6 /FontDescriptor 656 0 R /FirstChar 0 /LastChar 33 /Widths 593 0 R >>
 endobj
 312 0 obj
-<< /Type /Font /Subtype /Type1 /BaseFont /SOARHH+CMSY10 /FontDescriptor 638 0 R /FirstChar 0 /LastChar 103 /Widths 591 0 R >>
+<< /Type /Font /Subtype /Type1 /BaseFont /XYBSMX+CMSY8 /FontDescriptor 658 0 R /FirstChar 0 /LastChar 3 /Widths 612 0 R >>
 endobj
-319 0 obj
-<< /Type /Font /Subtype /Type1 /BaseFont /YIOMCD+CMSY6 /FontDescriptor 640 0 R /FirstChar 0 /LastChar 33 /Widths 577 0 R >>
+318 0 obj
+<< /Type /Font /Subtype /Type1 /BaseFont /AICAWW+MSBM10 /FontDescriptor 660 0 R /FirstChar 90 /LastChar 90 /Widths 606 0 R >>
 endobj
-307 0 obj
-<< /Type /Font /Subtype /Type1 /BaseFont /XYBSMX+CMSY8 /FontDescriptor 642 0 R /FirstChar 0 /LastChar 3 /Widths 596 0 R >>
+231 0 obj
+<< /Type /Pages /Parent 662 0 R /Count 10 /Kids [ 222 0 R 275 0 R 293 0 R 301 0 R 305 0 R 328 0 R 334 0 R 341 0 R 346 0 R 355 0 R ] >>
 endobj
-313 0 obj
-<< /Type /Font /Subtype /Type1 /BaseFont /AICAWW+MSBM10 /FontDescriptor 644 0 R /FirstChar 90 /LastChar 90 /Widths 590 0 R >>
+366 0 obj
+<< /Type /Pages /Parent 662 0 R /Count 10 /Kids [ 361 0 R 369 0 R 375 0 R 384 0 R 388 0 R 392 0 R 396 0 R 400 0 R 405 0 R 409 0 R ] >>
 endobj
-227 0 obj
-<< /Type /Pages /Parent 646 0 R /Count 10 /Kids [ 218 0 R 271 0 R 288 0 R 296 0 R 300 0 R 323 0 R 329 0 R 336 0 R 343 0 R 349 0 R ] >>
+416 0 obj
+<< /Type /Pages /Parent 662 0 R /Count 10 /Kids [ 413 0 R 418 0 R 422 0 R 426 0 R 430 0 R 434 0 R 438 0 R 442 0 R 446 0 R 450 0 R ] >>
 endobj
-360 0 obj
-<< /Type /Pages /Parent 646 0 R /Count 10 /Kids [ 357 0 R 364 0 R 373 0 R 377 0 R 381 0 R 385 0 R 389 0 R 393 0 R 398 0 R 402 0 R ] >>
+457 0 obj
+<< /Type /Pages /Parent 662 0 R /Count 10 /Kids [ 454 0 R 459 0 R 463 0 R 467 0 R 471 0 R 475 0 R 479 0 R 483 0 R 487 0 R 491 0 R ] >>
 endobj
-409 0 obj
-<< /Type /Pages /Parent 646 0 R /Count 10 /Kids [ 406 0 R 411 0 R 415 0 R 419 0 R 423 0 R 427 0 R 431 0 R 435 0 R 439 0 R 443 0 R ] >>
+498 0 obj
+<< /Type /Pages /Parent 662 0 R /Count 10 /Kids [ 495 0 R 500 0 R 504 0 R 508 0 R 512 0 R 516 0 R 520 0 R 524 0 R 528 0 R 532 0 R ] >>
 endobj
-450 0 obj
-<< /Type /Pages /Parent 646 0 R /Count 10 /Kids [ 447 0 R 452 0 R 456 0 R 460 0 R 464 0 R 468 0 R 472 0 R 476 0 R 480 0 R 484 0 R ] >>
+539 0 obj
+<< /Type /Pages /Parent 662 0 R /Count 10 /Kids [ 536 0 R 541 0 R 545 0 R 549 0 R 553 0 R 557 0 R 561 0 R 565 0 R 569 0 R 573 0 R ] >>
 endobj
-491 0 obj
-<< /Type /Pages /Parent 646 0 R /Count 10 /Kids [ 488 0 R 493 0 R 497 0 R 501 0 R 505 0 R 509 0 R 513 0 R 517 0 R 521 0 R 525 0 R ] >>
+580 0 obj
+<< /Type /Pages /Parent 662 0 R /Count 1 /Kids [ 577 0 R ] >>
 endobj
-532 0 obj
-<< /Type /Pages /Parent 646 0 R /Count 9 /Kids [ 529 0 R 534 0 R 538 0 R 542 0 R 546 0 R 550 0 R 554 0 R 558 0 R 562 0 R ] >>
+662 0 obj
+<< /Type /Pages  /Count 61 /Kids [ 231 0 R 366 0 R 416 0 R 457 0 R 498 0 R 539 0 R 580 0 R ] >>
 endobj
-646 0 obj
-<< /Type /Pages  /Count 59 /Kids [ 227 0 R 360 0 R 409 0 R 450 0 R 491 0 R 532 0 R ] >>
+663 0 obj
+<< /Type /Outlines /First 3 0 R /Last 187 0 R /Count 5 >>
 endobj
-647 0 obj
-<< /Type /Outlines /First 3 0 R /Last 183 0 R /Count 5 >>
+219 0 obj
+<< /Title 220 0 R /A 217 0 R /Parent 187 0 R /Prev 215 0 R >>
 endobj
 215 0 obj
-<< /Title 216 0 R /A 213 0 R /Parent 183 0 R /Prev 211 0 R >>
+<< /Title 216 0 R /A 213 0 R /Parent 187 0 R /Prev 211 0 R /Next 219 0 R >>
 endobj
 211 0 obj
-<< /Title 212 0 R /A 209 0 R /Parent 183 0 R /Prev 207 0 R /Next 215 0 R >>
+<< /Title 212 0 R /A 209 0 R /Parent 187 0 R /Prev 207 0 R /Next 215 0 R >>
 endobj
 207 0 obj
-<< /Title 208 0 R /A 205 0 R /Parent 183 0 R /Prev 203 0 R /Next 211 0 R >>
+<< /Title 208 0 R /A 205 0 R /Parent 187 0 R /Prev 203 0 R /Next 211 0 R >>
 endobj
 203 0 obj
-<< /Title 204 0 R /A 201 0 R /Parent 183 0 R /Prev 199 0 R /Next 207 0 R >>
+<< /Title 204 0 R /A 201 0 R /Parent 187 0 R /Prev 199 0 R /Next 207 0 R >>
 endobj
 199 0 obj
-<< /Title 200 0 R /A 197 0 R /Parent 183 0 R /Prev 195 0 R /Next 203 0 R >>
+<< /Title 200 0 R /A 197 0 R /Parent 187 0 R /Prev 195 0 R /Next 203 0 R >>
 endobj
 195 0 obj
-<< /Title 196 0 R /A 193 0 R /Parent 183 0 R /Prev 191 0 R /Next 199 0 R >>
+<< /Title 196 0 R /A 193 0 R /Parent 187 0 R /Prev 191 0 R /Next 199 0 R >>
 endobj
 191 0 obj
-<< /Title 192 0 R /A 189 0 R /Parent 183 0 R /Prev 187 0 R /Next 195 0 R >>
+<< /Title 192 0 R /A 189 0 R /Parent 187 0 R /Next 195 0 R >>
 endobj
 187 0 obj
-<< /Title 188 0 R /A 185 0 R /Parent 183 0 R /Next 191 0 R >>
+<< /Title 188 0 R /A 185 0 R /Parent 663 0 R /Prev 179 0 R /First 191 0 R /Last 219 0 R /Count -8 >>
 endobj
 183 0 obj
-<< /Title 184 0 R /A 181 0 R /Parent 647 0 R /Prev 175 0 R /First 187 0 R /Last 215 0 R /Count -8 >>
+<< /Title 184 0 R /A 181 0 R /Parent 179 0 R >>
 endobj
 179 0 obj
-<< /Title 180 0 R /A 177 0 R /Parent 175 0 R >>
+<< /Title 180 0 R /A 177 0 R /Parent 663 0 R /Prev 103 0 R /Next 187 0 R /First 183 0 R /Last 183 0 R /Count -1 >>
 endobj
 175 0 obj
-<< /Title 176 0 R /A 173 0 R /Parent 647 0 R /Prev 103 0 R /Next 183 0 R /First 179 0 R /Last 179 0 R /Count -1 >>
+<< /Title 176 0 R /A 173 0 R /Parent 131 0 R /Prev 171 0 R >>
 endobj
 171 0 obj
-<< /Title 172 0 R /A 169 0 R /Parent 131 0 R /Prev 167 0 R >>
+<< /Title 172 0 R /A 169 0 R /Parent 131 0 R /Prev 167 0 R /Next 175 0 R >>
 endobj
 167 0 obj
 << /Title 168 0 R /A 165 0 R /Parent 131 0 R /Prev 163 0 R /Next 171 0 R >>
@@ -3209,7 +3282,7 @@ endobj
 << /Title 136 0 R /A 133 0 R /Parent 131 0 R /Next 139 0 R >>
 endobj
 131 0 obj
-<< /Title 132 0 R /A 129 0 R /Parent 103 0 R /Prev 111 0 R /First 135 0 R /Last 171 0 R /Count -10 >>
+<< /Title 132 0 R /A 129 0 R /Parent 103 0 R /Prev 111 0 R /First 135 0 R /Last 175 0 R /Count -11 >>
 endobj
 127 0 obj
 << /Title 128 0 R /A 125 0 R /Parent 111 0 R /Prev 123 0 R >>
@@ -3230,7 +3303,7 @@ endobj
 << /Title 108 0 R /A 105 0 R /Parent 103 0 R /Next 111 0 R >>
 endobj
 103 0 obj
-<< /Title 104 0 R /A 101 0 R /Parent 647 0 R /Prev 7 0 R /Next 175 0 R /First 107 0 R /Last 131 0 R /Count -3 >>
+<< /Title 104 0 R /A 101 0 R /Parent 663 0 R /Prev 7 0 R /Next 179 0 R /First 107 0 R /Last 131 0 R /Count -3 >>
 endobj
 99 0 obj
 << /Title 100 0 R /A 97 0 R /Parent 87 0 R /Prev 95 0 R >>
@@ -3302,699 +3375,715 @@ endobj
 << /Title 12 0 R /A 9 0 R /Parent 7 0 R /Next 15 0 R >>
 endobj
 7 0 obj
-<< /Title 8 0 R /A 5 0 R /Parent 647 0 R /Prev 3 0 R /Next 103 0 R /First 11 0 R /Last 87 0 R /Count -7 >>
+<< /Title 8 0 R /A 5 0 R /Parent 663 0 R /Prev 3 0 R /Next 103 0 R /First 11 0 R /Last 87 0 R /Count -7 >>
 endobj
 3 0 obj
-<< /Title 4 0 R /A 1 0 R /Parent 647 0 R /Next 7 0 R >>
+<< /Title 4 0 R /A 1 0 R /Parent 663 0 R /Next 7 0 R >>
 endobj
-648 0 obj
-<< /Names [ (Doc-Start) 222 0 R (Item.1) 316 0 R (Item.10) 340 0 R (Item.11) 341 0 R (Item.12) 346 0 R (Item.13) 347 0 R (Item.14) 352 0 R (Item.15) 353 0 R (Item.16) 354 0 R (Item.17) 355 0 R (Item.2) 317 0 R (Item.3) 320 0 R (Item.4) 321 0 R (Item.5) 327 0 R (Item.6) 332 0 R (Item.7) 333 0 R (Item.8) 334 0 R (Item.9) 339 0 R (equation.2.1) 303 0 R (equation.2.2) 314 0 R (equation.2.3) 367 0 R (equation.2.4) 368 0 R (equation.2.5) 369 0 R (equation.2.6) 370 0 R (page.1) 221 0 R (page.10) 359 0 R (page.11) 366 0 R (page.12) 375 0 R (page.13) 379 0 R (page.14) 383 0 R (page.15) 387 0 R (page.16) 391 0 R ] /Limits [ (Doc-Start) (page.16) ] >>
+664 0 obj
+<< /Names [ (Doc-Start) 226 0 R (Item.1) 321 0 R (Item.10) 349 0 R (Item.11) 350 0 R (Item.12) 351 0 R (Item.13) 352 0 R (Item.14) 353 0 R (Item.15) 358 0 R (Item.16) 359 0 R (Item.17) 364 0 R (Item.18) 365 0 R (Item.2) 322 0 R (Item.3) 325 0 R (Item.4) 326 0 R (Item.5) 332 0 R (Item.6) 337 0 R (Item.7) 338 0 R (Item.8) 339 0 R (Item.9) 344 0 R (equation.2.1) 308 0 R (equation.2.2) 319 0 R (equation.2.3) 378 0 R (equation.2.4) 379 0 R (equation.2.5) 380 0 R (equation.2.6) 381 0 R (page.1) 225 0 R (page.10) 363 0 R (page.11) 371 0 R (page.12) 377 0 R (page.13) 386 0 R (page.14) 390 0 R (page.15) 394 0 R ] /Limits [ (Doc-Start) (page.15) ] >>
 endobj
-649 0 obj
-<< /Names [ (page.17) 395 0 R (page.18) 400 0 R (page.19) 404 0 R (page.2) 290 0 R (page.20) 408 0 R (page.21) 413 0 R (page.22) 417 0 R (page.23) 421 0 R (page.24) 425 0 R (page.25) 429 0 R (page.26) 433 0 R (page.27) 437 0 R (page.28) 441 0 R (page.29) 445 0 R (page.3) 298 0 R (page.30) 449 0 R (page.31) 454 0 R (page.32) 458 0 R (page.33) 462 0 R (page.34) 466 0 R (page.35) 470 0 R (page.36) 474 0 R (page.37) 478 0 R (page.38) 482 0 R (page.39) 486 0 R (page.4) 302 0 R (page.40) 490 0 R (page.41) 495 0 R (page.42) 499 0 R (page.43) 503 0 R (page.44) 507 0 R (page.45) 511 0 R ] /Limits [ (page.17) (page.45) ] >>
+665 0 obj
+<< /Names [ (page.16) 398 0 R (page.17) 402 0 R (page.18) 407 0 R (page.19) 411 0 R (page.2) 295 0 R (page.20) 415 0 R (page.21) 420 0 R (page.22) 424 0 R (page.23) 428 0 R (page.24) 432 0 R (page.25) 436 0 R (page.26) 440 0 R (page.27) 444 0 R (page.28) 448 0 R (page.29) 452 0 R (page.3) 303 0 R (page.30) 456 0 R (page.31) 461 0 R (page.32) 465 0 R (page.33) 469 0 R (page.34) 473 0 R (page.35) 477 0 R (page.36) 481 0 R (page.37) 485 0 R (page.38) 489 0 R (page.39) 493 0 R (page.4) 307 0 R (page.40) 497 0 R (page.41) 502 0 R (page.42) 506 0 R (page.43) 510 0 R (page.44) 514 0 R ] /Limits [ (page.16) (page.44) ] >>
 endobj
-650 0 obj
-<< /Names [ (page.46) 515 0 R (page.47) 519 0 R (page.48) 523 0 R (page.49) 527 0 R (page.5) 325 0 R (page.50) 531 0 R (page.51) 536 0 R (page.52) 540 0 R (page.53) 544 0 R (page.54) 548 0 R (page.55) 552 0 R (page.56) 556 0 R (page.57) 560 0 R (page.58) 564 0 R (page.6) 331 0 R (page.7) 338 0 R (page.8) 345 0 R (page.9) 351 0 R (section*.1) 273 0 R (section.1) 2 0 R (section.2) 6 0 R (section.3) 102 0 R (section.4) 174 0 R (section.5) 182 0 R (subsection.2.1) 10 0 R (subsection.2.2) 14 0 R (subsection.2.3) 18 0 R (subsection.2.4) 34 0 R (subsection.2.5) 46 0 R (subsection.2.6) 82 0 R (subsection.2.7) 86 0 R (subsection.3.1) 106 0 R ] /Limits [ (page.46) (subsection.3.1) ] >>
+666 0 obj
+<< /Names [ (page.45) 518 0 R (page.46) 522 0 R (page.47) 526 0 R (page.48) 530 0 R (page.49) 534 0 R (page.5) 330 0 R (page.50) 538 0 R (page.51) 543 0 R (page.52) 547 0 R (page.53) 551 0 R (page.54) 555 0 R (page.55) 559 0 R (page.56) 563 0 R (page.57) 567 0 R (page.58) 571 0 R (page.59) 575 0 R (page.6) 336 0 R (page.60) 579 0 R (page.7) 343 0 R (page.8) 348 0 R (page.9) 357 0 R (section*.1) 277 0 R (section.1) 2 0 R (section.2) 6 0 R (section.3) 102 0 R (section.4) 178 0 R (section.5) 186 0 R (subsection.2.1) 10 0 R (subsection.2.2) 14 0 R (subsection.2.3) 18 0 R (subsection.2.4) 34 0 R (subsection.2.5) 46 0 R ] /Limits [ (page.45) (subsection.2.5) ] >>
 endobj
-651 0 obj
-<< /Names [ (subsection.3.2) 110 0 R (subsection.3.3) 130 0 R (subsection.4.1) 178 0 R (subsection.5.1) 186 0 R (subsection.5.2) 190 0 R (subsection.5.3) 194 0 R (subsection.5.4) 198 0 R (subsection.5.5) 202 0 R (subsection.5.6) 206 0 R (subsection.5.7) 210 0 R (subsection.5.8) 214 0 R (subsubsection.2.3.1) 22 0 R (subsubsection.2.3.2) 26 0 R (subsubsection.2.3.3) 30 0 R (subsubsection.2.4.1) 38 0 R (subsubsection.2.4.2) 42 0 R (subsubsection.2.5.1) 50 0 R (subsubsection.2.5.2) 54 0 R (subsubsection.2.5.3) 58 0 R (subsubsection.2.5.4) 62 0 R (subsubsection.2.5.5) 66 0 R (subsubsection.2.5.6) 70 0 R (subsubsection.2.5.7) 74 0 R (subsubsection.2.5.8) 78 0 R (subsubsection.2.7.1) 90 0 R (subsubsection.2.7.2) 94 0 R (subsubsection.2.7.3) 98 0 R (subsubsection.3.2.1) 114 0 R (subsubsection.3.2.2) 118 0 R (subsubsection.3.2.3) 122 0 R (subsubsection.3.2.4) 126 0 R (subsubsection.3.3.1) 134 0 R ] /Limits [ (subsection.3.2) (subsubsection.3.3.1) ] >>
+667 0 obj
+<< /Names [ (subsection.2.6) 82 0 R (subsection.2.7) 86 0 R (subsection.3.1) 106 0 R (subsection.3.2) 110 0 R (subsection.3.3) 130 0 R (subsection.4.1) 182 0 R (subsection.5.1) 190 0 R (subsection.5.2) 194 0 R (subsection.5.3) 198 0 R (subsection.5.4) 202 0 R (subsection.5.5) 206 0 R (subsection.5.6) 210 0 R (subsection.5.7) 214 0 R (subsection.5.8) 218 0 R (subsubsection.2.3.1) 22 0 R (subsubsection.2.3.2) 26 0 R (subsubsection.2.3.3) 30 0 R (subsubsection.2.4.1) 38 0 R (subsubsection.2.4.2) 42 0 R (subsubsection.2.5.1) 50 0 R (subsubsection.2.5.2) 54 0 R (subsubsection.2.5.3) 58 0 R (subsubsection.2.5.4) 62 0 R (subsubsection.2.5.5) 66 0 R (subsubsection.2.5.6) 70 0 R (subsubsection.2.5.7) 74 0 R (subsubsection.2.5.8) 78 0 R (subsubsection.2.7.1) 90 0 R (subsubsection.2.7.2) 94 0 R (subsubsection.2.7.3) 98 0 R (subsubsection.3.2.1) 114 0 R (subsubsection.3.2.2) 118 0 R ] /Limits [ (subsection.2.6) (subsubsection.3.2.2) ] >>
 endobj
-652 0 obj
-<< /Names [ (subsubsection.3.3.10) 170 0 R (subsubsection.3.3.2) 138 0 R (subsubsection.3.3.3) 142 0 R (subsubsection.3.3.4) 146 0 R (subsubsection.3.3.5) 150 0 R (subsubsection.3.3.6) 154 0 R (subsubsection.3.3.7) 158 0 R (subsubsection.3.3.8) 162 0 R (subsubsection.3.3.9) 166 0 R ] /Limits [ (subsubsection.3.3.10) (subsubsection.3.3.9) ] >>
+668 0 obj
+<< /Names [ (subsubsection.3.2.3) 122 0 R (subsubsection.3.2.4) 126 0 R (subsubsection.3.3.1) 134 0 R (subsubsection.3.3.10) 170 0 R (subsubsection.3.3.11) 174 0 R (subsubsection.3.3.2) 138 0 R (subsubsection.3.3.3) 142 0 R (subsubsection.3.3.4) 146 0 R (subsubsection.3.3.5) 150 0 R (subsubsection.3.3.6) 154 0 R (subsubsection.3.3.7) 158 0 R (subsubsection.3.3.8) 162 0 R (subsubsection.3.3.9) 166 0 R ] /Limits [ (subsubsection.3.2.3) (subsubsection.3.3.9) ] >>
 endobj
-653 0 obj
-<< /Kids [ 648 0 R 649 0 R 650 0 R 651 0 R 652 0 R ] /Limits [ (Doc-Start) (subsubsection.3.3.9) ] >>
+669 0 obj
+<< /Kids [ 664 0 R 665 0 R 666 0 R 667 0 R 668 0 R ] /Limits [ (Doc-Start) (subsubsection.3.3.9) ] >>
 endobj
-654 0 obj
-<< /Dests 653 0 R >>
+670 0 obj
+<< /Dests 669 0 R >>
 endobj
-655 0 obj
-<< /Type /Catalog /Pages 646 0 R /Outlines 647 0 R /Names 654 0 R /PageMode/UseOutlines/PageLabels<</Nums[0<</S/D>>1<</S/D>>]>> /OpenAction 217 0 R >>
+671 0 obj
+<< /Type /Catalog /Pages 662 0 R /Outlines 663 0 R /Names 670 0 R /PageMode/UseOutlines/PageLabels<</Nums[0<</S/D>>1<</S/D>>]>> /OpenAction 221 0 R >>
 endobj
-656 0 obj
-<< /Author()/Title()/Subject()/Creator(LaTeX with hyperref)/Keywords() /Producer (LuaTeX-1.14.0) /CreationDate (D:20220803223528+02'00') /ModDate (D:20220803223528+02'00') /Trapped /False /PTEX.FullBanner (This is LuaHBTeX, Version 1.14.0 (MiKTeX 21.6)) >>
+672 0 obj
+<< /Author()/Title()/Subject()/Creator(LaTeX with hyperref)/Keywords() /Producer (LuaTeX-1.14.0) /CreationDate (D:20221006210644+02'00') /ModDate (D:20221006210644+02'00') /Trapped /False /PTEX.FullBanner (This is LuaHBTeX, Version 1.14.0 (MiKTeX 21.6)) >>
 endobj
 xref
-0 657
+0 673
 0000000000 65535 f 
 0000000020 00000 n 
-0000029670 00000 n 
-0000340970 00000 n 
+0000030128 00000 n 
+0000349314 00000 n 
 0000000065 00000 n 
 0000000152 00000 n 
-0000033308 00000 n 
-0000340848 00000 n 
+0000033778 00000 n 
+0000349192 00000 n 
 0000000197 00000 n 
 0000000328 00000 n 
-0000033496 00000 n 
-0000340776 00000 n 
+0000033966 00000 n 
+0000349120 00000 n 
 0000000378 00000 n 
 0000000489 00000 n 
-0000042917 00000 n 
-0000340690 00000 n 
+0000043189 00000 n 
+0000349034 00000 n 
 0000000540 00000 n 
 0000000689 00000 n 
-0000047002 00000 n 
-0000340567 00000 n 
+0000047063 00000 n 
+0000348911 00000 n 
 0000000740 00000 n 
 0000001008 00000 n 
-0000047064 00000 n 
-0000340493 00000 n 
+0000051330 00000 n 
+0000348837 00000 n 
 0000001064 00000 n 
 0000001165 00000 n 
-0000051493 00000 n 
-0000340406 00000 n 
+0000051390 00000 n 
+0000348750 00000 n 
 0000001221 00000 n 
 0000001342 00000 n 
-0000051555 00000 n 
-0000340332 00000 n 
+0000051452 00000 n 
+0000348676 00000 n 
 0000001398 00000 n 
 0000001538 00000 n 
-0000051868 00000 n 
-0000340209 00000 n 
+0000055666 00000 n 
+0000348553 00000 n 
 0000001589 00000 n 
 0000001915 00000 n 
-0000051930 00000 n 
-0000340135 00000 n 
+0000055728 00000 n 
+0000348479 00000 n 
 0000001971 00000 n 
 0000002067 00000 n 
-0000056399 00000 n 
-0000340061 00000 n 
+0000060650 00000 n 
+0000348405 00000 n 
 0000002123 00000 n 
 0000002470 00000 n 
-0000061253 00000 n 
-0000339938 00000 n 
+0000060712 00000 n 
+0000348282 00000 n 
 0000002521 00000 n 
 0000002632 00000 n 
-0000061315 00000 n 
-0000339864 00000 n 
+0000065305 00000 n 
+0000348208 00000 n 
 0000002688 00000 n 
 0000002824 00000 n 
-0000066103 00000 n 
-0000339777 00000 n 
+0000065623 00000 n 
+0000348121 00000 n 
 0000002880 00000 n 
 0000003052 00000 n 
-0000069441 00000 n 
-0000339690 00000 n 
+0000069597 00000 n 
+0000348034 00000 n 
 0000003108 00000 n 
 0000003199 00000 n 
-0000069503 00000 n 
-0000339603 00000 n 
+0000069658 00000 n 
+0000347947 00000 n 
 0000003255 00000 n 
 0000003462 00000 n 
-0000069565 00000 n 
-0000339516 00000 n 
+0000073171 00000 n 
+0000347860 00000 n 
 0000003518 00000 n 
 0000003705 00000 n 
-0000069627 00000 n 
-0000339429 00000 n 
+0000073233 00000 n 
+0000347773 00000 n 
 0000003761 00000 n 
 0000003912 00000 n 
-0000073496 00000 n 
-0000339342 00000 n 
+0000073295 00000 n 
+0000347686 00000 n 
 0000003968 00000 n 
 0000004059 00000 n 
-0000073558 00000 n 
-0000339268 00000 n 
+0000073357 00000 n 
+0000347612 00000 n 
 0000004115 00000 n 
 0000004357 00000 n 
-0000073620 00000 n 
-0000339182 00000 n 
+0000076786 00000 n 
+0000347526 00000 n 
 0000004408 00000 n 
 0000004509 00000 n 
-0000077021 00000 n 
-0000339072 00000 n 
+0000076848 00000 n 
+0000347416 00000 n 
 0000004560 00000 n 
 0000004772 00000 n 
-0000077083 00000 n 
-0000338998 00000 n 
+0000076910 00000 n 
+0000347342 00000 n 
 0000004828 00000 n 
 0000004934 00000 n 
-0000077145 00000 n 
-0000338911 00000 n 
+0000079785 00000 n 
+0000347255 00000 n 
 0000004990 00000 n 
 0000005291 00000 n 
-0000077207 00000 n 
-0000338836 00000 n 
+0000079847 00000 n 
+0000347180 00000 n 
 0000005347 00000 n 
 0000005532 00000 n 
-0000081503 00000 n 
-0000338706 00000 n 
+0000083025 00000 n 
+0000347050 00000 n 
 0000005579 00000 n 
 0000005709 00000 n 
-0000081566 00000 n 
-0000338627 00000 n 
+0000083088 00000 n 
+0000346971 00000 n 
 0000005761 00000 n 
 0000005916 00000 n 
-0000090062 00000 n 
-0000338495 00000 n 
+0000091584 00000 n 
+0000346839 00000 n 
 0000005968 00000 n 
 0000006196 00000 n 
-0000090125 00000 n 
-0000338416 00000 n 
+0000091647 00000 n 
+0000346760 00000 n 
 0000006253 00000 n 
 0000006421 00000 n 
-0000093642 00000 n 
-0000338323 00000 n 
+0000095164 00000 n 
+0000346667 00000 n 
 0000006478 00000 n 
 0000006626 00000 n 
-0000096774 00000 n 
-0000338230 00000 n 
+0000098352 00000 n 
+0000346574 00000 n 
 0000006683 00000 n 
 0000006831 00000 n 
-0000096837 00000 n 
-0000338151 00000 n 
+0000098415 00000 n 
+0000346495 00000 n 
 0000006888 00000 n 
 0000007056 00000 n 
-0000096900 00000 n 
-0000338032 00000 n 
+0000098478 00000 n 
+0000346376 00000 n 
 0000007108 00000 n 
 0000007235 00000 n 
-0000122428 00000 n 
-0000337953 00000 n 
+0000126428 00000 n 
+0000346297 00000 n 
 0000007292 00000 n 
 0000007432 00000 n 
-0000125339 00000 n 
-0000337860 00000 n 
+0000131578 00000 n 
+0000346204 00000 n 
 0000007489 00000 n 
 0000007664 00000 n 
-0000128459 00000 n 
-0000337767 00000 n 
+0000134697 00000 n 
+0000346111 00000 n 
 0000007721 00000 n 
 0000007874 00000 n 
-0000131977 00000 n 
-0000337674 00000 n 
+0000138291 00000 n 
+0000346018 00000 n 
 0000007931 00000 n 
 0000008280 00000 n 
-0000132040 00000 n 
-0000337581 00000 n 
+0000138354 00000 n 
+0000345925 00000 n 
 0000008337 00000 n 
 0000008696 00000 n 
-0000138429 00000 n 
-0000337488 00000 n 
+0000144775 00000 n 
+0000345832 00000 n 
 0000008753 00000 n 
-0000008928 00000 n 
-0000138492 00000 n 
-0000337395 00000 n 
-0000008985 00000 n 
-0000009135 00000 n 
-0000141659 00000 n 
-0000337302 00000 n 
-0000009192 00000 n 
-0000009334 00000 n 
-0000144996 00000 n 
-0000337209 00000 n 
-0000009391 00000 n 
-0000009543 00000 n 
-0000145059 00000 n 
-0000337130 00000 n 
-0000009601 00000 n 
-0000009888 00000 n 
-0000148667 00000 n 
-0000336998 00000 n 
-0000009935 00000 n 
-0000010300 00000 n 
-0000151493 00000 n 
-0000336933 00000 n 
-0000010352 00000 n 
-0000010525 00000 n 
-0000156482 00000 n 
-0000336815 00000 n 
-0000010572 00000 n 
-0000010702 00000 n 
-0000156545 00000 n 
-0000336736 00000 n 
-0000010754 00000 n 
-0000010896 00000 n 
-0000170248 00000 n 
-0000336643 00000 n 
-0000010948 00000 n 
-0000011562 00000 n 
-0000175735 00000 n 
-0000336550 00000 n 
-0000011614 00000 n 
-0000012090 00000 n 
-0000178519 00000 n 
-0000336457 00000 n 
-0000012142 00000 n 
-0000012568 00000 n 
-0000181694 00000 n 
-0000336364 00000 n 
-0000012620 00000 n 
-0000012927 00000 n 
-0000184506 00000 n 
-0000336271 00000 n 
-0000012979 00000 n 
-0000013369 00000 n 
-0000190207 00000 n 
-0000336178 00000 n 
-0000013421 00000 n 
-0000013895 00000 n 
-0000193045 00000 n 
-0000336099 00000 n 
-0000013947 00000 n 
-0000014226 00000 n 
-0000014755 00000 n 
-0000014995 00000 n 
-0000014278 00000 n 
-0000014869 00000 n 
-0000014932 00000 n 
-0000245588 00000 n 
-0000240438 00000 n 
-0000232306 00000 n 
-0000229161 00000 n 
-0000335016 00000 n 
-0000017786 00000 n 
-0000017937 00000 n 
-0000018088 00000 n 
-0000018246 00000 n 
-0000018404 00000 n 
-0000018561 00000 n 
-0000018723 00000 n 
-0000018885 00000 n 
-0000019047 00000 n 
-0000019205 00000 n 
-0000019367 00000 n 
-0000019529 00000 n 
-0000019685 00000 n 
-0000019847 00000 n 
-0000020008 00000 n 
-0000020170 00000 n 
-0000020332 00000 n 
-0000020492 00000 n 
-0000020654 00000 n 
-0000020815 00000 n 
-0000020977 00000 n 
-0000021134 00000 n 
-0000021292 00000 n 
-0000021454 00000 n 
-0000021614 00000 n 
-0000021776 00000 n 
-0000021928 00000 n 
-0000022086 00000 n 
-0000022244 00000 n 
-0000022406 00000 n 
-0000022565 00000 n 
-0000022727 00000 n 
-0000022889 00000 n 
-0000023047 00000 n 
-0000023209 00000 n 
-0000023370 00000 n 
-0000023532 00000 n 
-0000023694 00000 n 
-0000023854 00000 n 
-0000024016 00000 n 
-0000024177 00000 n 
-0000026065 00000 n 
-0000024400 00000 n 
-0000017307 00000 n 
-0000015106 00000 n 
-0000024337 00000 n 
-0000017437 00000 n 
-0000026227 00000 n 
-0000026390 00000 n 
-0000026542 00000 n 
-0000026699 00000 n 
-0000026851 00000 n 
-0000027009 00000 n 
-0000027322 00000 n 
-0000027636 00000 n 
-0000027792 00000 n 
-0000027950 00000 n 
-0000028108 00000 n 
-0000028421 00000 n 
-0000028642 00000 n 
-0000025786 00000 n 
-0000024485 00000 n 
-0000028579 00000 n 
-0000027166 00000 n 
-0000027479 00000 n 
-0000028265 00000 n 
-0000025916 00000 n 
-0000029731 00000 n 
-0000029493 00000 n 
-0000028727 00000 n 
-0000029607 00000 n 
-0000033810 00000 n 
-0000033131 00000 n 
-0000029816 00000 n 
-0000033245 00000 n 
-0000033369 00000 n 
-0000333595 00000 n 
-0000333882 00000 n 
-0000334025 00000 n 
-0000334733 00000 n 
-0000334308 00000 n 
-0000333452 00000 n 
-0000334167 00000 n 
-0000333739 00000 n 
-0000334449 00000 n 
-0000334873 00000 n 
-0000033433 00000 n 
-0000216939 00000 n 
-0000033558 00000 n 
-0000033621 00000 n 
-0000214991 00000 n 
-0000334592 00000 n 
-0000033684 00000 n 
-0000033747 00000 n 
-0000036845 00000 n 
-0000036605 00000 n 
-0000034064 00000 n 
-0000036719 00000 n 
-0000207051 00000 n 
-0000036782 00000 n 
-0000039854 00000 n 
-0000039488 00000 n 
-0000036969 00000 n 
-0000039602 00000 n 
-0000039665 00000 n 
-0000039728 00000 n 
-0000039791 00000 n 
-0000043168 00000 n 
-0000042740 00000 n 
-0000039978 00000 n 
-0000042854 00000 n 
-0000042979 00000 n 
-0000043042 00000 n 
-0000043105 00000 n 
-0000047126 00000 n 
-0000046699 00000 n 
-0000043292 00000 n 
-0000046813 00000 n 
-0000046876 00000 n 
-0000046939 00000 n 
-0000051992 00000 n 
-0000051316 00000 n 
-0000047263 00000 n 
-0000051430 00000 n 
-0000051617 00000 n 
-0000051680 00000 n 
-0000051743 00000 n 
-0000051805 00000 n 
-0000056461 00000 n 
-0000056222 00000 n 
-0000052181 00000 n 
-0000056336 00000 n 
-0000335168 00000 n 
-0000060817 00000 n 
-0000061033 00000 n 
-0000061631 00000 n 
-0000060650 00000 n 
-0000056624 00000 n 
-0000061190 00000 n 
-0000061376 00000 n 
-0000061440 00000 n 
-0000061504 00000 n 
-0000061568 00000 n 
-0000060780 00000 n 
-0000066164 00000 n 
-0000065926 00000 n 
-0000061846 00000 n 
-0000066040 00000 n 
-0000069689 00000 n 
-0000069264 00000 n 
-0000066301 00000 n 
-0000069378 00000 n 
-0000073682 00000 n 
-0000073319 00000 n 
-0000069813 00000 n 
-0000073433 00000 n 
-0000077268 00000 n 
-0000076844 00000 n 
-0000073793 00000 n 
-0000076958 00000 n 
-0000078439 00000 n 
-0000078262 00000 n 
-0000077418 00000 n 
-0000078376 00000 n 
-0000081629 00000 n 
-0000081326 00000 n 
-0000078550 00000 n 
-0000081440 00000 n 
-0000198933 00000 n 
-0000084331 00000 n 
-0000084154 00000 n 
-0000081779 00000 n 
-0000084268 00000 n 
-0000087030 00000 n 
-0000086853 00000 n 
-0000084455 00000 n 
-0000086967 00000 n 
-0000090186 00000 n 
-0000089885 00000 n 
-0000087154 00000 n 
-0000089999 00000 n 
-0000335320 00000 n 
-0000093705 00000 n 
-0000093465 00000 n 
-0000090310 00000 n 
-0000093579 00000 n 
-0000096963 00000 n 
-0000096597 00000 n 
-0000093842 00000 n 
-0000096711 00000 n 
-0000099650 00000 n 
-0000099473 00000 n 
-0000097113 00000 n 
-0000099587 00000 n 
-0000102070 00000 n 
-0000101893 00000 n 
-0000099787 00000 n 
-0000102007 00000 n 
-0000104244 00000 n 
-0000104067 00000 n 
-0000102207 00000 n 
-0000104181 00000 n 
-0000106781 00000 n 
-0000106604 00000 n 
-0000104381 00000 n 
-0000106718 00000 n 
-0000109384 00000 n 
-0000109207 00000 n 
-0000106918 00000 n 
-0000109321 00000 n 
-0000111899 00000 n 
-0000111722 00000 n 
-0000109521 00000 n 
-0000111836 00000 n 
-0000114499 00000 n 
-0000114322 00000 n 
-0000112036 00000 n 
-0000114436 00000 n 
-0000117303 00000 n 
-0000117126 00000 n 
-0000114636 00000 n 
-0000117240 00000 n 
-0000335472 00000 n 
-0000119999 00000 n 
-0000119822 00000 n 
-0000117440 00000 n 
-0000119936 00000 n 
-0000122491 00000 n 
-0000122251 00000 n 
-0000120136 00000 n 
-0000122365 00000 n 
-0000125402 00000 n 
-0000125162 00000 n 
-0000122576 00000 n 
-0000125276 00000 n 
-0000128522 00000 n 
-0000128282 00000 n 
-0000125487 00000 n 
-0000128396 00000 n 
-0000132103 00000 n 
-0000131800 00000 n 
-0000128607 00000 n 
-0000131914 00000 n 
-0000135306 00000 n 
-0000135129 00000 n 
-0000132188 00000 n 
-0000135243 00000 n 
-0000138555 00000 n 
-0000138252 00000 n 
-0000135378 00000 n 
-0000138366 00000 n 
-0000141722 00000 n 
-0000141482 00000 n 
-0000138640 00000 n 
-0000141596 00000 n 
-0000145122 00000 n 
-0000144819 00000 n 
-0000141807 00000 n 
-0000144933 00000 n 
-0000146314 00000 n 
-0000146137 00000 n 
-0000145207 00000 n 
-0000146251 00000 n 
-0000335624 00000 n 
-0000148730 00000 n 
-0000148490 00000 n 
-0000146386 00000 n 
-0000148604 00000 n 
-0000151556 00000 n 
-0000151316 00000 n 
-0000148815 00000 n 
-0000151430 00000 n 
-0000153805 00000 n 
-0000153628 00000 n 
-0000151706 00000 n 
-0000153742 00000 n 
-0000156608 00000 n 
-0000156305 00000 n 
-0000153942 00000 n 
-0000156419 00000 n 
-0000159249 00000 n 
-0000159072 00000 n 
+0000008908 00000 n 
+0000144838 00000 n 
+0000345739 00000 n 
+0000008965 00000 n 
+0000009140 00000 n 
+0000147903 00000 n 
+0000345646 00000 n 
+0000009197 00000 n 
+0000009347 00000 n 
+0000147966 00000 n 
+0000345553 00000 n 
+0000009404 00000 n 
+0000009546 00000 n 
+0000151271 00000 n 
+0000345460 00000 n 
+0000009604 00000 n 
+0000009756 00000 n 
+0000151334 00000 n 
+0000345381 00000 n 
+0000009814 00000 n 
+0000010101 00000 n 
+0000156610 00000 n 
+0000345249 00000 n 
+0000010148 00000 n 
+0000010513 00000 n 
+0000159436 00000 n 
+0000345184 00000 n 
+0000010565 00000 n 
+0000010738 00000 n 
+0000164531 00000 n 
+0000345066 00000 n 
+0000010785 00000 n 
+0000010915 00000 n 
+0000164594 00000 n 
+0000344987 00000 n 
+0000010967 00000 n 
+0000011109 00000 n 
+0000178295 00000 n 
+0000344894 00000 n 
+0000011161 00000 n 
+0000011775 00000 n 
+0000183609 00000 n 
+0000344801 00000 n 
+0000011827 00000 n 
+0000012303 00000 n 
+0000186436 00000 n 
+0000344708 00000 n 
+0000012355 00000 n 
+0000012781 00000 n 
+0000189332 00000 n 
+0000344615 00000 n 
+0000012833 00000 n 
+0000013140 00000 n 
+0000192311 00000 n 
+0000344522 00000 n 
+0000013192 00000 n 
+0000013582 00000 n 
+0000198027 00000 n 
+0000344429 00000 n 
+0000013634 00000 n 
+0000014108 00000 n 
+0000201118 00000 n 
+0000344350 00000 n 
+0000014160 00000 n 
+0000014439 00000 n 
+0000014993 00000 n 
+0000015233 00000 n 
+0000014491 00000 n 
+0000015107 00000 n 
+0000015170 00000 n 
+0000253743 00000 n 
+0000248493 00000 n 
+0000240361 00000 n 
+0000237216 00000 n 
+0000343171 00000 n 
+0000018033 00000 n 
+0000018184 00000 n 
+0000018335 00000 n 
+0000018493 00000 n 
+0000018651 00000 n 
+0000018808 00000 n 
+0000018970 00000 n 
+0000019132 00000 n 
+0000019294 00000 n 
+0000019452 00000 n 
+0000019614 00000 n 
+0000019776 00000 n 
+0000019932 00000 n 
+0000020094 00000 n 
+0000020255 00000 n 
+0000020417 00000 n 
+0000020579 00000 n 
+0000020739 00000 n 
+0000020901 00000 n 
+0000021062 00000 n 
+0000021224 00000 n 
+0000021381 00000 n 
+0000021539 00000 n 
+0000021701 00000 n 
+0000021861 00000 n 
+0000022023 00000 n 
+0000022175 00000 n 
+0000022333 00000 n 
+0000022491 00000 n 
+0000022653 00000 n 
+0000022812 00000 n 
+0000022974 00000 n 
+0000023136 00000 n 
+0000023294 00000 n 
+0000023456 00000 n 
+0000023617 00000 n 
+0000023779 00000 n 
+0000023941 00000 n 
+0000024101 00000 n 
+0000024263 00000 n 
+0000024424 00000 n 
+0000026354 00000 n 
+0000024647 00000 n 
+0000017554 00000 n 
+0000015344 00000 n 
+0000024584 00000 n 
+0000017684 00000 n 
+0000026516 00000 n 
+0000026679 00000 n 
+0000026840 00000 n 
+0000026992 00000 n 
+0000027150 00000 n 
+0000027301 00000 n 
+0000027459 00000 n 
+0000027772 00000 n 
+0000028086 00000 n 
+0000028242 00000 n 
+0000028400 00000 n 
+0000028558 00000 n 
+0000028872 00000 n 
+0000029091 00000 n 
+0000026067 00000 n 
+0000024732 00000 n 
+0000029028 00000 n 
+0000027615 00000 n 
+0000027929 00000 n 
+0000028715 00000 n 
+0000026197 00000 n 
+0000030189 00000 n 
+0000029951 00000 n 
+0000029176 00000 n 
+0000030065 00000 n 
+0000034280 00000 n 
+0000033601 00000 n 
+0000030274 00000 n 
+0000033715 00000 n 
+0000033839 00000 n 
+0000341750 00000 n 
+0000342037 00000 n 
+0000342180 00000 n 
+0000342888 00000 n 
+0000342463 00000 n 
+0000341607 00000 n 
+0000342322 00000 n 
+0000341894 00000 n 
+0000342604 00000 n 
+0000343028 00000 n 
+0000033903 00000 n 
+0000224897 00000 n 
+0000034028 00000 n 
+0000034091 00000 n 
+0000222949 00000 n 
+0000342747 00000 n 
+0000034154 00000 n 
+0000034217 00000 n 
+0000037315 00000 n 
+0000037075 00000 n 
+0000034534 00000 n 
+0000037189 00000 n 
+0000215009 00000 n 
+0000037252 00000 n 
+0000040324 00000 n 
+0000039958 00000 n 
+0000037439 00000 n 
+0000040072 00000 n 
+0000040135 00000 n 
+0000040198 00000 n 
+0000040261 00000 n 
+0000043251 00000 n 
+0000042949 00000 n 
+0000040448 00000 n 
+0000043063 00000 n 
+0000043126 00000 n 
+0000047124 00000 n 
+0000046571 00000 n 
+0000043375 00000 n 
+0000046685 00000 n 
+0000046748 00000 n 
+0000046811 00000 n 
+0000046874 00000 n 
+0000046937 00000 n 
+0000047000 00000 n 
+0000051640 00000 n 
+0000051153 00000 n 
+0000047209 00000 n 
+0000051267 00000 n 
+0000051514 00000 n 
+0000051577 00000 n 
+0000055789 00000 n 
+0000055363 00000 n 
+0000051829 00000 n 
+0000055477 00000 n 
+0000055540 00000 n 
+0000055603 00000 n 
+0000343323 00000 n 
+0000060371 00000 n 
+0000060774 00000 n 
+0000060212 00000 n 
+0000055952 00000 n 
+0000060587 00000 n 
+0000060342 00000 n 
+0000065087 00000 n 
+0000065684 00000 n 
+0000064928 00000 n 
+0000060937 00000 n 
+0000065242 00000 n 
+0000065367 00000 n 
+0000065431 00000 n 
+0000065495 00000 n 
+0000065559 00000 n 
+0000065058 00000 n 
+0000069720 00000 n 
+0000069420 00000 n 
+0000065899 00000 n 
+0000069534 00000 n 
+0000073419 00000 n 
+0000072994 00000 n 
+0000069805 00000 n 
+0000073108 00000 n 
+0000076972 00000 n 
+0000076609 00000 n 
+0000073556 00000 n 
+0000076723 00000 n 
+0000079909 00000 n 
+0000079608 00000 n 
+0000077057 00000 n 
+0000079722 00000 n 
+0000083151 00000 n 
+0000082848 00000 n 
+0000080072 00000 n 
+0000082962 00000 n 
+0000206835 00000 n 
+0000085853 00000 n 
+0000085676 00000 n 
+0000083301 00000 n 
+0000085790 00000 n 
+0000088552 00000 n 
+0000088375 00000 n 
+0000085977 00000 n 
+0000088489 00000 n 
+0000091708 00000 n 
+0000091407 00000 n 
+0000088676 00000 n 
+0000091521 00000 n 
+0000343475 00000 n 
+0000095227 00000 n 
+0000094987 00000 n 
+0000091832 00000 n 
+0000095101 00000 n 
+0000098541 00000 n 
+0000098175 00000 n 
+0000095364 00000 n 
+0000098289 00000 n 
+0000101197 00000 n 
+0000101020 00000 n 
+0000098691 00000 n 
+0000101134 00000 n 
+0000103569 00000 n 
+0000103392 00000 n 
+0000101334 00000 n 
+0000103506 00000 n 
+0000105680 00000 n 
+0000105503 00000 n 
+0000103706 00000 n 
+0000105617 00000 n 
+0000108003 00000 n 
+0000107826 00000 n 
+0000105817 00000 n 
+0000107940 00000 n 
+0000110729 00000 n 
+0000110552 00000 n 
+0000108140 00000 n 
+0000110666 00000 n 
+0000113201 00000 n 
+0000113024 00000 n 
+0000110866 00000 n 
+0000113138 00000 n 
+0000115570 00000 n 
+0000115393 00000 n 
+0000113338 00000 n 
+0000115507 00000 n 
+0000118261 00000 n 
+0000118084 00000 n 
+0000115707 00000 n 
+0000118198 00000 n 
+0000343627 00000 n 
+0000120938 00000 n 
+0000120761 00000 n 
+0000118398 00000 n 
+0000120875 00000 n 
+0000123585 00000 n 
+0000123408 00000 n 
+0000121075 00000 n 
+0000123522 00000 n 
+0000126491 00000 n 
+0000126251 00000 n 
+0000123722 00000 n 
+0000126365 00000 n 
+0000128790 00000 n 
+0000128613 00000 n 
+0000126641 00000 n 
+0000128727 00000 n 
+0000131641 00000 n 
+0000131401 00000 n 
+0000128862 00000 n 
+0000131515 00000 n 
+0000134760 00000 n 
+0000134520 00000 n 
+0000131726 00000 n 
+0000134634 00000 n 
+0000138417 00000 n 
+0000138114 00000 n 
+0000134845 00000 n 
+0000138228 00000 n 
+0000141620 00000 n 
+0000141443 00000 n 
+0000138502 00000 n 
+0000141557 00000 n 
+0000144901 00000 n 
+0000144598 00000 n 
+0000141692 00000 n 
+0000144712 00000 n 
+0000148029 00000 n 
+0000147726 00000 n 
+0000144986 00000 n 
+0000147840 00000 n 
+0000343779 00000 n 
+0000151397 00000 n 
+0000151094 00000 n 
+0000148114 00000 n 
+0000151208 00000 n 
+0000154257 00000 n 
+0000154080 00000 n 
+0000151482 00000 n 
+0000154194 00000 n 
+0000156673 00000 n 
+0000156433 00000 n 
+0000154329 00000 n 
+0000156547 00000 n 
+0000159499 00000 n 
+0000159259 00000 n 
 0000156758 00000 n 
-0000159186 00000 n 
-0000162385 00000 n 
-0000162208 00000 n 
-0000159386 00000 n 
-0000162322 00000 n 
-0000165016 00000 n 
-0000164839 00000 n 
-0000162522 00000 n 
-0000164953 00000 n 
-0000167457 00000 n 
-0000167280 00000 n 
-0000165153 00000 n 
-0000167394 00000 n 
-0000170311 00000 n 
-0000170071 00000 n 
-0000167581 00000 n 
-0000170185 00000 n 
-0000172731 00000 n 
-0000172554 00000 n 
-0000170461 00000 n 
-0000172668 00000 n 
-0000335776 00000 n 
-0000175798 00000 n 
-0000175558 00000 n 
-0000172855 00000 n 
-0000175672 00000 n 
-0000178582 00000 n 
-0000178342 00000 n 
-0000175948 00000 n 
-0000178456 00000 n 
-0000181757 00000 n 
-0000181517 00000 n 
-0000178732 00000 n 
-0000181631 00000 n 
-0000184569 00000 n 
-0000184329 00000 n 
-0000181907 00000 n 
-0000184443 00000 n 
-0000187052 00000 n 
-0000186875 00000 n 
-0000184719 00000 n 
-0000186989 00000 n 
-0000190270 00000 n 
-0000190030 00000 n 
-0000187176 00000 n 
-0000190144 00000 n 
-0000193108 00000 n 
-0000192868 00000 n 
-0000190420 00000 n 
-0000192982 00000 n 
-0000195174 00000 n 
-0000194997 00000 n 
-0000193258 00000 n 
-0000195111 00000 n 
-0000195311 00000 n 
-0000198116 00000 n 
-0000195531 00000 n 
-0000195636 00000 n 
-0000198355 00000 n 
-0000199091 00000 n 
-0000199295 00000 n 
-0000205995 00000 n 
-0000199782 00000 n 
-0000199904 00000 n 
-0000206234 00000 n 
-0000207207 00000 n 
-0000207409 00000 n 
-0000207504 00000 n 
-0000213939 00000 n 
-0000207967 00000 n 
-0000208083 00000 n 
-0000214181 00000 n 
-0000215152 00000 n 
-0000215359 00000 n 
-0000216251 00000 n 
-0000215402 00000 n 
-0000215497 00000 n 
-0000216489 00000 n 
-0000217095 00000 n 
-0000217297 00000 n 
-0000217323 00000 n 
-0000217569 00000 n 
-0000217809 00000 n 
-0000217863 00000 n 
-0000218055 00000 n 
-0000218119 00000 n 
-0000218155 00000 n 
-0000218319 00000 n 
-0000218567 00000 n 
-0000218781 00000 n 
-0000228106 00000 n 
-0000219249 00000 n 
-0000219363 00000 n 
-0000228345 00000 n 
-0000229318 00000 n 
-0000229521 00000 n 
-0000231558 00000 n 
-0000229680 00000 n 
-0000229785 00000 n 
-0000231797 00000 n 
-0000232463 00000 n 
-0000232666 00000 n 
-0000239483 00000 n 
-0000233073 00000 n 
-0000233183 00000 n 
-0000239720 00000 n 
-0000240592 00000 n 
-0000240792 00000 n 
-0000244749 00000 n 
-0000241086 00000 n 
-0000241191 00000 n 
-0000244993 00000 n 
-0000245749 00000 n 
-0000253867 00000 n 
-0000245956 00000 n 
-0000266122 00000 n 
-0000254177 00000 n 
-0000274917 00000 n 
-0000266404 00000 n 
-0000284094 00000 n 
-0000275170 00000 n 
-0000292593 00000 n 
-0000284339 00000 n 
-0000300160 00000 n 
-0000292896 00000 n 
-0000307845 00000 n 
-0000300405 00000 n 
-0000315911 00000 n 
-0000308085 00000 n 
-0000323336 00000 n 
-0000316201 00000 n 
-0000330783 00000 n 
-0000323574 00000 n 
-0000333233 00000 n 
-0000331024 00000 n 
-0000335919 00000 n 
-0000336024 00000 n 
-0000341041 00000 n 
-0000341707 00000 n 
-0000342346 00000 n 
-0000343048 00000 n 
-0000344022 00000 n 
-0000344384 00000 n 
-0000344503 00000 n 
-0000344541 00000 n 
-0000344709 00000 n 
+0000159373 00000 n 
+0000161853 00000 n 
+0000161676 00000 n 
+0000159649 00000 n 
+0000161790 00000 n 
+0000164657 00000 n 
+0000164354 00000 n 
+0000161990 00000 n 
+0000164468 00000 n 
+0000167298 00000 n 
+0000167121 00000 n 
+0000164807 00000 n 
+0000167235 00000 n 
+0000170434 00000 n 
+0000170257 00000 n 
+0000167435 00000 n 
+0000170371 00000 n 
+0000173066 00000 n 
+0000172889 00000 n 
+0000170571 00000 n 
+0000173003 00000 n 
+0000175506 00000 n 
+0000175329 00000 n 
+0000173203 00000 n 
+0000175443 00000 n 
+0000343931 00000 n 
+0000178358 00000 n 
+0000178118 00000 n 
+0000175630 00000 n 
+0000178232 00000 n 
+0000180790 00000 n 
+0000180613 00000 n 
+0000178508 00000 n 
+0000180727 00000 n 
+0000183672 00000 n 
+0000183432 00000 n 
+0000180914 00000 n 
+0000183546 00000 n 
+0000186499 00000 n 
+0000186259 00000 n 
+0000183822 00000 n 
+0000186373 00000 n 
+0000189395 00000 n 
+0000189155 00000 n 
+0000186649 00000 n 
+0000189269 00000 n 
+0000192374 00000 n 
+0000192134 00000 n 
+0000189545 00000 n 
+0000192248 00000 n 
+0000195056 00000 n 
+0000194879 00000 n 
+0000192524 00000 n 
+0000194993 00000 n 
+0000198090 00000 n 
+0000197850 00000 n 
+0000195193 00000 n 
+0000197964 00000 n 
+0000201181 00000 n 
+0000200941 00000 n 
+0000198240 00000 n 
+0000201055 00000 n 
+0000203076 00000 n 
+0000202899 00000 n 
+0000201331 00000 n 
+0000203013 00000 n 
+0000344083 00000 n 
+0000203213 00000 n 
+0000206018 00000 n 
+0000203433 00000 n 
+0000203538 00000 n 
+0000206257 00000 n 
+0000206993 00000 n 
+0000207197 00000 n 
+0000213953 00000 n 
+0000207700 00000 n 
+0000207822 00000 n 
+0000214192 00000 n 
+0000215165 00000 n 
+0000215367 00000 n 
+0000215462 00000 n 
+0000221897 00000 n 
+0000215925 00000 n 
+0000216041 00000 n 
+0000222139 00000 n 
+0000223110 00000 n 
+0000223317 00000 n 
+0000224209 00000 n 
+0000223360 00000 n 
+0000223455 00000 n 
+0000224447 00000 n 
+0000225053 00000 n 
+0000225255 00000 n 
+0000225281 00000 n 
+0000225527 00000 n 
+0000225767 00000 n 
+0000225821 00000 n 
+0000226013 00000 n 
+0000226077 00000 n 
+0000226113 00000 n 
+0000226277 00000 n 
+0000226525 00000 n 
+0000226739 00000 n 
+0000236158 00000 n 
+0000227204 00000 n 
+0000227318 00000 n 
+0000236397 00000 n 
+0000237373 00000 n 
+0000237576 00000 n 
+0000239613 00000 n 
+0000237735 00000 n 
+0000237840 00000 n 
+0000239852 00000 n 
+0000240518 00000 n 
+0000240721 00000 n 
+0000247538 00000 n 
+0000241128 00000 n 
+0000241238 00000 n 
+0000247775 00000 n 
+0000248647 00000 n 
+0000248847 00000 n 
+0000252901 00000 n 
+0000249145 00000 n 
+0000249250 00000 n 
+0000253145 00000 n 
+0000253904 00000 n 
+0000262022 00000 n 
+0000254111 00000 n 
+0000274277 00000 n 
+0000262332 00000 n 
+0000283072 00000 n 
+0000274559 00000 n 
+0000292249 00000 n 
+0000283325 00000 n 
+0000300748 00000 n 
+0000292494 00000 n 
+0000308315 00000 n 
+0000301051 00000 n 
+0000316000 00000 n 
+0000308560 00000 n 
+0000324066 00000 n 
+0000316240 00000 n 
+0000331491 00000 n 
+0000324356 00000 n 
+0000338938 00000 n 
+0000331729 00000 n 
+0000341388 00000 n 
+0000339179 00000 n 
+0000344162 00000 n 
+0000344275 00000 n 
+0000349385 00000 n 
+0000350051 00000 n 
+0000350690 00000 n 
+0000351373 00000 n 
+0000352330 00000 n 
+0000352812 00000 n 
+0000352931 00000 n 
+0000352969 00000 n 
+0000353137 00000 n 
 trailer
-<< /Size 657 /Root 655 0 R /Info 656 0 R /ID [ <53AE5338721C480FBAD027E3A8F6AEA2> <53AE5338721C480FBAD027E3A8F6AEA2> ] >>
+<< /Size 673 /Root 671 0 R /Info 672 0 R /ID [ <2EA7022BB7DCD26B2022C6B7EF055674> <2EA7022BB7DCD26B2022C6B7EF055674> ] >>
 startxref
-344983
+353411
 %%EOF
diff --git a/documentation/VkFFT_API_guide.tex b/documentation/VkFFT_API_guide.tex
index 78ef1a8..edba5bf 100644
--- a/documentation/VkFFT_API_guide.tex
+++ b/documentation/VkFFT_API_guide.tex
@@ -37,14 +37,14 @@
 \begin{titlepage} 	
 \centering 	
 \vspace{1cm} 	
-{\scshape\LARGE VkFFT - Vulkan/CUDA/HIP/OpenCL/Level Zero Fast Fourier Transform library \par} 		
+{\scshape\LARGE VkFFT - Vulkan/CUDA/HIP/OpenCL/Level Zero/Metal Fast Fourier Transform library \par} 		
 \vspace{1.5cm} 	
 {\huge\bfseries API guide with examples\par} 	
 \vspace{2cm} 	
 {\Large Dmitrii Tolmachev\par} 	
 	
 \vspace{1cm} 	
-{\large August 2022, version 1.2.26\par} 
+{\large October 2022, version 1.2.30\par} 
 \end{titlepage}
 
 \newpage{}
@@ -55,7 +55,7 @@
 
 \section{Introduction}
 
-This document describes VkFFT - Vulkan/CUDA/HIP/OpenCL/Level Zero
+This document describes VkFFT - Vulkan/CUDA/HIP/OpenCL/Level Zero/Metal
 Fast Fourier Transform library. It describes the features and current
 limitations of VkFFT, explains the API and compares it to other FFT
 libraries (like FFTW and cuFFT) on the set of examples. It is by no
@@ -98,13 +98,13 @@ project.
 \par\end{flushleft}
 \item Define VKFFT\_BACKEND as a number corresponding to the API used in
 the user's project: 0 - Vulkan, 1 - CUDA, 2 - HIP, 3 - OpenCL, 4 -
-Level Zero. Definition is done like:\begin{mdframed}[backgroundcolor=bg]
+Level Zero, 5 - Metal. Definition is done like:\begin{mdframed}[backgroundcolor=bg]
 \begin{minted}[tabsize=4,obeytabs,breaklines]{make}
 -DVKFFT_BACKEND=X
 \end{minted}
 \end{mdframed} in GCC or as \begin{mdframed}[backgroundcolor=bg]
 \begin{minted}[tabsize=4,obeytabs,breaklines]{cmake}
-set(VKFFT_BACKEND 1 CACHE STRING "0 - Vulkan, 1 - CUDA, 2 - HIP, 3 - OpenCL, 4 - Level Zero")
+set(VKFFT_BACKEND 1 CACHE STRING "0 - Vulkan, 1 - CUDA, 2 - HIP, 3 - OpenCL, 4 - Level Zero, 5 - Metal")
 \end{minted}
 \end{mdframed}in CMake.
 \item Depending on the API backend, the project must use additional libraries
@@ -200,6 +200,21 @@ target_compile_definitions(VkFFT INTERFACE -DVKFFT_BACKEND=4)
 target_link_libraries(${PROJECT_NAME} PUBLIC LevelZero VkFFT)
 \end{minted}
 \end{mdframed}
+\item Metal API: Metal. Sample CMakeLists can look like this:\begin{mdframed}[backgroundcolor=bg]
+\begin{minted}[tabsize=4,obeytabs,breaklines]{cmake}
+add_compile_options(-WMTL_IGNORE_WARNINGS)
+find_library(FOUNDATION_LIB Foundation REQUIRED)
+find_library(QUARTZ_CORE_LIB QuartzCore REQUIRED)	
+find_library(METAL_LIB Metal REQUIRED)
+target_include_directories(${PROJECT_NAME} PUBLIC "metal-cpp/")
+
+target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vkFFT/)
+add_library(VkFFT INTERFACE)
+target_compile_definitions(VkFFT INTERFACE -DVKFFT_BACKEND=5)
+
+target_link_libraries(${PROJECT_NAME} PUBLIC ${FOUNDATION_LIB} ${QUARTZ_CORE_LIB} ${METAL_LIB} VkFFT)
+\end{minted}
+\end{mdframed}
 \end{enumerate}
 \end{enumerate}
 
@@ -332,9 +347,9 @@ real numbers, so they don't require stride management, unlike R2C/C2R.
 VkFFT allows for explicit control over the data flow, which makes
 both in-place and out-of-place transforms possible. Buffers are passed
 to VkFFT as VkBuffer pointer in Vulkan, as double void pointers in
-CUDA/HIP/Level Zero and as cl\_mem pointer in OpenCL. This is done
-to maintain a uniform data pattern because some of the buffers can
-be allocated automatically. 
+CUDA/HIP/Level Zero, as cl\_mem pointer in OpenCL and as MTL::Buffer
+pointer in Metal. This is done to maintain a uniform data pattern
+because some of the buffers can be allocated automatically. 
 
 The main buffer is called buffer and it always has to be provided,
 either during the plan creation or when the plan is executed. All
@@ -889,6 +904,10 @@ pointer in VkFFTLaunchParams.
 calls to user-defined command list ze\_command\_list\_handle\_t. They
 have execution barriers between. ze\_command\_list\_handle\_t must
 be provided as a pointer in VkFFTLaunchParams. 
+\item Metal API: similar to Vulkan, VkFFT appends a sequence of dispatchThreads
+calls to user-defined command encoder MTL::ComputeCommandEncoder.
+MTL::ComputeCommandEncoder and its MTL::CommandBuffer must be provided
+as a pointer in VkFFTLaunchParams. 
 \end{itemize}
 If VkFFT fails during the VkFFTAppend call, it will not free the application
 and allocated there resources - use a separate call for that.
@@ -952,6 +971,9 @@ ze_device_handle_t* device;	// Pointer to Level Zero device, obtained from zeDev
 ze_context_handle_t* context;	// Pointer to Level Zero context, obtained from zeContextCreate
 ze_command_queue_handle_t* commandQueue;	// Pointer to Level Zero command queue with compute and copy capabilities, obtained from zeCommandQueueCreate
 uint32_t commandQueueID;	// ID of the commandQueue with compute and copy capabilities
+#elif(VKFFT_BACKEND==5)
+MTL::Device* device;	// Pointer to Metal device, obtained from MTL::CopyAllDevices
+MTL::CommandQueue* queue;	// Pointer to Metal queue, obtained from device->newCommandQueue()
 #endif
 
 // Data parameters (buffers can be specified at launch):
@@ -990,6 +1012,18 @@ cl_mem* tempBuffer;	// Needed if reorderFourStep is enabled to transpose the arr
 cl_mem* inputBuffer;	// Pointer to device buffer used to read data from if isInputFormatted is enabled
 cl_mem* outputBuffer;	// Pointer to device buffer used to write data to if isOutputFormatted is enabled
 cl_mem* kernel;	// Pointer to device buffer used to read kernel data from if performConvolution is enabled
+#elif(VKFFT_BACKEND==4)
+void** buffer;	// Pointer to device buffer used for computations
+void** tempBuffer;	// Needed if reorderFourStep is enabled to transpose the array. Same size as buffer. Default 0. Setting to non zero value enables manual user allocation
+void** inputBuffer;	// Pointer to device buffer used to read data from if isInputFormatted is enabled
+void** outputBuffer;	// Pointer to device buffer used to read data from if isOutputFormatted is enabled
+void** kernel;	// Pointer to device buffer used to read kernel data from if performConvolution is enabled
+#elif(VKFFT_BACKEND==5)
+MTL::Buffer** buffer;	// Pointer to device buffer used for computations
+MTL::Buffer** tempBuffer;	// Needed if reorderFourStep is enabled to transpose the array. Same size as buffer. Default 0. Setting to non zero value enables manual user allocation
+MTL::Buffer** inputBuffer;	// Pointer to device buffer used to read data from if isInputFormatted is enabled
+MTL::Buffer** outputBuffer;	// Pointer to device buffer used to read data from if isOutputFormatted is enabled
+MTL::Buffer** kernel;	// Pointer to device buffer used to read kernel data from if performConvolution is enabled
 #endif
 uint64_t bufferOffset;	// Specify if VkFFT has to offset the first element position inside the buffer. In bytes. Default 0
 uint64_t tempBufferOffset;	// Specify if VkFFT has to offset the first element position inside the temp buffer. In bytes. Default 0
@@ -1029,9 +1063,11 @@ uint64_t considerAllAxesStrided;	// Will create plan for non-strided axis simila
 uint64_t keepShaderCode;	// Will keep shader code and print all executed shaders during the plan execution in order (0 - off, 1 - on)
 uint64_t printMemoryLayout;	// Will print order of buffers used in shaders (0 - off, 1 - on) 
 
-uint64_t saveApplicationToString;	// Will save all compiled binaries to VkFFTApplication.saveApplicationString (will be allocated by VkFFT, deallocated with deleteVkFFT call). VkFFTApplication.applicationStringSize will contain size of binary in bytes. (0 - off, 1 - on)
-uint64_t loadApplicationFromString;	// Will load all binaries from loadApplicationString instead of recompiling them (must be allocated by user, must contain what saveApplicationToString call generated previously in VkFFTApplication.saveApplicationString). (0 - off, 1 - on). Mutually exclusive with saveApplicationToString
-void* loadApplicationString;	// Memory array (uint32_t* for Vulkan/HIP, char* for CUDA/OpenCL) through which user can load VkFFT binaries, must be provided by user if loadApplicationFromString = 1. 
+uint64_t saveApplicationToString;	// Will save all compiled binaries to VkFFTApplication.saveApplicationString (will be allocated by VkFFT, deallocated with deleteVkFFT call). VkFFTApplication.applicationStringSize will contain size of binary in bytes. Currently disabled in Metal backend. (0 - off, 1 - on)
+uint64_t loadApplicationFromString;	// Will load all binaries from loadApplicationString instead of recompiling them (must be allocated by user, must contain what saveApplicationToString call generated previously in VkFFTApplication.saveApplicationString). Currently disabled in Metal backend. (0 - off, 1 - on). Mutually exclusive with saveApplicationToString
+void* loadApplicationString;	// Memory binary array through which user can load VkFFT binaries, must be provided by user if loadApplicationFromString = 1. Use rb/wb flags to load/save.
+
+uint64_t disableSetLocale;	// disables all VkFFT attempts to set locale to C - user must ensure that VkFFT has C locale during the plan initialization. This option is needed for multithreading. Default 0. 
 
 //optional Bluestein optimizations: (default 0 if not stated otherwise)
 uint64_t fixMaxRadixBluestein;	// controls the padding of sequences in Bluestein convolution. If specified, padded sequence will be made of up to fixMaxRadixBluestein primes. Default: 2 for CUDA and Vulkan/OpenCL/HIP up to 1048576 combined dimension FFT system, 7 for Vulkan/OpenCL/HIP past after. Min = 2, Max = 13.
@@ -1040,6 +1076,12 @@ uint64_t useCustomBluesteinPaddingPattern;	// force the sequence sizes to pad to
 uint64_t* primeSizes;	// described in useCustomBluesteinPaddingPattern
 uint64_t* paddedSizes;	// described in useCustomBluesteinPaddingPattern
 
+uint64_t fixMinRaderPrimeMult;	// start direct multiplication Rader's algorithm for radix primes from this number. This means that VkFFT will inline custom Rader kernels if sequence is divisible by these primes. Default is 17, as VkFFT has kernels for 2-13. If you make it less than 13, VkFFT will switch from these kernels to Rader.
+uint64_t fixMaxRaderPrimeMult;	// switch from Mult Rader's algorithm for radix primes from this number. Current limitation for Rader is maxThreadNum/2+1, realistically you would want to switch somewhere on 30-100 range. Default is vendor-specific (currently ~40)
+
+uint64_t fixMinRaderPrimeFFT;	// start FFT convolution version of Rader for radix primes from this number. Better than direct multiplication version for almost all primes (except small ones, like 17-23 on some GPUs). Must be bigger or equal to fixMinRaderPrimeMult. Deafult 29 on AMD and 17 on other GPUs. 
+uint64_t fixMaxRaderPrimeFFT;	// switch to Bluestein's algorithm for radix primes from this number. Switch may happen earlier if prime can't fit in shared memory. Default is 16384, which is bigger than most current GPU's shared memory.
+
 // Optional zero padding control parameters: (default 0 if not stated otherwise)
 uint64_t performZeropadding[3];	// Don't read some data/perform computations if some input sequences are zeropadded for each axis (0 - off, 1 - on)
 uint64_t fft_zeropad_left[3];	// Specify start boundary of zero block in the system for each axis
@@ -1055,7 +1097,7 @@ uint64_t numberKernels;	// N - only used in convolution step - specify how many
 uint64_t kernelConvolution;	// Specify if this application is used to create kernel for convolution, so it has the same properties. performConvolution has to be set to 0 for kernel creation
 
 // Register overutilization (experimental): (default 0 if not stated otherwise)
-uint64_t registerBoost;	// Specify if register file size is bigger than shared memory and can be used to extend it X times (on Nvidia 256KB register file can be used instead of 32KB of shared memory, set this constant to 4 to emulate 128KB of shared memory). Defaults: Nvidia - 4 in Vulkan/OpenCL, 1 in CUDA backend; AMD - 2 if shared memory >= 64KB, else 4 in Vulkan/OpenCL backend, 1 in HIP backend; Intel - 1 if shared memory >= 64KB, else 2 in Vulkan/OpenCL/Level Zero backends; Default 1
+uint64_t registerBoost;	// Specify if register file size is bigger than shared memory and can be used to extend it X times (on Nvidia 256KB register file can be used instead of 32KB of shared memory, set this constant to 4 to emulate 128KB of shared memory). Defaults: Nvidia - 4 in Vulkan/OpenCL, 1 in CUDA backend; AMD - 2 if shared memory >= 64KB, else 4 in Vulkan/OpenCL backend, 1 in HIP backend; Intel - 1 if shared memory >= 64KB, else 2 in Vulkan/OpenCL/Level Zero backends, 1 in Metal; Default 1
 uint64_t registerBoostNonPow2;	// Specify if register overutilization should be used on non power of 2 sequences (0 - off, 1 - on)
 uint64_t registerBoost4Step;	// Specify if register file overutilization should be used in big sequences (>2^14), same definition as registerBoost. Default 1
 //not used techniques:
@@ -1064,6 +1106,8 @@ uint64_t devicePageSize;	// In KB, the size of a page on the GPU. Setting to 0 d
 uint64_t localPageSize;	// In KB, the size to split page into if sequence spans multiple devicePageSize pages
 
 // Automatically filled based on device info (still can be reconfigured by user):
+uint64_t computeCapabilityMajor;	// CUDA/HIP compute capability of the device
+uint64_t computeCapabilityMinor;	// CUDA/HIP compute capability of the device 	
 uint64_t maxComputeWorkGroupCount[3];	// maxComputeWorkGroupCount from VkPhysicalDeviceLimits
 uint64_t maxComputeWorkGroupSize[3];	// maxComputeWorkGroupCount from VkPhysicalDeviceLimits
 uint64_t maxThreadsNum;	// Max number of threads from VkPhysicalDeviceLimits
@@ -1077,6 +1121,7 @@ uint64_t reorderFourStep;	// Unshuffle Four step algorithm. Requires tempbuffer
 int64_t maxCodeLength;	// Specify how big can be buffer used for code generation (in char). Default 1000000 chars. 
 int64_t maxTempLength;	// Specify how big can be buffer used for intermediate string sprintfs be (in char). Default 5000 chars. If code segfaults for some reason - try increasing this number.
 uint64_t autoCustomBluesteinPaddingPattern; // default value for useCustomBluesteinPaddingPattern
+uint64_t useRaderUintLUT; // allocate additional LUT to store g_pow
 uint64_t vendorID; // vendorID 0x10DE - NVIDIA, 0x8086 - Intel, 0x1002 - AMD, etc
 #if(VKFFT_BACKEND==0) //Vulkan API
 VkDeviceMemory tempBufferDeviceMemory;	// Filled at app creation
@@ -1094,6 +1139,9 @@ uint64_t streamID;	// Filled at app creation
 cl_command_queue* commandQueue;	// Filled at app creation
 #elif(VKFFT_BACKEND==4)
 ze_command_list_handle_t* commandList;	// Filled at app creation
+#elif(VKFFT_BACKEND==5)
+MTL::CommandBuffer* commandBuffer;	// Filled at app execution
+MTL::ComputeCommandEncoder* commandEncoder;	// Filled at app execution
 #endif
 } VkFFTConfiguration;
 \end{minted}
@@ -1159,6 +1207,12 @@ command queuewith compute and copy capabilities, obtained from zeCommandQueueCre
 \item uint32\_t commandQueueID - ID of the commandQueue with compute and
 copy capabilities
 \end{itemize}
+Metal API will need the following information:
+\begin{itemize}
+\item MTL::Device{*} device - Pointer to Metal device, obtained from MTL::CopyAllDevices
+\item MTL::CommandQueue{*} queue - Pointer to Metal queue, obtained from
+device->newCommandQueue()
+\end{itemize}
 
 \subsubsection{Memory management parameters}
 
@@ -1174,18 +1228,19 @@ There are five buffer types user can provide to VkFFT:
 \end{itemize}
 These buffers must be passed by a pointer: in Vulkan API they are
 provided as VkBuffer{*}, in CUDA, HIP and Level Zero they are provided
-as void{*}{*}, in OpenCL, they are provided as cl\_mem{*}. Even though
-the underlying structure (VkBuffer, void{*}, cl\_mem) is not a memory
-but just a number that the driver can use to access corresponding
-allocated memory on the GPU, passing them by a pointer allows for
-the user to query multiple GPU allocated buffers for VkFFT to use.
-Currently, it is only supported in Vulkan API - each of five buffer
-types can be made out of multiple separate memory allocations. For
-example, it is possible to combine multiple small unused at the point
-of FFT calculation buffers to form a tempBuffer. This option also
-allows Vulkan API to overcome the limit of 4GB for a single memory
-allocation - due to the fact that Vulkan can only use 32-bit numbers
-for addressing (other APIs support 64-bit addressing). 
+as void{*}{*}, in OpenCL they are provided as cl\_mem{*}, in Metal
+they are provided as MTL::Buffer{*}. Even though the underlying structure
+(VkBuffer, void{*}, cl\_mem, MTL::Buffer{*}) is not a memory but just
+a number that the driver can use to access corresponding allocated
+memory on the GPU, passing them by a pointer allows for the user to
+query multiple GPU allocated buffers for VkFFT to use. Currently,
+it is only supported in Vulkan API - each of five buffer types can
+be made out of multiple separate memory allocations. For example,
+it is possible to combine multiple small unused at the point of FFT
+calculation buffers to form a tempBuffer. This option also allows
+Vulkan API to overcome the limit of 4GB for a single memory allocation
+- due to the fact that Vulkan can only use 32-bit numbers for addressing
+(other APIs support 64-bit addressing). 
 
 To use the buffers other than the main buffer, the user has to specify
 this in configuration at the application creation stage (set to zero
@@ -1265,15 +1320,17 @@ to enable. Optional parameter.
 
 uint64\_t doublePrecision - perform calculations in double precision.
 Default 0, set to 1 to enable. In Vulkan/OpenCL/Level Zero your device
-must support double-precision functionality. Optional parameter.
+must support double precision functionality. Metal API does not support
+double precision. Optional parameter.
 
 uint64\_t doublePrecisionFloatMemory - perform calculations in double
 precision, but all intermediate and final storage in float. Input/Output/main
 buffers must have single-precision layout. doublePrecision must be
 set to 0. This option increases precision, but not that much to be
 recommended for actual use. Default 0, set to 1 to enable. In Vulkan/OpenCL/Level
-Zero your device must support double-precision functionality. Experimental
-feature. Optional parameter.
+Zero your device must support double precision functionality. Metal
+API does not support double precision. Experimental feature. Optional
+parameter.
 
 uint64\_t halfPrecision - half-precision in VkFFT is implemented only
 as memory optimization. All calculations are done in single precision
@@ -1378,20 +1435,47 @@ buffers used in kernels. Default 0, set to 1 to enable. Optional parameter.
 uint64\_t saveApplicationToString - will save all compiled binaries
 to VkFFTApplication.saveApplicationString (will be allocated by VkFFT,
 deallocated with deleteVkFFT call). VkFFTApplication.applicationStringSize
-will contain size of binary in bytes. Default 0, set to 1 to enable.
-Optional parameter.
+will contain size of binary in bytes. Currently disabled in Metal
+backend. Default 0, set to 1 to enable. Optional parameter.
 
 uint64\_t loadApplicationFromString - will load all binaries from
 loadApplicationString instead of recompiling them (loadApplicationString
 must be allocated by user, must contain what saveApplicationToString
 call generated previously in VkFFTApplication.saveApplicationString).
-Default 0, set to 1 to enable. Optional parameter. Mutually exclusive
-with saveApplicationToString 
+Currently disabled in Metal backend. Default 0, set to 1 to enable.
+Optional parameter. Mutually exclusive with saveApplicationToString 
+
+void{*} loadApplicationString - memory binary array through which
+user can load VkFFT binaries, must be provided by user if loadApplicationFromString
+= 1. Use rb/wb flags to load/save.
+
+uint64\_t disableSetLocale - disables all VkFFT attempts to set locale
+to C - user must ensure that VkFFT has C locale during the plan initialization.
+This option is needed for multithreading. Default 0.
+
+\subsubsection{Rader control parameters}
+
+uint64\_t fixMinRaderPrimeMult - start direct multiplication Rader's
+algorithm for radix primes from this number. This means that VkFFT
+will inline custom Rader kernels if sequence is divisible by these
+primes. Default is 17, as VkFFT has kernels for 2-13. If you make
+it less than 13, VkFFT will switch from these kernels to Rader.
 
-void{*} loadApplicationString - memory array (uint32\_t{*} for Vulkan,
-HIP and Level Zero, char{*} for CUDA/OpenCL) through which user can
-load VkFFT binaries, must be provided by user if loadApplicationFromString
-= 1. 
+uint64\_t fixMaxRaderPrimeMult - switch from Mult Rader's algorithm
+for radix primes from this number. Current limitation for Rader is
+maxThreadNum/2+1, realistically you would want to switch somewhere
+on 30-100 range. Default is vendor-specific (currently \textasciitilde 40)
+
+uint64\_t fixMinRaderPrimeFFT - start FFT convolution version of Rader
+for radix primes from this number. Better than direct multiplication
+version for almost all primes (except small ones, like 17-23 on some
+GPUs). Must be bigger or equal to fixMinRaderPrimeMult. Deafult 29
+on AMD and 17 on other GPUs. 
+
+uint64\_t fixMaxRaderPrimeFFT - switch to Bluestein's algorithm for
+radix primes from this number. Switch may happen earlier if prime
+can't fit in shared memory. Default is 16384, which is bigger than
+most current GPU's shared memory.
 
 \subsubsection{Bluestein control parameters}
 
@@ -1497,8 +1581,8 @@ parameter for kernel generation.
 \subsubsection{Register overutilization}
 
 Only works in C2C mode, without convolution support. Enabled in Vulkan,
-OpenCL and Level Zero APIs only (it works in other APIs, but worse).
-Experimental feature.
+OpenCL and Level Zero APIs only (it works in other APIs, but worse,
+does not work in Metal). Experimental feature.
 
 uint64\_t registerBoost - specify if the register file size is bigger
 than shared memory and can be used to extend it X times (on Nvidia
@@ -1521,6 +1605,12 @@ as registerBoost. Default 1. Optional parameter.
 
 \subsubsection{Extra advanced parameters (filled automatically)}
 
+uint64\_t computeCapabilityMajor - CUDA/HIP compute capability of
+the device
+
+uint64\_t computeCapabilityMinor - CUDA/HIP compute capability of
+the device 
+
 uint64\_t maxComputeWorkGroupCount{[}3{]} - how many workgroups can
 be launched at one dispatch. Automatically derived from the driver,
 can be artificially lowered. Then VkFFT will perform a logical split
@@ -1566,10 +1656,11 @@ int64\_t maxTempLength - specify how big can the buffer used for intermediate
 string sprintf's be (in char). Default 5000 chars. If code segfaults
 for some reason - try increasing this number.
 
-uint64\_t autoCustomBluesteinPaddingPattern; // default value for
-useCustomBluesteinPaddingPattern
+uint64\_t autoCustomBluesteinPaddingPattern - default value for useCustomBluesteinPaddingPattern
 
-uint64\_t vendorID; // vendorID 0x10DE - NVIDIA, 0x8086 - Intel, 0x1002
+uint64\_t useRaderUintLUT - allocate additional LUT to store g\_pow 
+
+uint64\_t vendorID - vendorID 0x10DE - NVIDIA, 0x8086 - Intel, 0x1002
 - AMD, etc.
 
 \newpage{}
@@ -1683,6 +1774,9 @@ ze_device_handle_t device;
 ze_context_handle_t context;
 ze_command_queue_handle_t commandQueue;
 uint32_t commandQueueID;
+#elif(VKFFT_BACKEND==5) //Metal API
+MTL::Device* device;
+MTL::CommandQueue* queue;
 #endif
 uint64_t device_id; //an id of a device, reported by devices_list call
 } VkGPU;
@@ -1929,6 +2023,9 @@ launchParams.commandBuffer = &commandBuffer;
 launchParams.commandQueue = &commandQueue;
 #elif(VKFFT_BACKEND==4) //Level Zero API
 launchParams->commandList = &commandList;
+#elif(VKFFT_BACKEND==5) //Metal API
+launchParams->commandBuffer = commandBuffer;
+launchParams->commandEncoder = commandEncoder;
 #endif
 resFFT = VkFFTAppend(app, -1, &launchParams);
 
@@ -2162,7 +2259,8 @@ convolution_configuration.conjugateConvolution = 1;
 This example shows how to save/load binaries generated by VkFFT. This
 can reduce time taken by initializeVkFFT call by removing RTC components
 from it. Be sure that rest of the configuration stays the same to
-reuse the binary.
+reuse the binary. Use rb/wb flags to load/save. This does not currently
+work in Metal backend.
 
 \begin{mdframed}[backgroundcolor=bg]
 \begin{minted}[tabsize=4,obeytabs,breaklines]{C}
@@ -2176,11 +2274,7 @@ configuration.saveApplicationToString = 1;
 if (configuration.loadApplicationFromString) {
 	FILE* kernelCache;
 	uint64_t str_len;
-#if((VKFFT_BACKEND==0) || (VKFFT_BACKEND==2) || (VKFFT_BACKEND==4))
-	kernelCache = fopen("VkFFT_binary", "rb"); //Vulkan and HIP backends load data as a uint32_t sequence
-#else
-	kernelCache = fopen("VkFFT_binary", "r"); 
-#endif
+	kernelCache = fopen("VkFFT_binary", "rb");
 	fseek(kernelCache, 0, SEEK_END);
 	str_len = ftell(kernelCache);
 	fseek(kernelCache, 0, SEEK_SET);
@@ -2197,11 +2291,7 @@ if (configuration.loadApplicationFromString)
 
 if (configuration.saveApplicationToString) {
 	FILE* kernelCache;
-#if((VKFFT_BACKEND==0) || (VKFFT_BACKEND==2) || (VKFFT_BACKEND==4))
-	kernelCache = fopen("VkFFT_binary", "wb"); //Vulkan and HIP backends save data as a uint32_t sequence
-#else
-	kernelCache = fopen("VkFFT_binary", "w"); 
-#endif
+	kernelCache = fopen("VkFFT_binary", "wb");
 	fwrite(app.saveApplicationString, app.applicationStringSize, 1, kernelCache);
 	fclose(kernelCache);
 }
diff --git a/metal-cpp/Foundation/Foundation.hpp b/metal-cpp/Foundation/Foundation.hpp
new file mode 100644
index 0000000..408252e
--- /dev/null
+++ b/metal-cpp/Foundation/Foundation.hpp
@@ -0,0 +1,45 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/Foundation.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "NSArray.hpp"
+#include "NSAutoreleasePool.hpp"
+#include "NSBundle.hpp"
+#include "NSData.hpp"
+#include "NSDate.hpp"
+#include "NSDefines.hpp"
+#include "NSDictionary.hpp"
+#include "NSEnumerator.hpp"
+#include "NSError.hpp"
+#include "NSLock.hpp"
+#include "NSNotification.hpp"
+#include "NSNumber.hpp"
+#include "NSObject.hpp"
+#include "NSPrivate.hpp"
+#include "NSProcessInfo.hpp"
+#include "NSRange.hpp"
+#include "NSString.hpp"
+#include "NSTypes.hpp"
+#include "NSURL.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Foundation/NSArray.hpp b/metal-cpp/Foundation/NSArray.hpp
new file mode 100644
index 0000000..8730df9
--- /dev/null
+++ b/metal-cpp/Foundation/NSArray.hpp
@@ -0,0 +1,115 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSArray.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "NSObject.hpp"
+#include "NSTypes.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+class Array : public Copying<Array>
+{
+public:
+    static Array* array();
+    static Array* array(const Object* pObject);
+    static Array* array(const Object* const* pObjects, UInteger count);
+
+    static Array* alloc();
+
+    Array*        init();
+    Array*        init(const Object* const* pObjects, UInteger count);
+    Array*        init(const class Coder* pCoder);
+
+    template <class _Object = Object>
+    _Object* object(UInteger index) const;
+    UInteger count() const;
+};
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Array* NS::Array::array()
+{
+    return Object::sendMessage<Array*>(_NS_PRIVATE_CLS(NSArray), _NS_PRIVATE_SEL(array));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Array* NS::Array::array(const Object* pObject)
+{
+    return Object::sendMessage<Array*>(_NS_PRIVATE_CLS(NSArray), _NS_PRIVATE_SEL(arrayWithObject_), pObject);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Array* NS::Array::array(const Object* const* pObjects, UInteger count)
+{
+    return Object::sendMessage<Array*>(_NS_PRIVATE_CLS(NSArray), _NS_PRIVATE_SEL(arrayWithObjects_count_), pObjects, count);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Array* NS::Array::alloc()
+{
+    return NS::Object::alloc<Array>(_NS_PRIVATE_CLS(NSArray));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Array* NS::Array::init()
+{
+    return NS::Object::init<Array>();
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Array* NS::Array::init(const Object* const* pObjects, UInteger count)
+{
+    return Object::sendMessage<Array*>(this, _NS_PRIVATE_SEL(initWithObjects_count_), pObjects, count);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Array* NS::Array::init(const class Coder* pCoder)
+{
+    return Object::sendMessage<Array*>(this, _NS_PRIVATE_SEL(initWithCoder_), pCoder);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::UInteger NS::Array::count() const
+{
+    return Object::sendMessage<UInteger>(this, _NS_PRIVATE_SEL(count));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template <class _Object>
+_NS_INLINE _Object* NS::Array::object(UInteger index) const
+{
+    return Object::sendMessage<_Object*>(this, _NS_PRIVATE_SEL(objectAtIndex_), index);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Foundation/NSAutoreleasePool.hpp b/metal-cpp/Foundation/NSAutoreleasePool.hpp
new file mode 100644
index 0000000..a2383a5
--- /dev/null
+++ b/metal-cpp/Foundation/NSAutoreleasePool.hpp
@@ -0,0 +1,83 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSAutoreleasePool.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "NSDefines.hpp"
+#include "NSObject.hpp"
+#include "NSPrivate.hpp"
+#include "NSTypes.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+class AutoreleasePool : public Object
+{
+public:
+    static AutoreleasePool* alloc();
+    AutoreleasePool*        init();
+
+    void                    drain();
+
+    void                    addObject(Object* pObject);
+
+    static void             showPools();
+};
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::AutoreleasePool* NS::AutoreleasePool::alloc()
+{
+    return NS::Object::alloc<AutoreleasePool>(_NS_PRIVATE_CLS(NSAutoreleasePool));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::AutoreleasePool* NS::AutoreleasePool::init()
+{
+    return NS::Object::init<AutoreleasePool>();
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void NS::AutoreleasePool::drain()
+{
+    Object::sendMessage<void>(this, _NS_PRIVATE_SEL(drain));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void NS::AutoreleasePool::addObject(Object* pObject)
+{
+    Object::sendMessage<void>(this, _NS_PRIVATE_SEL(addObject_), pObject);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void NS::AutoreleasePool::showPools()
+{
+    Object::sendMessage<void>(_NS_PRIVATE_CLS(NSAutoreleasePool), _NS_PRIVATE_SEL(showPools));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Foundation/NSBundle.hpp b/metal-cpp/Foundation/NSBundle.hpp
new file mode 100644
index 0000000..2e8c539
--- /dev/null
+++ b/metal-cpp/Foundation/NSBundle.hpp
@@ -0,0 +1,374 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSBundle.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "NSDefines.hpp"
+#include "NSNotification.hpp"
+#include "NSObject.hpp"
+#include "NSTypes.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+_NS_CONST(NotificationName, BundleDidLoadNotification);
+_NS_CONST(NotificationName, BundleResourceRequestLowDiskSpaceNotification);
+
+class String* LocalizedString(const String* pKey, const String*);
+class String* LocalizedStringFromTable(const String* pKey, const String* pTbl, const String*);
+class String* LocalizedStringFromTableInBundle(const String* pKey, const String* pTbl, const class Bundle* pBdle, const String*);
+class String* LocalizedStringWithDefaultValue(const String* pKey, const String* pTbl, const class Bundle* pBdle, const String* pVal, const String*);
+
+class Bundle : public Referencing<Bundle>
+{
+public:
+    static Bundle*    mainBundle();
+
+    static Bundle*    bundle(const class String* pPath);
+    static Bundle*    bundle(const class URL* pURL);
+
+    static Bundle*    alloc();
+
+    Bundle*           init(const class String* pPath);
+    Bundle*           init(const class URL* pURL);
+
+    class Array*      allBundles() const;
+    class Array*      allFrameworks() const;
+
+    bool              load();
+    bool              unload();
+
+    bool              isLoaded() const;
+
+    bool              preflightAndReturnError(class Error** pError) const;
+    bool              loadAndReturnError(class Error** pError);
+
+    class URL*        bundleURL() const;
+    class URL*        resourceURL() const;
+    class URL*        executableURL() const;
+    class URL*        URLForAuxiliaryExecutable(const class String* pExecutableName) const;
+
+    class URL*        privateFrameworksURL() const;
+    class URL*        sharedFrameworksURL() const;
+    class URL*        sharedSupportURL() const;
+    class URL*        builtInPlugInsURL() const;
+    class URL*        appStoreReceiptURL() const;
+
+    class String*     bundlePath() const;
+    class String*     resourcePath() const;
+    class String*     executablePath() const;
+    class String*     pathForAuxiliaryExecutable(const class String* pExecutableName) const;
+
+    class String*     privateFrameworksPath() const;
+    class String*     sharedFrameworksPath() const;
+    class String*     sharedSupportPath() const;
+    class String*     builtInPlugInsPath() const;
+
+    class String*     bundleIdentifier() const;
+    class Dictionary* infoDictionary() const;
+    class Dictionary* localizedInfoDictionary() const;
+    class Object*     objectForInfoDictionaryKey(const class String* pKey);
+
+    class String*     localizedString(const class String* pKey, const class String* pValue = nullptr, const class String* pTableName = nullptr) const;
+};
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_PRIVATE_DEF_CONST(NS::NotificationName, BundleDidLoadNotification);
+_NS_PRIVATE_DEF_CONST(NS::NotificationName, BundleResourceRequestLowDiskSpaceNotification);
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::LocalizedString(const String* pKey, const String*)
+{
+    return Bundle::mainBundle()->localizedString(pKey, nullptr, nullptr);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::LocalizedStringFromTable(const String* pKey, const String* pTbl, const String*)
+{
+    return Bundle::mainBundle()->localizedString(pKey, nullptr, pTbl);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::LocalizedStringFromTableInBundle(const String* pKey, const String* pTbl, const Bundle* pBdl, const String*)
+{
+    return pBdl->localizedString(pKey, nullptr, pTbl);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::LocalizedStringWithDefaultValue(const String* pKey, const String* pTbl, const Bundle* pBdl, const String* pVal, const String*)
+{
+    return pBdl->localizedString(pKey, pVal, pTbl);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Bundle* NS::Bundle::mainBundle()
+{
+    return Object::sendMessage<Bundle*>(_NS_PRIVATE_CLS(NSBundle), _NS_PRIVATE_SEL(mainBundle));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Bundle* NS::Bundle::bundle(const class String* pPath)
+{
+    return Object::sendMessage<Bundle*>(_NS_PRIVATE_CLS(NSBundle), _NS_PRIVATE_SEL(bundleWithPath_), pPath);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Bundle* NS::Bundle::bundle(const class URL* pURL)
+{
+    return Object::sendMessage<Bundle*>(_NS_PRIVATE_CLS(NSBundle), _NS_PRIVATE_SEL(bundleWithURL_), pURL);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Bundle* NS::Bundle::alloc()
+{
+    return Object::sendMessage<Bundle*>(_NS_PRIVATE_CLS(NSBundle), _NS_PRIVATE_SEL(alloc));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Bundle* NS::Bundle::init(const String* pPath)
+{
+    return Object::sendMessage<Bundle*>(this, _NS_PRIVATE_SEL(initWithPath_), pPath);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Bundle* NS::Bundle::init(const URL* pURL)
+{
+    return Object::sendMessage<Bundle*>(this, _NS_PRIVATE_SEL(initWithURL_), pURL);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Array* NS::Bundle::allBundles() const
+{
+    return Object::sendMessage<Array*>(this, _NS_PRIVATE_SEL(allBundles));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Array* NS::Bundle::allFrameworks() const
+{
+    return Object::sendMessage<Array*>(this, _NS_PRIVATE_SEL(allFrameworks));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE bool NS::Bundle::load()
+{
+    return Object::sendMessage<bool>(this, _NS_PRIVATE_SEL(load));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE bool NS::Bundle::unload()
+{
+    return Object::sendMessage<bool>(this, _NS_PRIVATE_SEL(unload));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE bool NS::Bundle::isLoaded() const
+{
+    return Object::sendMessage<bool>(this, _NS_PRIVATE_SEL(isLoaded));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE bool NS::Bundle::preflightAndReturnError(Error** pError) const
+{
+    return Object::sendMessage<bool>(this, _NS_PRIVATE_SEL(preflightAndReturnError_), pError);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE bool NS::Bundle::loadAndReturnError(Error** pError)
+{
+    return Object::sendMessage<bool>(this, _NS_PRIVATE_SEL(loadAndReturnError_), pError);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::URL* NS::Bundle::bundleURL() const
+{
+    return Object::sendMessage<URL*>(this, _NS_PRIVATE_SEL(bundleURL));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::URL* NS::Bundle::resourceURL() const
+{
+    return Object::sendMessage<URL*>(this, _NS_PRIVATE_SEL(resourceURL));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::URL* NS::Bundle::executableURL() const
+{
+    return Object::sendMessage<URL*>(this, _NS_PRIVATE_SEL(executableURL));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::URL* NS::Bundle::URLForAuxiliaryExecutable(const String* pExecutableName) const
+{
+    return Object::sendMessage<URL*>(this, _NS_PRIVATE_SEL(URLForAuxiliaryExecutable_), pExecutableName);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::URL* NS::Bundle::privateFrameworksURL() const
+{
+    return Object::sendMessage<URL*>(this, _NS_PRIVATE_SEL(privateFrameworksURL));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::URL* NS::Bundle::sharedFrameworksURL() const
+{
+    return Object::sendMessage<URL*>(this, _NS_PRIVATE_SEL(sharedFrameworksURL));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::URL* NS::Bundle::sharedSupportURL() const
+{
+    return Object::sendMessage<URL*>(this, _NS_PRIVATE_SEL(sharedSupportURL));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::URL* NS::Bundle::builtInPlugInsURL() const
+{
+    return Object::sendMessage<URL*>(this, _NS_PRIVATE_SEL(builtInPlugInsURL));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::URL* NS::Bundle::appStoreReceiptURL() const
+{
+    return Object::sendMessage<URL*>(this, _NS_PRIVATE_SEL(appStoreReceiptURL));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::Bundle::bundlePath() const
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(bundlePath));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::Bundle::resourcePath() const
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(resourcePath));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::Bundle::executablePath() const
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(executablePath));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::Bundle::pathForAuxiliaryExecutable(const String* pExecutableName) const
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(pathForAuxiliaryExecutable_), pExecutableName);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::Bundle::privateFrameworksPath() const
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(privateFrameworksPath));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::Bundle::sharedFrameworksPath() const
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(sharedFrameworksPath));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::Bundle::sharedSupportPath() const
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(sharedSupportPath));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::Bundle::builtInPlugInsPath() const
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(builtInPlugInsPath));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::Bundle::bundleIdentifier() const
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(bundleIdentifier));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Dictionary* NS::Bundle::infoDictionary() const
+{
+    return Object::sendMessage<Dictionary*>(this, _NS_PRIVATE_SEL(infoDictionary));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Dictionary* NS::Bundle::localizedInfoDictionary() const
+{
+    return Object::sendMessage<Dictionary*>(this, _NS_PRIVATE_SEL(localizedInfoDictionary));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Object* NS::Bundle::objectForInfoDictionaryKey(const String* pKey)
+{
+    return Object::sendMessage<Object*>(this, _NS_PRIVATE_SEL(objectForInfoDictionaryKey_), pKey);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::Bundle::localizedString(const String* pKey, const String* pValue /* = nullptr */, const String* pTableName /* = nullptr */) const
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(localizedStringForKey_value_table_), pKey, pValue, pTableName);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Foundation/NSData.hpp b/metal-cpp/Foundation/NSData.hpp
new file mode 100644
index 0000000..1c699e4
--- /dev/null
+++ b/metal-cpp/Foundation/NSData.hpp
@@ -0,0 +1,54 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSData.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "NSObject.hpp"
+#include "NSTypes.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+class Data : public Copying<Data>
+{
+public:
+    void*    mutableBytes() const;
+    UInteger length() const;
+};
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void* NS::Data::mutableBytes() const
+{
+    return Object::sendMessage<void*>(this, _NS_PRIVATE_SEL(mutableBytes));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::UInteger NS::Data::length() const
+{
+    return Object::sendMessage<UInteger>(this, _NS_PRIVATE_SEL(length));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Foundation/NSDate.hpp b/metal-cpp/Foundation/NSDate.hpp
new file mode 100644
index 0000000..ee6c83a
--- /dev/null
+++ b/metal-cpp/Foundation/NSDate.hpp
@@ -0,0 +1,40 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSDate.hpp
+//
+// See LICENSE.txt for this project licensing information.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "NSDefines.hpp"
+#include "NSObject.hpp"
+#include "NSPrivate.hpp"
+#include "NSTypes.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+
+using TimeInterval = double;
+
+class Date : public Copying<Date>
+{
+public:
+    static Date* dateWithTimeIntervalSinceNow(TimeInterval secs);
+};
+
+} // NS
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Date* NS::Date::dateWithTimeIntervalSinceNow(NS::TimeInterval secs)
+{
+    return NS::Object::sendMessage<NS::Date*>(_NS_PRIVATE_CLS(NSDate), _NS_PRIVATE_SEL(dateWithTimeIntervalSinceNow_), secs);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
\ No newline at end of file
diff --git a/metal-cpp/Foundation/NSDefines.hpp b/metal-cpp/Foundation/NSDefines.hpp
new file mode 100644
index 0000000..70e6708
--- /dev/null
+++ b/metal-cpp/Foundation/NSDefines.hpp
@@ -0,0 +1,41 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSDefines.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#define _NS_WEAK_IMPORT __attribute__((weak_import))
+#define _NS_EXPORT __attribute__((visibility("default")))
+#define _NS_EXTERN extern "C" _NS_EXPORT
+#define _NS_INLINE inline __attribute__((always_inline))
+#define _NS_PACKED __attribute__((packed))
+
+#define _NS_CONST(type, name) _NS_EXTERN type const name;
+#define _NS_ENUM(type, name) enum name : type
+#define _NS_OPTIONS(type, name) \
+    using name = type;          \
+    enum : name
+
+#define _NS_CAST_TO_UINT(value) static_cast<NS::UInteger>(value)
+#define _NS_VALIDATE_SIZE(ns, name) static_assert(sizeof(ns::name) == sizeof(ns##name), "size mismatch " #ns "::" #name)
+#define _NS_VALIDATE_ENUM(ns, name) static_assert(_NS_CAST_TO_UINT(ns::name) == _NS_CAST_TO_UINT(ns##name), "value mismatch " #ns "::" #name)
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Foundation/NSDictionary.hpp b/metal-cpp/Foundation/NSDictionary.hpp
new file mode 100644
index 0000000..ccbe2bd
--- /dev/null
+++ b/metal-cpp/Foundation/NSDictionary.hpp
@@ -0,0 +1,128 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSDictionary.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "NSEnumerator.hpp"
+#include "NSObject.hpp"
+#include "NSTypes.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+class Dictionary : public NS::Copying<Dictionary>
+{
+public:
+    static Dictionary* dictionary();
+    static Dictionary* dictionary(const Object* pObject, const Object* pKey);
+    static Dictionary* dictionary(const Object* const* pObjects, const Object* const* pKeys, UInteger count);
+
+    static Dictionary* alloc();
+
+    Dictionary*        init();
+    Dictionary*        init(const Object* const* pObjects, const Object* const* pKeys, UInteger count);
+    Dictionary*        init(const class Coder* pCoder);
+
+    template <class _KeyType = Object>
+    Enumerator<_KeyType>* keyEnumerator() const;
+
+    template <class _Object = Object>
+    _Object* object(const Object* pKey) const;
+    UInteger count() const;
+};
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Dictionary* NS::Dictionary::dictionary()
+{
+    return Object::sendMessage<Dictionary*>(_NS_PRIVATE_CLS(NSDictionary), _NS_PRIVATE_SEL(dictionary));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Dictionary* NS::Dictionary::dictionary(const Object* pObject, const Object* pKey)
+{
+    return Object::sendMessage<Dictionary*>(_NS_PRIVATE_CLS(NSDictionary), _NS_PRIVATE_SEL(dictionaryWithObject_forKey_), pObject, pKey);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Dictionary* NS::Dictionary::dictionary(const Object* const* pObjects, const Object* const* pKeys, UInteger count)
+{
+    return Object::sendMessage<Dictionary*>(_NS_PRIVATE_CLS(NSDictionary), _NS_PRIVATE_SEL(dictionaryWithObjects_forKeys_count_),
+        pObjects, pKeys, count);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Dictionary* NS::Dictionary::alloc()
+{
+    return NS::Object::alloc<Dictionary>(_NS_PRIVATE_CLS(NSDictionary));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Dictionary* NS::Dictionary::init()
+{
+    return NS::Object::init<Dictionary>();
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Dictionary* NS::Dictionary::init(const Object* const* pObjects, const Object* const* pKeys, UInteger count)
+{
+    return Object::sendMessage<Dictionary*>(this, _NS_PRIVATE_SEL(initWithObjects_forKeys_count_), pObjects, pKeys, count);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Dictionary* NS::Dictionary::init(const class Coder* pCoder)
+{
+    return Object::sendMessage<Dictionary*>(this, _NS_PRIVATE_SEL(initWithCoder_), pCoder);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template <class _KeyType>
+_NS_INLINE NS::Enumerator<_KeyType>* NS::Dictionary::keyEnumerator() const
+{
+    return Object::sendMessage<Enumerator<_KeyType>*>(this, _NS_PRIVATE_SEL(keyEnumerator));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template <class _Object>
+_NS_INLINE _Object* NS::Dictionary::object(const Object* pKey) const
+{
+    return Object::sendMessage<_Object*>(this, _NS_PRIVATE_SEL(objectForKey_), pKey);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::UInteger NS::Dictionary::count() const
+{
+    return Object::sendMessage<UInteger>(this, _NS_PRIVATE_SEL(count));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Foundation/NSEnumerator.hpp b/metal-cpp/Foundation/NSEnumerator.hpp
new file mode 100644
index 0000000..69596c0
--- /dev/null
+++ b/metal-cpp/Foundation/NSEnumerator.hpp
@@ -0,0 +1,78 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSEnumerator.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "NSObject.hpp"
+#include "NSTypes.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+struct FastEnumerationState
+{
+    unsigned long  state;
+    Object**       itemsPtr;
+    unsigned long* mutationsPtr;
+    unsigned long  extra[5];
+} _NS_PACKED;
+
+class FastEnumeration : public Referencing<FastEnumeration>
+{
+public:
+    NS::UInteger countByEnumerating(FastEnumerationState* pState, Object** pBuffer, NS::UInteger len);
+};
+
+template <class _ObjectType>
+class Enumerator : public Referencing<Enumerator<_ObjectType>, FastEnumeration>
+{
+public:
+    _ObjectType* nextObject();
+    class Array* allObjects();
+};
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::UInteger NS::FastEnumeration::countByEnumerating(FastEnumerationState* pState, Object** pBuffer, NS::UInteger len)
+{
+    return Object::sendMessage<UInteger>(this, _NS_PRIVATE_SEL(countByEnumeratingWithState_objects_count_), pState, pBuffer, len);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template <class _ObjectType>
+_NS_INLINE _ObjectType* NS::Enumerator<_ObjectType>::nextObject()
+{
+    return Object::sendMessage<_ObjectType*>(this, _NS_PRIVATE_SEL(nextObject));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template <class _ObjectType>
+_NS_INLINE NS::Array* NS::Enumerator<_ObjectType>::allObjects()
+{
+    return Object::sendMessage<Array*>(this, _NS_PRIVATE_SEL(allObjects));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Foundation/NSError.hpp b/metal-cpp/Foundation/NSError.hpp
new file mode 100644
index 0000000..5ebf981
--- /dev/null
+++ b/metal-cpp/Foundation/NSError.hpp
@@ -0,0 +1,173 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSError.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "NSDefines.hpp"
+#include "NSObject.hpp"
+#include "NSPrivate.hpp"
+#include "NSTypes.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+using ErrorDomain = class String*;
+
+_NS_CONST(ErrorDomain, CocoaErrorDomain);
+_NS_CONST(ErrorDomain, POSIXErrorDomain);
+_NS_CONST(ErrorDomain, OSStatusErrorDomain);
+_NS_CONST(ErrorDomain, MachErrorDomain);
+
+using ErrorUserInfoKey = class String*;
+
+_NS_CONST(ErrorUserInfoKey, UnderlyingErrorKey);
+_NS_CONST(ErrorUserInfoKey, LocalizedDescriptionKey);
+_NS_CONST(ErrorUserInfoKey, LocalizedFailureReasonErrorKey);
+_NS_CONST(ErrorUserInfoKey, LocalizedRecoverySuggestionErrorKey);
+_NS_CONST(ErrorUserInfoKey, LocalizedRecoveryOptionsErrorKey);
+_NS_CONST(ErrorUserInfoKey, RecoveryAttempterErrorKey);
+_NS_CONST(ErrorUserInfoKey, HelpAnchorErrorKey);
+_NS_CONST(ErrorUserInfoKey, DebugDescriptionErrorKey);
+_NS_CONST(ErrorUserInfoKey, LocalizedFailureErrorKey);
+_NS_CONST(ErrorUserInfoKey, StringEncodingErrorKey);
+_NS_CONST(ErrorUserInfoKey, URLErrorKey);
+_NS_CONST(ErrorUserInfoKey, FilePathErrorKey);
+
+class Error : public Copying<Error>
+{
+public:
+    static Error*     error(ErrorDomain domain, Integer code, class Dictionary* pDictionary);
+
+    static Error*     alloc();
+    Error*            init();
+    Error*            init(ErrorDomain domain, Integer code, class Dictionary* pDictionary);
+
+    Integer           code() const;
+    ErrorDomain       domain() const;
+    class Dictionary* userInfo() const;
+
+    class String*     localizedDescription() const;
+    class Array*      localizedRecoveryOptions() const;
+    class String*     localizedRecoverySuggestion() const;
+    class String*     localizedFailureReason() const;
+};
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_PRIVATE_DEF_CONST(NS::ErrorDomain, CocoaErrorDomain);
+_NS_PRIVATE_DEF_CONST(NS::ErrorDomain, POSIXErrorDomain);
+_NS_PRIVATE_DEF_CONST(NS::ErrorDomain, OSStatusErrorDomain);
+_NS_PRIVATE_DEF_CONST(NS::ErrorDomain, MachErrorDomain);
+
+_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, UnderlyingErrorKey);
+_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, LocalizedDescriptionKey);
+_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, LocalizedFailureReasonErrorKey);
+_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, LocalizedRecoverySuggestionErrorKey);
+_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, LocalizedRecoveryOptionsErrorKey);
+_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, RecoveryAttempterErrorKey);
+_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, HelpAnchorErrorKey);
+_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, DebugDescriptionErrorKey);
+_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, LocalizedFailureErrorKey);
+_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, StringEncodingErrorKey);
+_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, URLErrorKey);
+_NS_PRIVATE_DEF_CONST(NS::ErrorUserInfoKey, FilePathErrorKey);
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Error* NS::Error::error(ErrorDomain domain, Integer code, class Dictionary* pDictionary)
+{
+    return Object::sendMessage<Error*>(_NS_PRIVATE_CLS(NSError), _NS_PRIVATE_SEL(errorWithDomain_code_userInfo_), domain, code, pDictionary);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Error* NS::Error::alloc()
+{
+    return Object::alloc<Error>(_NS_PRIVATE_CLS(NSError));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Error* NS::Error::init()
+{
+    return Object::init<Error>();
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Error* NS::Error::init(ErrorDomain domain, Integer code, class Dictionary* pDictionary)
+{
+    return Object::sendMessage<Error*>(this, _NS_PRIVATE_SEL(initWithDomain_code_userInfo_), domain, code, pDictionary);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Integer NS::Error::code() const
+{
+    return Object::sendMessage<Integer>(this, _NS_PRIVATE_SEL(code));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::ErrorDomain NS::Error::domain() const
+{
+    return Object::sendMessage<ErrorDomain>(this, _NS_PRIVATE_SEL(domain));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Dictionary* NS::Error::userInfo() const
+{
+    return Object::sendMessage<Dictionary*>(this, _NS_PRIVATE_SEL(userInfo));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::Error::localizedDescription() const
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(localizedDescription));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Array* NS::Error::localizedRecoveryOptions() const
+{
+    return Object::sendMessage<Array*>(this, _NS_PRIVATE_SEL(localizedRecoveryOptions));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::Error::localizedRecoverySuggestion() const
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(localizedRecoverySuggestion));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::Error::localizedFailureReason() const
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(localizedFailureReason));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Foundation/NSLock.hpp b/metal-cpp/Foundation/NSLock.hpp
new file mode 100644
index 0000000..f27de08
--- /dev/null
+++ b/metal-cpp/Foundation/NSLock.hpp
@@ -0,0 +1,105 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSLock.hpp
+//
+// See LICENSE.txt for this project licensing information.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "NSDefines.hpp"
+#include "NSObject.hpp"
+#include "NSPrivate.hpp"
+#include "NSTypes.hpp"
+#include "NSDate.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+
+template <class _Class, class _Base = class Object>
+class Locking : public _Base
+{
+public:
+    void lock();
+    void unlock();
+};
+
+class Condition : public Locking<Condition>
+{
+public:
+    static Condition* alloc();
+
+    Condition*        init();
+
+    void              wait();
+    bool              waitUntilDate(Date* pLimit);
+    void              signal();
+    void              broadcast();
+};
+
+} // NS
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template<class _Class, class _Base /* = NS::Object */>
+_NS_INLINE void NS::Locking<_Class, _Base>::lock()
+{
+    NS::Object::sendMessage<void>(this, _NS_PRIVATE_SEL(lock));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template<class _Class, class _Base /* = NS::Object */>
+_NS_INLINE void NS::Locking<_Class, _Base>::unlock()
+{
+    NS::Object::sendMessage<void>(this, _NS_PRIVATE_SEL(unlock));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Condition* NS::Condition::alloc()
+{
+    return NS::Object::alloc<NS::Condition>(_NS_PRIVATE_CLS(NSCondition));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Condition* NS::Condition::init()
+{
+    return NS::Object::init<NS::Condition>();
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void NS::Condition::wait()
+{
+    NS::Object::sendMessage<void>(this, _NS_PRIVATE_SEL(wait));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE bool NS::Condition::waitUntilDate(NS::Date* pLimit)
+{
+    return NS::Object::sendMessage<bool>(this, _NS_PRIVATE_SEL(waitUntilDate_), pLimit);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void NS::Condition::signal()
+{
+    NS::Object::sendMessage<void>(this, _NS_PRIVATE_SEL(signal));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void NS::Condition::broadcast()
+{
+    NS::Object::sendMessage<void>(this, _NS_PRIVATE_SEL(broadcast));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
\ No newline at end of file
diff --git a/metal-cpp/Foundation/NSNotification.hpp b/metal-cpp/Foundation/NSNotification.hpp
new file mode 100644
index 0000000..0abbc9f
--- /dev/null
+++ b/metal-cpp/Foundation/NSNotification.hpp
@@ -0,0 +1,67 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSNotification.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "NSDefines.hpp"
+#include "NSDictionary.hpp"
+#include "NSObject.hpp"
+#include "NSString.hpp"
+#include "NSTypes.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+using NotificationName = class String*;
+
+class Notification : public NS::Referencing<Notification>
+{
+public:
+    NS::String*     name() const;
+    NS::Object*     object() const;
+    NS::Dictionary* userInfo() const;
+};
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::Notification::name() const
+{
+    return Object::sendMessage<NS::String*>(this, _NS_PRIVATE_SEL(name));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Object* NS::Notification::object() const
+{
+    return Object::sendMessage<NS::Object*>(this, _NS_PRIVATE_SEL(object));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Dictionary* NS::Notification::userInfo() const
+{
+    return Object::sendMessage<NS::Dictionary*>(this, _NS_PRIVATE_SEL(userInfo));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Foundation/NSNumber.hpp b/metal-cpp/Foundation/NSNumber.hpp
new file mode 100644
index 0000000..8d0f66f
--- /dev/null
+++ b/metal-cpp/Foundation/NSNumber.hpp
@@ -0,0 +1,501 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSNumber.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "NSObjCRuntime.hpp"
+#include "NSObject.hpp"
+#include "NSTypes.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+class Value : public Copying<Value>
+{
+public:
+    static Value* value(const void* pValue, const char* pType);
+    static Value* value(const void* pPointer);
+
+    static Value* alloc();
+
+    Value*        init(const void* pValue, const char* pType);
+    Value*        init(const class Coder* pCoder);
+
+    void          getValue(void* pValue, UInteger size) const;
+    const char*   objCType() const;
+
+    bool          isEqualToValue(Value* pValue) const;
+    void*         pointerValue() const;
+};
+
+class Number : public Copying<Number, Value>
+{
+public:
+    static Number*     number(char value);
+    static Number*     number(unsigned char value);
+    static Number*     number(short value);
+    static Number*     number(unsigned short value);
+    static Number*     number(int value);
+    static Number*     number(unsigned int value);
+    static Number*     number(long value);
+    static Number*     number(unsigned long value);
+    static Number*     number(long long value);
+    static Number*     number(unsigned long long value);
+    static Number*     number(float value);
+    static Number*     number(double value);
+    static Number*     number(bool value);
+
+    static Number*     alloc();
+
+    Number*            init(const class Coder* pCoder);
+    Number*            init(char value);
+    Number*            init(unsigned char value);
+    Number*            init(short value);
+    Number*            init(unsigned short value);
+    Number*            init(int value);
+    Number*            init(unsigned int value);
+    Number*            init(long value);
+    Number*            init(unsigned long value);
+    Number*            init(long long value);
+    Number*            init(unsigned long long value);
+    Number*            init(float value);
+    Number*            init(double value);
+    Number*            init(bool value);
+
+    char               charValue() const;
+    unsigned char      unsignedCharValue() const;
+    short              shortValue() const;
+    unsigned short     unsignedShortValue() const;
+    int                intValue() const;
+    unsigned int       unsignedIntValue() const;
+    long               longValue() const;
+    unsigned long      unsignedLongValue() const;
+    long long          longLongValue() const;
+    unsigned long long unsignedLongLongValue() const;
+    float              floatValue() const;
+    double             doubleValue() const;
+    bool               boolValue() const;
+    Integer            integerValue() const;
+    UInteger           unsignedIntegerValue() const;
+    class String*      stringValue() const;
+
+    ComparisonResult   compare(const Number* pOtherNumber) const;
+    bool               isEqualToNumber(const Number* pNumber) const;
+
+    class String*      descriptionWithLocale(const Object* pLocale) const;
+};
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Value* NS::Value::value(const void* pValue, const char* pType)
+{
+    return Object::sendMessage<Value*>(_NS_PRIVATE_CLS(NSValue), _NS_PRIVATE_SEL(valueWithBytes_objCType_), pValue, pType);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Value* NS::Value::value(const void* pPointer)
+{
+    return Object::sendMessage<Value*>(_NS_PRIVATE_CLS(NSValue), _NS_PRIVATE_SEL(valueWithPointer_), pPointer);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Value* NS::Value::alloc()
+{
+    return NS::Object::alloc<Value>(_NS_PRIVATE_CLS(NSValue));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Value* NS::Value::init(const void* pValue, const char* pType)
+{
+    return Object::sendMessage<Value*>(this, _NS_PRIVATE_SEL(initWithBytes_objCType_), pValue, pType);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Value* NS::Value::init(const class Coder* pCoder)
+{
+    return Object::sendMessage<Value*>(this, _NS_PRIVATE_SEL(initWithCoder_), pCoder);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void NS::Value::getValue(void* pValue, UInteger size) const
+{
+    Object::sendMessage<void>(this, _NS_PRIVATE_SEL(getValue_size_), pValue, size);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE const char* NS::Value::objCType() const
+{
+    return Object::sendMessage<const char*>(this, _NS_PRIVATE_SEL(objCType));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE bool NS::Value::isEqualToValue(Value* pValue) const
+{
+    return Object::sendMessage<bool>(this, _NS_PRIVATE_SEL(isEqualToValue_), pValue);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void* NS::Value::pointerValue() const
+{
+    return Object::sendMessage<void*>(this, _NS_PRIVATE_SEL(pointerValue));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::number(char value)
+{
+    return Object::sendMessage<Number*>(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithChar_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::number(unsigned char value)
+{
+    return Object::sendMessage<Number*>(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithUnsignedChar_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::number(short value)
+{
+    return Object::sendMessage<Number*>(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithShort_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::number(unsigned short value)
+{
+    return Object::sendMessage<Number*>(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithUnsignedShort_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::number(int value)
+{
+    return Object::sendMessage<Number*>(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithInt_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::number(unsigned int value)
+{
+    return Object::sendMessage<Number*>(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithUnsignedInt_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::number(long value)
+{
+    return Object::sendMessage<Number*>(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithLong_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::number(unsigned long value)
+{
+    return Object::sendMessage<Number*>(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithUnsignedLong_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::number(long long value)
+{
+    return Object::sendMessage<Number*>(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithLongLong_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::number(unsigned long long value)
+{
+    return Object::sendMessage<Number*>(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithUnsignedLongLong_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::number(float value)
+{
+    return Object::sendMessage<Number*>(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithFloat_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::number(double value)
+{
+    return Object::sendMessage<Number*>(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithDouble_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::number(bool value)
+{
+    return Object::sendMessage<Number*>(_NS_PRIVATE_CLS(NSNumber), _NS_PRIVATE_SEL(numberWithBool_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::alloc()
+{
+    return NS::Object::alloc<Number>(_NS_PRIVATE_CLS(NSNumber));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::init(const Coder* pCoder)
+{
+    return Object::sendMessage<Number*>(this, _NS_PRIVATE_SEL(initWithCoder_), pCoder);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::init(char value)
+{
+    return Object::sendMessage<Number*>(this, _NS_PRIVATE_SEL(initWithChar_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::init(unsigned char value)
+{
+    return Object::sendMessage<Number*>(this, _NS_PRIVATE_SEL(initWithUnsignedChar_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::init(short value)
+{
+    return Object::sendMessage<Number*>(this, _NS_PRIVATE_SEL(initWithShort_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::init(unsigned short value)
+{
+    return Object::sendMessage<Number*>(this, _NS_PRIVATE_SEL(initWithUnsignedShort_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::init(int value)
+{
+    return Object::sendMessage<Number*>(this, _NS_PRIVATE_SEL(initWithInt_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::init(unsigned int value)
+{
+    return Object::sendMessage<Number*>(this, _NS_PRIVATE_SEL(initWithUnsignedInt_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::init(long value)
+{
+    return Object::sendMessage<Number*>(this, _NS_PRIVATE_SEL(initWithLong_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::init(unsigned long value)
+{
+    return Object::sendMessage<Number*>(this, _NS_PRIVATE_SEL(initWithUnsignedLong_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::init(long long value)
+{
+    return Object::sendMessage<Number*>(this, _NS_PRIVATE_SEL(initWithLongLong_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::init(unsigned long long value)
+{
+    return Object::sendMessage<Number*>(this, _NS_PRIVATE_SEL(initWithUnsignedLongLong_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::init(float value)
+{
+    return Object::sendMessage<Number*>(this, _NS_PRIVATE_SEL(initWithFloat_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::init(double value)
+{
+    return Object::sendMessage<Number*>(this, _NS_PRIVATE_SEL(initWithDouble_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Number* NS::Number::init(bool value)
+{
+    return Object::sendMessage<Number*>(this, _NS_PRIVATE_SEL(initWithBool_), value);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE char NS::Number::charValue() const
+{
+    return Object::sendMessage<char>(this, _NS_PRIVATE_SEL(charValue));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE unsigned char NS::Number::unsignedCharValue() const
+{
+    return Object::sendMessage<unsigned char>(this, _NS_PRIVATE_SEL(unsignedCharValue));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE short NS::Number::shortValue() const
+{
+    return Object::sendMessage<short>(this, _NS_PRIVATE_SEL(shortValue));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE unsigned short NS::Number::unsignedShortValue() const
+{
+    return Object::sendMessage<unsigned short>(this, _NS_PRIVATE_SEL(unsignedShortValue));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE int NS::Number::intValue() const
+{
+    return Object::sendMessage<int>(this, _NS_PRIVATE_SEL(intValue));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE unsigned int NS::Number::unsignedIntValue() const
+{
+    return Object::sendMessage<unsigned int>(this, _NS_PRIVATE_SEL(unsignedIntValue));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE long NS::Number::longValue() const
+{
+    return Object::sendMessage<long>(this, _NS_PRIVATE_SEL(longValue));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE unsigned long NS::Number::unsignedLongValue() const
+{
+    return Object::sendMessage<unsigned long>(this, _NS_PRIVATE_SEL(unsignedLongValue));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE long long NS::Number::longLongValue() const
+{
+    return Object::sendMessage<long long>(this, _NS_PRIVATE_SEL(longLongValue));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE unsigned long long NS::Number::unsignedLongLongValue() const
+{
+    return Object::sendMessage<unsigned long long>(this, _NS_PRIVATE_SEL(unsignedLongLongValue));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE float NS::Number::floatValue() const
+{
+    return Object::sendMessage<float>(this, _NS_PRIVATE_SEL(floatValue));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE double NS::Number::doubleValue() const
+{
+    return Object::sendMessage<double>(this, _NS_PRIVATE_SEL(doubleValue));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE bool NS::Number::boolValue() const
+{
+    return Object::sendMessage<bool>(this, _NS_PRIVATE_SEL(boolValue));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Integer NS::Number::integerValue() const
+{
+    return Object::sendMessage<Integer>(this, _NS_PRIVATE_SEL(integerValue));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::UInteger NS::Number::unsignedIntegerValue() const
+{
+    return Object::sendMessage<UInteger>(this, _NS_PRIVATE_SEL(unsignedIntegerValue));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::Number::stringValue() const
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(stringValue));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::ComparisonResult NS::Number::compare(const Number* pOtherNumber) const
+{
+    return Object::sendMessage<ComparisonResult>(this, _NS_PRIVATE_SEL(compare_), pOtherNumber);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE bool NS::Number::isEqualToNumber(const Number* pNumber) const
+{
+    return Object::sendMessage<bool>(this, _NS_PRIVATE_SEL(isEqualToNumber_), pNumber);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::Number::descriptionWithLocale(const Object* pLocale) const
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(descriptionWithLocale_), pLocale);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Foundation/NSObjCRuntime.hpp b/metal-cpp/Foundation/NSObjCRuntime.hpp
new file mode 100644
index 0000000..48e6506
--- /dev/null
+++ b/metal-cpp/Foundation/NSObjCRuntime.hpp
@@ -0,0 +1,43 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSObjCRuntime.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "NSDefines.hpp"
+#include "NSTypes.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+
+_NS_ENUM(Integer, ComparisonResult) {
+    OrderedAscending = -1,
+    OrderedSame = 0,
+    OrderedDescending = 1,
+};
+
+const Integer NotFound = IntegerMax;
+
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Foundation/NSObject.hpp b/metal-cpp/Foundation/NSObject.hpp
new file mode 100644
index 0000000..f4e1c63
--- /dev/null
+++ b/metal-cpp/Foundation/NSObject.hpp
@@ -0,0 +1,297 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSObject.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "NSDefines.hpp"
+#include "NSPrivate.hpp"
+#include "NSTypes.hpp"
+
+#include <objc/message.h>
+#include <objc/runtime.h>
+
+#include <type_traits>
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+template <class _Class, class _Base = class Object>
+class Referencing : public _Base
+{
+public:
+    _Class*  retain();
+    void     release();
+
+    _Class*  autorelease();
+
+    UInteger retainCount() const;
+};
+
+template <class _Class, class _Base = class Object>
+class Copying : public Referencing<_Class, _Base>
+{
+public:
+    _Class* copy() const;
+};
+
+class Object : public Referencing<Object, objc_object>
+{
+public:
+    UInteger      hash() const;
+    bool          isEqual(const Object* pObject) const;
+
+    class String* description() const;
+    class String* debugDescription() const;
+
+protected:
+    friend class Referencing<Object, objc_object>;
+
+    template <class _Class>
+    static _Class* alloc(const char* pClassName);
+    template <class _Class>
+    static _Class* alloc(const void* pClass);
+    template <class _Class>
+    _Class* init();
+
+    template <class _Dst>
+    static _Dst                   bridgingCast(const void* pObj);
+    static class MethodSignature* methodSignatureForSelector(const void* pObj, SEL selector);
+    static bool                   respondsToSelector(const void* pObj, SEL selector);
+    template <typename _Type>
+    static constexpr bool doesRequireMsgSendStret();
+    template <typename _Ret, typename... _Args>
+    static _Ret sendMessage(const void* pObj, SEL selector, _Args... args);
+    template <typename _Ret, typename... _Args>
+    static _Ret sendMessageSafe(const void* pObj, SEL selector, _Args... args);
+
+private:
+    Object() = delete;
+    Object(const Object&) = delete;
+    ~Object() = delete;
+
+    Object& operator=(const Object&) = delete;
+};
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template <class _Class, class _Base /* = Object */>
+_NS_INLINE _Class* NS::Referencing<_Class, _Base>::retain()
+{
+    return Object::sendMessage<_Class*>(this, _NS_PRIVATE_SEL(retain));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template <class _Class, class _Base /* = Object */>
+_NS_INLINE void NS::Referencing<_Class, _Base>::release()
+{
+    Object::sendMessage<void>(this, _NS_PRIVATE_SEL(release));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template <class _Class, class _Base /* = Object */>
+_NS_INLINE _Class* NS::Referencing<_Class, _Base>::autorelease()
+{
+    return Object::sendMessage<_Class*>(this, _NS_PRIVATE_SEL(autorelease));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template <class _Class, class _Base /* = Object */>
+_NS_INLINE NS::UInteger NS::Referencing<_Class, _Base>::retainCount() const
+{
+    return Object::sendMessage<UInteger>(this, _NS_PRIVATE_SEL(retainCount));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template <class _Class, class _Base /* = Object */>
+_NS_INLINE _Class* NS::Copying<_Class, _Base>::copy() const
+{
+    return Object::sendMessage<_Class*>(this, _NS_PRIVATE_SEL(copy));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template <class _Dst>
+_NS_INLINE _Dst NS::Object::bridgingCast(const void* pObj)
+{
+#if __OBJC__
+    return (__bridge _Dst)pObj;
+#else
+    return (_Dst)pObj;
+#endif // __OBJC__
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template <typename _Type>
+_NS_INLINE constexpr bool NS::Object::doesRequireMsgSendStret()
+{
+#if (defined(__i386__) || defined(__x86_64__))
+    constexpr size_t kStructLimit = (sizeof(std::uintptr_t) << 1);
+
+    return sizeof(_Type) > kStructLimit;
+#elif defined(__arm64__)
+    return false;
+#elif defined(__arm__)
+    constexpr size_t kStructLimit = sizeof(std::uintptr_t);
+
+    return std::is_class(_Type) && (sizeof(_Type) > kStructLimit);
+#else
+#error "Unsupported architecture!"
+#endif
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template <>
+_NS_INLINE constexpr bool NS::Object::doesRequireMsgSendStret<void>()
+{
+    return false;
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template <typename _Ret, typename... _Args>
+_NS_INLINE _Ret NS::Object::sendMessage(const void* pObj, SEL selector, _Args... args)
+{
+#if (defined(__i386__) || defined(__x86_64__))
+    if constexpr (std::is_floating_point<_Ret>())
+    {
+        using SendMessageProcFpret = _Ret (*)(const void*, SEL, _Args...);
+
+        const SendMessageProcFpret pProc = reinterpret_cast<SendMessageProcFpret>(&objc_msgSend_fpret);
+
+        return (*pProc)(pObj, selector, args...);
+    }
+    else
+#endif // ( defined( __i386__ )  || defined( __x86_64__ )  )
+#if !defined(__arm64__)
+        if constexpr (doesRequireMsgSendStret<_Ret>())
+    {
+        using SendMessageProcStret = void (*)(_Ret*, const void*, SEL, _Args...);
+
+        const SendMessageProcStret pProc = reinterpret_cast<SendMessageProcStret>(&objc_msgSend_stret);
+        _Ret                       ret;
+
+        (*pProc)(&ret, pObj, selector, args...);
+
+        return ret;
+    }
+    else
+#endif // !defined( __arm64__ )
+    {
+        using SendMessageProc = _Ret (*)(const void*, SEL, _Args...);
+
+        const SendMessageProc pProc = reinterpret_cast<SendMessageProc>(&objc_msgSend);
+
+        return (*pProc)(pObj, selector, args...);
+    }
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::MethodSignature* NS::Object::methodSignatureForSelector(const void* pObj, SEL selector)
+{
+    return sendMessage<MethodSignature*>(pObj, _NS_PRIVATE_SEL(methodSignatureForSelector_), selector);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE bool NS::Object::respondsToSelector(const void* pObj, SEL selector)
+{
+    return sendMessage<bool>(pObj, _NS_PRIVATE_SEL(respondsToSelector_), selector);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template <typename _Ret, typename... _Args>
+_NS_INLINE _Ret NS::Object::sendMessageSafe(const void* pObj, SEL selector, _Args... args)
+{
+    if ((respondsToSelector(pObj, selector)) || (nullptr != methodSignatureForSelector(pObj, selector)))
+    {
+        return sendMessage<_Ret>(pObj, selector, args...);
+    }
+
+    if constexpr (!std::is_void<_Ret>::value)
+    {
+        return 0;
+    }
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template <class _Class>
+_NS_INLINE _Class* NS::Object::alloc(const char* pClassName)
+{
+    return sendMessage<_Class*>(objc_lookUpClass(pClassName), _NS_PRIVATE_SEL(alloc));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template <class _Class>
+_NS_INLINE _Class* NS::Object::alloc(const void* pClass)
+{
+    return sendMessage<_Class*>(pClass, _NS_PRIVATE_SEL(alloc));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+template <class _Class>
+_NS_INLINE _Class* NS::Object::init()
+{
+    return sendMessage<_Class*>(this, _NS_PRIVATE_SEL(init));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::UInteger NS::Object::hash() const
+{
+    return sendMessage<UInteger>(this, _NS_PRIVATE_SEL(hash));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE bool NS::Object::isEqual(const Object* pObject) const
+{
+    return sendMessage<bool>(this, _NS_PRIVATE_SEL(isEqual_), pObject);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::Object::description() const
+{
+    return sendMessage<String*>(this, _NS_PRIVATE_SEL(description));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::Object::debugDescription() const
+{
+    return sendMessageSafe<String*>(this, _NS_PRIVATE_SEL(debugDescription));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Foundation/NSPrivate.hpp b/metal-cpp/Foundation/NSPrivate.hpp
new file mode 100644
index 0000000..9dfd2ab
--- /dev/null
+++ b/metal-cpp/Foundation/NSPrivate.hpp
@@ -0,0 +1,488 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSPrivate.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include <objc/runtime.h>
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#define _NS_PRIVATE_CLS(symbol) (Private::Class::s_k##symbol)
+#define _NS_PRIVATE_SEL(accessor) (Private::Selector::s_k##accessor)
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#if defined(NS_PRIVATE_IMPLEMENTATION)
+
+#define _NS_PRIVATE_VISIBILITY __attribute__((visibility("default")))
+#define _NS_PRIVATE_IMPORT __attribute__((weak_import))
+
+#if __OBJC__
+#define _NS_PRIVATE_OBJC_LOOKUP_CLASS(symbol) ((__bridge void*)objc_lookUpClass(#symbol))
+#else
+#define _NS_PRIVATE_OBJC_LOOKUP_CLASS(symbol) objc_lookUpClass(#symbol)
+#endif // __OBJC__
+
+#define _NS_PRIVATE_DEF_CLS(symbol) void* s_k##symbol _NS_PRIVATE_VISIBILITY = _NS_PRIVATE_OBJC_LOOKUP_CLASS(symbol);
+#define _NS_PRIVATE_DEF_PRO(symbol)
+#define _NS_PRIVATE_DEF_SEL(accessor, symbol) SEL s_k##accessor _NS_PRIVATE_VISIBILITY = sel_registerName(symbol);
+#define _NS_PRIVATE_DEF_CONST(type, symbol)              \
+    _NS_EXTERN type const NS##symbol _NS_PRIVATE_IMPORT; \
+    type const                       NS::symbol = (nullptr != &NS##symbol) ? NS##symbol : nullptr;
+
+#else
+
+#define _NS_PRIVATE_DEF_CLS(symbol) extern void* s_k##symbol;
+#define _NS_PRIVATE_DEF_PRO(symbol)
+#define _NS_PRIVATE_DEF_SEL(accessor, symbol) extern SEL s_k##accessor;
+#define _NS_PRIVATE_DEF_CONST(type, symbol)
+
+#endif // NS_PRIVATE_IMPLEMENTATION
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+namespace Private
+{
+    namespace Class
+    {
+
+        _NS_PRIVATE_DEF_CLS(NSArray);
+        _NS_PRIVATE_DEF_CLS(NSAutoreleasePool);
+        _NS_PRIVATE_DEF_CLS(NSBundle);
+        _NS_PRIVATE_DEF_CLS(NSCondition);
+        _NS_PRIVATE_DEF_CLS(NSDate);
+        _NS_PRIVATE_DEF_CLS(NSDictionary);
+        _NS_PRIVATE_DEF_CLS(NSError);
+        _NS_PRIVATE_DEF_CLS(NSNumber);
+        _NS_PRIVATE_DEF_CLS(NSObject);
+        _NS_PRIVATE_DEF_CLS(NSProcessInfo);
+        _NS_PRIVATE_DEF_CLS(NSString);
+        _NS_PRIVATE_DEF_CLS(NSURL);
+        _NS_PRIVATE_DEF_CLS(NSValue);
+
+    } // Class
+} // Private
+} // MTL
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+namespace Private
+{
+    namespace Protocol
+    {
+
+    } // Protocol
+} // Private
+} // NS
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+namespace Private
+{
+    namespace Selector
+    {
+
+        _NS_PRIVATE_DEF_SEL(addObject_,
+            "addObject:");
+        _NS_PRIVATE_DEF_SEL(activeProcessorCount,
+            "activeProcessorCount");
+        _NS_PRIVATE_DEF_SEL(allBundles,
+            "allBundles");
+        _NS_PRIVATE_DEF_SEL(allFrameworks,
+            "allFrameworks");
+        _NS_PRIVATE_DEF_SEL(allObjects,
+            "allObjects");
+        _NS_PRIVATE_DEF_SEL(alloc,
+            "alloc");
+        _NS_PRIVATE_DEF_SEL(appStoreReceiptURL,
+            "appStoreReceiptURL");
+        _NS_PRIVATE_DEF_SEL(arguments,
+            "arguments");
+        _NS_PRIVATE_DEF_SEL(array,
+            "array");
+        _NS_PRIVATE_DEF_SEL(arrayWithObject_,
+            "arrayWithObject:");
+        _NS_PRIVATE_DEF_SEL(arrayWithObjects_count_,
+            "arrayWithObjects:count:");
+        _NS_PRIVATE_DEF_SEL(automaticTerminationSupportEnabled,
+            "automaticTerminationSupportEnabled");
+        _NS_PRIVATE_DEF_SEL(autorelease,
+            "autorelease");
+        _NS_PRIVATE_DEF_SEL(beginActivityWithOptions_reason_,
+            "beginActivityWithOptions:reason:");
+        _NS_PRIVATE_DEF_SEL(boolValue,
+            "boolValue");
+        _NS_PRIVATE_DEF_SEL(broadcast,
+            "broadcast");
+        _NS_PRIVATE_DEF_SEL(builtInPlugInsPath,
+            "builtInPlugInsPath");
+        _NS_PRIVATE_DEF_SEL(builtInPlugInsURL,
+            "builtInPlugInsURL");
+        _NS_PRIVATE_DEF_SEL(bundleIdentifier,
+            "bundleIdentifier");
+        _NS_PRIVATE_DEF_SEL(bundlePath,
+            "bundlePath");
+        _NS_PRIVATE_DEF_SEL(bundleURL,
+            "bundleURL");
+        _NS_PRIVATE_DEF_SEL(bundleWithPath_,
+            "bundleWithPath:");
+        _NS_PRIVATE_DEF_SEL(bundleWithURL_,
+            "bundleWithURL:");
+        _NS_PRIVATE_DEF_SEL(characterAtIndex_,
+            "characterAtIndex:");
+        _NS_PRIVATE_DEF_SEL(charValue,
+            "charValue");
+        _NS_PRIVATE_DEF_SEL(countByEnumeratingWithState_objects_count_,
+            "countByEnumeratingWithState:objects:count:");
+        _NS_PRIVATE_DEF_SEL(cStringUsingEncoding_,
+            "cStringUsingEncoding:");
+        _NS_PRIVATE_DEF_SEL(code,
+            "code");
+        _NS_PRIVATE_DEF_SEL(compare_,
+            "compare:");
+        _NS_PRIVATE_DEF_SEL(copy,
+            "copy");
+        _NS_PRIVATE_DEF_SEL(count,
+            "count");
+        _NS_PRIVATE_DEF_SEL(dateWithTimeIntervalSinceNow_,
+            "dateWithTimeIntervalSinceNow:");
+        _NS_PRIVATE_DEF_SEL(descriptionWithLocale_,
+            "descriptionWithLocale:");
+        _NS_PRIVATE_DEF_SEL(disableAutomaticTermination_,
+            "disableAutomaticTermination:");
+        _NS_PRIVATE_DEF_SEL(disableSuddenTermination,
+            "disableSuddenTermination");
+        _NS_PRIVATE_DEF_SEL(debugDescription,
+            "debugDescription");
+        _NS_PRIVATE_DEF_SEL(description,
+            "description");
+        _NS_PRIVATE_DEF_SEL(dictionary,
+            "dictionary");
+        _NS_PRIVATE_DEF_SEL(dictionaryWithObject_forKey_,
+            "dictionaryWithObject:forKey:");
+        _NS_PRIVATE_DEF_SEL(dictionaryWithObjects_forKeys_count_,
+            "dictionaryWithObjects:forKeys:count:");
+        _NS_PRIVATE_DEF_SEL(domain,
+            "domain");
+        _NS_PRIVATE_DEF_SEL(doubleValue,
+            "doubleValue");
+        _NS_PRIVATE_DEF_SEL(drain,
+            "drain");
+        _NS_PRIVATE_DEF_SEL(enableAutomaticTermination_,
+            "enableAutomaticTermination:");
+        _NS_PRIVATE_DEF_SEL(enableSuddenTermination,
+            "enableSuddenTermination");
+        _NS_PRIVATE_DEF_SEL(endActivity_,
+            "endActivity:");
+        _NS_PRIVATE_DEF_SEL(environment,
+            "environment");
+        _NS_PRIVATE_DEF_SEL(errorWithDomain_code_userInfo_,
+            "errorWithDomain:code:userInfo:");
+        _NS_PRIVATE_DEF_SEL(executablePath,
+            "executablePath");
+        _NS_PRIVATE_DEF_SEL(executableURL,
+            "executableURL");
+        _NS_PRIVATE_DEF_SEL(fileSystemRepresentation,
+            "fileSystemRepresentation");
+        _NS_PRIVATE_DEF_SEL(fileURLWithPath_,
+            "fileURLWithPath:");
+        _NS_PRIVATE_DEF_SEL(floatValue,
+            "floatValue");
+        _NS_PRIVATE_DEF_SEL(fullUserName,
+            "fullUserName");
+        _NS_PRIVATE_DEF_SEL(getValue_size_,
+            "getValue:size:");
+        _NS_PRIVATE_DEF_SEL(globallyUniqueString,
+            "globallyUniqueString");
+        _NS_PRIVATE_DEF_SEL(hash,
+            "hash");
+        _NS_PRIVATE_DEF_SEL(hostName,
+            "hostName");
+        _NS_PRIVATE_DEF_SEL(infoDictionary,
+            "infoDictionary");
+        _NS_PRIVATE_DEF_SEL(init,
+            "init");
+        _NS_PRIVATE_DEF_SEL(initFileURLWithPath_,
+            "initFileURLWithPath:");
+        _NS_PRIVATE_DEF_SEL(initWithBool_,
+            "initWithBool:");
+        _NS_PRIVATE_DEF_SEL(initWithBytes_objCType_,
+            "initWithBytes:objCType:");
+        _NS_PRIVATE_DEF_SEL(initWithBytesNoCopy_length_encoding_freeWhenDone_,
+            "initWithBytesNoCopy:length:encoding:freeWhenDone:");
+        _NS_PRIVATE_DEF_SEL(initWithChar_,
+            "initWithChar:");
+        _NS_PRIVATE_DEF_SEL(initWithCoder_,
+            "initWithCoder:");
+        _NS_PRIVATE_DEF_SEL(initWithCString_encoding_,
+            "initWithCString:encoding:");
+        _NS_PRIVATE_DEF_SEL(initWithDomain_code_userInfo_,
+            "initWithDomain:code:userInfo:");
+        _NS_PRIVATE_DEF_SEL(initWithDouble_,
+            "initWithDouble:");
+        _NS_PRIVATE_DEF_SEL(initWithFloat_,
+            "initWithFloat:");
+        _NS_PRIVATE_DEF_SEL(initWithInt_,
+            "initWithInt:");
+        _NS_PRIVATE_DEF_SEL(initWithLong_,
+            "initWithLong:");
+        _NS_PRIVATE_DEF_SEL(initWithLongLong_,
+            "initWithLongLong:");
+        _NS_PRIVATE_DEF_SEL(initWithObjects_count_,
+            "initWithObjects:count:");
+        _NS_PRIVATE_DEF_SEL(initWithObjects_forKeys_count_,
+            "initWithObjects:forKeys:count:");
+        _NS_PRIVATE_DEF_SEL(initWithPath_,
+            "initWithPath:");
+        _NS_PRIVATE_DEF_SEL(initWithShort_,
+            "initWithShort:");
+        _NS_PRIVATE_DEF_SEL(initWithString_,
+            "initWithString:");
+        _NS_PRIVATE_DEF_SEL(initWithUnsignedChar_,
+            "initWithUnsignedChar:");
+        _NS_PRIVATE_DEF_SEL(initWithUnsignedInt_,
+            "initWithUnsignedInt:");
+        _NS_PRIVATE_DEF_SEL(initWithUnsignedLong_,
+            "initWithUnsignedLong:");
+        _NS_PRIVATE_DEF_SEL(initWithUnsignedLongLong_,
+            "initWithUnsignedLongLong:");
+        _NS_PRIVATE_DEF_SEL(initWithUnsignedShort_,
+            "initWithUnsignedShort:");
+        _NS_PRIVATE_DEF_SEL(initWithURL_,
+            "initWithURL:");
+        _NS_PRIVATE_DEF_SEL(integerValue,
+            "integerValue");
+        _NS_PRIVATE_DEF_SEL(intValue,
+            "intValue");
+        _NS_PRIVATE_DEF_SEL(isEqual_,
+            "isEqual:");
+        _NS_PRIVATE_DEF_SEL(isEqualToNumber_,
+            "isEqualToNumber:");
+        _NS_PRIVATE_DEF_SEL(isEqualToString_,
+            "isEqualToString:");
+        _NS_PRIVATE_DEF_SEL(isEqualToValue_,
+            "isEqualToValue:");
+        _NS_PRIVATE_DEF_SEL(isiOSAppOnMac,
+            "isiOSAppOnMac");
+        _NS_PRIVATE_DEF_SEL(isLoaded,
+            "isLoaded");
+        _NS_PRIVATE_DEF_SEL(isLowPowerModeEnabled,
+            "isLowPowerModeEnabled");
+        _NS_PRIVATE_DEF_SEL(isMacCatalystApp,
+            "isMacCatalystApp");
+        _NS_PRIVATE_DEF_SEL(isOperatingSystemAtLeastVersion_,
+            "isOperatingSystemAtLeastVersion:");
+        _NS_PRIVATE_DEF_SEL(keyEnumerator,
+            "keyEnumerator");
+        _NS_PRIVATE_DEF_SEL(length,
+            "length");
+        _NS_PRIVATE_DEF_SEL(lengthOfBytesUsingEncoding_,
+            "lengthOfBytesUsingEncoding:");
+        _NS_PRIVATE_DEF_SEL(load,
+            "load");
+        _NS_PRIVATE_DEF_SEL(loadAndReturnError_,
+            "loadAndReturnError:");
+        _NS_PRIVATE_DEF_SEL(localizedDescription,
+            "localizedDescription");
+        _NS_PRIVATE_DEF_SEL(localizedFailureReason,
+            "localizedFailureReason");
+        _NS_PRIVATE_DEF_SEL(localizedInfoDictionary,
+            "localizedInfoDictionary");
+        _NS_PRIVATE_DEF_SEL(localizedRecoveryOptions,
+            "localizedRecoveryOptions");
+        _NS_PRIVATE_DEF_SEL(localizedRecoverySuggestion,
+            "localizedRecoverySuggestion");
+        _NS_PRIVATE_DEF_SEL(localizedStringForKey_value_table_,
+            "localizedStringForKey:value:table:");
+        _NS_PRIVATE_DEF_SEL(lock,
+            "lock");
+        _NS_PRIVATE_DEF_SEL(longValue,
+            "longValue");
+        _NS_PRIVATE_DEF_SEL(longLongValue,
+            "longLongValue");
+        _NS_PRIVATE_DEF_SEL(mainBundle,
+            "mainBundle");
+        _NS_PRIVATE_DEF_SEL(maximumLengthOfBytesUsingEncoding_,
+            "maximumLengthOfBytesUsingEncoding:");
+        _NS_PRIVATE_DEF_SEL(methodSignatureForSelector_,
+            "methodSignatureForSelector:");
+        _NS_PRIVATE_DEF_SEL(mutableBytes,
+            "mutableBytes");
+        _NS_PRIVATE_DEF_SEL(name,
+            "name");
+        _NS_PRIVATE_DEF_SEL(nextObject,
+            "nextObject");
+        _NS_PRIVATE_DEF_SEL(numberWithBool_,
+            "numberWithBool:");
+        _NS_PRIVATE_DEF_SEL(numberWithChar_,
+            "numberWithChar:");
+        _NS_PRIVATE_DEF_SEL(numberWithDouble_,
+            "numberWithDouble:");
+        _NS_PRIVATE_DEF_SEL(numberWithFloat_,
+            "numberWithFloat:");
+        _NS_PRIVATE_DEF_SEL(numberWithInt_,
+            "numberWithInt:");
+        _NS_PRIVATE_DEF_SEL(numberWithLong_,
+            "numberWithLong:");
+        _NS_PRIVATE_DEF_SEL(numberWithLongLong_,
+            "numberWithLongLong:");
+        _NS_PRIVATE_DEF_SEL(numberWithShort_,
+            "numberWithShort:");
+        _NS_PRIVATE_DEF_SEL(numberWithUnsignedChar_,
+            "numberWithUnsignedChar:");
+        _NS_PRIVATE_DEF_SEL(numberWithUnsignedInt_,
+            "numberWithUnsignedInt:");
+        _NS_PRIVATE_DEF_SEL(numberWithUnsignedLong_,
+            "numberWithUnsignedLong:");
+        _NS_PRIVATE_DEF_SEL(numberWithUnsignedLongLong_,
+            "numberWithUnsignedLongLong:");
+        _NS_PRIVATE_DEF_SEL(numberWithUnsignedShort_,
+            "numberWithUnsignedShort:");
+        _NS_PRIVATE_DEF_SEL(objCType,
+            "objCType");
+        _NS_PRIVATE_DEF_SEL(object,
+            "object");
+        _NS_PRIVATE_DEF_SEL(objectAtIndex_,
+            "objectAtIndex:");
+        _NS_PRIVATE_DEF_SEL(objectForInfoDictionaryKey_,
+            "objectForInfoDictionaryKey:");
+        _NS_PRIVATE_DEF_SEL(objectForKey_,
+            "objectForKey:");
+        _NS_PRIVATE_DEF_SEL(operatingSystem,
+            "operatingSystem");
+        _NS_PRIVATE_DEF_SEL(operatingSystemVersion,
+            "operatingSystemVersion");
+        _NS_PRIVATE_DEF_SEL(operatingSystemVersionString,
+            "operatingSystemVersionString");
+        _NS_PRIVATE_DEF_SEL(pathForAuxiliaryExecutable_,
+            "pathForAuxiliaryExecutable:");
+        _NS_PRIVATE_DEF_SEL(performActivityWithOptions_reason_usingBlock_,
+            "performActivityWithOptions:reason:usingBlock:");
+        _NS_PRIVATE_DEF_SEL(performExpiringActivityWithReason_usingBlock_,
+            "performExpiringActivityWithReason:usingBlock:");
+        _NS_PRIVATE_DEF_SEL(physicalMemory,
+            "physicalMemory");
+        _NS_PRIVATE_DEF_SEL(pointerValue,
+            "pointerValue");
+        _NS_PRIVATE_DEF_SEL(preflightAndReturnError_,
+            "preflightAndReturnError:");
+        _NS_PRIVATE_DEF_SEL(privateFrameworksPath,
+            "privateFrameworksPath");
+        _NS_PRIVATE_DEF_SEL(privateFrameworksURL,
+            "privateFrameworksURL");
+        _NS_PRIVATE_DEF_SEL(processIdentifier,
+            "processIdentifier");
+        _NS_PRIVATE_DEF_SEL(processInfo,
+            "processInfo");
+        _NS_PRIVATE_DEF_SEL(processName,
+            "processName");
+        _NS_PRIVATE_DEF_SEL(processorCount,
+            "processorCount");
+        _NS_PRIVATE_DEF_SEL(rangeOfString_options_,
+            "rangeOfString:options:");
+        _NS_PRIVATE_DEF_SEL(release,
+            "release");
+        _NS_PRIVATE_DEF_SEL(resourcePath,
+            "resourcePath");
+        _NS_PRIVATE_DEF_SEL(resourceURL,
+            "resourceURL");
+        _NS_PRIVATE_DEF_SEL(respondsToSelector_,
+            "respondsToSelector:");
+        _NS_PRIVATE_DEF_SEL(retain,
+            "retain");
+        _NS_PRIVATE_DEF_SEL(retainCount,
+            "retainCount");
+        _NS_PRIVATE_DEF_SEL(setAutomaticTerminationSupportEnabled_,
+            "setAutomaticTerminationSupportEnabled:");
+        _NS_PRIVATE_DEF_SEL(setProcessName_,
+            "setProcessName:");
+        _NS_PRIVATE_DEF_SEL(sharedFrameworksPath,
+            "sharedFrameworksPath");
+        _NS_PRIVATE_DEF_SEL(sharedFrameworksURL,
+            "sharedFrameworksURL");
+        _NS_PRIVATE_DEF_SEL(sharedSupportPath,
+            "sharedSupportPath");
+        _NS_PRIVATE_DEF_SEL(sharedSupportURL,
+            "sharedSupportURL");
+        _NS_PRIVATE_DEF_SEL(shortValue,
+            "shortValue");
+        _NS_PRIVATE_DEF_SEL(showPools,
+            "showPools");
+        _NS_PRIVATE_DEF_SEL(signal,
+            "signal");
+        _NS_PRIVATE_DEF_SEL(string,
+            "string");
+        _NS_PRIVATE_DEF_SEL(stringValue,
+            "stringValue");
+        _NS_PRIVATE_DEF_SEL(stringWithString_,
+            "stringWithString:");
+        _NS_PRIVATE_DEF_SEL(stringWithCString_encoding_,
+            "stringWithCString:encoding:");
+        _NS_PRIVATE_DEF_SEL(stringByAppendingString_,
+            "stringByAppendingString:");
+        _NS_PRIVATE_DEF_SEL(systemUptime,
+            "systemUptime");
+        _NS_PRIVATE_DEF_SEL(thermalState,
+            "thermalState");
+        _NS_PRIVATE_DEF_SEL(unload,
+            "unload");
+        _NS_PRIVATE_DEF_SEL(unlock,
+            "unlock");
+        _NS_PRIVATE_DEF_SEL(unsignedCharValue,
+            "unsignedCharValue");
+        _NS_PRIVATE_DEF_SEL(unsignedIntegerValue,
+            "unsignedIntegerValue");
+        _NS_PRIVATE_DEF_SEL(unsignedIntValue,
+            "unsignedIntValue");
+        _NS_PRIVATE_DEF_SEL(unsignedLongValue,
+            "unsignedLongValue");
+        _NS_PRIVATE_DEF_SEL(unsignedLongLongValue,
+            "unsignedLongLongValue");
+        _NS_PRIVATE_DEF_SEL(unsignedShortValue,
+            "unsignedShortValue");
+        _NS_PRIVATE_DEF_SEL(URLForAuxiliaryExecutable_,
+            "URLForAuxiliaryExecutable:");
+        _NS_PRIVATE_DEF_SEL(userInfo,
+            "userInfo");
+        _NS_PRIVATE_DEF_SEL(userName,
+            "userName");
+        _NS_PRIVATE_DEF_SEL(UTF8String,
+            "UTF8String");
+        _NS_PRIVATE_DEF_SEL(valueWithBytes_objCType_,
+            "valueWithBytes:objCType:");
+        _NS_PRIVATE_DEF_SEL(valueWithPointer_,
+            "valueWithPointer:");
+        _NS_PRIVATE_DEF_SEL(wait,
+            "wait");
+        _NS_PRIVATE_DEF_SEL(waitUntilDate_,
+            "waitUntilDate:");
+    } // Class
+} // Private
+} // MTL
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Foundation/NSProcessInfo.hpp b/metal-cpp/Foundation/NSProcessInfo.hpp
new file mode 100644
index 0000000..98e3147
--- /dev/null
+++ b/metal-cpp/Foundation/NSProcessInfo.hpp
@@ -0,0 +1,354 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSProcessInfo.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "NSDefines.hpp"
+#include "NSNotification.hpp"
+#include "NSObject.hpp"
+#include "NSPrivate.hpp"
+#include "NSTypes.hpp"
+
+#include <functional>
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+_NS_CONST(NotificationName, ProcessInfoThermalStateDidChangeNotification);
+_NS_CONST(NotificationName, ProcessInfoPowerStateDidChangeNotification);
+
+_NS_ENUM(NS::Integer, ProcessInfoThermalState) {
+    ProcessInfoThermalStateNominal = 0,
+    ProcessInfoThermalStateFair = 1,
+    ProcessInfoThermalStateSerious = 2,
+    ProcessInfoThermalStateCritical = 3
+};
+
+_NS_OPTIONS(std::uint64_t, ActivityOptions) {
+    ActivityIdleDisplaySleepDisabled = (1ULL << 40),
+    ActivityIdleSystemSleepDisabled = (1ULL << 20),
+    ActivitySuddenTerminationDisabled = (1ULL << 14),
+    ActivityAutomaticTerminationDisabled = (1ULL << 15),
+    ActivityUserInitiated = (0x00FFFFFFULL | ActivityIdleSystemSleepDisabled),
+    ActivityUserInitiatedAllowingIdleSystemSleep = (ActivityUserInitiated & ~ActivityIdleSystemSleepDisabled),
+    ActivityBackground = 0x000000FFULL,
+    ActivityLatencyCritical = 0xFF00000000ULL,
+};
+
+class ProcessInfo : public Referencing<ProcessInfo>
+{
+public:
+    static ProcessInfo*     processInfo();
+
+    class Array*            arguments() const;
+    class Dictionary*       environment() const;
+    class String*           hostName() const;
+    class String*           processName() const;
+    void                    setProcessName(const String* pString);
+    int                     processIdentifier() const;
+    class String*           globallyUniqueString() const;
+
+    class String*           userName() const;
+    class String*           fullUserName() const;
+
+    UInteger                operatingSystem() const;
+    OperatingSystemVersion  operatingSystemVersion() const;
+    class String*           operatingSystemVersionString() const;
+    bool                    isOperatingSystemAtLeastVersion(OperatingSystemVersion version) const;
+
+    UInteger                processorCount() const;
+    UInteger                activeProcessorCount() const;
+    unsigned long long      physicalMemory() const;
+    TimeInterval            systemUptime() const;
+
+    void                    disableSuddenTermination();
+    void                    enableSuddenTermination();
+
+    void                    disableAutomaticTermination(const class String* pReason);
+    void                    enableAutomaticTermination(const class String* pReason);
+    bool                    automaticTerminationSupportEnabled() const;
+    void                    setAutomaticTerminationSupportEnabled(bool enabled);
+
+    class Object*           beginActivity(ActivityOptions options, const class String* pReason);
+    void                    endActivity(class Object* pActivity);
+    void                    performActivity(ActivityOptions options, const class String* pReason, void (^block)(void));
+    void                    performActivity(ActivityOptions options, const class String* pReason, const std::function<void()>& func);
+    void                    performExpiringActivity(const class String* pReason, void (^block)(bool expired));
+    void                    performExpiringActivity(const class String* pReason, const std::function<void(bool expired)>& func);
+
+    ProcessInfoThermalState thermalState() const;
+    bool                    isLowPowerModeEnabled() const;
+
+    bool                    isiOSAppOnMac() const;
+    bool                    isMacCatalystApp() const;
+};
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_PRIVATE_DEF_CONST(NS::NotificationName, ProcessInfoThermalStateDidChangeNotification);
+_NS_PRIVATE_DEF_CONST(NS::NotificationName, ProcessInfoPowerStateDidChangeNotification);
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::ProcessInfo* NS::ProcessInfo::processInfo()
+{
+    return Object::sendMessage<ProcessInfo*>(_NS_PRIVATE_CLS(NSProcessInfo), _NS_PRIVATE_SEL(processInfo));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Array* NS::ProcessInfo::arguments() const
+{
+    return Object::sendMessage<Array*>(this, _NS_PRIVATE_SEL(arguments));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Dictionary* NS::ProcessInfo::environment() const
+{
+    return Object::sendMessage<Dictionary*>(this, _NS_PRIVATE_SEL(environment));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::ProcessInfo::hostName() const
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(hostName));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::ProcessInfo::processName() const
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(processName));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void NS::ProcessInfo::setProcessName(const String* pString)
+{
+    Object::sendMessage<void>(this, _NS_PRIVATE_SEL(setProcessName_), pString);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE int NS::ProcessInfo::processIdentifier() const
+{
+    return Object::sendMessage<int>(this, _NS_PRIVATE_SEL(processIdentifier));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::ProcessInfo::globallyUniqueString() const
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(globallyUniqueString));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::ProcessInfo::userName() const
+{
+    return Object::sendMessageSafe<String*>(this, _NS_PRIVATE_SEL(userName));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::ProcessInfo::fullUserName() const
+{
+    return Object::sendMessageSafe<String*>(this, _NS_PRIVATE_SEL(fullUserName));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::UInteger NS::ProcessInfo::operatingSystem() const
+{
+    return Object::sendMessage<UInteger>(this, _NS_PRIVATE_SEL(operatingSystem));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::OperatingSystemVersion NS::ProcessInfo::operatingSystemVersion() const
+{
+    return Object::sendMessage<OperatingSystemVersion>(this, _NS_PRIVATE_SEL(operatingSystemVersion));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::ProcessInfo::operatingSystemVersionString() const
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(operatingSystemVersionString));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE bool NS::ProcessInfo::isOperatingSystemAtLeastVersion(OperatingSystemVersion version) const
+{
+    return Object::sendMessage<bool>(this, _NS_PRIVATE_SEL(isOperatingSystemAtLeastVersion_), version);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::UInteger NS::ProcessInfo::processorCount() const
+{
+    return Object::sendMessage<UInteger>(this, _NS_PRIVATE_SEL(processorCount));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::UInteger NS::ProcessInfo::activeProcessorCount() const
+{
+    return Object::sendMessage<UInteger>(this, _NS_PRIVATE_SEL(activeProcessorCount));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE unsigned long long NS::ProcessInfo::physicalMemory() const
+{
+    return Object::sendMessage<unsigned long long>(this, _NS_PRIVATE_SEL(physicalMemory));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::TimeInterval NS::ProcessInfo::systemUptime() const
+{
+    return Object::sendMessage<TimeInterval>(this, _NS_PRIVATE_SEL(systemUptime));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void NS::ProcessInfo::disableSuddenTermination()
+{
+    Object::sendMessageSafe<void>(this, _NS_PRIVATE_SEL(disableSuddenTermination));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void NS::ProcessInfo::enableSuddenTermination()
+{
+    Object::sendMessageSafe<void>(this, _NS_PRIVATE_SEL(enableSuddenTermination));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void NS::ProcessInfo::disableAutomaticTermination(const String* pReason)
+{
+    Object::sendMessageSafe<void>(this, _NS_PRIVATE_SEL(disableAutomaticTermination_), pReason);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void NS::ProcessInfo::enableAutomaticTermination(const String* pReason)
+{
+    Object::sendMessageSafe<void>(this, _NS_PRIVATE_SEL(enableAutomaticTermination_), pReason);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE bool NS::ProcessInfo::automaticTerminationSupportEnabled() const
+{
+    return Object::sendMessageSafe<bool>(this, _NS_PRIVATE_SEL(automaticTerminationSupportEnabled));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void NS::ProcessInfo::setAutomaticTerminationSupportEnabled(bool enabled)
+{
+    Object::sendMessageSafe<void>(this, _NS_PRIVATE_SEL(setAutomaticTerminationSupportEnabled_), enabled);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Object* NS::ProcessInfo::beginActivity(ActivityOptions options, const String* pReason)
+{
+    return Object::sendMessage<Object*>(this, _NS_PRIVATE_SEL(beginActivityWithOptions_reason_), options, pReason);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void NS::ProcessInfo::endActivity(Object* pActivity)
+{
+    Object::sendMessage<void>(this, _NS_PRIVATE_SEL(endActivity_), pActivity);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void NS::ProcessInfo::performActivity(ActivityOptions options, const String* pReason, void (^block)(void))
+{
+    Object::sendMessage<void>(this, _NS_PRIVATE_SEL(performActivityWithOptions_reason_usingBlock_), options, pReason, block);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void NS::ProcessInfo::performActivity(ActivityOptions options, const String* pReason, const std::function<void()>& function)
+{
+    __block std::function<void()> blockFunction = function;
+
+    performActivity(options, pReason, ^() { blockFunction(); });
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void NS::ProcessInfo::performExpiringActivity(const String* pReason, void (^block)(bool expired))
+{
+    Object::sendMessageSafe<void>(this, _NS_PRIVATE_SEL(performExpiringActivityWithReason_usingBlock_), pReason, block);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE void NS::ProcessInfo::performExpiringActivity(const String* pReason, const std::function<void(bool expired)>& function)
+{
+    __block std::function<void(bool expired)> blockFunction = function;
+
+    performExpiringActivity(pReason, ^(bool expired) { blockFunction(expired); });
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::ProcessInfoThermalState NS::ProcessInfo::thermalState() const
+{
+    return Object::sendMessage<ProcessInfoThermalState>(this, _NS_PRIVATE_SEL(thermalState));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE bool NS::ProcessInfo::isLowPowerModeEnabled() const
+{
+    return Object::sendMessageSafe<bool>(this, _NS_PRIVATE_SEL(isLowPowerModeEnabled));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE bool NS::ProcessInfo::isiOSAppOnMac() const
+{
+    return Object::sendMessageSafe<bool>(this, _NS_PRIVATE_SEL(isiOSAppOnMac));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE bool NS::ProcessInfo::isMacCatalystApp() const
+{
+    return Object::sendMessageSafe<bool>(this, _NS_PRIVATE_SEL(isMacCatalystApp));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Foundation/NSRange.hpp b/metal-cpp/Foundation/NSRange.hpp
new file mode 100644
index 0000000..8b467d6
--- /dev/null
+++ b/metal-cpp/Foundation/NSRange.hpp
@@ -0,0 +1,83 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSRange.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "NSDefines.hpp"
+#include "NSTypes.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+struct Range
+{
+    static Range Make(UInteger loc, UInteger len);
+
+    Range(UInteger loc, UInteger len);
+
+    bool     Equal(const Range& range) const;
+    bool     LocationInRange(UInteger loc) const;
+    UInteger Max() const;
+
+    UInteger location;
+    UInteger length;
+} _NS_PACKED;
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Range::Range(UInteger loc, UInteger len)
+    : location(loc)
+    , length(len)
+{
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Range NS::Range::Make(UInteger loc, UInteger len)
+{
+    return Range(loc, len);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE bool NS::Range::Equal(const Range& range) const
+{
+    return (location == range.location) && (length == range.length);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE bool NS::Range::LocationInRange(UInteger loc) const
+{
+    return (!(loc < location)) && ((loc - location) < length);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::UInteger NS::Range::Max() const
+{
+    return location + length;
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Foundation/NSString.hpp b/metal-cpp/Foundation/NSString.hpp
new file mode 100644
index 0000000..829decb
--- /dev/null
+++ b/metal-cpp/Foundation/NSString.hpp
@@ -0,0 +1,245 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSString.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "NSDefines.hpp"
+#include "NSObject.hpp"
+#include "NSPrivate.hpp"
+#include "NSRange.hpp"
+#include "NSTypes.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+_NS_ENUM(NS::UInteger, StringEncoding) {
+    ASCIIStringEncoding = 1,
+    NEXTSTEPStringEncoding = 2,
+    JapaneseEUCStringEncoding = 3,
+    UTF8StringEncoding = 4,
+    ISOLatin1StringEncoding = 5,
+    SymbolStringEncoding = 6,
+    NonLossyASCIIStringEncoding = 7,
+    ShiftJISStringEncoding = 8,
+    ISOLatin2StringEncoding = 9,
+    UnicodeStringEncoding = 10,
+    WindowsCP1251StringEncoding = 11,
+    WindowsCP1252StringEncoding = 12,
+    WindowsCP1253StringEncoding = 13,
+    WindowsCP1254StringEncoding = 14,
+    WindowsCP1250StringEncoding = 15,
+    ISO2022JPStringEncoding = 21,
+    MacOSRomanStringEncoding = 30,
+
+    UTF16StringEncoding = UnicodeStringEncoding,
+
+    UTF16BigEndianStringEncoding = 0x90000100,
+    UTF16LittleEndianStringEncoding = 0x94000100,
+
+    UTF32StringEncoding = 0x8c000100,
+    UTF32BigEndianStringEncoding = 0x98000100,
+    UTF32LittleEndianStringEncoding = 0x9c000100
+};
+
+_NS_OPTIONS(NS::UInteger, StringCompareOptions) {
+    CaseInsensitiveSearch = 1,
+    LiteralSearch = 2,
+    BackwardsSearch = 4,
+    AnchoredSearch = 8,
+    NumericSearch = 64,
+    DiacriticInsensitiveSearch = 128,
+    WidthInsensitiveSearch = 256,
+    ForcedOrderingSearch = 512,
+    RegularExpressionSearch = 1024
+};
+
+using unichar = unsigned short;
+
+class String : public Copying<String>
+{
+public:
+    static String* string();
+    static String* string(const String* pString);
+    static String* string(const char* pString, StringEncoding encoding);
+
+    static String* alloc();
+    String*        init();
+    String*        init(const String* pString);
+    String*        init(const char* pString, StringEncoding encoding);
+    String*        init(void* pBytes, UInteger len, StringEncoding encoding, bool freeBuffer);
+
+    unichar        character(UInteger index) const;
+    UInteger       length() const;
+
+    const char*    cString(StringEncoding encoding) const;
+    const char*    utf8String() const;
+    UInteger       maximumLengthOfBytes(StringEncoding encoding) const;
+    UInteger       lengthOfBytes(StringEncoding encoding) const;
+
+    bool           isEqualToString(const String* pString) const;
+    Range          rangeOfString(const String* pString, StringCompareOptions options) const;
+
+    const char*    fileSystemRepresentation() const;
+
+    String*        stringByAppendingString(const String* pString) const;
+
+};
+
+template< std::size_t _StringLen >
+constexpr const String* MakeConstantString( const char ( &str )[_StringLen] )
+{
+    return reinterpret_cast< const String* >( __CFStringMakeConstantString( str ) );
+}
+
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::String::string()
+{
+    return Object::sendMessage<String*>(_NS_PRIVATE_CLS(NSString), _NS_PRIVATE_SEL(string));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::String::string(const String* pString)
+{
+    return Object::sendMessage<String*>(_NS_PRIVATE_CLS(NSString), _NS_PRIVATE_SEL(stringWithString_), pString);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::String::string(const char* pString, StringEncoding encoding)
+{
+    return Object::sendMessage<String*>(_NS_PRIVATE_CLS(NSString), _NS_PRIVATE_SEL(stringWithCString_encoding_), pString, encoding);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::String::alloc()
+{
+    return Object::alloc<String>(_NS_PRIVATE_CLS(NSString));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::String::init()
+{
+    return Object::init<String>();
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::String::init(const String* pString)
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(initWithString_), pString);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::String::init(const char* pString, StringEncoding encoding)
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(initWithCString_encoding_), pString, encoding);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::String::init(void* pBytes, UInteger len, StringEncoding encoding, bool freeBuffer)
+{
+    return Object::sendMessage<String*>(this, _NS_PRIVATE_SEL(initWithBytesNoCopy_length_encoding_freeWhenDone_), pBytes, len, encoding, freeBuffer);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::unichar NS::String::character(UInteger index) const
+{
+    return Object::sendMessage<unichar>(this, _NS_PRIVATE_SEL(characterAtIndex_), index);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::UInteger NS::String::length() const
+{
+    return Object::sendMessage<UInteger>(this, _NS_PRIVATE_SEL(length));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE const char* NS::String::cString(StringEncoding encoding) const
+{
+    return Object::sendMessage<const char*>(this, _NS_PRIVATE_SEL(cStringUsingEncoding_), encoding);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE const char* NS::String::utf8String() const
+{
+    return Object::sendMessage<const char*>(this, _NS_PRIVATE_SEL(UTF8String));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::UInteger NS::String::maximumLengthOfBytes(StringEncoding encoding) const
+{
+    return Object::sendMessage<UInteger>(this, _NS_PRIVATE_SEL(maximumLengthOfBytesUsingEncoding_), encoding);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::UInteger NS::String::lengthOfBytes(StringEncoding encoding) const
+{
+    return Object::sendMessage<UInteger>(this, _NS_PRIVATE_SEL(lengthOfBytesUsingEncoding_), encoding);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE bool NS::String::isEqualToString(const NS::String* pString) const
+{
+    return Object::sendMessage<bool>(this, _NS_PRIVATE_SEL(isEqualToString_), pString);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::Range NS::String::rangeOfString(const NS::String* pString, NS::StringCompareOptions options) const
+{
+    return Object::sendMessage<Range>(this, _NS_PRIVATE_SEL(rangeOfString_options_), pString, options);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE const char* NS::String::fileSystemRepresentation() const
+{
+    return Object::sendMessage<const char*>(this, _NS_PRIVATE_SEL(fileSystemRepresentation));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::String* NS::String::stringByAppendingString(const String* pString) const
+{
+    return Object::sendMessage<NS::String*>(this, _NS_PRIVATE_SEL(stringByAppendingString_), pString);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Foundation/NSTypes.hpp b/metal-cpp/Foundation/NSTypes.hpp
new file mode 100644
index 0000000..5f8ada1
--- /dev/null
+++ b/metal-cpp/Foundation/NSTypes.hpp
@@ -0,0 +1,51 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSTypes.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "NSDefines.hpp"
+
+#include <CoreFoundation/CoreFoundation.h>
+#include <cstdint>
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+using TimeInterval = double;
+
+using Integer = std::intptr_t;
+using UInteger = std::uintptr_t;
+
+const Integer  IntegerMax = INTPTR_MAX;
+const Integer  IntegerMin = INTPTR_MIN;
+const UInteger UIntegerMax = UINTPTR_MAX;
+
+struct OperatingSystemVersion
+{
+    Integer majorVersion;
+    Integer minorVersion;
+    Integer patchVersion;
+} _NS_PACKED;
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Foundation/NSURL.hpp b/metal-cpp/Foundation/NSURL.hpp
new file mode 100644
index 0000000..110e7a2
--- /dev/null
+++ b/metal-cpp/Foundation/NSURL.hpp
@@ -0,0 +1,90 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Foundation/NSURL.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "NSDefines.hpp"
+#include "NSObject.hpp"
+#include "NSPrivate.hpp"
+#include "NSTypes.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace NS
+{
+class URL : public Copying<URL>
+{
+public:
+    static URL* fileURLWithPath(const class String* pPath);
+
+    static URL* alloc();
+    URL*        init();
+    URL*        init(const class String* pString);
+    URL*        initFileURLWithPath(const class String* pPath);
+
+    const char* fileSystemRepresentation() const;
+};
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::URL* NS::URL::fileURLWithPath(const String* pPath)
+{
+    return Object::sendMessage<URL*>(_NS_PRIVATE_CLS(NSURL), _NS_PRIVATE_SEL(fileURLWithPath_), pPath);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::URL* NS::URL::alloc()
+{
+    return Object::alloc<URL>(_NS_PRIVATE_CLS(NSURL));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::URL* NS::URL::init()
+{
+    return Object::init<URL>();
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::URL* NS::URL::init(const String* pString)
+{
+    return Object::sendMessage<URL*>(this, _NS_PRIVATE_SEL(initWithString_), pString);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE NS::URL* NS::URL::initFileURLWithPath(const String* pPath)
+{
+    return Object::sendMessage<URL*>(this, _NS_PRIVATE_SEL(initFileURLWithPath_), pPath);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_NS_INLINE const char* NS::URL::fileSystemRepresentation() const
+{
+    return Object::sendMessage<const char*>(this, _NS_PRIVATE_SEL(fileSystemRepresentation));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/LICENSE.txt b/metal-cpp/LICENSE.txt
new file mode 100644
index 0000000..5e743c6
--- /dev/null
+++ b/metal-cpp/LICENSE.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright © 2021 Apple Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/metal-cpp/Metal/MTLAccelerationStructure.hpp b/metal-cpp/Metal/MTLAccelerationStructure.hpp
new file mode 100644
index 0000000..3d456c8
--- /dev/null
+++ b/metal-cpp/Metal/MTLAccelerationStructure.hpp
@@ -0,0 +1,932 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLAccelerationStructure.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLAccelerationStructure.hpp"
+#include "MTLAccelerationStructureTypes.hpp"
+#include "MTLResource.hpp"
+#include "MTLStageInputOutputDescriptor.hpp"
+
+namespace MTL
+{
+_MTL_OPTIONS(NS::UInteger, AccelerationStructureUsage) {
+    AccelerationStructureUsageNone = 0,
+    AccelerationStructureUsageRefit = 1,
+    AccelerationStructureUsagePreferFastBuild = 2,
+    AccelerationStructureUsageExtendedLimits = 4,
+};
+
+_MTL_OPTIONS(uint32_t, AccelerationStructureInstanceOptions) {
+    AccelerationStructureInstanceOptionNone = 0,
+    AccelerationStructureInstanceOptionDisableTriangleCulling = 1,
+    AccelerationStructureInstanceOptionTriangleFrontFacingWindingCounterClockwise = 2,
+    AccelerationStructureInstanceOptionOpaque = 4,
+    AccelerationStructureInstanceOptionNonOpaque = 8,
+};
+
+class AccelerationStructureDescriptor : public NS::Copying<AccelerationStructureDescriptor>
+{
+public:
+    static class AccelerationStructureDescriptor* alloc();
+
+    class AccelerationStructureDescriptor*        init();
+
+    MTL::AccelerationStructureUsage               usage() const;
+    void                                          setUsage(MTL::AccelerationStructureUsage usage);
+};
+
+class AccelerationStructureGeometryDescriptor : public NS::Copying<AccelerationStructureGeometryDescriptor>
+{
+public:
+    static class AccelerationStructureGeometryDescriptor* alloc();
+
+    class AccelerationStructureGeometryDescriptor*        init();
+
+    NS::UInteger                                          intersectionFunctionTableOffset() const;
+    void                                                  setIntersectionFunctionTableOffset(NS::UInteger intersectionFunctionTableOffset);
+
+    bool                                                  opaque() const;
+    void                                                  setOpaque(bool opaque);
+
+    bool                                                  allowDuplicateIntersectionFunctionInvocation() const;
+    void                                                  setAllowDuplicateIntersectionFunctionInvocation(bool allowDuplicateIntersectionFunctionInvocation);
+
+    NS::String*                                           label() const;
+    void                                                  setLabel(const NS::String* label);
+};
+
+_MTL_ENUM(uint32_t, MotionBorderMode) {
+    MotionBorderModeClamp = 0,
+    MotionBorderModeVanish = 1,
+};
+
+class PrimitiveAccelerationStructureDescriptor : public NS::Copying<PrimitiveAccelerationStructureDescriptor, MTL::AccelerationStructureDescriptor>
+{
+public:
+    static class PrimitiveAccelerationStructureDescriptor* alloc();
+
+    class PrimitiveAccelerationStructureDescriptor*        init();
+
+    NS::Array*                                             geometryDescriptors() const;
+    void                                                   setGeometryDescriptors(const NS::Array* geometryDescriptors);
+
+    MTL::MotionBorderMode                                  motionStartBorderMode() const;
+    void                                                   setMotionStartBorderMode(MTL::MotionBorderMode motionStartBorderMode);
+
+    MTL::MotionBorderMode                                  motionEndBorderMode() const;
+    void                                                   setMotionEndBorderMode(MTL::MotionBorderMode motionEndBorderMode);
+
+    float                                                  motionStartTime() const;
+    void                                                   setMotionStartTime(float motionStartTime);
+
+    float                                                  motionEndTime() const;
+    void                                                   setMotionEndTime(float motionEndTime);
+
+    NS::UInteger                                           motionKeyframeCount() const;
+    void                                                   setMotionKeyframeCount(NS::UInteger motionKeyframeCount);
+
+    static MTL::PrimitiveAccelerationStructureDescriptor*  descriptor();
+};
+
+class AccelerationStructureTriangleGeometryDescriptor : public NS::Copying<AccelerationStructureTriangleGeometryDescriptor, MTL::AccelerationStructureGeometryDescriptor>
+{
+public:
+    static class AccelerationStructureTriangleGeometryDescriptor* alloc();
+
+    class AccelerationStructureTriangleGeometryDescriptor*        init();
+
+    class Buffer*                                                 vertexBuffer() const;
+    void                                                          setVertexBuffer(const class Buffer* vertexBuffer);
+
+    NS::UInteger                                                  vertexBufferOffset() const;
+    void                                                          setVertexBufferOffset(NS::UInteger vertexBufferOffset);
+
+    NS::UInteger                                                  vertexStride() const;
+    void                                                          setVertexStride(NS::UInteger vertexStride);
+
+    class Buffer*                                                 indexBuffer() const;
+    void                                                          setIndexBuffer(const class Buffer* indexBuffer);
+
+    NS::UInteger                                                  indexBufferOffset() const;
+    void                                                          setIndexBufferOffset(NS::UInteger indexBufferOffset);
+
+    MTL::IndexType                                                indexType() const;
+    void                                                          setIndexType(MTL::IndexType indexType);
+
+    NS::UInteger                                                  triangleCount() const;
+    void                                                          setTriangleCount(NS::UInteger triangleCount);
+
+    static MTL::AccelerationStructureTriangleGeometryDescriptor*  descriptor();
+};
+
+class AccelerationStructureBoundingBoxGeometryDescriptor : public NS::Copying<AccelerationStructureBoundingBoxGeometryDescriptor, MTL::AccelerationStructureGeometryDescriptor>
+{
+public:
+    static class AccelerationStructureBoundingBoxGeometryDescriptor* alloc();
+
+    class AccelerationStructureBoundingBoxGeometryDescriptor*        init();
+
+    class Buffer*                                                    boundingBoxBuffer() const;
+    void                                                             setBoundingBoxBuffer(const class Buffer* boundingBoxBuffer);
+
+    NS::UInteger                                                     boundingBoxBufferOffset() const;
+    void                                                             setBoundingBoxBufferOffset(NS::UInteger boundingBoxBufferOffset);
+
+    NS::UInteger                                                     boundingBoxStride() const;
+    void                                                             setBoundingBoxStride(NS::UInteger boundingBoxStride);
+
+    NS::UInteger                                                     boundingBoxCount() const;
+    void                                                             setBoundingBoxCount(NS::UInteger boundingBoxCount);
+
+    static MTL::AccelerationStructureBoundingBoxGeometryDescriptor*  descriptor();
+};
+
+class MotionKeyframeData : public NS::Referencing<MotionKeyframeData>
+{
+public:
+    static class MotionKeyframeData* alloc();
+
+    class MotionKeyframeData*        init();
+
+    class Buffer*                    buffer() const;
+    void                             setBuffer(const class Buffer* buffer);
+
+    NS::UInteger                     offset() const;
+    void                             setOffset(NS::UInteger offset);
+
+    static MTL::MotionKeyframeData*  data();
+};
+
+class AccelerationStructureMotionTriangleGeometryDescriptor : public NS::Copying<AccelerationStructureMotionTriangleGeometryDescriptor, MTL::AccelerationStructureGeometryDescriptor>
+{
+public:
+    static class AccelerationStructureMotionTriangleGeometryDescriptor* alloc();
+
+    class AccelerationStructureMotionTriangleGeometryDescriptor*        init();
+
+    NS::Array*                                                          vertexBuffers() const;
+    void                                                                setVertexBuffers(const NS::Array* vertexBuffers);
+
+    NS::UInteger                                                        vertexStride() const;
+    void                                                                setVertexStride(NS::UInteger vertexStride);
+
+    class Buffer*                                                       indexBuffer() const;
+    void                                                                setIndexBuffer(const class Buffer* indexBuffer);
+
+    NS::UInteger                                                        indexBufferOffset() const;
+    void                                                                setIndexBufferOffset(NS::UInteger indexBufferOffset);
+
+    MTL::IndexType                                                      indexType() const;
+    void                                                                setIndexType(MTL::IndexType indexType);
+
+    NS::UInteger                                                        triangleCount() const;
+    void                                                                setTriangleCount(NS::UInteger triangleCount);
+
+    static MTL::AccelerationStructureMotionTriangleGeometryDescriptor*  descriptor();
+};
+
+class AccelerationStructureMotionBoundingBoxGeometryDescriptor : public NS::Copying<AccelerationStructureMotionBoundingBoxGeometryDescriptor, MTL::AccelerationStructureGeometryDescriptor>
+{
+public:
+    static class AccelerationStructureMotionBoundingBoxGeometryDescriptor* alloc();
+
+    class AccelerationStructureMotionBoundingBoxGeometryDescriptor*        init();
+
+    NS::Array*                                                             boundingBoxBuffers() const;
+    void                                                                   setBoundingBoxBuffers(const NS::Array* boundingBoxBuffers);
+
+    NS::UInteger                                                           boundingBoxStride() const;
+    void                                                                   setBoundingBoxStride(NS::UInteger boundingBoxStride);
+
+    NS::UInteger                                                           boundingBoxCount() const;
+    void                                                                   setBoundingBoxCount(NS::UInteger boundingBoxCount);
+
+    static MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor*  descriptor();
+};
+
+struct AccelerationStructureInstanceDescriptor
+{
+    MTL::PackedFloat4x3                       transformationMatrix;
+    MTL::AccelerationStructureInstanceOptions options;
+    uint32_t                                  mask;
+    uint32_t                                  intersectionFunctionTableOffset;
+    uint32_t                                  accelerationStructureIndex;
+} _MTL_PACKED;
+
+struct AccelerationStructureUserIDInstanceDescriptor
+{
+    MTL::PackedFloat4x3                       transformationMatrix;
+    MTL::AccelerationStructureInstanceOptions options;
+    uint32_t                                  mask;
+    uint32_t                                  intersectionFunctionTableOffset;
+    uint32_t                                  accelerationStructureIndex;
+    uint32_t                                  userID;
+} _MTL_PACKED;
+
+_MTL_ENUM(NS::UInteger, AccelerationStructureInstanceDescriptorType) {
+    AccelerationStructureInstanceDescriptorTypeDefault = 0,
+    AccelerationStructureInstanceDescriptorTypeUserID = 1,
+    AccelerationStructureInstanceDescriptorTypeMotion = 2,
+};
+
+struct AccelerationStructureMotionInstanceDescriptor
+{
+    MTL::AccelerationStructureInstanceOptions options;
+    uint32_t                                  mask;
+    uint32_t                                  intersectionFunctionTableOffset;
+    uint32_t                                  accelerationStructureIndex;
+    uint32_t                                  userID;
+    uint32_t                                  motionTransformsStartIndex;
+    uint32_t                                  motionTransformsCount;
+    MTL::MotionBorderMode                     motionStartBorderMode;
+    MTL::MotionBorderMode                     motionEndBorderMode;
+    float                                     motionStartTime;
+    float                                     motionEndTime;
+} _MTL_PACKED;
+
+class InstanceAccelerationStructureDescriptor : public NS::Copying<InstanceAccelerationStructureDescriptor, MTL::AccelerationStructureDescriptor>
+{
+public:
+    static class InstanceAccelerationStructureDescriptor* alloc();
+
+    class InstanceAccelerationStructureDescriptor*        init();
+
+    class Buffer*                                         instanceDescriptorBuffer() const;
+    void                                                  setInstanceDescriptorBuffer(const class Buffer* instanceDescriptorBuffer);
+
+    NS::UInteger                                          instanceDescriptorBufferOffset() const;
+    void                                                  setInstanceDescriptorBufferOffset(NS::UInteger instanceDescriptorBufferOffset);
+
+    NS::UInteger                                          instanceDescriptorStride() const;
+    void                                                  setInstanceDescriptorStride(NS::UInteger instanceDescriptorStride);
+
+    NS::UInteger                                          instanceCount() const;
+    void                                                  setInstanceCount(NS::UInteger instanceCount);
+
+    NS::Array*                                            instancedAccelerationStructures() const;
+    void                                                  setInstancedAccelerationStructures(const NS::Array* instancedAccelerationStructures);
+
+    MTL::AccelerationStructureInstanceDescriptorType      instanceDescriptorType() const;
+    void                                                  setInstanceDescriptorType(MTL::AccelerationStructureInstanceDescriptorType instanceDescriptorType);
+
+    class Buffer*                                         motionTransformBuffer() const;
+    void                                                  setMotionTransformBuffer(const class Buffer* motionTransformBuffer);
+
+    NS::UInteger                                          motionTransformBufferOffset() const;
+    void                                                  setMotionTransformBufferOffset(NS::UInteger motionTransformBufferOffset);
+
+    NS::UInteger                                          motionTransformCount() const;
+    void                                                  setMotionTransformCount(NS::UInteger motionTransformCount);
+
+    static MTL::InstanceAccelerationStructureDescriptor*  descriptor();
+};
+
+class AccelerationStructure : public NS::Referencing<AccelerationStructure, Resource>
+{
+public:
+    NS::UInteger size() const;
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::AccelerationStructureDescriptor* MTL::AccelerationStructureDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::AccelerationStructureDescriptor>(_MTL_PRIVATE_CLS(MTLAccelerationStructureDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::AccelerationStructureDescriptor* MTL::AccelerationStructureDescriptor::init()
+{
+    return NS::Object::init<MTL::AccelerationStructureDescriptor>();
+}
+
+// property: usage
+_MTL_INLINE MTL::AccelerationStructureUsage MTL::AccelerationStructureDescriptor::usage() const
+{
+    return Object::sendMessage<MTL::AccelerationStructureUsage>(this, _MTL_PRIVATE_SEL(usage));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureDescriptor::setUsage(MTL::AccelerationStructureUsage usage)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setUsage_), usage);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::AccelerationStructureGeometryDescriptor* MTL::AccelerationStructureGeometryDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::AccelerationStructureGeometryDescriptor>(_MTL_PRIVATE_CLS(MTLAccelerationStructureGeometryDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::AccelerationStructureGeometryDescriptor* MTL::AccelerationStructureGeometryDescriptor::init()
+{
+    return NS::Object::init<MTL::AccelerationStructureGeometryDescriptor>();
+}
+
+// property: intersectionFunctionTableOffset
+_MTL_INLINE NS::UInteger MTL::AccelerationStructureGeometryDescriptor::intersectionFunctionTableOffset() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(intersectionFunctionTableOffset));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureGeometryDescriptor::setIntersectionFunctionTableOffset(NS::UInteger intersectionFunctionTableOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setIntersectionFunctionTableOffset_), intersectionFunctionTableOffset);
+}
+
+// property: opaque
+_MTL_INLINE bool MTL::AccelerationStructureGeometryDescriptor::opaque() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(opaque));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureGeometryDescriptor::setOpaque(bool opaque)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setOpaque_), opaque);
+}
+
+// property: allowDuplicateIntersectionFunctionInvocation
+_MTL_INLINE bool MTL::AccelerationStructureGeometryDescriptor::allowDuplicateIntersectionFunctionInvocation() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(allowDuplicateIntersectionFunctionInvocation));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureGeometryDescriptor::setAllowDuplicateIntersectionFunctionInvocation(bool allowDuplicateIntersectionFunctionInvocation)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setAllowDuplicateIntersectionFunctionInvocation_), allowDuplicateIntersectionFunctionInvocation);
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::AccelerationStructureGeometryDescriptor::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureGeometryDescriptor::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::PrimitiveAccelerationStructureDescriptor* MTL::PrimitiveAccelerationStructureDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::PrimitiveAccelerationStructureDescriptor>(_MTL_PRIVATE_CLS(MTLPrimitiveAccelerationStructureDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::PrimitiveAccelerationStructureDescriptor* MTL::PrimitiveAccelerationStructureDescriptor::init()
+{
+    return NS::Object::init<MTL::PrimitiveAccelerationStructureDescriptor>();
+}
+
+// property: geometryDescriptors
+_MTL_INLINE NS::Array* MTL::PrimitiveAccelerationStructureDescriptor::geometryDescriptors() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(geometryDescriptors));
+}
+
+_MTL_INLINE void MTL::PrimitiveAccelerationStructureDescriptor::setGeometryDescriptors(const NS::Array* geometryDescriptors)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setGeometryDescriptors_), geometryDescriptors);
+}
+
+// property: motionStartBorderMode
+_MTL_INLINE MTL::MotionBorderMode MTL::PrimitiveAccelerationStructureDescriptor::motionStartBorderMode() const
+{
+    return Object::sendMessage<MTL::MotionBorderMode>(this, _MTL_PRIVATE_SEL(motionStartBorderMode));
+}
+
+_MTL_INLINE void MTL::PrimitiveAccelerationStructureDescriptor::setMotionStartBorderMode(MTL::MotionBorderMode motionStartBorderMode)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMotionStartBorderMode_), motionStartBorderMode);
+}
+
+// property: motionEndBorderMode
+_MTL_INLINE MTL::MotionBorderMode MTL::PrimitiveAccelerationStructureDescriptor::motionEndBorderMode() const
+{
+    return Object::sendMessage<MTL::MotionBorderMode>(this, _MTL_PRIVATE_SEL(motionEndBorderMode));
+}
+
+_MTL_INLINE void MTL::PrimitiveAccelerationStructureDescriptor::setMotionEndBorderMode(MTL::MotionBorderMode motionEndBorderMode)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMotionEndBorderMode_), motionEndBorderMode);
+}
+
+// property: motionStartTime
+_MTL_INLINE float MTL::PrimitiveAccelerationStructureDescriptor::motionStartTime() const
+{
+    return Object::sendMessage<float>(this, _MTL_PRIVATE_SEL(motionStartTime));
+}
+
+_MTL_INLINE void MTL::PrimitiveAccelerationStructureDescriptor::setMotionStartTime(float motionStartTime)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMotionStartTime_), motionStartTime);
+}
+
+// property: motionEndTime
+_MTL_INLINE float MTL::PrimitiveAccelerationStructureDescriptor::motionEndTime() const
+{
+    return Object::sendMessage<float>(this, _MTL_PRIVATE_SEL(motionEndTime));
+}
+
+_MTL_INLINE void MTL::PrimitiveAccelerationStructureDescriptor::setMotionEndTime(float motionEndTime)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMotionEndTime_), motionEndTime);
+}
+
+// property: motionKeyframeCount
+_MTL_INLINE NS::UInteger MTL::PrimitiveAccelerationStructureDescriptor::motionKeyframeCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(motionKeyframeCount));
+}
+
+_MTL_INLINE void MTL::PrimitiveAccelerationStructureDescriptor::setMotionKeyframeCount(NS::UInteger motionKeyframeCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMotionKeyframeCount_), motionKeyframeCount);
+}
+
+// static method: descriptor
+_MTL_INLINE MTL::PrimitiveAccelerationStructureDescriptor* MTL::PrimitiveAccelerationStructureDescriptor::descriptor()
+{
+    return Object::sendMessage<MTL::PrimitiveAccelerationStructureDescriptor*>(_MTL_PRIVATE_CLS(MTLPrimitiveAccelerationStructureDescriptor), _MTL_PRIVATE_SEL(descriptor));
+}
+
+// static method: alloc
+_MTL_INLINE MTL::AccelerationStructureTriangleGeometryDescriptor* MTL::AccelerationStructureTriangleGeometryDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::AccelerationStructureTriangleGeometryDescriptor>(_MTL_PRIVATE_CLS(MTLAccelerationStructureTriangleGeometryDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::AccelerationStructureTriangleGeometryDescriptor* MTL::AccelerationStructureTriangleGeometryDescriptor::init()
+{
+    return NS::Object::init<MTL::AccelerationStructureTriangleGeometryDescriptor>();
+}
+
+// property: vertexBuffer
+_MTL_INLINE MTL::Buffer* MTL::AccelerationStructureTriangleGeometryDescriptor::vertexBuffer() const
+{
+    return Object::sendMessage<MTL::Buffer*>(this, _MTL_PRIVATE_SEL(vertexBuffer));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureTriangleGeometryDescriptor::setVertexBuffer(const MTL::Buffer* vertexBuffer)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexBuffer_), vertexBuffer);
+}
+
+// property: vertexBufferOffset
+_MTL_INLINE NS::UInteger MTL::AccelerationStructureTriangleGeometryDescriptor::vertexBufferOffset() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(vertexBufferOffset));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureTriangleGeometryDescriptor::setVertexBufferOffset(NS::UInteger vertexBufferOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexBufferOffset_), vertexBufferOffset);
+}
+
+// property: vertexStride
+_MTL_INLINE NS::UInteger MTL::AccelerationStructureTriangleGeometryDescriptor::vertexStride() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(vertexStride));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureTriangleGeometryDescriptor::setVertexStride(NS::UInteger vertexStride)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexStride_), vertexStride);
+}
+
+// property: indexBuffer
+_MTL_INLINE MTL::Buffer* MTL::AccelerationStructureTriangleGeometryDescriptor::indexBuffer() const
+{
+    return Object::sendMessage<MTL::Buffer*>(this, _MTL_PRIVATE_SEL(indexBuffer));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureTriangleGeometryDescriptor::setIndexBuffer(const MTL::Buffer* indexBuffer)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setIndexBuffer_), indexBuffer);
+}
+
+// property: indexBufferOffset
+_MTL_INLINE NS::UInteger MTL::AccelerationStructureTriangleGeometryDescriptor::indexBufferOffset() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(indexBufferOffset));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureTriangleGeometryDescriptor::setIndexBufferOffset(NS::UInteger indexBufferOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setIndexBufferOffset_), indexBufferOffset);
+}
+
+// property: indexType
+_MTL_INLINE MTL::IndexType MTL::AccelerationStructureTriangleGeometryDescriptor::indexType() const
+{
+    return Object::sendMessage<MTL::IndexType>(this, _MTL_PRIVATE_SEL(indexType));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureTriangleGeometryDescriptor::setIndexType(MTL::IndexType indexType)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setIndexType_), indexType);
+}
+
+// property: triangleCount
+_MTL_INLINE NS::UInteger MTL::AccelerationStructureTriangleGeometryDescriptor::triangleCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(triangleCount));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureTriangleGeometryDescriptor::setTriangleCount(NS::UInteger triangleCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTriangleCount_), triangleCount);
+}
+
+// static method: descriptor
+_MTL_INLINE MTL::AccelerationStructureTriangleGeometryDescriptor* MTL::AccelerationStructureTriangleGeometryDescriptor::descriptor()
+{
+    return Object::sendMessage<MTL::AccelerationStructureTriangleGeometryDescriptor*>(_MTL_PRIVATE_CLS(MTLAccelerationStructureTriangleGeometryDescriptor), _MTL_PRIVATE_SEL(descriptor));
+}
+
+// static method: alloc
+_MTL_INLINE MTL::AccelerationStructureBoundingBoxGeometryDescriptor* MTL::AccelerationStructureBoundingBoxGeometryDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::AccelerationStructureBoundingBoxGeometryDescriptor>(_MTL_PRIVATE_CLS(MTLAccelerationStructureBoundingBoxGeometryDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::AccelerationStructureBoundingBoxGeometryDescriptor* MTL::AccelerationStructureBoundingBoxGeometryDescriptor::init()
+{
+    return NS::Object::init<MTL::AccelerationStructureBoundingBoxGeometryDescriptor>();
+}
+
+// property: boundingBoxBuffer
+_MTL_INLINE MTL::Buffer* MTL::AccelerationStructureBoundingBoxGeometryDescriptor::boundingBoxBuffer() const
+{
+    return Object::sendMessage<MTL::Buffer*>(this, _MTL_PRIVATE_SEL(boundingBoxBuffer));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureBoundingBoxGeometryDescriptor::setBoundingBoxBuffer(const MTL::Buffer* boundingBoxBuffer)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBoundingBoxBuffer_), boundingBoxBuffer);
+}
+
+// property: boundingBoxBufferOffset
+_MTL_INLINE NS::UInteger MTL::AccelerationStructureBoundingBoxGeometryDescriptor::boundingBoxBufferOffset() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(boundingBoxBufferOffset));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureBoundingBoxGeometryDescriptor::setBoundingBoxBufferOffset(NS::UInteger boundingBoxBufferOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBoundingBoxBufferOffset_), boundingBoxBufferOffset);
+}
+
+// property: boundingBoxStride
+_MTL_INLINE NS::UInteger MTL::AccelerationStructureBoundingBoxGeometryDescriptor::boundingBoxStride() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(boundingBoxStride));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureBoundingBoxGeometryDescriptor::setBoundingBoxStride(NS::UInteger boundingBoxStride)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBoundingBoxStride_), boundingBoxStride);
+}
+
+// property: boundingBoxCount
+_MTL_INLINE NS::UInteger MTL::AccelerationStructureBoundingBoxGeometryDescriptor::boundingBoxCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(boundingBoxCount));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureBoundingBoxGeometryDescriptor::setBoundingBoxCount(NS::UInteger boundingBoxCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBoundingBoxCount_), boundingBoxCount);
+}
+
+// static method: descriptor
+_MTL_INLINE MTL::AccelerationStructureBoundingBoxGeometryDescriptor* MTL::AccelerationStructureBoundingBoxGeometryDescriptor::descriptor()
+{
+    return Object::sendMessage<MTL::AccelerationStructureBoundingBoxGeometryDescriptor*>(_MTL_PRIVATE_CLS(MTLAccelerationStructureBoundingBoxGeometryDescriptor), _MTL_PRIVATE_SEL(descriptor));
+}
+
+// static method: alloc
+_MTL_INLINE MTL::MotionKeyframeData* MTL::MotionKeyframeData::alloc()
+{
+    return NS::Object::alloc<MTL::MotionKeyframeData>(_MTL_PRIVATE_CLS(MTLMotionKeyframeData));
+}
+
+// method: init
+_MTL_INLINE MTL::MotionKeyframeData* MTL::MotionKeyframeData::init()
+{
+    return NS::Object::init<MTL::MotionKeyframeData>();
+}
+
+// property: buffer
+_MTL_INLINE MTL::Buffer* MTL::MotionKeyframeData::buffer() const
+{
+    return Object::sendMessage<MTL::Buffer*>(this, _MTL_PRIVATE_SEL(buffer));
+}
+
+_MTL_INLINE void MTL::MotionKeyframeData::setBuffer(const MTL::Buffer* buffer)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBuffer_), buffer);
+}
+
+// property: offset
+_MTL_INLINE NS::UInteger MTL::MotionKeyframeData::offset() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(offset));
+}
+
+_MTL_INLINE void MTL::MotionKeyframeData::setOffset(NS::UInteger offset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setOffset_), offset);
+}
+
+// static method: data
+_MTL_INLINE MTL::MotionKeyframeData* MTL::MotionKeyframeData::data()
+{
+    return Object::sendMessage<MTL::MotionKeyframeData*>(_MTL_PRIVATE_CLS(MTLMotionKeyframeData), _MTL_PRIVATE_SEL(data));
+}
+
+// static method: alloc
+_MTL_INLINE MTL::AccelerationStructureMotionTriangleGeometryDescriptor* MTL::AccelerationStructureMotionTriangleGeometryDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::AccelerationStructureMotionTriangleGeometryDescriptor>(_MTL_PRIVATE_CLS(MTLAccelerationStructureMotionTriangleGeometryDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::AccelerationStructureMotionTriangleGeometryDescriptor* MTL::AccelerationStructureMotionTriangleGeometryDescriptor::init()
+{
+    return NS::Object::init<MTL::AccelerationStructureMotionTriangleGeometryDescriptor>();
+}
+
+// property: vertexBuffers
+_MTL_INLINE NS::Array* MTL::AccelerationStructureMotionTriangleGeometryDescriptor::vertexBuffers() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(vertexBuffers));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureMotionTriangleGeometryDescriptor::setVertexBuffers(const NS::Array* vertexBuffers)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexBuffers_), vertexBuffers);
+}
+
+// property: vertexStride
+_MTL_INLINE NS::UInteger MTL::AccelerationStructureMotionTriangleGeometryDescriptor::vertexStride() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(vertexStride));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureMotionTriangleGeometryDescriptor::setVertexStride(NS::UInteger vertexStride)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexStride_), vertexStride);
+}
+
+// property: indexBuffer
+_MTL_INLINE MTL::Buffer* MTL::AccelerationStructureMotionTriangleGeometryDescriptor::indexBuffer() const
+{
+    return Object::sendMessage<MTL::Buffer*>(this, _MTL_PRIVATE_SEL(indexBuffer));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureMotionTriangleGeometryDescriptor::setIndexBuffer(const MTL::Buffer* indexBuffer)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setIndexBuffer_), indexBuffer);
+}
+
+// property: indexBufferOffset
+_MTL_INLINE NS::UInteger MTL::AccelerationStructureMotionTriangleGeometryDescriptor::indexBufferOffset() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(indexBufferOffset));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureMotionTriangleGeometryDescriptor::setIndexBufferOffset(NS::UInteger indexBufferOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setIndexBufferOffset_), indexBufferOffset);
+}
+
+// property: indexType
+_MTL_INLINE MTL::IndexType MTL::AccelerationStructureMotionTriangleGeometryDescriptor::indexType() const
+{
+    return Object::sendMessage<MTL::IndexType>(this, _MTL_PRIVATE_SEL(indexType));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureMotionTriangleGeometryDescriptor::setIndexType(MTL::IndexType indexType)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setIndexType_), indexType);
+}
+
+// property: triangleCount
+_MTL_INLINE NS::UInteger MTL::AccelerationStructureMotionTriangleGeometryDescriptor::triangleCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(triangleCount));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureMotionTriangleGeometryDescriptor::setTriangleCount(NS::UInteger triangleCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTriangleCount_), triangleCount);
+}
+
+// static method: descriptor
+_MTL_INLINE MTL::AccelerationStructureMotionTriangleGeometryDescriptor* MTL::AccelerationStructureMotionTriangleGeometryDescriptor::descriptor()
+{
+    return Object::sendMessage<MTL::AccelerationStructureMotionTriangleGeometryDescriptor*>(_MTL_PRIVATE_CLS(MTLAccelerationStructureMotionTriangleGeometryDescriptor), _MTL_PRIVATE_SEL(descriptor));
+}
+
+// static method: alloc
+_MTL_INLINE MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor* MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor>(_MTL_PRIVATE_CLS(MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor* MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor::init()
+{
+    return NS::Object::init<MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor>();
+}
+
+// property: boundingBoxBuffers
+_MTL_INLINE NS::Array* MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor::boundingBoxBuffers() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(boundingBoxBuffers));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor::setBoundingBoxBuffers(const NS::Array* boundingBoxBuffers)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBoundingBoxBuffers_), boundingBoxBuffers);
+}
+
+// property: boundingBoxStride
+_MTL_INLINE NS::UInteger MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor::boundingBoxStride() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(boundingBoxStride));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor::setBoundingBoxStride(NS::UInteger boundingBoxStride)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBoundingBoxStride_), boundingBoxStride);
+}
+
+// property: boundingBoxCount
+_MTL_INLINE NS::UInteger MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor::boundingBoxCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(boundingBoxCount));
+}
+
+_MTL_INLINE void MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor::setBoundingBoxCount(NS::UInteger boundingBoxCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBoundingBoxCount_), boundingBoxCount);
+}
+
+// static method: descriptor
+_MTL_INLINE MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor* MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor::descriptor()
+{
+    return Object::sendMessage<MTL::AccelerationStructureMotionBoundingBoxGeometryDescriptor*>(_MTL_PRIVATE_CLS(MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor), _MTL_PRIVATE_SEL(descriptor));
+}
+
+// static method: alloc
+_MTL_INLINE MTL::InstanceAccelerationStructureDescriptor* MTL::InstanceAccelerationStructureDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::InstanceAccelerationStructureDescriptor>(_MTL_PRIVATE_CLS(MTLInstanceAccelerationStructureDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::InstanceAccelerationStructureDescriptor* MTL::InstanceAccelerationStructureDescriptor::init()
+{
+    return NS::Object::init<MTL::InstanceAccelerationStructureDescriptor>();
+}
+
+// property: instanceDescriptorBuffer
+_MTL_INLINE MTL::Buffer* MTL::InstanceAccelerationStructureDescriptor::instanceDescriptorBuffer() const
+{
+    return Object::sendMessage<MTL::Buffer*>(this, _MTL_PRIVATE_SEL(instanceDescriptorBuffer));
+}
+
+_MTL_INLINE void MTL::InstanceAccelerationStructureDescriptor::setInstanceDescriptorBuffer(const MTL::Buffer* instanceDescriptorBuffer)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setInstanceDescriptorBuffer_), instanceDescriptorBuffer);
+}
+
+// property: instanceDescriptorBufferOffset
+_MTL_INLINE NS::UInteger MTL::InstanceAccelerationStructureDescriptor::instanceDescriptorBufferOffset() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(instanceDescriptorBufferOffset));
+}
+
+_MTL_INLINE void MTL::InstanceAccelerationStructureDescriptor::setInstanceDescriptorBufferOffset(NS::UInteger instanceDescriptorBufferOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setInstanceDescriptorBufferOffset_), instanceDescriptorBufferOffset);
+}
+
+// property: instanceDescriptorStride
+_MTL_INLINE NS::UInteger MTL::InstanceAccelerationStructureDescriptor::instanceDescriptorStride() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(instanceDescriptorStride));
+}
+
+_MTL_INLINE void MTL::InstanceAccelerationStructureDescriptor::setInstanceDescriptorStride(NS::UInteger instanceDescriptorStride)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setInstanceDescriptorStride_), instanceDescriptorStride);
+}
+
+// property: instanceCount
+_MTL_INLINE NS::UInteger MTL::InstanceAccelerationStructureDescriptor::instanceCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(instanceCount));
+}
+
+_MTL_INLINE void MTL::InstanceAccelerationStructureDescriptor::setInstanceCount(NS::UInteger instanceCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setInstanceCount_), instanceCount);
+}
+
+// property: instancedAccelerationStructures
+_MTL_INLINE NS::Array* MTL::InstanceAccelerationStructureDescriptor::instancedAccelerationStructures() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(instancedAccelerationStructures));
+}
+
+_MTL_INLINE void MTL::InstanceAccelerationStructureDescriptor::setInstancedAccelerationStructures(const NS::Array* instancedAccelerationStructures)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setInstancedAccelerationStructures_), instancedAccelerationStructures);
+}
+
+// property: instanceDescriptorType
+_MTL_INLINE MTL::AccelerationStructureInstanceDescriptorType MTL::InstanceAccelerationStructureDescriptor::instanceDescriptorType() const
+{
+    return Object::sendMessage<MTL::AccelerationStructureInstanceDescriptorType>(this, _MTL_PRIVATE_SEL(instanceDescriptorType));
+}
+
+_MTL_INLINE void MTL::InstanceAccelerationStructureDescriptor::setInstanceDescriptorType(MTL::AccelerationStructureInstanceDescriptorType instanceDescriptorType)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setInstanceDescriptorType_), instanceDescriptorType);
+}
+
+// property: motionTransformBuffer
+_MTL_INLINE MTL::Buffer* MTL::InstanceAccelerationStructureDescriptor::motionTransformBuffer() const
+{
+    return Object::sendMessage<MTL::Buffer*>(this, _MTL_PRIVATE_SEL(motionTransformBuffer));
+}
+
+_MTL_INLINE void MTL::InstanceAccelerationStructureDescriptor::setMotionTransformBuffer(const MTL::Buffer* motionTransformBuffer)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMotionTransformBuffer_), motionTransformBuffer);
+}
+
+// property: motionTransformBufferOffset
+_MTL_INLINE NS::UInteger MTL::InstanceAccelerationStructureDescriptor::motionTransformBufferOffset() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(motionTransformBufferOffset));
+}
+
+_MTL_INLINE void MTL::InstanceAccelerationStructureDescriptor::setMotionTransformBufferOffset(NS::UInteger motionTransformBufferOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMotionTransformBufferOffset_), motionTransformBufferOffset);
+}
+
+// property: motionTransformCount
+_MTL_INLINE NS::UInteger MTL::InstanceAccelerationStructureDescriptor::motionTransformCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(motionTransformCount));
+}
+
+_MTL_INLINE void MTL::InstanceAccelerationStructureDescriptor::setMotionTransformCount(NS::UInteger motionTransformCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMotionTransformCount_), motionTransformCount);
+}
+
+// static method: descriptor
+_MTL_INLINE MTL::InstanceAccelerationStructureDescriptor* MTL::InstanceAccelerationStructureDescriptor::descriptor()
+{
+    return Object::sendMessage<MTL::InstanceAccelerationStructureDescriptor*>(_MTL_PRIVATE_CLS(MTLInstanceAccelerationStructureDescriptor), _MTL_PRIVATE_SEL(descriptor));
+}
+
+// property: size
+_MTL_INLINE NS::UInteger MTL::AccelerationStructure::size() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(size));
+}
diff --git a/metal-cpp/Metal/MTLAccelerationStructureCommandEncoder.hpp b/metal-cpp/Metal/MTLAccelerationStructureCommandEncoder.hpp
new file mode 100644
index 0000000..0ff4c9c
--- /dev/null
+++ b/metal-cpp/Metal/MTLAccelerationStructureCommandEncoder.hpp
@@ -0,0 +1,144 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLAccelerationStructureCommandEncoder.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLArgument.hpp"
+#include "MTLCommandEncoder.hpp"
+#include "MTLHeap.hpp"
+#include "MTLResource.hpp"
+
+namespace MTL
+{
+class AccelerationStructureCommandEncoder : public NS::Referencing<AccelerationStructureCommandEncoder, CommandEncoder>
+{
+public:
+    void buildAccelerationStructure(const class AccelerationStructure* accelerationStructure, const class AccelerationStructureDescriptor* descriptor, const class Buffer* scratchBuffer, NS::UInteger scratchBufferOffset);
+
+    void refitAccelerationStructure(const class AccelerationStructure* sourceAccelerationStructure, const class AccelerationStructureDescriptor* descriptor, const class AccelerationStructure* destinationAccelerationStructure, const class Buffer* scratchBuffer, NS::UInteger scratchBufferOffset);
+
+    void copyAccelerationStructure(const class AccelerationStructure* sourceAccelerationStructure, const class AccelerationStructure* destinationAccelerationStructure);
+
+    void writeCompactedAccelerationStructureSize(const class AccelerationStructure* accelerationStructure, const class Buffer* buffer, NS::UInteger offset);
+
+    void writeCompactedAccelerationStructureSize(const class AccelerationStructure* accelerationStructure, const class Buffer* buffer, NS::UInteger offset, MTL::DataType sizeDataType);
+
+    void copyAndCompactAccelerationStructure(const class AccelerationStructure* sourceAccelerationStructure, const class AccelerationStructure* destinationAccelerationStructure);
+
+    void updateFence(const class Fence* fence);
+
+    void waitForFence(const class Fence* fence);
+
+    void useResource(const class Resource* resource, MTL::ResourceUsage usage);
+
+    void useResources(MTL::Resource* resources[], NS::UInteger count, MTL::ResourceUsage usage);
+
+    void useHeap(const class Heap* heap);
+
+    void useHeaps(MTL::Heap* heaps[], NS::UInteger count);
+
+    void sampleCountersInBuffer(const class CounterSampleBuffer* sampleBuffer, NS::UInteger sampleIndex, bool barrier);
+};
+
+}
+
+// method: buildAccelerationStructure:descriptor:scratchBuffer:scratchBufferOffset:
+_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::buildAccelerationStructure(const MTL::AccelerationStructure* accelerationStructure, const MTL::AccelerationStructureDescriptor* descriptor, const MTL::Buffer* scratchBuffer, NS::UInteger scratchBufferOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(buildAccelerationStructure_descriptor_scratchBuffer_scratchBufferOffset_), accelerationStructure, descriptor, scratchBuffer, scratchBufferOffset);
+}
+
+// method: refitAccelerationStructure:descriptor:destination:scratchBuffer:scratchBufferOffset:
+_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::refitAccelerationStructure(const MTL::AccelerationStructure* sourceAccelerationStructure, const MTL::AccelerationStructureDescriptor* descriptor, const MTL::AccelerationStructure* destinationAccelerationStructure, const MTL::Buffer* scratchBuffer, NS::UInteger scratchBufferOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(refitAccelerationStructure_descriptor_destination_scratchBuffer_scratchBufferOffset_), sourceAccelerationStructure, descriptor, destinationAccelerationStructure, scratchBuffer, scratchBufferOffset);
+}
+
+// method: copyAccelerationStructure:toAccelerationStructure:
+_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::copyAccelerationStructure(const MTL::AccelerationStructure* sourceAccelerationStructure, const MTL::AccelerationStructure* destinationAccelerationStructure)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(copyAccelerationStructure_toAccelerationStructure_), sourceAccelerationStructure, destinationAccelerationStructure);
+}
+
+// method: writeCompactedAccelerationStructureSize:toBuffer:offset:
+_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::writeCompactedAccelerationStructureSize(const MTL::AccelerationStructure* accelerationStructure, const MTL::Buffer* buffer, NS::UInteger offset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(writeCompactedAccelerationStructureSize_toBuffer_offset_), accelerationStructure, buffer, offset);
+}
+
+// method: writeCompactedAccelerationStructureSize:toBuffer:offset:sizeDataType:
+_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::writeCompactedAccelerationStructureSize(const MTL::AccelerationStructure* accelerationStructure, const MTL::Buffer* buffer, NS::UInteger offset, MTL::DataType sizeDataType)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(writeCompactedAccelerationStructureSize_toBuffer_offset_sizeDataType_), accelerationStructure, buffer, offset, sizeDataType);
+}
+
+// method: copyAndCompactAccelerationStructure:toAccelerationStructure:
+_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::copyAndCompactAccelerationStructure(const MTL::AccelerationStructure* sourceAccelerationStructure, const MTL::AccelerationStructure* destinationAccelerationStructure)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(copyAndCompactAccelerationStructure_toAccelerationStructure_), sourceAccelerationStructure, destinationAccelerationStructure);
+}
+
+// method: updateFence:
+_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::updateFence(const MTL::Fence* fence)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(updateFence_), fence);
+}
+
+// method: waitForFence:
+_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::waitForFence(const MTL::Fence* fence)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(waitForFence_), fence);
+}
+
+// method: useResource:usage:
+_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::useResource(const MTL::Resource* resource, MTL::ResourceUsage usage)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(useResource_usage_), resource, usage);
+}
+
+// method: useResources:count:usage:
+_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::useResources(MTL::Resource* resources[], NS::UInteger count, MTL::ResourceUsage usage)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(useResources_count_usage_), resources, count, usage);
+}
+
+// method: useHeap:
+_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::useHeap(const MTL::Heap* heap)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(useHeap_), heap);
+}
+
+// method: useHeaps:count:
+_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::useHeaps(MTL::Heap* heaps[], NS::UInteger count)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(useHeaps_count_), heaps, count);
+}
+
+// method: sampleCountersInBuffer:atSampleIndex:withBarrier:
+_MTL_INLINE void MTL::AccelerationStructureCommandEncoder::sampleCountersInBuffer(const MTL::CounterSampleBuffer* sampleBuffer, NS::UInteger sampleIndex, bool barrier)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(sampleCountersInBuffer_atSampleIndex_withBarrier_), sampleBuffer, sampleIndex, barrier);
+}
diff --git a/metal-cpp/Metal/MTLAccelerationStructureTypes.hpp b/metal-cpp/Metal/MTLAccelerationStructureTypes.hpp
new file mode 100644
index 0000000..be767dc
--- /dev/null
+++ b/metal-cpp/Metal/MTLAccelerationStructureTypes.hpp
@@ -0,0 +1,169 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLAccelerationStructureTypes.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "MTLDefines.hpp"
+#include "MTLPrivate.hpp"
+#include "MTLResource.hpp"
+#include "MTLStageInputOutputDescriptor.hpp"
+
+#include "../Foundation/NSRange.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace MTL
+{
+struct PackedFloat3
+{
+    PackedFloat3();
+    PackedFloat3(float x, float y, float z);
+
+    float& operator[](int idx);
+    float  operator[](int idx) const;
+
+    union
+    {
+        struct
+        {
+            float x;
+            float y;
+            float z;
+        };
+
+        float elements[3];
+    };
+} _MTL_PACKED;
+
+struct PackedFloat4x3
+{
+    PackedFloat4x3();
+    PackedFloat4x3(const PackedFloat3& col0, const PackedFloat3& col1, const PackedFloat3& col2, const PackedFloat3& col3);
+
+    PackedFloat3&       operator[](int idx);
+    const PackedFloat3& operator[](int idx) const;
+
+    PackedFloat3        columns[4];
+} _MTL_PACKED;
+
+struct AxisAlignedBoundingBox
+{
+    AxisAlignedBoundingBox();
+    AxisAlignedBoundingBox(PackedFloat3 p);
+    AxisAlignedBoundingBox(PackedFloat3 min, PackedFloat3 max);
+
+    PackedFloat3 min;
+    PackedFloat3 max;
+} _MTL_PACKED;
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_MTL_INLINE MTL::PackedFloat3::PackedFloat3()
+    : x(0.0f)
+    , y(0.0f)
+    , z(0.0f)
+{
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_MTL_INLINE MTL::PackedFloat3::PackedFloat3(float _x, float _y, float _z)
+    : x(_x)
+    , y(_y)
+    , z(_z)
+{
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_MTL_INLINE float& MTL::PackedFloat3::operator[](int idx)
+{
+    return elements[idx];
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_MTL_INLINE float MTL::PackedFloat3::operator[](int idx) const
+{
+    return elements[idx];
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_MTL_INLINE MTL::PackedFloat4x3::PackedFloat4x3()
+{
+    columns[0] = PackedFloat3(0.0f, 0.0f, 0.0f);
+    columns[1] = PackedFloat3(0.0f, 0.0f, 0.0f);
+    columns[2] = PackedFloat3(0.0f, 0.0f, 0.0f);
+    columns[3] = PackedFloat3(0.0f, 0.0f, 0.0f);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_MTL_INLINE MTL::PackedFloat4x3::PackedFloat4x3(const PackedFloat3& col0, const PackedFloat3& col1, const PackedFloat3& col2, const PackedFloat3& col3)
+{
+    columns[0] = col0;
+    columns[1] = col1;
+    columns[2] = col2;
+    columns[3] = col3;
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_MTL_INLINE MTL::PackedFloat3& MTL::PackedFloat4x3::operator[](int idx)
+{
+    return columns[idx];
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_MTL_INLINE const MTL::PackedFloat3& MTL::PackedFloat4x3::operator[](int idx) const
+{
+    return columns[idx];
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_MTL_INLINE MTL::AxisAlignedBoundingBox::AxisAlignedBoundingBox()
+    : min(INFINITY, INFINITY, INFINITY)
+    , max(-INFINITY, -INFINITY, -INFINITY)
+{
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_MTL_INLINE MTL::AxisAlignedBoundingBox::AxisAlignedBoundingBox(PackedFloat3 p)
+    : min(p)
+    , max(p)
+{
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_MTL_INLINE MTL::AxisAlignedBoundingBox::AxisAlignedBoundingBox(PackedFloat3 _min, PackedFloat3 _max)
+    : min(_min)
+    , max(_max)
+{
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Metal/MTLArgument.hpp b/metal-cpp/Metal/MTLArgument.hpp
new file mode 100644
index 0000000..8057fe0
--- /dev/null
+++ b/metal-cpp/Metal/MTLArgument.hpp
@@ -0,0 +1,655 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLArgument.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLArgument.hpp"
+#include "MTLTexture.hpp"
+
+namespace MTL
+{
+_MTL_ENUM(NS::UInteger, DataType) {
+    DataTypeNone = 0,
+    DataTypeStruct = 1,
+    DataTypeArray = 2,
+    DataTypeFloat = 3,
+    DataTypeFloat2 = 4,
+    DataTypeFloat3 = 5,
+    DataTypeFloat4 = 6,
+    DataTypeFloat2x2 = 7,
+    DataTypeFloat2x3 = 8,
+    DataTypeFloat2x4 = 9,
+    DataTypeFloat3x2 = 10,
+    DataTypeFloat3x3 = 11,
+    DataTypeFloat3x4 = 12,
+    DataTypeFloat4x2 = 13,
+    DataTypeFloat4x3 = 14,
+    DataTypeFloat4x4 = 15,
+    DataTypeHalf = 16,
+    DataTypeHalf2 = 17,
+    DataTypeHalf3 = 18,
+    DataTypeHalf4 = 19,
+    DataTypeHalf2x2 = 20,
+    DataTypeHalf2x3 = 21,
+    DataTypeHalf2x4 = 22,
+    DataTypeHalf3x2 = 23,
+    DataTypeHalf3x3 = 24,
+    DataTypeHalf3x4 = 25,
+    DataTypeHalf4x2 = 26,
+    DataTypeHalf4x3 = 27,
+    DataTypeHalf4x4 = 28,
+    DataTypeInt = 29,
+    DataTypeInt2 = 30,
+    DataTypeInt3 = 31,
+    DataTypeInt4 = 32,
+    DataTypeUInt = 33,
+    DataTypeUInt2 = 34,
+    DataTypeUInt3 = 35,
+    DataTypeUInt4 = 36,
+    DataTypeShort = 37,
+    DataTypeShort2 = 38,
+    DataTypeShort3 = 39,
+    DataTypeShort4 = 40,
+    DataTypeUShort = 41,
+    DataTypeUShort2 = 42,
+    DataTypeUShort3 = 43,
+    DataTypeUShort4 = 44,
+    DataTypeChar = 45,
+    DataTypeChar2 = 46,
+    DataTypeChar3 = 47,
+    DataTypeChar4 = 48,
+    DataTypeUChar = 49,
+    DataTypeUChar2 = 50,
+    DataTypeUChar3 = 51,
+    DataTypeUChar4 = 52,
+    DataTypeBool = 53,
+    DataTypeBool2 = 54,
+    DataTypeBool3 = 55,
+    DataTypeBool4 = 56,
+    DataTypeTexture = 58,
+    DataTypeSampler = 59,
+    DataTypePointer = 60,
+    DataTypeR8Unorm = 62,
+    DataTypeR8Snorm = 63,
+    DataTypeR16Unorm = 64,
+    DataTypeR16Snorm = 65,
+    DataTypeRG8Unorm = 66,
+    DataTypeRG8Snorm = 67,
+    DataTypeRG16Unorm = 68,
+    DataTypeRG16Snorm = 69,
+    DataTypeRGBA8Unorm = 70,
+    DataTypeRGBA8Unorm_sRGB = 71,
+    DataTypeRGBA8Snorm = 72,
+    DataTypeRGBA16Unorm = 73,
+    DataTypeRGBA16Snorm = 74,
+    DataTypeRGB10A2Unorm = 75,
+    DataTypeRG11B10Float = 76,
+    DataTypeRGB9E5Float = 77,
+    DataTypeRenderPipeline = 78,
+    DataTypeComputePipeline = 79,
+    DataTypeIndirectCommandBuffer = 80,
+    DataTypeLong = 81,
+    DataTypeLong2 = 82,
+    DataTypeLong3 = 83,
+    DataTypeLong4 = 84,
+    DataTypeULong = 85,
+    DataTypeULong2 = 86,
+    DataTypeULong3 = 87,
+    DataTypeULong4 = 88,
+    DataTypeVisibleFunctionTable = 115,
+    DataTypeIntersectionFunctionTable = 116,
+    DataTypePrimitiveAccelerationStructure = 117,
+    DataTypeInstanceAccelerationStructure = 118,
+};
+
+_MTL_ENUM(NS::UInteger, ArgumentType) {
+    ArgumentTypeBuffer = 0,
+    ArgumentTypeThreadgroupMemory = 1,
+    ArgumentTypeTexture = 2,
+    ArgumentTypeSampler = 3,
+    ArgumentTypeImageblockData = 16,
+    ArgumentTypeImageblock = 17,
+    ArgumentTypeVisibleFunctionTable = 24,
+    ArgumentTypePrimitiveAccelerationStructure = 25,
+    ArgumentTypeInstanceAccelerationStructure = 26,
+    ArgumentTypeIntersectionFunctionTable = 27,
+};
+
+_MTL_ENUM(NS::UInteger, ArgumentAccess) {
+    ArgumentAccessReadOnly = 0,
+    ArgumentAccessReadWrite = 1,
+    ArgumentAccessWriteOnly = 2,
+};
+
+class Type : public NS::Referencing<Type>
+{
+public:
+    static class Type* alloc();
+
+    class Type*        init();
+
+    MTL::DataType      dataType() const;
+};
+
+class StructMember : public NS::Referencing<StructMember>
+{
+public:
+    static class StructMember*  alloc();
+
+    class StructMember*         init();
+
+    NS::String*                 name() const;
+
+    NS::UInteger                offset() const;
+
+    MTL::DataType               dataType() const;
+
+    class StructType*           structType();
+
+    class ArrayType*            arrayType();
+
+    class TextureReferenceType* textureReferenceType();
+
+    class PointerType*          pointerType();
+
+    NS::UInteger                argumentIndex() const;
+};
+
+class StructType : public NS::Referencing<StructType, Type>
+{
+public:
+    static class StructType* alloc();
+
+    class StructType*        init();
+
+    NS::Array*               members() const;
+
+    class StructMember*      memberByName(const NS::String* name);
+};
+
+class ArrayType : public NS::Referencing<ArrayType, Type>
+{
+public:
+    static class ArrayType*     alloc();
+
+    class ArrayType*            init();
+
+    MTL::DataType               elementType() const;
+
+    NS::UInteger                arrayLength() const;
+
+    NS::UInteger                stride() const;
+
+    NS::UInteger                argumentIndexStride() const;
+
+    class StructType*           elementStructType();
+
+    class ArrayType*            elementArrayType();
+
+    class TextureReferenceType* elementTextureReferenceType();
+
+    class PointerType*          elementPointerType();
+};
+
+class PointerType : public NS::Referencing<PointerType, Type>
+{
+public:
+    static class PointerType* alloc();
+
+    class PointerType*        init();
+
+    MTL::DataType             elementType() const;
+
+    MTL::ArgumentAccess       access() const;
+
+    NS::UInteger              alignment() const;
+
+    NS::UInteger              dataSize() const;
+
+    bool                      elementIsArgumentBuffer() const;
+
+    class StructType*         elementStructType();
+
+    class ArrayType*          elementArrayType();
+};
+
+class TextureReferenceType : public NS::Referencing<TextureReferenceType, Type>
+{
+public:
+    static class TextureReferenceType* alloc();
+
+    class TextureReferenceType*        init();
+
+    MTL::DataType                      textureDataType() const;
+
+    MTL::TextureType                   textureType() const;
+
+    MTL::ArgumentAccess                access() const;
+
+    bool                               isDepthTexture() const;
+};
+
+class Argument : public NS::Referencing<Argument>
+{
+public:
+    static class Argument* alloc();
+
+    class Argument*        init();
+
+    NS::String*            name() const;
+
+    MTL::ArgumentType      type() const;
+
+    MTL::ArgumentAccess    access() const;
+
+    NS::UInteger           index() const;
+
+    bool                   active() const;
+
+    NS::UInteger           bufferAlignment() const;
+
+    NS::UInteger           bufferDataSize() const;
+
+    MTL::DataType          bufferDataType() const;
+
+    class StructType*      bufferStructType() const;
+
+    class PointerType*     bufferPointerType() const;
+
+    NS::UInteger           threadgroupMemoryAlignment() const;
+
+    NS::UInteger           threadgroupMemoryDataSize() const;
+
+    MTL::TextureType       textureType() const;
+
+    MTL::DataType          textureDataType() const;
+
+    bool                   isDepthTexture() const;
+
+    NS::UInteger           arrayLength() const;
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::Type* MTL::Type::alloc()
+{
+    return NS::Object::alloc<MTL::Type>(_MTL_PRIVATE_CLS(MTLType));
+}
+
+// method: init
+_MTL_INLINE MTL::Type* MTL::Type::init()
+{
+    return NS::Object::init<MTL::Type>();
+}
+
+// property: dataType
+_MTL_INLINE MTL::DataType MTL::Type::dataType() const
+{
+    return Object::sendMessage<MTL::DataType>(this, _MTL_PRIVATE_SEL(dataType));
+}
+
+// static method: alloc
+_MTL_INLINE MTL::StructMember* MTL::StructMember::alloc()
+{
+    return NS::Object::alloc<MTL::StructMember>(_MTL_PRIVATE_CLS(MTLStructMember));
+}
+
+// method: init
+_MTL_INLINE MTL::StructMember* MTL::StructMember::init()
+{
+    return NS::Object::init<MTL::StructMember>();
+}
+
+// property: name
+_MTL_INLINE NS::String* MTL::StructMember::name() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(name));
+}
+
+// property: offset
+_MTL_INLINE NS::UInteger MTL::StructMember::offset() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(offset));
+}
+
+// property: dataType
+_MTL_INLINE MTL::DataType MTL::StructMember::dataType() const
+{
+    return Object::sendMessage<MTL::DataType>(this, _MTL_PRIVATE_SEL(dataType));
+}
+
+// method: structType
+_MTL_INLINE MTL::StructType* MTL::StructMember::structType()
+{
+    return Object::sendMessage<MTL::StructType*>(this, _MTL_PRIVATE_SEL(structType));
+}
+
+// method: arrayType
+_MTL_INLINE MTL::ArrayType* MTL::StructMember::arrayType()
+{
+    return Object::sendMessage<MTL::ArrayType*>(this, _MTL_PRIVATE_SEL(arrayType));
+}
+
+// method: textureReferenceType
+_MTL_INLINE MTL::TextureReferenceType* MTL::StructMember::textureReferenceType()
+{
+    return Object::sendMessage<MTL::TextureReferenceType*>(this, _MTL_PRIVATE_SEL(textureReferenceType));
+}
+
+// method: pointerType
+_MTL_INLINE MTL::PointerType* MTL::StructMember::pointerType()
+{
+    return Object::sendMessage<MTL::PointerType*>(this, _MTL_PRIVATE_SEL(pointerType));
+}
+
+// property: argumentIndex
+_MTL_INLINE NS::UInteger MTL::StructMember::argumentIndex() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(argumentIndex));
+}
+
+// static method: alloc
+_MTL_INLINE MTL::StructType* MTL::StructType::alloc()
+{
+    return NS::Object::alloc<MTL::StructType>(_MTL_PRIVATE_CLS(MTLStructType));
+}
+
+// method: init
+_MTL_INLINE MTL::StructType* MTL::StructType::init()
+{
+    return NS::Object::init<MTL::StructType>();
+}
+
+// property: members
+_MTL_INLINE NS::Array* MTL::StructType::members() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(members));
+}
+
+// method: memberByName:
+_MTL_INLINE MTL::StructMember* MTL::StructType::memberByName(const NS::String* name)
+{
+    return Object::sendMessage<MTL::StructMember*>(this, _MTL_PRIVATE_SEL(memberByName_), name);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::ArrayType* MTL::ArrayType::alloc()
+{
+    return NS::Object::alloc<MTL::ArrayType>(_MTL_PRIVATE_CLS(MTLArrayType));
+}
+
+// method: init
+_MTL_INLINE MTL::ArrayType* MTL::ArrayType::init()
+{
+    return NS::Object::init<MTL::ArrayType>();
+}
+
+// property: elementType
+_MTL_INLINE MTL::DataType MTL::ArrayType::elementType() const
+{
+    return Object::sendMessage<MTL::DataType>(this, _MTL_PRIVATE_SEL(elementType));
+}
+
+// property: arrayLength
+_MTL_INLINE NS::UInteger MTL::ArrayType::arrayLength() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(arrayLength));
+}
+
+// property: stride
+_MTL_INLINE NS::UInteger MTL::ArrayType::stride() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(stride));
+}
+
+// property: argumentIndexStride
+_MTL_INLINE NS::UInteger MTL::ArrayType::argumentIndexStride() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(argumentIndexStride));
+}
+
+// method: elementStructType
+_MTL_INLINE MTL::StructType* MTL::ArrayType::elementStructType()
+{
+    return Object::sendMessage<MTL::StructType*>(this, _MTL_PRIVATE_SEL(elementStructType));
+}
+
+// method: elementArrayType
+_MTL_INLINE MTL::ArrayType* MTL::ArrayType::elementArrayType()
+{
+    return Object::sendMessage<MTL::ArrayType*>(this, _MTL_PRIVATE_SEL(elementArrayType));
+}
+
+// method: elementTextureReferenceType
+_MTL_INLINE MTL::TextureReferenceType* MTL::ArrayType::elementTextureReferenceType()
+{
+    return Object::sendMessage<MTL::TextureReferenceType*>(this, _MTL_PRIVATE_SEL(elementTextureReferenceType));
+}
+
+// method: elementPointerType
+_MTL_INLINE MTL::PointerType* MTL::ArrayType::elementPointerType()
+{
+    return Object::sendMessage<MTL::PointerType*>(this, _MTL_PRIVATE_SEL(elementPointerType));
+}
+
+// static method: alloc
+_MTL_INLINE MTL::PointerType* MTL::PointerType::alloc()
+{
+    return NS::Object::alloc<MTL::PointerType>(_MTL_PRIVATE_CLS(MTLPointerType));
+}
+
+// method: init
+_MTL_INLINE MTL::PointerType* MTL::PointerType::init()
+{
+    return NS::Object::init<MTL::PointerType>();
+}
+
+// property: elementType
+_MTL_INLINE MTL::DataType MTL::PointerType::elementType() const
+{
+    return Object::sendMessage<MTL::DataType>(this, _MTL_PRIVATE_SEL(elementType));
+}
+
+// property: access
+_MTL_INLINE MTL::ArgumentAccess MTL::PointerType::access() const
+{
+    return Object::sendMessage<MTL::ArgumentAccess>(this, _MTL_PRIVATE_SEL(access));
+}
+
+// property: alignment
+_MTL_INLINE NS::UInteger MTL::PointerType::alignment() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(alignment));
+}
+
+// property: dataSize
+_MTL_INLINE NS::UInteger MTL::PointerType::dataSize() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(dataSize));
+}
+
+// property: elementIsArgumentBuffer
+_MTL_INLINE bool MTL::PointerType::elementIsArgumentBuffer() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(elementIsArgumentBuffer));
+}
+
+// method: elementStructType
+_MTL_INLINE MTL::StructType* MTL::PointerType::elementStructType()
+{
+    return Object::sendMessage<MTL::StructType*>(this, _MTL_PRIVATE_SEL(elementStructType));
+}
+
+// method: elementArrayType
+_MTL_INLINE MTL::ArrayType* MTL::PointerType::elementArrayType()
+{
+    return Object::sendMessage<MTL::ArrayType*>(this, _MTL_PRIVATE_SEL(elementArrayType));
+}
+
+// static method: alloc
+_MTL_INLINE MTL::TextureReferenceType* MTL::TextureReferenceType::alloc()
+{
+    return NS::Object::alloc<MTL::TextureReferenceType>(_MTL_PRIVATE_CLS(MTLTextureReferenceType));
+}
+
+// method: init
+_MTL_INLINE MTL::TextureReferenceType* MTL::TextureReferenceType::init()
+{
+    return NS::Object::init<MTL::TextureReferenceType>();
+}
+
+// property: textureDataType
+_MTL_INLINE MTL::DataType MTL::TextureReferenceType::textureDataType() const
+{
+    return Object::sendMessage<MTL::DataType>(this, _MTL_PRIVATE_SEL(textureDataType));
+}
+
+// property: textureType
+_MTL_INLINE MTL::TextureType MTL::TextureReferenceType::textureType() const
+{
+    return Object::sendMessage<MTL::TextureType>(this, _MTL_PRIVATE_SEL(textureType));
+}
+
+// property: access
+_MTL_INLINE MTL::ArgumentAccess MTL::TextureReferenceType::access() const
+{
+    return Object::sendMessage<MTL::ArgumentAccess>(this, _MTL_PRIVATE_SEL(access));
+}
+
+// property: isDepthTexture
+_MTL_INLINE bool MTL::TextureReferenceType::isDepthTexture() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isDepthTexture));
+}
+
+// static method: alloc
+_MTL_INLINE MTL::Argument* MTL::Argument::alloc()
+{
+    return NS::Object::alloc<MTL::Argument>(_MTL_PRIVATE_CLS(MTLArgument));
+}
+
+// method: init
+_MTL_INLINE MTL::Argument* MTL::Argument::init()
+{
+    return NS::Object::init<MTL::Argument>();
+}
+
+// property: name
+_MTL_INLINE NS::String* MTL::Argument::name() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(name));
+}
+
+// property: type
+_MTL_INLINE MTL::ArgumentType MTL::Argument::type() const
+{
+    return Object::sendMessage<MTL::ArgumentType>(this, _MTL_PRIVATE_SEL(type));
+}
+
+// property: access
+_MTL_INLINE MTL::ArgumentAccess MTL::Argument::access() const
+{
+    return Object::sendMessage<MTL::ArgumentAccess>(this, _MTL_PRIVATE_SEL(access));
+}
+
+// property: index
+_MTL_INLINE NS::UInteger MTL::Argument::index() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(index));
+}
+
+// property: active
+_MTL_INLINE bool MTL::Argument::active() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isActive));
+}
+
+// property: bufferAlignment
+_MTL_INLINE NS::UInteger MTL::Argument::bufferAlignment() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(bufferAlignment));
+}
+
+// property: bufferDataSize
+_MTL_INLINE NS::UInteger MTL::Argument::bufferDataSize() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(bufferDataSize));
+}
+
+// property: bufferDataType
+_MTL_INLINE MTL::DataType MTL::Argument::bufferDataType() const
+{
+    return Object::sendMessage<MTL::DataType>(this, _MTL_PRIVATE_SEL(bufferDataType));
+}
+
+// property: bufferStructType
+_MTL_INLINE MTL::StructType* MTL::Argument::bufferStructType() const
+{
+    return Object::sendMessage<MTL::StructType*>(this, _MTL_PRIVATE_SEL(bufferStructType));
+}
+
+// property: bufferPointerType
+_MTL_INLINE MTL::PointerType* MTL::Argument::bufferPointerType() const
+{
+    return Object::sendMessage<MTL::PointerType*>(this, _MTL_PRIVATE_SEL(bufferPointerType));
+}
+
+// property: threadgroupMemoryAlignment
+_MTL_INLINE NS::UInteger MTL::Argument::threadgroupMemoryAlignment() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(threadgroupMemoryAlignment));
+}
+
+// property: threadgroupMemoryDataSize
+_MTL_INLINE NS::UInteger MTL::Argument::threadgroupMemoryDataSize() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(threadgroupMemoryDataSize));
+}
+
+// property: textureType
+_MTL_INLINE MTL::TextureType MTL::Argument::textureType() const
+{
+    return Object::sendMessage<MTL::TextureType>(this, _MTL_PRIVATE_SEL(textureType));
+}
+
+// property: textureDataType
+_MTL_INLINE MTL::DataType MTL::Argument::textureDataType() const
+{
+    return Object::sendMessage<MTL::DataType>(this, _MTL_PRIVATE_SEL(textureDataType));
+}
+
+// property: isDepthTexture
+_MTL_INLINE bool MTL::Argument::isDepthTexture() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isDepthTexture));
+}
+
+// property: arrayLength
+_MTL_INLINE NS::UInteger MTL::Argument::arrayLength() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(arrayLength));
+}
diff --git a/metal-cpp/Metal/MTLArgumentEncoder.hpp b/metal-cpp/Metal/MTLArgumentEncoder.hpp
new file mode 100644
index 0000000..0cecc7d
--- /dev/null
+++ b/metal-cpp/Metal/MTLArgumentEncoder.hpp
@@ -0,0 +1,250 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLArgumentEncoder.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLBuffer.hpp"
+#include "MTLComputePipeline.hpp"
+#include "MTLIndirectCommandBuffer.hpp"
+#include "MTLIntersectionFunctionTable.hpp"
+#include "MTLRenderPipeline.hpp"
+#include "MTLSampler.hpp"
+#include "MTLTexture.hpp"
+#include "MTLVisibleFunctionTable.hpp"
+
+namespace MTL
+{
+class ArgumentEncoder : public NS::Referencing<ArgumentEncoder>
+{
+public:
+    class Device*          device() const;
+
+    NS::String*            label() const;
+    void                   setLabel(const NS::String* label);
+
+    NS::UInteger           encodedLength() const;
+
+    NS::UInteger           alignment() const;
+
+    void                   setArgumentBuffer(const class Buffer* argumentBuffer, NS::UInteger offset);
+
+    void                   setArgumentBuffer(const class Buffer* argumentBuffer, NS::UInteger startOffset, NS::UInteger arrayElement);
+
+    void                   setBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index);
+
+    void                   setBuffers(MTL::Buffer* buffers[], const NS::UInteger offsets[], NS::Range range);
+
+    void                   setTexture(const class Texture* texture, NS::UInteger index);
+
+    void                   setTextures(MTL::Texture* textures[], NS::Range range);
+
+    void                   setSamplerState(const class SamplerState* sampler, NS::UInteger index);
+
+    void                   setSamplerStates(MTL::SamplerState* samplers[], NS::Range range);
+
+    void*                  constantData(NS::UInteger index);
+
+    void                   setRenderPipelineState(const class RenderPipelineState* pipeline, NS::UInteger index);
+
+    void                   setRenderPipelineStates(MTL::RenderPipelineState* pipelines, NS::Range range);
+
+    void                   setComputePipelineState(const class ComputePipelineState* pipeline, NS::UInteger index);
+
+    void                   setComputePipelineStates(MTL::ComputePipelineState* pipelines, NS::Range range);
+
+    void                   setIndirectCommandBuffer(const class IndirectCommandBuffer* indirectCommandBuffer, NS::UInteger index);
+
+    void                   setIndirectCommandBuffers(MTL::IndirectCommandBuffer* buffers, NS::Range range);
+
+    void                   setAccelerationStructure(const class AccelerationStructure* accelerationStructure, NS::UInteger index);
+
+    class ArgumentEncoder* newArgumentEncoder(NS::UInteger index);
+
+    void                   setVisibleFunctionTable(const class VisibleFunctionTable* visibleFunctionTable, NS::UInteger index);
+
+    void                   setVisibleFunctionTables(const VisibleFunctionTable* visibleFunctionTables[], NS::Range range);
+
+    void                   setIntersectionFunctionTable(const class IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger index);
+
+    void                   setIntersectionFunctionTables(const IntersectionFunctionTable* intersectionFunctionTables[], NS::Range range);
+};
+
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::ArgumentEncoder::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::ArgumentEncoder::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::ArgumentEncoder::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
+
+// property: encodedLength
+_MTL_INLINE NS::UInteger MTL::ArgumentEncoder::encodedLength() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(encodedLength));
+}
+
+// property: alignment
+_MTL_INLINE NS::UInteger MTL::ArgumentEncoder::alignment() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(alignment));
+}
+
+// method: setArgumentBuffer:offset:
+_MTL_INLINE void MTL::ArgumentEncoder::setArgumentBuffer(const MTL::Buffer* argumentBuffer, NS::UInteger offset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setArgumentBuffer_offset_), argumentBuffer, offset);
+}
+
+// method: setArgumentBuffer:startOffset:arrayElement:
+_MTL_INLINE void MTL::ArgumentEncoder::setArgumentBuffer(const MTL::Buffer* argumentBuffer, NS::UInteger startOffset, NS::UInteger arrayElement)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setArgumentBuffer_startOffset_arrayElement_), argumentBuffer, startOffset, arrayElement);
+}
+
+// method: setBuffer:offset:atIndex:
+_MTL_INLINE void MTL::ArgumentEncoder::setBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBuffer_offset_atIndex_), buffer, offset, index);
+}
+
+// method: setBuffers:offsets:withRange:
+_MTL_INLINE void MTL::ArgumentEncoder::setBuffers(MTL::Buffer* buffers[], const NS::UInteger offsets[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBuffers_offsets_withRange_), buffers, offsets, range);
+}
+
+// method: setTexture:atIndex:
+_MTL_INLINE void MTL::ArgumentEncoder::setTexture(const MTL::Texture* texture, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTexture_atIndex_), texture, index);
+}
+
+// method: setTextures:withRange:
+_MTL_INLINE void MTL::ArgumentEncoder::setTextures(MTL::Texture* textures[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTextures_withRange_), textures, range);
+}
+
+// method: setSamplerState:atIndex:
+_MTL_INLINE void MTL::ArgumentEncoder::setSamplerState(const MTL::SamplerState* sampler, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSamplerState_atIndex_), sampler, index);
+}
+
+// method: setSamplerStates:withRange:
+_MTL_INLINE void MTL::ArgumentEncoder::setSamplerStates(MTL::SamplerState* samplers[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSamplerStates_withRange_), samplers, range);
+}
+
+// method: constantDataAtIndex:
+_MTL_INLINE void* MTL::ArgumentEncoder::constantData(NS::UInteger index)
+{
+    return Object::sendMessage<void*>(this, _MTL_PRIVATE_SEL(constantDataAtIndex_), index);
+}
+
+// method: setRenderPipelineState:atIndex:
+_MTL_INLINE void MTL::ArgumentEncoder::setRenderPipelineState(const MTL::RenderPipelineState* pipeline, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setRenderPipelineState_atIndex_), pipeline, index);
+}
+
+// method: setRenderPipelineStates:withRange:
+_MTL_INLINE void MTL::ArgumentEncoder::setRenderPipelineStates(MTL::RenderPipelineState* pipelines, NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setRenderPipelineStates_withRange_), pipelines, range);
+}
+
+// method: setComputePipelineState:atIndex:
+_MTL_INLINE void MTL::ArgumentEncoder::setComputePipelineState(const MTL::ComputePipelineState* pipeline, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setComputePipelineState_atIndex_), pipeline, index);
+}
+
+// method: setComputePipelineStates:withRange:
+_MTL_INLINE void MTL::ArgumentEncoder::setComputePipelineStates(MTL::ComputePipelineState* pipelines, NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setComputePipelineStates_withRange_), pipelines, range);
+}
+
+// method: setIndirectCommandBuffer:atIndex:
+_MTL_INLINE void MTL::ArgumentEncoder::setIndirectCommandBuffer(const MTL::IndirectCommandBuffer* indirectCommandBuffer, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setIndirectCommandBuffer_atIndex_), indirectCommandBuffer, index);
+}
+
+// method: setIndirectCommandBuffers:withRange:
+_MTL_INLINE void MTL::ArgumentEncoder::setIndirectCommandBuffers(MTL::IndirectCommandBuffer* buffers, NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setIndirectCommandBuffers_withRange_), buffers, range);
+}
+
+// method: setAccelerationStructure:atIndex:
+_MTL_INLINE void MTL::ArgumentEncoder::setAccelerationStructure(const MTL::AccelerationStructure* accelerationStructure, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setAccelerationStructure_atIndex_), accelerationStructure, index);
+}
+
+// method: newArgumentEncoderForBufferAtIndex:
+_MTL_INLINE MTL::ArgumentEncoder* MTL::ArgumentEncoder::newArgumentEncoder(NS::UInteger index)
+{
+    return Object::sendMessage<MTL::ArgumentEncoder*>(this, _MTL_PRIVATE_SEL(newArgumentEncoderForBufferAtIndex_), index);
+}
+
+// method: setVisibleFunctionTable:atIndex:
+_MTL_INLINE void MTL::ArgumentEncoder::setVisibleFunctionTable(const MTL::VisibleFunctionTable* visibleFunctionTable, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVisibleFunctionTable_atIndex_), visibleFunctionTable, index);
+}
+
+// method: setVisibleFunctionTables:withRange:
+_MTL_INLINE void MTL::ArgumentEncoder::setVisibleFunctionTables(const MTL::VisibleFunctionTable* visibleFunctionTables[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVisibleFunctionTables_withRange_), visibleFunctionTables, range);
+}
+
+// method: setIntersectionFunctionTable:atIndex:
+_MTL_INLINE void MTL::ArgumentEncoder::setIntersectionFunctionTable(const MTL::IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setIntersectionFunctionTable_atIndex_), intersectionFunctionTable, index);
+}
+
+// method: setIntersectionFunctionTables:withRange:
+_MTL_INLINE void MTL::ArgumentEncoder::setIntersectionFunctionTables(const MTL::IntersectionFunctionTable* intersectionFunctionTables[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setIntersectionFunctionTables_withRange_), intersectionFunctionTables, range);
+}
diff --git a/metal-cpp/Metal/MTLBinaryArchive.hpp b/metal-cpp/Metal/MTLBinaryArchive.hpp
new file mode 100644
index 0000000..8142d37
--- /dev/null
+++ b/metal-cpp/Metal/MTLBinaryArchive.hpp
@@ -0,0 +1,138 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLBinaryArchive.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+namespace MTL
+{
+_MTL_ENUM(NS::UInteger, BinaryArchiveError) {
+    BinaryArchiveErrorNone = 0,
+    BinaryArchiveErrorInvalidFile = 1,
+    BinaryArchiveErrorUnexpectedElement = 2,
+    BinaryArchiveErrorCompilationFailure = 3,
+};
+
+class BinaryArchiveDescriptor : public NS::Copying<BinaryArchiveDescriptor>
+{
+public:
+    static class BinaryArchiveDescriptor* alloc();
+
+    class BinaryArchiveDescriptor*        init();
+
+    NS::URL*                              url() const;
+    void                                  setUrl(const NS::URL* url);
+};
+
+class BinaryArchive : public NS::Referencing<BinaryArchive>
+{
+public:
+    NS::String*   label() const;
+    void          setLabel(const NS::String* label);
+
+    class Device* device() const;
+
+    bool          addComputePipelineFunctions(const class ComputePipelineDescriptor* descriptor, NS::Error** error);
+
+    bool          addRenderPipelineFunctions(const class RenderPipelineDescriptor* descriptor, NS::Error** error);
+
+    bool          addTileRenderPipelineFunctions(const class TileRenderPipelineDescriptor* descriptor, NS::Error** error);
+
+    bool          serializeToURL(const NS::URL* url, NS::Error** error);
+
+    bool          addFunction(const class FunctionDescriptor* descriptor, const class Library* library, NS::Error** error);
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::BinaryArchiveDescriptor* MTL::BinaryArchiveDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::BinaryArchiveDescriptor>(_MTL_PRIVATE_CLS(MTLBinaryArchiveDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::BinaryArchiveDescriptor* MTL::BinaryArchiveDescriptor::init()
+{
+    return NS::Object::init<MTL::BinaryArchiveDescriptor>();
+}
+
+// property: url
+_MTL_INLINE NS::URL* MTL::BinaryArchiveDescriptor::url() const
+{
+    return Object::sendMessage<NS::URL*>(this, _MTL_PRIVATE_SEL(url));
+}
+
+_MTL_INLINE void MTL::BinaryArchiveDescriptor::setUrl(const NS::URL* url)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setUrl_), url);
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::BinaryArchive::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::BinaryArchive::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::BinaryArchive::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
+
+// method: addComputePipelineFunctionsWithDescriptor:error:
+_MTL_INLINE bool MTL::BinaryArchive::addComputePipelineFunctions(const MTL::ComputePipelineDescriptor* descriptor, NS::Error** error)
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(addComputePipelineFunctionsWithDescriptor_error_), descriptor, error);
+}
+
+// method: addRenderPipelineFunctionsWithDescriptor:error:
+_MTL_INLINE bool MTL::BinaryArchive::addRenderPipelineFunctions(const MTL::RenderPipelineDescriptor* descriptor, NS::Error** error)
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(addRenderPipelineFunctionsWithDescriptor_error_), descriptor, error);
+}
+
+// method: addTileRenderPipelineFunctionsWithDescriptor:error:
+_MTL_INLINE bool MTL::BinaryArchive::addTileRenderPipelineFunctions(const MTL::TileRenderPipelineDescriptor* descriptor, NS::Error** error)
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(addTileRenderPipelineFunctionsWithDescriptor_error_), descriptor, error);
+}
+
+// method: serializeToURL:error:
+_MTL_INLINE bool MTL::BinaryArchive::serializeToURL(const NS::URL* url, NS::Error** error)
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(serializeToURL_error_), url, error);
+}
+
+// method: addFunctionWithDescriptor:library:error:
+_MTL_INLINE bool MTL::BinaryArchive::addFunction(const MTL::FunctionDescriptor* descriptor, const MTL::Library* library, NS::Error** error)
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(addFunctionWithDescriptor_library_error_), descriptor, library, error);
+}
diff --git a/metal-cpp/Metal/MTLBlitCommandEncoder.hpp b/metal-cpp/Metal/MTLBlitCommandEncoder.hpp
new file mode 100644
index 0000000..0a86b6a
--- /dev/null
+++ b/metal-cpp/Metal/MTLBlitCommandEncoder.hpp
@@ -0,0 +1,246 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLBlitCommandEncoder.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLBlitCommandEncoder.hpp"
+#include "MTLCommandEncoder.hpp"
+#include "MTLTypes.hpp"
+
+namespace MTL
+{
+_MTL_OPTIONS(NS::UInteger, BlitOption) {
+    BlitOptionNone = 0,
+    BlitOptionDepthFromDepthStencil = 1,
+    BlitOptionStencilFromDepthStencil = 2,
+    BlitOptionRowLinearPVRTC = 4,
+};
+
+class BlitCommandEncoder : public NS::Referencing<BlitCommandEncoder, CommandEncoder>
+{
+public:
+    void synchronizeResource(const class Resource* resource);
+
+    void synchronizeTexture(const class Texture* texture, NS::UInteger slice, NS::UInteger level);
+
+    void copyFromTexture(const class Texture* sourceTexture, NS::UInteger sourceSlice, NS::UInteger sourceLevel, MTL::Origin sourceOrigin, MTL::Size sourceSize, const class Texture* destinationTexture, NS::UInteger destinationSlice, NS::UInteger destinationLevel, MTL::Origin destinationOrigin);
+
+    void copyFromBuffer(const class Buffer* sourceBuffer, NS::UInteger sourceOffset, NS::UInteger sourceBytesPerRow, NS::UInteger sourceBytesPerImage, MTL::Size sourceSize, const class Texture* destinationTexture, NS::UInteger destinationSlice, NS::UInteger destinationLevel, MTL::Origin destinationOrigin);
+
+    void copyFromBuffer(const class Buffer* sourceBuffer, NS::UInteger sourceOffset, NS::UInteger sourceBytesPerRow, NS::UInteger sourceBytesPerImage, MTL::Size sourceSize, const class Texture* destinationTexture, NS::UInteger destinationSlice, NS::UInteger destinationLevel, MTL::Origin destinationOrigin, MTL::BlitOption options);
+
+    void copyFromTexture(const class Texture* sourceTexture, NS::UInteger sourceSlice, NS::UInteger sourceLevel, MTL::Origin sourceOrigin, MTL::Size sourceSize, const class Buffer* destinationBuffer, NS::UInteger destinationOffset, NS::UInteger destinationBytesPerRow, NS::UInteger destinationBytesPerImage);
+
+    void copyFromTexture(const class Texture* sourceTexture, NS::UInteger sourceSlice, NS::UInteger sourceLevel, MTL::Origin sourceOrigin, MTL::Size sourceSize, const class Buffer* destinationBuffer, NS::UInteger destinationOffset, NS::UInteger destinationBytesPerRow, NS::UInteger destinationBytesPerImage, MTL::BlitOption options);
+
+    void generateMipmaps(const class Texture* texture);
+
+    void fillBuffer(const class Buffer* buffer, NS::Range range, uint8_t value);
+
+    void copyFromTexture(const class Texture* sourceTexture, NS::UInteger sourceSlice, NS::UInteger sourceLevel, const class Texture* destinationTexture, NS::UInteger destinationSlice, NS::UInteger destinationLevel, NS::UInteger sliceCount, NS::UInteger levelCount);
+
+    void copyFromTexture(const class Texture* sourceTexture, const class Texture* destinationTexture);
+
+    void copyFromBuffer(const class Buffer* sourceBuffer, NS::UInteger sourceOffset, const class Buffer* destinationBuffer, NS::UInteger destinationOffset, NS::UInteger size);
+
+    void updateFence(const class Fence* fence);
+
+    void waitForFence(const class Fence* fence);
+
+    void getTextureAccessCounters(const class Texture* texture, MTL::Region region, NS::UInteger mipLevel, NS::UInteger slice, bool resetCounters, const class Buffer* countersBuffer, NS::UInteger countersBufferOffset);
+
+    void resetTextureAccessCounters(const class Texture* texture, MTL::Region region, NS::UInteger mipLevel, NS::UInteger slice);
+
+    void optimizeContentsForGPUAccess(const class Texture* texture);
+
+    void optimizeContentsForGPUAccess(const class Texture* texture, NS::UInteger slice, NS::UInteger level);
+
+    void optimizeContentsForCPUAccess(const class Texture* texture);
+
+    void optimizeContentsForCPUAccess(const class Texture* texture, NS::UInteger slice, NS::UInteger level);
+
+    void resetCommandsInBuffer(const class IndirectCommandBuffer* buffer, NS::Range range);
+
+    void copyIndirectCommandBuffer(const class IndirectCommandBuffer* source, NS::Range sourceRange, const class IndirectCommandBuffer* destination, NS::UInteger destinationIndex);
+
+    void optimizeIndirectCommandBuffer(const class IndirectCommandBuffer* indirectCommandBuffer, NS::Range range);
+
+    void sampleCountersInBuffer(const class CounterSampleBuffer* sampleBuffer, NS::UInteger sampleIndex, bool barrier);
+
+    void resolveCounters(const class CounterSampleBuffer* sampleBuffer, NS::Range range, const class Buffer* destinationBuffer, NS::UInteger destinationOffset);
+};
+
+}
+
+// method: synchronizeResource:
+_MTL_INLINE void MTL::BlitCommandEncoder::synchronizeResource(const MTL::Resource* resource)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(synchronizeResource_), resource);
+}
+
+// method: synchronizeTexture:slice:level:
+_MTL_INLINE void MTL::BlitCommandEncoder::synchronizeTexture(const MTL::Texture* texture, NS::UInteger slice, NS::UInteger level)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(synchronizeTexture_slice_level_), texture, slice, level);
+}
+
+// method: copyFromTexture:sourceSlice:sourceLevel:sourceOrigin:sourceSize:toTexture:destinationSlice:destinationLevel:destinationOrigin:
+_MTL_INLINE void MTL::BlitCommandEncoder::copyFromTexture(const MTL::Texture* sourceTexture, NS::UInteger sourceSlice, NS::UInteger sourceLevel, MTL::Origin sourceOrigin, MTL::Size sourceSize, const MTL::Texture* destinationTexture, NS::UInteger destinationSlice, NS::UInteger destinationLevel, MTL::Origin destinationOrigin)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(copyFromTexture_sourceSlice_sourceLevel_sourceOrigin_sourceSize_toTexture_destinationSlice_destinationLevel_destinationOrigin_), sourceTexture, sourceSlice, sourceLevel, sourceOrigin, sourceSize, destinationTexture, destinationSlice, destinationLevel, destinationOrigin);
+}
+
+// method: copyFromBuffer:sourceOffset:sourceBytesPerRow:sourceBytesPerImage:sourceSize:toTexture:destinationSlice:destinationLevel:destinationOrigin:
+_MTL_INLINE void MTL::BlitCommandEncoder::copyFromBuffer(const MTL::Buffer* sourceBuffer, NS::UInteger sourceOffset, NS::UInteger sourceBytesPerRow, NS::UInteger sourceBytesPerImage, MTL::Size sourceSize, const MTL::Texture* destinationTexture, NS::UInteger destinationSlice, NS::UInteger destinationLevel, MTL::Origin destinationOrigin)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(copyFromBuffer_sourceOffset_sourceBytesPerRow_sourceBytesPerImage_sourceSize_toTexture_destinationSlice_destinationLevel_destinationOrigin_), sourceBuffer, sourceOffset, sourceBytesPerRow, sourceBytesPerImage, sourceSize, destinationTexture, destinationSlice, destinationLevel, destinationOrigin);
+}
+
+// method: copyFromBuffer:sourceOffset:sourceBytesPerRow:sourceBytesPerImage:sourceSize:toTexture:destinationSlice:destinationLevel:destinationOrigin:options:
+_MTL_INLINE void MTL::BlitCommandEncoder::copyFromBuffer(const MTL::Buffer* sourceBuffer, NS::UInteger sourceOffset, NS::UInteger sourceBytesPerRow, NS::UInteger sourceBytesPerImage, MTL::Size sourceSize, const MTL::Texture* destinationTexture, NS::UInteger destinationSlice, NS::UInteger destinationLevel, MTL::Origin destinationOrigin, MTL::BlitOption options)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(copyFromBuffer_sourceOffset_sourceBytesPerRow_sourceBytesPerImage_sourceSize_toTexture_destinationSlice_destinationLevel_destinationOrigin_options_), sourceBuffer, sourceOffset, sourceBytesPerRow, sourceBytesPerImage, sourceSize, destinationTexture, destinationSlice, destinationLevel, destinationOrigin, options);
+}
+
+// method: copyFromTexture:sourceSlice:sourceLevel:sourceOrigin:sourceSize:toBuffer:destinationOffset:destinationBytesPerRow:destinationBytesPerImage:
+_MTL_INLINE void MTL::BlitCommandEncoder::copyFromTexture(const MTL::Texture* sourceTexture, NS::UInteger sourceSlice, NS::UInteger sourceLevel, MTL::Origin sourceOrigin, MTL::Size sourceSize, const MTL::Buffer* destinationBuffer, NS::UInteger destinationOffset, NS::UInteger destinationBytesPerRow, NS::UInteger destinationBytesPerImage)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(copyFromTexture_sourceSlice_sourceLevel_sourceOrigin_sourceSize_toBuffer_destinationOffset_destinationBytesPerRow_destinationBytesPerImage_), sourceTexture, sourceSlice, sourceLevel, sourceOrigin, sourceSize, destinationBuffer, destinationOffset, destinationBytesPerRow, destinationBytesPerImage);
+}
+
+// method: copyFromTexture:sourceSlice:sourceLevel:sourceOrigin:sourceSize:toBuffer:destinationOffset:destinationBytesPerRow:destinationBytesPerImage:options:
+_MTL_INLINE void MTL::BlitCommandEncoder::copyFromTexture(const MTL::Texture* sourceTexture, NS::UInteger sourceSlice, NS::UInteger sourceLevel, MTL::Origin sourceOrigin, MTL::Size sourceSize, const MTL::Buffer* destinationBuffer, NS::UInteger destinationOffset, NS::UInteger destinationBytesPerRow, NS::UInteger destinationBytesPerImage, MTL::BlitOption options)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(copyFromTexture_sourceSlice_sourceLevel_sourceOrigin_sourceSize_toBuffer_destinationOffset_destinationBytesPerRow_destinationBytesPerImage_options_), sourceTexture, sourceSlice, sourceLevel, sourceOrigin, sourceSize, destinationBuffer, destinationOffset, destinationBytesPerRow, destinationBytesPerImage, options);
+}
+
+// method: generateMipmapsForTexture:
+_MTL_INLINE void MTL::BlitCommandEncoder::generateMipmaps(const MTL::Texture* texture)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(generateMipmapsForTexture_), texture);
+}
+
+// method: fillBuffer:range:value:
+_MTL_INLINE void MTL::BlitCommandEncoder::fillBuffer(const MTL::Buffer* buffer, NS::Range range, uint8_t value)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(fillBuffer_range_value_), buffer, range, value);
+}
+
+// method: copyFromTexture:sourceSlice:sourceLevel:toTexture:destinationSlice:destinationLevel:sliceCount:levelCount:
+_MTL_INLINE void MTL::BlitCommandEncoder::copyFromTexture(const MTL::Texture* sourceTexture, NS::UInteger sourceSlice, NS::UInteger sourceLevel, const MTL::Texture* destinationTexture, NS::UInteger destinationSlice, NS::UInteger destinationLevel, NS::UInteger sliceCount, NS::UInteger levelCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(copyFromTexture_sourceSlice_sourceLevel_toTexture_destinationSlice_destinationLevel_sliceCount_levelCount_), sourceTexture, sourceSlice, sourceLevel, destinationTexture, destinationSlice, destinationLevel, sliceCount, levelCount);
+}
+
+// method: copyFromTexture:toTexture:
+_MTL_INLINE void MTL::BlitCommandEncoder::copyFromTexture(const MTL::Texture* sourceTexture, const MTL::Texture* destinationTexture)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(copyFromTexture_toTexture_), sourceTexture, destinationTexture);
+}
+
+// method: copyFromBuffer:sourceOffset:toBuffer:destinationOffset:size:
+_MTL_INLINE void MTL::BlitCommandEncoder::copyFromBuffer(const MTL::Buffer* sourceBuffer, NS::UInteger sourceOffset, const MTL::Buffer* destinationBuffer, NS::UInteger destinationOffset, NS::UInteger size)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(copyFromBuffer_sourceOffset_toBuffer_destinationOffset_size_), sourceBuffer, sourceOffset, destinationBuffer, destinationOffset, size);
+}
+
+// method: updateFence:
+_MTL_INLINE void MTL::BlitCommandEncoder::updateFence(const MTL::Fence* fence)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(updateFence_), fence);
+}
+
+// method: waitForFence:
+_MTL_INLINE void MTL::BlitCommandEncoder::waitForFence(const MTL::Fence* fence)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(waitForFence_), fence);
+}
+
+// method: getTextureAccessCounters:region:mipLevel:slice:resetCounters:countersBuffer:countersBufferOffset:
+_MTL_INLINE void MTL::BlitCommandEncoder::getTextureAccessCounters(const MTL::Texture* texture, MTL::Region region, NS::UInteger mipLevel, NS::UInteger slice, bool resetCounters, const MTL::Buffer* countersBuffer, NS::UInteger countersBufferOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(getTextureAccessCounters_region_mipLevel_slice_resetCounters_countersBuffer_countersBufferOffset_), texture, region, mipLevel, slice, resetCounters, countersBuffer, countersBufferOffset);
+}
+
+// method: resetTextureAccessCounters:region:mipLevel:slice:
+_MTL_INLINE void MTL::BlitCommandEncoder::resetTextureAccessCounters(const MTL::Texture* texture, MTL::Region region, NS::UInteger mipLevel, NS::UInteger slice)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(resetTextureAccessCounters_region_mipLevel_slice_), texture, region, mipLevel, slice);
+}
+
+// method: optimizeContentsForGPUAccess:
+_MTL_INLINE void MTL::BlitCommandEncoder::optimizeContentsForGPUAccess(const MTL::Texture* texture)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(optimizeContentsForGPUAccess_), texture);
+}
+
+// method: optimizeContentsForGPUAccess:slice:level:
+_MTL_INLINE void MTL::BlitCommandEncoder::optimizeContentsForGPUAccess(const MTL::Texture* texture, NS::UInteger slice, NS::UInteger level)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(optimizeContentsForGPUAccess_slice_level_), texture, slice, level);
+}
+
+// method: optimizeContentsForCPUAccess:
+_MTL_INLINE void MTL::BlitCommandEncoder::optimizeContentsForCPUAccess(const MTL::Texture* texture)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(optimizeContentsForCPUAccess_), texture);
+}
+
+// method: optimizeContentsForCPUAccess:slice:level:
+_MTL_INLINE void MTL::BlitCommandEncoder::optimizeContentsForCPUAccess(const MTL::Texture* texture, NS::UInteger slice, NS::UInteger level)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(optimizeContentsForCPUAccess_slice_level_), texture, slice, level);
+}
+
+// method: resetCommandsInBuffer:withRange:
+_MTL_INLINE void MTL::BlitCommandEncoder::resetCommandsInBuffer(const MTL::IndirectCommandBuffer* buffer, NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(resetCommandsInBuffer_withRange_), buffer, range);
+}
+
+// method: copyIndirectCommandBuffer:sourceRange:destination:destinationIndex:
+_MTL_INLINE void MTL::BlitCommandEncoder::copyIndirectCommandBuffer(const MTL::IndirectCommandBuffer* source, NS::Range sourceRange, const MTL::IndirectCommandBuffer* destination, NS::UInteger destinationIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(copyIndirectCommandBuffer_sourceRange_destination_destinationIndex_), source, sourceRange, destination, destinationIndex);
+}
+
+// method: optimizeIndirectCommandBuffer:withRange:
+_MTL_INLINE void MTL::BlitCommandEncoder::optimizeIndirectCommandBuffer(const MTL::IndirectCommandBuffer* indirectCommandBuffer, NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(optimizeIndirectCommandBuffer_withRange_), indirectCommandBuffer, range);
+}
+
+// method: sampleCountersInBuffer:atSampleIndex:withBarrier:
+_MTL_INLINE void MTL::BlitCommandEncoder::sampleCountersInBuffer(const MTL::CounterSampleBuffer* sampleBuffer, NS::UInteger sampleIndex, bool barrier)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(sampleCountersInBuffer_atSampleIndex_withBarrier_), sampleBuffer, sampleIndex, barrier);
+}
+
+// method: resolveCounters:inRange:destinationBuffer:destinationOffset:
+_MTL_INLINE void MTL::BlitCommandEncoder::resolveCounters(const MTL::CounterSampleBuffer* sampleBuffer, NS::Range range, const MTL::Buffer* destinationBuffer, NS::UInteger destinationOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(resolveCounters_inRange_destinationBuffer_destinationOffset_), sampleBuffer, range, destinationBuffer, destinationOffset);
+}
diff --git a/metal-cpp/Metal/MTLBlitPass.hpp b/metal-cpp/Metal/MTLBlitPass.hpp
new file mode 100644
index 0000000..78eae31
--- /dev/null
+++ b/metal-cpp/Metal/MTLBlitPass.hpp
@@ -0,0 +1,165 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLBlitPass.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+namespace MTL
+{
+class BlitPassSampleBufferAttachmentDescriptor : public NS::Copying<BlitPassSampleBufferAttachmentDescriptor>
+{
+public:
+    static class BlitPassSampleBufferAttachmentDescriptor* alloc();
+
+    class BlitPassSampleBufferAttachmentDescriptor*        init();
+
+    class CounterSampleBuffer*                             sampleBuffer() const;
+    void                                                   setSampleBuffer(const class CounterSampleBuffer* sampleBuffer);
+
+    NS::UInteger                                           startOfEncoderSampleIndex() const;
+    void                                                   setStartOfEncoderSampleIndex(NS::UInteger startOfEncoderSampleIndex);
+
+    NS::UInteger                                           endOfEncoderSampleIndex() const;
+    void                                                   setEndOfEncoderSampleIndex(NS::UInteger endOfEncoderSampleIndex);
+};
+
+class BlitPassSampleBufferAttachmentDescriptorArray : public NS::Referencing<BlitPassSampleBufferAttachmentDescriptorArray>
+{
+public:
+    static class BlitPassSampleBufferAttachmentDescriptorArray* alloc();
+
+    class BlitPassSampleBufferAttachmentDescriptorArray*        init();
+
+    class BlitPassSampleBufferAttachmentDescriptor*             object(NS::UInteger attachmentIndex);
+
+    void                                                        setObject(const class BlitPassSampleBufferAttachmentDescriptor* attachment, NS::UInteger attachmentIndex);
+};
+
+class BlitPassDescriptor : public NS::Copying<BlitPassDescriptor>
+{
+public:
+    static class BlitPassDescriptor*                     alloc();
+
+    class BlitPassDescriptor*                            init();
+
+    static class BlitPassDescriptor*                     blitPassDescriptor();
+
+    class BlitPassSampleBufferAttachmentDescriptorArray* sampleBufferAttachments() const;
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::BlitPassSampleBufferAttachmentDescriptor* MTL::BlitPassSampleBufferAttachmentDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::BlitPassSampleBufferAttachmentDescriptor>(_MTL_PRIVATE_CLS(MTLBlitPassSampleBufferAttachmentDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::BlitPassSampleBufferAttachmentDescriptor* MTL::BlitPassSampleBufferAttachmentDescriptor::init()
+{
+    return NS::Object::init<MTL::BlitPassSampleBufferAttachmentDescriptor>();
+}
+
+// property: sampleBuffer
+_MTL_INLINE MTL::CounterSampleBuffer* MTL::BlitPassSampleBufferAttachmentDescriptor::sampleBuffer() const
+{
+    return Object::sendMessage<MTL::CounterSampleBuffer*>(this, _MTL_PRIVATE_SEL(sampleBuffer));
+}
+
+_MTL_INLINE void MTL::BlitPassSampleBufferAttachmentDescriptor::setSampleBuffer(const MTL::CounterSampleBuffer* sampleBuffer)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSampleBuffer_), sampleBuffer);
+}
+
+// property: startOfEncoderSampleIndex
+_MTL_INLINE NS::UInteger MTL::BlitPassSampleBufferAttachmentDescriptor::startOfEncoderSampleIndex() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(startOfEncoderSampleIndex));
+}
+
+_MTL_INLINE void MTL::BlitPassSampleBufferAttachmentDescriptor::setStartOfEncoderSampleIndex(NS::UInteger startOfEncoderSampleIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStartOfEncoderSampleIndex_), startOfEncoderSampleIndex);
+}
+
+// property: endOfEncoderSampleIndex
+_MTL_INLINE NS::UInteger MTL::BlitPassSampleBufferAttachmentDescriptor::endOfEncoderSampleIndex() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(endOfEncoderSampleIndex));
+}
+
+_MTL_INLINE void MTL::BlitPassSampleBufferAttachmentDescriptor::setEndOfEncoderSampleIndex(NS::UInteger endOfEncoderSampleIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setEndOfEncoderSampleIndex_), endOfEncoderSampleIndex);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::BlitPassSampleBufferAttachmentDescriptorArray* MTL::BlitPassSampleBufferAttachmentDescriptorArray::alloc()
+{
+    return NS::Object::alloc<MTL::BlitPassSampleBufferAttachmentDescriptorArray>(_MTL_PRIVATE_CLS(MTLBlitPassSampleBufferAttachmentDescriptorArray));
+}
+
+// method: init
+_MTL_INLINE MTL::BlitPassSampleBufferAttachmentDescriptorArray* MTL::BlitPassSampleBufferAttachmentDescriptorArray::init()
+{
+    return NS::Object::init<MTL::BlitPassSampleBufferAttachmentDescriptorArray>();
+}
+
+// method: objectAtIndexedSubscript:
+_MTL_INLINE MTL::BlitPassSampleBufferAttachmentDescriptor* MTL::BlitPassSampleBufferAttachmentDescriptorArray::object(NS::UInteger attachmentIndex)
+{
+    return Object::sendMessage<MTL::BlitPassSampleBufferAttachmentDescriptor*>(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), attachmentIndex);
+}
+
+// method: setObject:atIndexedSubscript:
+_MTL_INLINE void MTL::BlitPassSampleBufferAttachmentDescriptorArray::setObject(const MTL::BlitPassSampleBufferAttachmentDescriptor* attachment, NS::UInteger attachmentIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), attachment, attachmentIndex);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::BlitPassDescriptor* MTL::BlitPassDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::BlitPassDescriptor>(_MTL_PRIVATE_CLS(MTLBlitPassDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::BlitPassDescriptor* MTL::BlitPassDescriptor::init()
+{
+    return NS::Object::init<MTL::BlitPassDescriptor>();
+}
+
+// static method: blitPassDescriptor
+_MTL_INLINE MTL::BlitPassDescriptor* MTL::BlitPassDescriptor::blitPassDescriptor()
+{
+    return Object::sendMessage<MTL::BlitPassDescriptor*>(_MTL_PRIVATE_CLS(MTLBlitPassDescriptor), _MTL_PRIVATE_SEL(blitPassDescriptor));
+}
+
+// property: sampleBufferAttachments
+_MTL_INLINE MTL::BlitPassSampleBufferAttachmentDescriptorArray* MTL::BlitPassDescriptor::sampleBufferAttachments() const
+{
+    return Object::sendMessage<MTL::BlitPassSampleBufferAttachmentDescriptorArray*>(this, _MTL_PRIVATE_SEL(sampleBufferAttachments));
+}
diff --git a/metal-cpp/Metal/MTLBuffer.hpp b/metal-cpp/Metal/MTLBuffer.hpp
new file mode 100644
index 0000000..61bddef
--- /dev/null
+++ b/metal-cpp/Metal/MTLBuffer.hpp
@@ -0,0 +1,101 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLBuffer.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLResource.hpp"
+
+namespace MTL
+{
+class Buffer : public NS::Referencing<Buffer, Resource>
+{
+public:
+    NS::UInteger   length() const;
+
+    void*          contents();
+
+    void           didModifyRange(NS::Range range);
+
+    class Texture* newTexture(const class TextureDescriptor* descriptor, NS::UInteger offset, NS::UInteger bytesPerRow);
+
+    void           addDebugMarker(const NS::String* marker, NS::Range range);
+
+    void           removeAllDebugMarkers();
+
+    class Buffer*  remoteStorageBuffer() const;
+
+    class Buffer*  newRemoteBufferViewForDevice(const class Device* device);
+};
+
+}
+
+// property: length
+_MTL_INLINE NS::UInteger MTL::Buffer::length() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(length));
+}
+
+// method: contents
+_MTL_INLINE void* MTL::Buffer::contents()
+{
+    return Object::sendMessage<void*>(this, _MTL_PRIVATE_SEL(contents));
+}
+
+// method: didModifyRange:
+_MTL_INLINE void MTL::Buffer::didModifyRange(NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(didModifyRange_), range);
+}
+
+// method: newTextureWithDescriptor:offset:bytesPerRow:
+_MTL_INLINE MTL::Texture* MTL::Buffer::newTexture(const MTL::TextureDescriptor* descriptor, NS::UInteger offset, NS::UInteger bytesPerRow)
+{
+    return Object::sendMessage<MTL::Texture*>(this, _MTL_PRIVATE_SEL(newTextureWithDescriptor_offset_bytesPerRow_), descriptor, offset, bytesPerRow);
+}
+
+// method: addDebugMarker:range:
+_MTL_INLINE void MTL::Buffer::addDebugMarker(const NS::String* marker, NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(addDebugMarker_range_), marker, range);
+}
+
+// method: removeAllDebugMarkers
+_MTL_INLINE void MTL::Buffer::removeAllDebugMarkers()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(removeAllDebugMarkers));
+}
+
+// property: remoteStorageBuffer
+_MTL_INLINE MTL::Buffer* MTL::Buffer::remoteStorageBuffer() const
+{
+    return Object::sendMessage<MTL::Buffer*>(this, _MTL_PRIVATE_SEL(remoteStorageBuffer));
+}
+
+// method: newRemoteBufferViewForDevice:
+_MTL_INLINE MTL::Buffer* MTL::Buffer::newRemoteBufferViewForDevice(const MTL::Device* device)
+{
+    return Object::sendMessage<MTL::Buffer*>(this, _MTL_PRIVATE_SEL(newRemoteBufferViewForDevice_), device);
+}
diff --git a/metal-cpp/Metal/MTLCaptureManager.hpp b/metal-cpp/Metal/MTLCaptureManager.hpp
new file mode 100644
index 0000000..18e10f5
--- /dev/null
+++ b/metal-cpp/Metal/MTLCaptureManager.hpp
@@ -0,0 +1,220 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLCaptureManager.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLCaptureManager.hpp"
+
+namespace MTL
+{
+_MTL_ENUM(NS::Integer, CaptureError) {
+    CaptureErrorNotSupported = 1,
+    CaptureErrorAlreadyCapturing = 2,
+    CaptureErrorInvalidDescriptor = 3,
+};
+
+_MTL_ENUM(NS::Integer, CaptureDestination) {
+    CaptureDestinationDeveloperTools = 1,
+    CaptureDestinationGPUTraceDocument = 2,
+};
+
+class CaptureDescriptor : public NS::Copying<CaptureDescriptor>
+{
+public:
+    static class CaptureDescriptor* alloc();
+
+    class CaptureDescriptor*        init();
+
+    id                              captureObject() const;
+    void                            setCaptureObject(id captureObject);
+
+    MTL::CaptureDestination         destination() const;
+    void                            setDestination(MTL::CaptureDestination destination);
+
+    NS::URL*                        outputURL() const;
+    void                            setOutputURL(const NS::URL* outputURL);
+};
+
+class CaptureManager : public NS::Referencing<CaptureManager>
+{
+public:
+    static class CaptureManager* alloc();
+
+    static class CaptureManager* sharedCaptureManager();
+
+    MTL::CaptureManager*         init();
+
+    class CaptureScope*          newCaptureScope(const class Device* device);
+
+    class CaptureScope*          newCaptureScope(const class CommandQueue* commandQueue);
+
+    bool                         supportsDestination(MTL::CaptureDestination destination);
+
+    bool                         startCapture(const class CaptureDescriptor* descriptor, NS::Error** error);
+
+    void                         startCapture(const class Device* device);
+
+    void                         startCapture(const class CommandQueue* commandQueue);
+
+    void                         startCapture(const class CaptureScope* captureScope);
+
+    void                         stopCapture();
+
+    class CaptureScope*          defaultCaptureScope() const;
+    void                         setDefaultCaptureScope(const class CaptureScope* defaultCaptureScope);
+
+    bool                         isCapturing() const;
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::CaptureDescriptor* MTL::CaptureDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::CaptureDescriptor>(_MTL_PRIVATE_CLS(MTLCaptureDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::CaptureDescriptor* MTL::CaptureDescriptor::init()
+{
+    return NS::Object::init<MTL::CaptureDescriptor>();
+}
+
+// property: captureObject
+_MTL_INLINE id MTL::CaptureDescriptor::captureObject() const
+{
+    return Object::sendMessage<id>(this, _MTL_PRIVATE_SEL(captureObject));
+}
+
+_MTL_INLINE void MTL::CaptureDescriptor::setCaptureObject(id captureObject)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setCaptureObject_), captureObject);
+}
+
+// property: destination
+_MTL_INLINE MTL::CaptureDestination MTL::CaptureDescriptor::destination() const
+{
+    return Object::sendMessage<MTL::CaptureDestination>(this, _MTL_PRIVATE_SEL(destination));
+}
+
+_MTL_INLINE void MTL::CaptureDescriptor::setDestination(MTL::CaptureDestination destination)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDestination_), destination);
+}
+
+// property: outputURL
+_MTL_INLINE NS::URL* MTL::CaptureDescriptor::outputURL() const
+{
+    return Object::sendMessage<NS::URL*>(this, _MTL_PRIVATE_SEL(outputURL));
+}
+
+_MTL_INLINE void MTL::CaptureDescriptor::setOutputURL(const NS::URL* outputURL)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setOutputURL_), outputURL);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::CaptureManager* MTL::CaptureManager::alloc()
+{
+    return NS::Object::alloc<MTL::CaptureManager>(_MTL_PRIVATE_CLS(MTLCaptureManager));
+}
+
+// static method: sharedCaptureManager
+_MTL_INLINE MTL::CaptureManager* MTL::CaptureManager::sharedCaptureManager()
+{
+    return Object::sendMessage<MTL::CaptureManager*>(_MTL_PRIVATE_CLS(MTLCaptureManager), _MTL_PRIVATE_SEL(sharedCaptureManager));
+}
+
+// method: init
+_MTL_INLINE MTL::CaptureManager* MTL::CaptureManager::init()
+{
+    return NS::Object::init<MTL::CaptureManager>();
+}
+
+// method: newCaptureScopeWithDevice:
+_MTL_INLINE MTL::CaptureScope* MTL::CaptureManager::newCaptureScope(const MTL::Device* device)
+{
+    return Object::sendMessage<MTL::CaptureScope*>(this, _MTL_PRIVATE_SEL(newCaptureScopeWithDevice_), device);
+}
+
+// method: newCaptureScopeWithCommandQueue:
+_MTL_INLINE MTL::CaptureScope* MTL::CaptureManager::newCaptureScope(const MTL::CommandQueue* commandQueue)
+{
+    return Object::sendMessage<MTL::CaptureScope*>(this, _MTL_PRIVATE_SEL(newCaptureScopeWithCommandQueue_), commandQueue);
+}
+
+// method: supportsDestination:
+_MTL_INLINE bool MTL::CaptureManager::supportsDestination(MTL::CaptureDestination destination)
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportsDestination_), destination);
+}
+
+// method: startCaptureWithDescriptor:error:
+_MTL_INLINE bool MTL::CaptureManager::startCapture(const MTL::CaptureDescriptor* descriptor, NS::Error** error)
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(startCaptureWithDescriptor_error_), descriptor, error);
+}
+
+// method: startCaptureWithDevice:
+_MTL_INLINE void MTL::CaptureManager::startCapture(const MTL::Device* device)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(startCaptureWithDevice_), device);
+}
+
+// method: startCaptureWithCommandQueue:
+_MTL_INLINE void MTL::CaptureManager::startCapture(const MTL::CommandQueue* commandQueue)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(startCaptureWithCommandQueue_), commandQueue);
+}
+
+// method: startCaptureWithScope:
+_MTL_INLINE void MTL::CaptureManager::startCapture(const MTL::CaptureScope* captureScope)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(startCaptureWithScope_), captureScope);
+}
+
+// method: stopCapture
+_MTL_INLINE void MTL::CaptureManager::stopCapture()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(stopCapture));
+}
+
+// property: defaultCaptureScope
+_MTL_INLINE MTL::CaptureScope* MTL::CaptureManager::defaultCaptureScope() const
+{
+    return Object::sendMessage<MTL::CaptureScope*>(this, _MTL_PRIVATE_SEL(defaultCaptureScope));
+}
+
+_MTL_INLINE void MTL::CaptureManager::setDefaultCaptureScope(const MTL::CaptureScope* defaultCaptureScope)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDefaultCaptureScope_), defaultCaptureScope);
+}
+
+// property: isCapturing
+_MTL_INLINE bool MTL::CaptureManager::isCapturing() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isCapturing));
+}
diff --git a/metal-cpp/Metal/MTLCaptureScope.hpp b/metal-cpp/Metal/MTLCaptureScope.hpp
new file mode 100644
index 0000000..2360235
--- /dev/null
+++ b/metal-cpp/Metal/MTLCaptureScope.hpp
@@ -0,0 +1,92 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLCaptureScope.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "MTLDefines.hpp"
+#include "MTLPrivate.hpp"
+
+#include "../Foundation/NSObject.hpp"
+#include "../Foundation/NSString.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace MTL
+{
+class CaptureScope : public NS::Referencing<CaptureScope>
+{
+public:
+    class Device*       device() const;
+
+    NS::String*         label() const;
+    void                setLabel(const NS::String* pLabel);
+
+    class CommandQueue* commandQueue() const;
+
+    void                beginScope();
+    void                endScope();
+};
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_MTL_INLINE MTL::Device* MTL::CaptureScope::device() const
+{
+    return Object::sendMessage<Device*>(this, _MTL_PRIVATE_SEL(device));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_MTL_INLINE NS::String* MTL::CaptureScope::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_MTL_INLINE void MTL::CaptureScope::setLabel(const NS::String* pLabel)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), pLabel);
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_MTL_INLINE MTL::CommandQueue* MTL::CaptureScope::commandQueue() const
+{
+    return Object::sendMessage<CommandQueue*>(this, _MTL_PRIVATE_SEL(commandQueue));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_MTL_INLINE void MTL::CaptureScope::beginScope()
+{
+    return Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(beginScope));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_MTL_INLINE void MTL::CaptureScope::endScope()
+{
+    return Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(endScope));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Metal/MTLCommandBuffer.hpp b/metal-cpp/Metal/MTLCommandBuffer.hpp
new file mode 100644
index 0000000..d0fc98e
--- /dev/null
+++ b/metal-cpp/Metal/MTLCommandBuffer.hpp
@@ -0,0 +1,465 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLCommandBuffer.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLCommandBuffer.hpp"
+#include <functional>
+
+namespace MTL
+{
+_MTL_ENUM(NS::UInteger, CommandBufferStatus) {
+    CommandBufferStatusNotEnqueued = 0,
+    CommandBufferStatusEnqueued = 1,
+    CommandBufferStatusCommitted = 2,
+    CommandBufferStatusScheduled = 3,
+    CommandBufferStatusCompleted = 4,
+    CommandBufferStatusError = 5,
+};
+
+_MTL_ENUM(NS::UInteger, CommandBufferError) {
+    CommandBufferErrorNone = 0,
+    CommandBufferErrorTimeout = 2,
+    CommandBufferErrorPageFault = 3,
+    CommandBufferErrorAccessRevoked = 4,
+    CommandBufferErrorBlacklisted = 4,
+    CommandBufferErrorNotPermitted = 7,
+    CommandBufferErrorOutOfMemory = 8,
+    CommandBufferErrorInvalidResource = 9,
+    CommandBufferErrorMemoryless = 10,
+    CommandBufferErrorDeviceRemoved = 11,
+    CommandBufferErrorStackOverflow = 12,
+};
+
+_MTL_OPTIONS(NS::UInteger, CommandBufferErrorOption) {
+    CommandBufferErrorOptionNone = 0,
+    CommandBufferErrorOptionEncoderExecutionStatus = 1,
+};
+
+_MTL_ENUM(NS::Integer, CommandEncoderErrorState) {
+    CommandEncoderErrorStateUnknown = 0,
+    CommandEncoderErrorStateCompleted = 1,
+    CommandEncoderErrorStateAffected = 2,
+    CommandEncoderErrorStatePending = 3,
+    CommandEncoderErrorStateFaulted = 4,
+};
+
+class CommandBufferDescriptor : public NS::Copying<CommandBufferDescriptor>
+{
+public:
+    static class CommandBufferDescriptor* alloc();
+
+    class CommandBufferDescriptor*        init();
+
+    bool                                  retainedReferences() const;
+    void                                  setRetainedReferences(bool retainedReferences);
+
+    MTL::CommandBufferErrorOption         errorOptions() const;
+    void                                  setErrorOptions(MTL::CommandBufferErrorOption errorOptions);
+};
+
+class CommandBufferEncoderInfo : public NS::Referencing<CommandBufferEncoderInfo>
+{
+public:
+    NS::String*                   label() const;
+
+    NS::Array*                    debugSignposts() const;
+
+    MTL::CommandEncoderErrorState errorState() const;
+};
+
+_MTL_ENUM(NS::UInteger, DispatchType) {
+    DispatchTypeSerial = 0,
+    DispatchTypeConcurrent = 1,
+};
+
+class CommandBuffer;
+
+using CommandBufferHandler = void (^)(CommandBuffer*);
+
+using HandlerFunction = std::function<void(CommandBuffer*)>;
+
+class CommandBuffer : public NS::Referencing<CommandBuffer>
+{
+public:
+    void                                       addScheduledHandler(const HandlerFunction& function);
+
+    void                                       addCompletedHandler(const HandlerFunction& function);
+
+    class Device*                              device() const;
+
+    class CommandQueue*                        commandQueue() const;
+
+    bool                                       retainedReferences() const;
+
+    MTL::CommandBufferErrorOption              errorOptions() const;
+
+    NS::String*                                label() const;
+    void                                       setLabel(const NS::String* label);
+
+    CFTimeInterval                             kernelStartTime() const;
+
+    CFTimeInterval                             kernelEndTime() const;
+
+    class LogContainer*                        logs() const;
+
+    CFTimeInterval                             GPUStartTime() const;
+
+    CFTimeInterval                             GPUEndTime() const;
+
+    void                                       enqueue();
+
+    void                                       commit();
+
+    void                                       addScheduledHandler(const MTL::CommandBufferHandler block);
+
+    void                                       presentDrawable(const class Drawable* drawable);
+
+    void                                       presentDrawableAtTime(const class Drawable* drawable, CFTimeInterval presentationTime);
+
+    void                                       presentDrawableAfterMinimumDuration(const class Drawable* drawable, CFTimeInterval duration);
+
+    void                                       waitUntilScheduled();
+
+    void                                       addCompletedHandler(const MTL::CommandBufferHandler block);
+
+    void                                       waitUntilCompleted();
+
+    MTL::CommandBufferStatus                   status() const;
+
+    NS::Error*                                 error() const;
+
+    class BlitCommandEncoder*                  blitCommandEncoder();
+
+    class RenderCommandEncoder*                renderCommandEncoder(const class RenderPassDescriptor* renderPassDescriptor);
+
+    class ComputeCommandEncoder*               computeCommandEncoder(const class ComputePassDescriptor* computePassDescriptor);
+
+    class BlitCommandEncoder*                  blitCommandEncoder(const class BlitPassDescriptor* blitPassDescriptor);
+
+    class ComputeCommandEncoder*               computeCommandEncoder();
+
+    class ComputeCommandEncoder*               computeCommandEncoder(MTL::DispatchType dispatchType);
+
+    void                                       encodeWait(const class Event* event, uint64_t value);
+
+    void                                       encodeSignalEvent(const class Event* event, uint64_t value);
+
+    class ParallelRenderCommandEncoder*        parallelRenderCommandEncoder(const class RenderPassDescriptor* renderPassDescriptor);
+
+    class ResourceStateCommandEncoder*         resourceStateCommandEncoder();
+
+    class ResourceStateCommandEncoder*         resourceStateCommandEncoder(const class ResourceStatePassDescriptor* resourceStatePassDescriptor);
+
+    class AccelerationStructureCommandEncoder* accelerationStructureCommandEncoder();
+
+    void                                       pushDebugGroup(const NS::String* string);
+
+    void                                       popDebugGroup();
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::CommandBufferDescriptor* MTL::CommandBufferDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::CommandBufferDescriptor>(_MTL_PRIVATE_CLS(MTLCommandBufferDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::CommandBufferDescriptor* MTL::CommandBufferDescriptor::init()
+{
+    return NS::Object::init<MTL::CommandBufferDescriptor>();
+}
+
+// property: retainedReferences
+_MTL_INLINE bool MTL::CommandBufferDescriptor::retainedReferences() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(retainedReferences));
+}
+
+_MTL_INLINE void MTL::CommandBufferDescriptor::setRetainedReferences(bool retainedReferences)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setRetainedReferences_), retainedReferences);
+}
+
+// property: errorOptions
+_MTL_INLINE MTL::CommandBufferErrorOption MTL::CommandBufferDescriptor::errorOptions() const
+{
+    return Object::sendMessage<MTL::CommandBufferErrorOption>(this, _MTL_PRIVATE_SEL(errorOptions));
+}
+
+_MTL_INLINE void MTL::CommandBufferDescriptor::setErrorOptions(MTL::CommandBufferErrorOption errorOptions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setErrorOptions_), errorOptions);
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::CommandBufferEncoderInfo::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+// property: debugSignposts
+_MTL_INLINE NS::Array* MTL::CommandBufferEncoderInfo::debugSignposts() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(debugSignposts));
+}
+
+// property: errorState
+_MTL_INLINE MTL::CommandEncoderErrorState MTL::CommandBufferEncoderInfo::errorState() const
+{
+    return Object::sendMessage<MTL::CommandEncoderErrorState>(this, _MTL_PRIVATE_SEL(errorState));
+}
+
+_MTL_INLINE void MTL::CommandBuffer::addScheduledHandler(const HandlerFunction& function)
+{
+    __block HandlerFunction blockFunction = function;
+
+    addScheduledHandler(^(MTL::CommandBuffer* pCommandBuffer) { blockFunction(pCommandBuffer); });
+}
+
+_MTL_INLINE void MTL::CommandBuffer::addCompletedHandler(const HandlerFunction& function)
+{
+    __block HandlerFunction blockFunction = function;
+
+    addCompletedHandler(^(MTL::CommandBuffer* pCommandBuffer) { blockFunction(pCommandBuffer); });
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::CommandBuffer::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
+
+// property: commandQueue
+_MTL_INLINE MTL::CommandQueue* MTL::CommandBuffer::commandQueue() const
+{
+    return Object::sendMessage<MTL::CommandQueue*>(this, _MTL_PRIVATE_SEL(commandQueue));
+}
+
+// property: retainedReferences
+_MTL_INLINE bool MTL::CommandBuffer::retainedReferences() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(retainedReferences));
+}
+
+// property: errorOptions
+_MTL_INLINE MTL::CommandBufferErrorOption MTL::CommandBuffer::errorOptions() const
+{
+    return Object::sendMessage<MTL::CommandBufferErrorOption>(this, _MTL_PRIVATE_SEL(errorOptions));
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::CommandBuffer::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::CommandBuffer::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
+
+// property: kernelStartTime
+_MTL_INLINE CFTimeInterval MTL::CommandBuffer::kernelStartTime() const
+{
+    return Object::sendMessage<CFTimeInterval>(this, _MTL_PRIVATE_SEL(kernelStartTime));
+}
+
+// property: kernelEndTime
+_MTL_INLINE CFTimeInterval MTL::CommandBuffer::kernelEndTime() const
+{
+    return Object::sendMessage<CFTimeInterval>(this, _MTL_PRIVATE_SEL(kernelEndTime));
+}
+
+// property: logs
+_MTL_INLINE MTL::LogContainer* MTL::CommandBuffer::logs() const
+{
+    return Object::sendMessage<MTL::LogContainer*>(this, _MTL_PRIVATE_SEL(logs));
+}
+
+// property: GPUStartTime
+_MTL_INLINE CFTimeInterval MTL::CommandBuffer::GPUStartTime() const
+{
+    return Object::sendMessage<CFTimeInterval>(this, _MTL_PRIVATE_SEL(GPUStartTime));
+}
+
+// property: GPUEndTime
+_MTL_INLINE CFTimeInterval MTL::CommandBuffer::GPUEndTime() const
+{
+    return Object::sendMessage<CFTimeInterval>(this, _MTL_PRIVATE_SEL(GPUEndTime));
+}
+
+// method: enqueue
+_MTL_INLINE void MTL::CommandBuffer::enqueue()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(enqueue));
+}
+
+// method: commit
+_MTL_INLINE void MTL::CommandBuffer::commit()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(commit));
+}
+
+// method: addScheduledHandler:
+_MTL_INLINE void MTL::CommandBuffer::addScheduledHandler(const MTL::CommandBufferHandler block)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(addScheduledHandler_), block);
+}
+
+// method: presentDrawable:
+_MTL_INLINE void MTL::CommandBuffer::presentDrawable(const MTL::Drawable* drawable)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(presentDrawable_), drawable);
+}
+
+// method: presentDrawable:atTime:
+_MTL_INLINE void MTL::CommandBuffer::presentDrawableAtTime(const MTL::Drawable* drawable, CFTimeInterval presentationTime)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(presentDrawable_atTime_), drawable, presentationTime);
+}
+
+// method: presentDrawable:afterMinimumDuration:
+_MTL_INLINE void MTL::CommandBuffer::presentDrawableAfterMinimumDuration(const MTL::Drawable* drawable, CFTimeInterval duration)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(presentDrawable_afterMinimumDuration_), drawable, duration);
+}
+
+// method: waitUntilScheduled
+_MTL_INLINE void MTL::CommandBuffer::waitUntilScheduled()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(waitUntilScheduled));
+}
+
+// method: addCompletedHandler:
+_MTL_INLINE void MTL::CommandBuffer::addCompletedHandler(const MTL::CommandBufferHandler block)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(addCompletedHandler_), block);
+}
+
+// method: waitUntilCompleted
+_MTL_INLINE void MTL::CommandBuffer::waitUntilCompleted()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(waitUntilCompleted));
+}
+
+// property: status
+_MTL_INLINE MTL::CommandBufferStatus MTL::CommandBuffer::status() const
+{
+    return Object::sendMessage<MTL::CommandBufferStatus>(this, _MTL_PRIVATE_SEL(status));
+}
+
+// property: error
+_MTL_INLINE NS::Error* MTL::CommandBuffer::error() const
+{
+    return Object::sendMessage<NS::Error*>(this, _MTL_PRIVATE_SEL(error));
+}
+
+// method: blitCommandEncoder
+_MTL_INLINE MTL::BlitCommandEncoder* MTL::CommandBuffer::blitCommandEncoder()
+{
+    return Object::sendMessage<MTL::BlitCommandEncoder*>(this, _MTL_PRIVATE_SEL(blitCommandEncoder));
+}
+
+// method: renderCommandEncoderWithDescriptor:
+_MTL_INLINE MTL::RenderCommandEncoder* MTL::CommandBuffer::renderCommandEncoder(const MTL::RenderPassDescriptor* renderPassDescriptor)
+{
+    return Object::sendMessage<MTL::RenderCommandEncoder*>(this, _MTL_PRIVATE_SEL(renderCommandEncoderWithDescriptor_), renderPassDescriptor);
+}
+
+// method: computeCommandEncoderWithDescriptor:
+_MTL_INLINE MTL::ComputeCommandEncoder* MTL::CommandBuffer::computeCommandEncoder(const MTL::ComputePassDescriptor* computePassDescriptor)
+{
+    return Object::sendMessage<MTL::ComputeCommandEncoder*>(this, _MTL_PRIVATE_SEL(computeCommandEncoderWithDescriptor_), computePassDescriptor);
+}
+
+// method: blitCommandEncoderWithDescriptor:
+_MTL_INLINE MTL::BlitCommandEncoder* MTL::CommandBuffer::blitCommandEncoder(const MTL::BlitPassDescriptor* blitPassDescriptor)
+{
+    return Object::sendMessage<MTL::BlitCommandEncoder*>(this, _MTL_PRIVATE_SEL(blitCommandEncoderWithDescriptor_), blitPassDescriptor);
+}
+
+// method: computeCommandEncoder
+_MTL_INLINE MTL::ComputeCommandEncoder* MTL::CommandBuffer::computeCommandEncoder()
+{
+    return Object::sendMessage<MTL::ComputeCommandEncoder*>(this, _MTL_PRIVATE_SEL(computeCommandEncoder));
+}
+
+// method: computeCommandEncoderWithDispatchType:
+_MTL_INLINE MTL::ComputeCommandEncoder* MTL::CommandBuffer::computeCommandEncoder(MTL::DispatchType dispatchType)
+{
+    return Object::sendMessage<MTL::ComputeCommandEncoder*>(this, _MTL_PRIVATE_SEL(computeCommandEncoderWithDispatchType_), dispatchType);
+}
+
+// method: encodeWaitForEvent:value:
+_MTL_INLINE void MTL::CommandBuffer::encodeWait(const MTL::Event* event, uint64_t value)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(encodeWaitForEvent_value_), event, value);
+}
+
+// method: encodeSignalEvent:value:
+_MTL_INLINE void MTL::CommandBuffer::encodeSignalEvent(const MTL::Event* event, uint64_t value)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(encodeSignalEvent_value_), event, value);
+}
+
+// method: parallelRenderCommandEncoderWithDescriptor:
+_MTL_INLINE MTL::ParallelRenderCommandEncoder* MTL::CommandBuffer::parallelRenderCommandEncoder(const MTL::RenderPassDescriptor* renderPassDescriptor)
+{
+    return Object::sendMessage<MTL::ParallelRenderCommandEncoder*>(this, _MTL_PRIVATE_SEL(parallelRenderCommandEncoderWithDescriptor_), renderPassDescriptor);
+}
+
+// method: resourceStateCommandEncoder
+_MTL_INLINE MTL::ResourceStateCommandEncoder* MTL::CommandBuffer::resourceStateCommandEncoder()
+{
+    return Object::sendMessage<MTL::ResourceStateCommandEncoder*>(this, _MTL_PRIVATE_SEL(resourceStateCommandEncoder));
+}
+
+// method: resourceStateCommandEncoderWithDescriptor:
+_MTL_INLINE MTL::ResourceStateCommandEncoder* MTL::CommandBuffer::resourceStateCommandEncoder(const MTL::ResourceStatePassDescriptor* resourceStatePassDescriptor)
+{
+    return Object::sendMessage<MTL::ResourceStateCommandEncoder*>(this, _MTL_PRIVATE_SEL(resourceStateCommandEncoderWithDescriptor_), resourceStatePassDescriptor);
+}
+
+// method: accelerationStructureCommandEncoder
+_MTL_INLINE MTL::AccelerationStructureCommandEncoder* MTL::CommandBuffer::accelerationStructureCommandEncoder()
+{
+    return Object::sendMessage<MTL::AccelerationStructureCommandEncoder*>(this, _MTL_PRIVATE_SEL(accelerationStructureCommandEncoder));
+}
+
+// method: pushDebugGroup:
+_MTL_INLINE void MTL::CommandBuffer::pushDebugGroup(const NS::String* string)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(pushDebugGroup_), string);
+}
+
+// method: popDebugGroup
+_MTL_INLINE void MTL::CommandBuffer::popDebugGroup()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(popDebugGroup));
+}
diff --git a/metal-cpp/Metal/MTLCommandEncoder.hpp b/metal-cpp/Metal/MTLCommandEncoder.hpp
new file mode 100644
index 0000000..7f1f392
--- /dev/null
+++ b/metal-cpp/Metal/MTLCommandEncoder.hpp
@@ -0,0 +1,101 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLCommandEncoder.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+namespace MTL
+{
+_MTL_OPTIONS(NS::UInteger, ResourceUsage) {
+    ResourceUsageRead = 1,
+    ResourceUsageWrite = 2,
+    ResourceUsageSample = 4,
+};
+
+_MTL_OPTIONS(NS::UInteger, BarrierScope) {
+    BarrierScopeBuffers = 1,
+    BarrierScopeTextures = 2,
+    BarrierScopeRenderTargets = 4,
+};
+
+class CommandEncoder : public NS::Referencing<CommandEncoder>
+{
+public:
+    class Device* device() const;
+
+    NS::String*   label() const;
+    void          setLabel(const NS::String* label);
+
+    void          endEncoding();
+
+    void          insertDebugSignpost(const NS::String* string);
+
+    void          pushDebugGroup(const NS::String* string);
+
+    void          popDebugGroup();
+};
+
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::CommandEncoder::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::CommandEncoder::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::CommandEncoder::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
+
+// method: endEncoding
+_MTL_INLINE void MTL::CommandEncoder::endEncoding()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(endEncoding));
+}
+
+// method: insertDebugSignpost:
+_MTL_INLINE void MTL::CommandEncoder::insertDebugSignpost(const NS::String* string)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(insertDebugSignpost_), string);
+}
+
+// method: pushDebugGroup:
+_MTL_INLINE void MTL::CommandEncoder::pushDebugGroup(const NS::String* string)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(pushDebugGroup_), string);
+}
+
+// method: popDebugGroup
+_MTL_INLINE void MTL::CommandEncoder::popDebugGroup()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(popDebugGroup));
+}
diff --git a/metal-cpp/Metal/MTLCommandQueue.hpp b/metal-cpp/Metal/MTLCommandQueue.hpp
new file mode 100644
index 0000000..f54a16e
--- /dev/null
+++ b/metal-cpp/Metal/MTLCommandQueue.hpp
@@ -0,0 +1,89 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLCommandQueue.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+namespace MTL
+{
+class CommandQueue : public NS::Referencing<CommandQueue>
+{
+public:
+    NS::String*          label() const;
+    void                 setLabel(const NS::String* label);
+
+    class Device*        device() const;
+
+    class CommandBuffer* commandBuffer();
+
+    class CommandBuffer* commandBuffer(const class CommandBufferDescriptor* descriptor);
+
+    class CommandBuffer* commandBufferWithUnretainedReferences();
+
+    void                 insertDebugCaptureBoundary();
+};
+
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::CommandQueue::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::CommandQueue::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::CommandQueue::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
+
+// method: commandBuffer
+_MTL_INLINE MTL::CommandBuffer* MTL::CommandQueue::commandBuffer()
+{
+    return Object::sendMessage<MTL::CommandBuffer*>(this, _MTL_PRIVATE_SEL(commandBuffer));
+}
+
+// method: commandBufferWithDescriptor:
+_MTL_INLINE MTL::CommandBuffer* MTL::CommandQueue::commandBuffer(const MTL::CommandBufferDescriptor* descriptor)
+{
+    return Object::sendMessage<MTL::CommandBuffer*>(this, _MTL_PRIVATE_SEL(commandBufferWithDescriptor_), descriptor);
+}
+
+// method: commandBufferWithUnretainedReferences
+_MTL_INLINE MTL::CommandBuffer* MTL::CommandQueue::commandBufferWithUnretainedReferences()
+{
+    return Object::sendMessage<MTL::CommandBuffer*>(this, _MTL_PRIVATE_SEL(commandBufferWithUnretainedReferences));
+}
+
+// method: insertDebugCaptureBoundary
+_MTL_INLINE void MTL::CommandQueue::insertDebugCaptureBoundary()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(insertDebugCaptureBoundary));
+}
diff --git a/metal-cpp/Metal/MTLComputeCommandEncoder.hpp b/metal-cpp/Metal/MTLComputeCommandEncoder.hpp
new file mode 100644
index 0000000..5fa9ef7
--- /dev/null
+++ b/metal-cpp/Metal/MTLComputeCommandEncoder.hpp
@@ -0,0 +1,337 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLComputeCommandEncoder.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLBuffer.hpp"
+#include "MTLCommandBuffer.hpp"
+#include "MTLCommandEncoder.hpp"
+#include "MTLHeap.hpp"
+#include "MTLIntersectionFunctionTable.hpp"
+#include "MTLResource.hpp"
+#include "MTLSampler.hpp"
+#include "MTLTexture.hpp"
+#include "MTLTypes.hpp"
+#include "MTLVisibleFunctionTable.hpp"
+
+namespace MTL
+{
+struct DispatchThreadgroupsIndirectArguments
+{
+    uint32_t threadgroupsPerGrid[3];
+} _MTL_PACKED;
+
+struct StageInRegionIndirectArguments
+{
+    uint32_t stageInOrigin[3];
+    uint32_t stageInSize[3];
+} _MTL_PACKED;
+
+class ComputeCommandEncoder : public NS::Referencing<ComputeCommandEncoder, CommandEncoder>
+{
+public:
+    MTL::DispatchType dispatchType() const;
+
+    void              setComputePipelineState(const class ComputePipelineState* state);
+
+    void              setBytes(const void* bytes, NS::UInteger length, NS::UInteger index);
+
+    void              setBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index);
+
+    void              setBufferOffset(NS::UInteger offset, NS::UInteger index);
+
+    void              setBuffers(MTL::Buffer* buffers[], const NS::UInteger offsets[], NS::Range range);
+
+    void              setVisibleFunctionTable(const class VisibleFunctionTable* visibleFunctionTable, NS::UInteger bufferIndex);
+
+    void              setVisibleFunctionTables(const class VisibleFunctionTable* visibleFunctionTables[], NS::Range range);
+
+    void              setIntersectionFunctionTable(const class IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger bufferIndex);
+
+    void              setIntersectionFunctionTables(const class IntersectionFunctionTable* intersectionFunctionTables[], NS::Range range);
+
+    void              setAccelerationStructure(const class AccelerationStructure* accelerationStructure, NS::UInteger bufferIndex);
+
+    void              setTexture(const class Texture* texture, NS::UInteger index);
+
+    void              setTextures(MTL::Texture* textures[], NS::Range range);
+
+    void              setSamplerState(const class SamplerState* sampler, NS::UInteger index);
+
+    void              setSamplerStates(MTL::SamplerState* samplers[], NS::Range range);
+
+    void              setSamplerState(const class SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index);
+
+    void              setSamplerStates(MTL::SamplerState* samplers[], const float lodMinClamps[], const float lodMaxClamps[], NS::Range range);
+
+    void              setThreadgroupMemoryLength(NS::UInteger length, NS::UInteger index);
+
+    void              setImageblockWidth(NS::UInteger width, NS::UInteger height);
+
+    void              setStageInRegion(MTL::Region region);
+
+    void              setStageInRegion(const class Buffer* indirectBuffer, NS::UInteger indirectBufferOffset);
+
+    void              dispatchThreadgroups(MTL::Size threadgroupsPerGrid, MTL::Size threadsPerThreadgroup);
+
+    void              dispatchThreadgroups(const class Buffer* indirectBuffer, NS::UInteger indirectBufferOffset, MTL::Size threadsPerThreadgroup);
+
+    void              dispatchThreads(MTL::Size threadsPerGrid, MTL::Size threadsPerThreadgroup);
+
+    void              updateFence(const class Fence* fence);
+
+    void              waitForFence(const class Fence* fence);
+
+    void              useResource(const class Resource* resource, MTL::ResourceUsage usage);
+
+    void              useResources(MTL::Resource* resources[], NS::UInteger count, MTL::ResourceUsage usage);
+
+    void              useHeap(const class Heap* heap);
+
+    void              useHeaps(MTL::Heap* heaps[], NS::UInteger count);
+
+    void              executeCommandsInBuffer(const class IndirectCommandBuffer* indirectCommandBuffer, NS::Range executionRange);
+
+    void              executeCommandsInBuffer(const class IndirectCommandBuffer* indirectCommandbuffer, const class Buffer* indirectRangeBuffer, NS::UInteger indirectBufferOffset);
+
+    void              memoryBarrier(MTL::BarrierScope scope);
+
+    void              memoryBarrier(MTL::Resource* resources[], NS::UInteger count);
+
+    void              sampleCountersInBuffer(const class CounterSampleBuffer* sampleBuffer, NS::UInteger sampleIndex, bool barrier);
+};
+
+}
+
+// property: dispatchType
+_MTL_INLINE MTL::DispatchType MTL::ComputeCommandEncoder::dispatchType() const
+{
+    return Object::sendMessage<MTL::DispatchType>(this, _MTL_PRIVATE_SEL(dispatchType));
+}
+
+// method: setComputePipelineState:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setComputePipelineState(const MTL::ComputePipelineState* state)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setComputePipelineState_), state);
+}
+
+// method: setBytes:length:atIndex:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setBytes(const void* bytes, NS::UInteger length, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBytes_length_atIndex_), bytes, length, index);
+}
+
+// method: setBuffer:offset:atIndex:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBuffer_offset_atIndex_), buffer, offset, index);
+}
+
+// method: setBufferOffset:atIndex:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setBufferOffset(NS::UInteger offset, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBufferOffset_atIndex_), offset, index);
+}
+
+// method: setBuffers:offsets:withRange:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setBuffers(MTL::Buffer* buffers[], const NS::UInteger offsets[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBuffers_offsets_withRange_), buffers, offsets, range);
+}
+
+// method: setVisibleFunctionTable:atBufferIndex:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setVisibleFunctionTable(const MTL::VisibleFunctionTable* visibleFunctionTable, NS::UInteger bufferIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVisibleFunctionTable_atBufferIndex_), visibleFunctionTable, bufferIndex);
+}
+
+// method: setVisibleFunctionTables:withBufferRange:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setVisibleFunctionTables(const MTL::VisibleFunctionTable* visibleFunctionTables[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVisibleFunctionTables_withBufferRange_), visibleFunctionTables, range);
+}
+
+// method: setIntersectionFunctionTable:atBufferIndex:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setIntersectionFunctionTable(const MTL::IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger bufferIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setIntersectionFunctionTable_atBufferIndex_), intersectionFunctionTable, bufferIndex);
+}
+
+// method: setIntersectionFunctionTables:withBufferRange:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setIntersectionFunctionTables(const MTL::IntersectionFunctionTable* intersectionFunctionTables[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setIntersectionFunctionTables_withBufferRange_), intersectionFunctionTables, range);
+}
+
+// method: setAccelerationStructure:atBufferIndex:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setAccelerationStructure(const MTL::AccelerationStructure* accelerationStructure, NS::UInteger bufferIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setAccelerationStructure_atBufferIndex_), accelerationStructure, bufferIndex);
+}
+
+// method: setTexture:atIndex:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setTexture(const MTL::Texture* texture, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTexture_atIndex_), texture, index);
+}
+
+// method: setTextures:withRange:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setTextures(MTL::Texture* textures[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTextures_withRange_), textures, range);
+}
+
+// method: setSamplerState:atIndex:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setSamplerState(const MTL::SamplerState* sampler, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSamplerState_atIndex_), sampler, index);
+}
+
+// method: setSamplerStates:withRange:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setSamplerStates(MTL::SamplerState* samplers[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSamplerStates_withRange_), samplers, range);
+}
+
+// method: setSamplerState:lodMinClamp:lodMaxClamp:atIndex:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setSamplerState(const MTL::SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSamplerState_lodMinClamp_lodMaxClamp_atIndex_), sampler, lodMinClamp, lodMaxClamp, index);
+}
+
+// method: setSamplerStates:lodMinClamps:lodMaxClamps:withRange:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setSamplerStates(MTL::SamplerState* samplers[], const float lodMinClamps[], const float lodMaxClamps[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSamplerStates_lodMinClamps_lodMaxClamps_withRange_), samplers, lodMinClamps, lodMaxClamps, range);
+}
+
+// method: setThreadgroupMemoryLength:atIndex:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setThreadgroupMemoryLength(NS::UInteger length, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setThreadgroupMemoryLength_atIndex_), length, index);
+}
+
+// method: setImageblockWidth:height:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setImageblockWidth(NS::UInteger width, NS::UInteger height)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setImageblockWidth_height_), width, height);
+}
+
+// method: setStageInRegion:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setStageInRegion(MTL::Region region)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStageInRegion_), region);
+}
+
+// method: setStageInRegionWithIndirectBuffer:indirectBufferOffset:
+_MTL_INLINE void MTL::ComputeCommandEncoder::setStageInRegion(const MTL::Buffer* indirectBuffer, NS::UInteger indirectBufferOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStageInRegionWithIndirectBuffer_indirectBufferOffset_), indirectBuffer, indirectBufferOffset);
+}
+
+// method: dispatchThreadgroups:threadsPerThreadgroup:
+_MTL_INLINE void MTL::ComputeCommandEncoder::dispatchThreadgroups(MTL::Size threadgroupsPerGrid, MTL::Size threadsPerThreadgroup)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(dispatchThreadgroups_threadsPerThreadgroup_), threadgroupsPerGrid, threadsPerThreadgroup);
+}
+
+// method: dispatchThreadgroupsWithIndirectBuffer:indirectBufferOffset:threadsPerThreadgroup:
+_MTL_INLINE void MTL::ComputeCommandEncoder::dispatchThreadgroups(const MTL::Buffer* indirectBuffer, NS::UInteger indirectBufferOffset, MTL::Size threadsPerThreadgroup)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(dispatchThreadgroupsWithIndirectBuffer_indirectBufferOffset_threadsPerThreadgroup_), indirectBuffer, indirectBufferOffset, threadsPerThreadgroup);
+}
+
+// method: dispatchThreads:threadsPerThreadgroup:
+_MTL_INLINE void MTL::ComputeCommandEncoder::dispatchThreads(MTL::Size threadsPerGrid, MTL::Size threadsPerThreadgroup)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(dispatchThreads_threadsPerThreadgroup_), threadsPerGrid, threadsPerThreadgroup);
+}
+
+// method: updateFence:
+_MTL_INLINE void MTL::ComputeCommandEncoder::updateFence(const MTL::Fence* fence)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(updateFence_), fence);
+}
+
+// method: waitForFence:
+_MTL_INLINE void MTL::ComputeCommandEncoder::waitForFence(const MTL::Fence* fence)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(waitForFence_), fence);
+}
+
+// method: useResource:usage:
+_MTL_INLINE void MTL::ComputeCommandEncoder::useResource(const MTL::Resource* resource, MTL::ResourceUsage usage)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(useResource_usage_), resource, usage);
+}
+
+// method: useResources:count:usage:
+_MTL_INLINE void MTL::ComputeCommandEncoder::useResources(MTL::Resource* resources[], NS::UInteger count, MTL::ResourceUsage usage)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(useResources_count_usage_), resources, count, usage);
+}
+
+// method: useHeap:
+_MTL_INLINE void MTL::ComputeCommandEncoder::useHeap(const MTL::Heap* heap)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(useHeap_), heap);
+}
+
+// method: useHeaps:count:
+_MTL_INLINE void MTL::ComputeCommandEncoder::useHeaps(MTL::Heap* heaps[], NS::UInteger count)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(useHeaps_count_), heaps, count);
+}
+
+// method: executeCommandsInBuffer:withRange:
+_MTL_INLINE void MTL::ComputeCommandEncoder::executeCommandsInBuffer(const MTL::IndirectCommandBuffer* indirectCommandBuffer, NS::Range executionRange)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(executeCommandsInBuffer_withRange_), indirectCommandBuffer, executionRange);
+}
+
+// method: executeCommandsInBuffer:indirectBuffer:indirectBufferOffset:
+_MTL_INLINE void MTL::ComputeCommandEncoder::executeCommandsInBuffer(const MTL::IndirectCommandBuffer* indirectCommandbuffer, const MTL::Buffer* indirectRangeBuffer, NS::UInteger indirectBufferOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(executeCommandsInBuffer_indirectBuffer_indirectBufferOffset_), indirectCommandbuffer, indirectRangeBuffer, indirectBufferOffset);
+}
+
+// method: memoryBarrierWithScope:
+_MTL_INLINE void MTL::ComputeCommandEncoder::memoryBarrier(MTL::BarrierScope scope)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(memoryBarrierWithScope_), scope);
+}
+
+// method: memoryBarrierWithResources:count:
+_MTL_INLINE void MTL::ComputeCommandEncoder::memoryBarrier(MTL::Resource* resources[], NS::UInteger count)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(memoryBarrierWithResources_count_), resources, count);
+}
+
+// method: sampleCountersInBuffer:atSampleIndex:withBarrier:
+_MTL_INLINE void MTL::ComputeCommandEncoder::sampleCountersInBuffer(const MTL::CounterSampleBuffer* sampleBuffer, NS::UInteger sampleIndex, bool barrier)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(sampleCountersInBuffer_atSampleIndex_withBarrier_), sampleBuffer, sampleIndex, barrier);
+}
diff --git a/metal-cpp/Metal/MTLComputePass.hpp b/metal-cpp/Metal/MTLComputePass.hpp
new file mode 100644
index 0000000..f3d4b52
--- /dev/null
+++ b/metal-cpp/Metal/MTLComputePass.hpp
@@ -0,0 +1,181 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLComputePass.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLCommandBuffer.hpp"
+
+namespace MTL
+{
+class ComputePassSampleBufferAttachmentDescriptor : public NS::Copying<ComputePassSampleBufferAttachmentDescriptor>
+{
+public:
+    static class ComputePassSampleBufferAttachmentDescriptor* alloc();
+
+    class ComputePassSampleBufferAttachmentDescriptor*        init();
+
+    class CounterSampleBuffer*                                sampleBuffer() const;
+    void                                                      setSampleBuffer(const class CounterSampleBuffer* sampleBuffer);
+
+    NS::UInteger                                              startOfEncoderSampleIndex() const;
+    void                                                      setStartOfEncoderSampleIndex(NS::UInteger startOfEncoderSampleIndex);
+
+    NS::UInteger                                              endOfEncoderSampleIndex() const;
+    void                                                      setEndOfEncoderSampleIndex(NS::UInteger endOfEncoderSampleIndex);
+};
+
+class ComputePassSampleBufferAttachmentDescriptorArray : public NS::Referencing<ComputePassSampleBufferAttachmentDescriptorArray>
+{
+public:
+    static class ComputePassSampleBufferAttachmentDescriptorArray* alloc();
+
+    class ComputePassSampleBufferAttachmentDescriptorArray*        init();
+
+    class ComputePassSampleBufferAttachmentDescriptor*             object(NS::UInteger attachmentIndex);
+
+    void                                                           setObject(const class ComputePassSampleBufferAttachmentDescriptor* attachment, NS::UInteger attachmentIndex);
+};
+
+class ComputePassDescriptor : public NS::Copying<ComputePassDescriptor>
+{
+public:
+    static class ComputePassDescriptor*                     alloc();
+
+    class ComputePassDescriptor*                            init();
+
+    static class ComputePassDescriptor*                     computePassDescriptor();
+
+    MTL::DispatchType                                       dispatchType() const;
+    void                                                    setDispatchType(MTL::DispatchType dispatchType);
+
+    class ComputePassSampleBufferAttachmentDescriptorArray* sampleBufferAttachments() const;
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::ComputePassSampleBufferAttachmentDescriptor* MTL::ComputePassSampleBufferAttachmentDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::ComputePassSampleBufferAttachmentDescriptor>(_MTL_PRIVATE_CLS(MTLComputePassSampleBufferAttachmentDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::ComputePassSampleBufferAttachmentDescriptor* MTL::ComputePassSampleBufferAttachmentDescriptor::init()
+{
+    return NS::Object::init<MTL::ComputePassSampleBufferAttachmentDescriptor>();
+}
+
+// property: sampleBuffer
+_MTL_INLINE MTL::CounterSampleBuffer* MTL::ComputePassSampleBufferAttachmentDescriptor::sampleBuffer() const
+{
+    return Object::sendMessage<MTL::CounterSampleBuffer*>(this, _MTL_PRIVATE_SEL(sampleBuffer));
+}
+
+_MTL_INLINE void MTL::ComputePassSampleBufferAttachmentDescriptor::setSampleBuffer(const MTL::CounterSampleBuffer* sampleBuffer)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSampleBuffer_), sampleBuffer);
+}
+
+// property: startOfEncoderSampleIndex
+_MTL_INLINE NS::UInteger MTL::ComputePassSampleBufferAttachmentDescriptor::startOfEncoderSampleIndex() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(startOfEncoderSampleIndex));
+}
+
+_MTL_INLINE void MTL::ComputePassSampleBufferAttachmentDescriptor::setStartOfEncoderSampleIndex(NS::UInteger startOfEncoderSampleIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStartOfEncoderSampleIndex_), startOfEncoderSampleIndex);
+}
+
+// property: endOfEncoderSampleIndex
+_MTL_INLINE NS::UInteger MTL::ComputePassSampleBufferAttachmentDescriptor::endOfEncoderSampleIndex() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(endOfEncoderSampleIndex));
+}
+
+_MTL_INLINE void MTL::ComputePassSampleBufferAttachmentDescriptor::setEndOfEncoderSampleIndex(NS::UInteger endOfEncoderSampleIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setEndOfEncoderSampleIndex_), endOfEncoderSampleIndex);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::ComputePassSampleBufferAttachmentDescriptorArray* MTL::ComputePassSampleBufferAttachmentDescriptorArray::alloc()
+{
+    return NS::Object::alloc<MTL::ComputePassSampleBufferAttachmentDescriptorArray>(_MTL_PRIVATE_CLS(MTLComputePassSampleBufferAttachmentDescriptorArray));
+}
+
+// method: init
+_MTL_INLINE MTL::ComputePassSampleBufferAttachmentDescriptorArray* MTL::ComputePassSampleBufferAttachmentDescriptorArray::init()
+{
+    return NS::Object::init<MTL::ComputePassSampleBufferAttachmentDescriptorArray>();
+}
+
+// method: objectAtIndexedSubscript:
+_MTL_INLINE MTL::ComputePassSampleBufferAttachmentDescriptor* MTL::ComputePassSampleBufferAttachmentDescriptorArray::object(NS::UInteger attachmentIndex)
+{
+    return Object::sendMessage<MTL::ComputePassSampleBufferAttachmentDescriptor*>(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), attachmentIndex);
+}
+
+// method: setObject:atIndexedSubscript:
+_MTL_INLINE void MTL::ComputePassSampleBufferAttachmentDescriptorArray::setObject(const MTL::ComputePassSampleBufferAttachmentDescriptor* attachment, NS::UInteger attachmentIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), attachment, attachmentIndex);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::ComputePassDescriptor* MTL::ComputePassDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::ComputePassDescriptor>(_MTL_PRIVATE_CLS(MTLComputePassDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::ComputePassDescriptor* MTL::ComputePassDescriptor::init()
+{
+    return NS::Object::init<MTL::ComputePassDescriptor>();
+}
+
+// static method: computePassDescriptor
+_MTL_INLINE MTL::ComputePassDescriptor* MTL::ComputePassDescriptor::computePassDescriptor()
+{
+    return Object::sendMessage<MTL::ComputePassDescriptor*>(_MTL_PRIVATE_CLS(MTLComputePassDescriptor), _MTL_PRIVATE_SEL(computePassDescriptor));
+}
+
+// property: dispatchType
+_MTL_INLINE MTL::DispatchType MTL::ComputePassDescriptor::dispatchType() const
+{
+    return Object::sendMessage<MTL::DispatchType>(this, _MTL_PRIVATE_SEL(dispatchType));
+}
+
+_MTL_INLINE void MTL::ComputePassDescriptor::setDispatchType(MTL::DispatchType dispatchType)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDispatchType_), dispatchType);
+}
+
+// property: sampleBufferAttachments
+_MTL_INLINE MTL::ComputePassSampleBufferAttachmentDescriptorArray* MTL::ComputePassDescriptor::sampleBufferAttachments() const
+{
+    return Object::sendMessage<MTL::ComputePassSampleBufferAttachmentDescriptorArray*>(this, _MTL_PRIVATE_SEL(sampleBufferAttachments));
+}
diff --git a/metal-cpp/Metal/MTLComputePipeline.hpp b/metal-cpp/Metal/MTLComputePipeline.hpp
new file mode 100644
index 0000000..de56d7f
--- /dev/null
+++ b/metal-cpp/Metal/MTLComputePipeline.hpp
@@ -0,0 +1,357 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLComputePipeline.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLTypes.hpp"
+
+namespace MTL
+{
+class ComputePipelineReflection : public NS::Referencing<ComputePipelineReflection>
+{
+public:
+    static class ComputePipelineReflection* alloc();
+
+    class ComputePipelineReflection*        init();
+
+    NS::Array*                              arguments() const;
+};
+
+class ComputePipelineDescriptor : public NS::Copying<ComputePipelineDescriptor>
+{
+public:
+    static class ComputePipelineDescriptor* alloc();
+
+    class ComputePipelineDescriptor*        init();
+
+    NS::String*                             label() const;
+    void                                    setLabel(const NS::String* label);
+
+    class Function*                         computeFunction() const;
+    void                                    setComputeFunction(const class Function* computeFunction);
+
+    bool                                    threadGroupSizeIsMultipleOfThreadExecutionWidth() const;
+    void                                    setThreadGroupSizeIsMultipleOfThreadExecutionWidth(bool threadGroupSizeIsMultipleOfThreadExecutionWidth);
+
+    NS::UInteger                            maxTotalThreadsPerThreadgroup() const;
+    void                                    setMaxTotalThreadsPerThreadgroup(NS::UInteger maxTotalThreadsPerThreadgroup);
+
+    class StageInputOutputDescriptor*       stageInputDescriptor() const;
+    void                                    setStageInputDescriptor(const class StageInputOutputDescriptor* stageInputDescriptor);
+
+    class PipelineBufferDescriptorArray*    buffers() const;
+
+    bool                                    supportIndirectCommandBuffers() const;
+    void                                    setSupportIndirectCommandBuffers(bool supportIndirectCommandBuffers);
+
+    NS::Array*                              insertLibraries() const;
+    void                                    setInsertLibraries(const NS::Array* insertLibraries);
+
+    NS::Array*                              preloadedLibraries() const;
+    void                                    setPreloadedLibraries(const NS::Array* preloadedLibraries);
+
+    NS::Array*                              binaryArchives() const;
+    void                                    setBinaryArchives(const NS::Array* binaryArchives);
+
+    void                                    reset();
+
+    class LinkedFunctions*                  linkedFunctions() const;
+    void                                    setLinkedFunctions(const class LinkedFunctions* linkedFunctions);
+
+    bool                                    supportAddingBinaryFunctions() const;
+    void                                    setSupportAddingBinaryFunctions(bool supportAddingBinaryFunctions);
+
+    NS::UInteger                            maxCallStackDepth() const;
+    void                                    setMaxCallStackDepth(NS::UInteger maxCallStackDepth);
+};
+
+class ComputePipelineState : public NS::Referencing<ComputePipelineState>
+{
+public:
+    NS::String*                      label() const;
+
+    class Device*                    device() const;
+
+    NS::UInteger                     maxTotalThreadsPerThreadgroup() const;
+
+    NS::UInteger                     threadExecutionWidth() const;
+
+    NS::UInteger                     staticThreadgroupMemoryLength() const;
+
+    NS::UInteger                     imageblockMemoryLength(MTL::Size imageblockDimensions);
+
+    bool                             supportIndirectCommandBuffers() const;
+
+    class FunctionHandle*            functionHandle(const class Function* function);
+
+    class ComputePipelineState*      newComputePipelineState(const NS::Array* functions, NS::Error** error);
+
+    class VisibleFunctionTable*      newVisibleFunctionTable(const class VisibleFunctionTableDescriptor* descriptor);
+
+    class IntersectionFunctionTable* newIntersectionFunctionTable(const class IntersectionFunctionTableDescriptor* descriptor);
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::ComputePipelineReflection* MTL::ComputePipelineReflection::alloc()
+{
+    return NS::Object::alloc<MTL::ComputePipelineReflection>(_MTL_PRIVATE_CLS(MTLComputePipelineReflection));
+}
+
+// method: init
+_MTL_INLINE MTL::ComputePipelineReflection* MTL::ComputePipelineReflection::init()
+{
+    return NS::Object::init<MTL::ComputePipelineReflection>();
+}
+
+// property: arguments
+_MTL_INLINE NS::Array* MTL::ComputePipelineReflection::arguments() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(arguments));
+}
+
+// static method: alloc
+_MTL_INLINE MTL::ComputePipelineDescriptor* MTL::ComputePipelineDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::ComputePipelineDescriptor>(_MTL_PRIVATE_CLS(MTLComputePipelineDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::ComputePipelineDescriptor* MTL::ComputePipelineDescriptor::init()
+{
+    return NS::Object::init<MTL::ComputePipelineDescriptor>();
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::ComputePipelineDescriptor::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::ComputePipelineDescriptor::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
+
+// property: computeFunction
+_MTL_INLINE MTL::Function* MTL::ComputePipelineDescriptor::computeFunction() const
+{
+    return Object::sendMessage<MTL::Function*>(this, _MTL_PRIVATE_SEL(computeFunction));
+}
+
+_MTL_INLINE void MTL::ComputePipelineDescriptor::setComputeFunction(const MTL::Function* computeFunction)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setComputeFunction_), computeFunction);
+}
+
+// property: threadGroupSizeIsMultipleOfThreadExecutionWidth
+_MTL_INLINE bool MTL::ComputePipelineDescriptor::threadGroupSizeIsMultipleOfThreadExecutionWidth() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(threadGroupSizeIsMultipleOfThreadExecutionWidth));
+}
+
+_MTL_INLINE void MTL::ComputePipelineDescriptor::setThreadGroupSizeIsMultipleOfThreadExecutionWidth(bool threadGroupSizeIsMultipleOfThreadExecutionWidth)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setThreadGroupSizeIsMultipleOfThreadExecutionWidth_), threadGroupSizeIsMultipleOfThreadExecutionWidth);
+}
+
+// property: maxTotalThreadsPerThreadgroup
+_MTL_INLINE NS::UInteger MTL::ComputePipelineDescriptor::maxTotalThreadsPerThreadgroup() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(maxTotalThreadsPerThreadgroup));
+}
+
+_MTL_INLINE void MTL::ComputePipelineDescriptor::setMaxTotalThreadsPerThreadgroup(NS::UInteger maxTotalThreadsPerThreadgroup)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMaxTotalThreadsPerThreadgroup_), maxTotalThreadsPerThreadgroup);
+}
+
+// property: stageInputDescriptor
+_MTL_INLINE MTL::StageInputOutputDescriptor* MTL::ComputePipelineDescriptor::stageInputDescriptor() const
+{
+    return Object::sendMessage<MTL::StageInputOutputDescriptor*>(this, _MTL_PRIVATE_SEL(stageInputDescriptor));
+}
+
+_MTL_INLINE void MTL::ComputePipelineDescriptor::setStageInputDescriptor(const MTL::StageInputOutputDescriptor* stageInputDescriptor)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStageInputDescriptor_), stageInputDescriptor);
+}
+
+// property: buffers
+_MTL_INLINE MTL::PipelineBufferDescriptorArray* MTL::ComputePipelineDescriptor::buffers() const
+{
+    return Object::sendMessage<MTL::PipelineBufferDescriptorArray*>(this, _MTL_PRIVATE_SEL(buffers));
+}
+
+// property: supportIndirectCommandBuffers
+_MTL_INLINE bool MTL::ComputePipelineDescriptor::supportIndirectCommandBuffers() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportIndirectCommandBuffers));
+}
+
+_MTL_INLINE void MTL::ComputePipelineDescriptor::setSupportIndirectCommandBuffers(bool supportIndirectCommandBuffers)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSupportIndirectCommandBuffers_), supportIndirectCommandBuffers);
+}
+
+// property: insertLibraries
+_MTL_INLINE NS::Array* MTL::ComputePipelineDescriptor::insertLibraries() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(insertLibraries));
+}
+
+_MTL_INLINE void MTL::ComputePipelineDescriptor::setInsertLibraries(const NS::Array* insertLibraries)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setInsertLibraries_), insertLibraries);
+}
+
+// property: preloadedLibraries
+_MTL_INLINE NS::Array* MTL::ComputePipelineDescriptor::preloadedLibraries() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(preloadedLibraries));
+}
+
+_MTL_INLINE void MTL::ComputePipelineDescriptor::setPreloadedLibraries(const NS::Array* preloadedLibraries)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setPreloadedLibraries_), preloadedLibraries);
+}
+
+// property: binaryArchives
+_MTL_INLINE NS::Array* MTL::ComputePipelineDescriptor::binaryArchives() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(binaryArchives));
+}
+
+_MTL_INLINE void MTL::ComputePipelineDescriptor::setBinaryArchives(const NS::Array* binaryArchives)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBinaryArchives_), binaryArchives);
+}
+
+// method: reset
+_MTL_INLINE void MTL::ComputePipelineDescriptor::reset()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(reset));
+}
+
+// property: linkedFunctions
+_MTL_INLINE MTL::LinkedFunctions* MTL::ComputePipelineDescriptor::linkedFunctions() const
+{
+    return Object::sendMessage<MTL::LinkedFunctions*>(this, _MTL_PRIVATE_SEL(linkedFunctions));
+}
+
+_MTL_INLINE void MTL::ComputePipelineDescriptor::setLinkedFunctions(const MTL::LinkedFunctions* linkedFunctions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLinkedFunctions_), linkedFunctions);
+}
+
+// property: supportAddingBinaryFunctions
+_MTL_INLINE bool MTL::ComputePipelineDescriptor::supportAddingBinaryFunctions() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportAddingBinaryFunctions));
+}
+
+_MTL_INLINE void MTL::ComputePipelineDescriptor::setSupportAddingBinaryFunctions(bool supportAddingBinaryFunctions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSupportAddingBinaryFunctions_), supportAddingBinaryFunctions);
+}
+
+// property: maxCallStackDepth
+_MTL_INLINE NS::UInteger MTL::ComputePipelineDescriptor::maxCallStackDepth() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(maxCallStackDepth));
+}
+
+_MTL_INLINE void MTL::ComputePipelineDescriptor::setMaxCallStackDepth(NS::UInteger maxCallStackDepth)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMaxCallStackDepth_), maxCallStackDepth);
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::ComputePipelineState::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::ComputePipelineState::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
+
+// property: maxTotalThreadsPerThreadgroup
+_MTL_INLINE NS::UInteger MTL::ComputePipelineState::maxTotalThreadsPerThreadgroup() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(maxTotalThreadsPerThreadgroup));
+}
+
+// property: threadExecutionWidth
+_MTL_INLINE NS::UInteger MTL::ComputePipelineState::threadExecutionWidth() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(threadExecutionWidth));
+}
+
+// property: staticThreadgroupMemoryLength
+_MTL_INLINE NS::UInteger MTL::ComputePipelineState::staticThreadgroupMemoryLength() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(staticThreadgroupMemoryLength));
+}
+
+// method: imageblockMemoryLengthForDimensions:
+_MTL_INLINE NS::UInteger MTL::ComputePipelineState::imageblockMemoryLength(MTL::Size imageblockDimensions)
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(imageblockMemoryLengthForDimensions_), imageblockDimensions);
+}
+
+// property: supportIndirectCommandBuffers
+_MTL_INLINE bool MTL::ComputePipelineState::supportIndirectCommandBuffers() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportIndirectCommandBuffers));
+}
+
+// method: functionHandleWithFunction:
+_MTL_INLINE MTL::FunctionHandle* MTL::ComputePipelineState::functionHandle(const MTL::Function* function)
+{
+    return Object::sendMessage<MTL::FunctionHandle*>(this, _MTL_PRIVATE_SEL(functionHandleWithFunction_), function);
+}
+
+// method: newComputePipelineStateWithAdditionalBinaryFunctions:error:
+_MTL_INLINE MTL::ComputePipelineState* MTL::ComputePipelineState::newComputePipelineState(const NS::Array* functions, NS::Error** error)
+{
+    return Object::sendMessage<MTL::ComputePipelineState*>(this, _MTL_PRIVATE_SEL(newComputePipelineStateWithAdditionalBinaryFunctions_error_), functions, error);
+}
+
+// method: newVisibleFunctionTableWithDescriptor:
+_MTL_INLINE MTL::VisibleFunctionTable* MTL::ComputePipelineState::newVisibleFunctionTable(const MTL::VisibleFunctionTableDescriptor* descriptor)
+{
+    return Object::sendMessage<MTL::VisibleFunctionTable*>(this, _MTL_PRIVATE_SEL(newVisibleFunctionTableWithDescriptor_), descriptor);
+}
+
+// method: newIntersectionFunctionTableWithDescriptor:
+_MTL_INLINE MTL::IntersectionFunctionTable* MTL::ComputePipelineState::newIntersectionFunctionTable(const MTL::IntersectionFunctionTableDescriptor* descriptor)
+{
+    return Object::sendMessage<MTL::IntersectionFunctionTable*>(this, _MTL_PRIVATE_SEL(newIntersectionFunctionTableWithDescriptor_), descriptor);
+}
diff --git a/metal-cpp/Metal/MTLCounters.hpp b/metal-cpp/Metal/MTLCounters.hpp
new file mode 100644
index 0000000..6e65c1d
--- /dev/null
+++ b/metal-cpp/Metal/MTLCounters.hpp
@@ -0,0 +1,258 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLCounters.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLResource.hpp"
+
+namespace MTL
+{
+
+_MTL_CONST( NS::ErrorDomain, CounterErrorDomain );
+
+using CommonCounter = NS::String*;
+
+_MTL_CONST( CommonCounter, CommonCounterTimestamp );
+_MTL_CONST( CommonCounter, CommonCounterTessellationInputPatches );
+_MTL_CONST( CommonCounter, CommonCounterVertexInvocations );
+_MTL_CONST( CommonCounter, CommonCounterPostTessellationVertexInvocations );
+_MTL_CONST( CommonCounter, CommonCounterClipperInvocations );
+_MTL_CONST( CommonCounter, CommonCounterClipperPrimitivesOut );
+_MTL_CONST( CommonCounter, CommonCounterFragmentInvocations );
+_MTL_CONST( CommonCounter, CommonCounterFragmentsPassed );
+_MTL_CONST( CommonCounter, CommonCounterComputeKernelInvocations );
+_MTL_CONST( CommonCounter, CommonCounterTotalCycles );
+_MTL_CONST( CommonCounter, CommonCounterVertexCycles );
+_MTL_CONST( CommonCounter, CommonCounterTessellationCycles );
+_MTL_CONST( CommonCounter, CommonCounterPostTessellationVertexCycles );
+_MTL_CONST( CommonCounter, CommonCounterFragmentCycles );
+_MTL_CONST( CommonCounter, CommonCounterRenderTargetWriteCycles );
+
+using CommonCounterSet = NS::String*;
+
+_MTL_CONST( CommonCounterSet, CommonCounterSetTimestamp );
+_MTL_CONST( CommonCounterSet, CommonCounterSetStageUtilization );
+_MTL_CONST( CommonCounterSet, CommonCounterSetStatistic );
+
+struct CounterResultTimestamp
+{
+    uint64_t timestamp;
+} _MTL_PACKED;
+
+struct CounterResultStageUtilization
+{
+    uint64_t totalCycles;
+    uint64_t vertexCycles;
+    uint64_t tessellationCycles;
+    uint64_t postTessellationVertexCycles;
+    uint64_t fragmentCycles;
+    uint64_t renderTargetCycles;
+} _MTL_PACKED;
+
+struct CounterResultStatistic
+{
+    uint64_t tessellationInputPatches;
+    uint64_t vertexInvocations;
+    uint64_t postTessellationVertexInvocations;
+    uint64_t clipperInvocations;
+    uint64_t clipperPrimitivesOut;
+    uint64_t fragmentInvocations;
+    uint64_t fragmentsPassed;
+    uint64_t computeKernelInvocations;
+} _MTL_PACKED;
+
+class Counter : public NS::Referencing<Counter>
+{
+public:
+    NS::String* name() const;
+};
+
+class CounterSet : public NS::Referencing<CounterSet>
+{
+public:
+    NS::String* name() const;
+
+    NS::Array*  counters() const;
+};
+
+class CounterSampleBufferDescriptor : public NS::Copying<CounterSampleBufferDescriptor>
+{
+public:
+    static class CounterSampleBufferDescriptor* alloc();
+
+    class CounterSampleBufferDescriptor*        init();
+
+    class CounterSet*                           counterSet() const;
+    void                                        setCounterSet(const class CounterSet* counterSet);
+
+    NS::String*                                 label() const;
+    void                                        setLabel(const NS::String* label);
+
+    MTL::StorageMode                            storageMode() const;
+    void                                        setStorageMode(MTL::StorageMode storageMode);
+
+    NS::UInteger                                sampleCount() const;
+    void                                        setSampleCount(NS::UInteger sampleCount);
+};
+
+class CounterSampleBuffer : public NS::Referencing<CounterSampleBuffer>
+{
+public:
+    class Device* device() const;
+
+    NS::String*   label() const;
+
+    NS::UInteger  sampleCount() const;
+
+    NS::Data*     resolveCounterRange(NS::Range range);
+};
+
+_MTL_ENUM(NS::Integer, CounterSampleBufferError) {
+    CounterSampleBufferErrorOutOfMemory = 0,
+    CounterSampleBufferErrorInvalid = 1,
+};
+
+}
+
+_MTL_PRIVATE_DEF_STR( NS::ErrorDomain, CounterErrorDomain );
+
+_MTL_PRIVATE_DEF_STR( MTL::CommonCounter, CommonCounterTimestamp );
+_MTL_PRIVATE_DEF_STR( MTL::CommonCounter, CommonCounterTessellationInputPatches );
+_MTL_PRIVATE_DEF_STR( MTL::CommonCounter, CommonCounterVertexInvocations );
+_MTL_PRIVATE_DEF_STR( MTL::CommonCounter, CommonCounterPostTessellationVertexInvocations );
+_MTL_PRIVATE_DEF_STR( MTL::CommonCounter, CommonCounterClipperInvocations );
+_MTL_PRIVATE_DEF_STR( MTL::CommonCounter, CommonCounterClipperPrimitivesOut );
+_MTL_PRIVATE_DEF_STR( MTL::CommonCounter, CommonCounterFragmentInvocations );
+_MTL_PRIVATE_DEF_STR( MTL::CommonCounter, CommonCounterFragmentsPassed );
+_MTL_PRIVATE_DEF_STR( MTL::CommonCounter, CommonCounterComputeKernelInvocations );
+_MTL_PRIVATE_DEF_STR( MTL::CommonCounter, CommonCounterTotalCycles );
+_MTL_PRIVATE_DEF_STR( MTL::CommonCounter, CommonCounterVertexCycles );
+_MTL_PRIVATE_DEF_STR( MTL::CommonCounter, CommonCounterTessellationCycles );
+_MTL_PRIVATE_DEF_STR( MTL::CommonCounter, CommonCounterPostTessellationVertexCycles );
+_MTL_PRIVATE_DEF_STR( MTL::CommonCounter, CommonCounterFragmentCycles );
+_MTL_PRIVATE_DEF_STR( MTL::CommonCounter, CommonCounterRenderTargetWriteCycles );
+
+_MTL_PRIVATE_DEF_STR( MTL::CommonCounterSet, CommonCounterSetTimestamp );
+_MTL_PRIVATE_DEF_STR( MTL::CommonCounterSet, CommonCounterSetStageUtilization );
+_MTL_PRIVATE_DEF_STR( MTL::CommonCounterSet, CommonCounterSetStatistic );
+
+// property: name
+_MTL_INLINE NS::String* MTL::Counter::name() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(name));
+}
+
+// property: name
+_MTL_INLINE NS::String* MTL::CounterSet::name() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(name));
+}
+
+// property: counters
+_MTL_INLINE NS::Array* MTL::CounterSet::counters() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(counters));
+}
+
+// static method: alloc
+_MTL_INLINE MTL::CounterSampleBufferDescriptor* MTL::CounterSampleBufferDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::CounterSampleBufferDescriptor>(_MTL_PRIVATE_CLS(MTLCounterSampleBufferDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::CounterSampleBufferDescriptor* MTL::CounterSampleBufferDescriptor::init()
+{
+    return NS::Object::init<MTL::CounterSampleBufferDescriptor>();
+}
+
+// property: counterSet
+_MTL_INLINE MTL::CounterSet* MTL::CounterSampleBufferDescriptor::counterSet() const
+{
+    return Object::sendMessage<MTL::CounterSet*>(this, _MTL_PRIVATE_SEL(counterSet));
+}
+
+_MTL_INLINE void MTL::CounterSampleBufferDescriptor::setCounterSet(const MTL::CounterSet* counterSet)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setCounterSet_), counterSet);
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::CounterSampleBufferDescriptor::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::CounterSampleBufferDescriptor::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
+
+// property: storageMode
+_MTL_INLINE MTL::StorageMode MTL::CounterSampleBufferDescriptor::storageMode() const
+{
+    return Object::sendMessage<MTL::StorageMode>(this, _MTL_PRIVATE_SEL(storageMode));
+}
+
+_MTL_INLINE void MTL::CounterSampleBufferDescriptor::setStorageMode(MTL::StorageMode storageMode)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStorageMode_), storageMode);
+}
+
+// property: sampleCount
+_MTL_INLINE NS::UInteger MTL::CounterSampleBufferDescriptor::sampleCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(sampleCount));
+}
+
+_MTL_INLINE void MTL::CounterSampleBufferDescriptor::setSampleCount(NS::UInteger sampleCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSampleCount_), sampleCount);
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::CounterSampleBuffer::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::CounterSampleBuffer::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+// property: sampleCount
+_MTL_INLINE NS::UInteger MTL::CounterSampleBuffer::sampleCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(sampleCount));
+}
+
+// method: resolveCounterRange:
+_MTL_INLINE NS::Data* MTL::CounterSampleBuffer::resolveCounterRange(NS::Range range)
+{
+    return Object::sendMessage<NS::Data*>(this, _MTL_PRIVATE_SEL(resolveCounterRange_), range);
+}
diff --git a/metal-cpp/Metal/MTLDefines.hpp b/metal-cpp/Metal/MTLDefines.hpp
new file mode 100644
index 0000000..762e7a2
--- /dev/null
+++ b/metal-cpp/Metal/MTLDefines.hpp
@@ -0,0 +1,41 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLDefines.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "../Foundation/NSDefines.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#define _MTL_EXPORT _NS_EXPORT
+#define _MTL_EXTERN _NS_EXTERN
+#define _MTL_INLINE _NS_INLINE
+#define _MTL_PACKED _NS_PACKED
+
+#define _MTL_CONST(type, name) _NS_CONST(type, name)
+#define _MTL_ENUM(type, name) _NS_ENUM(type, name)
+#define _MTL_OPTIONS(type, name) _NS_OPTIONS(type, name)
+
+#define _MTL_VALIDATE_SIZE(ns, name) _NS_VALIDATE_SIZE(ns, name)
+#define _MTL_VALIDATE_ENUM(ns, name) _NS_VALIDATE_ENUM(ns, name)
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Metal/MTLDepthStencil.hpp b/metal-cpp/Metal/MTLDepthStencil.hpp
new file mode 100644
index 0000000..0d774dc
--- /dev/null
+++ b/metal-cpp/Metal/MTLDepthStencil.hpp
@@ -0,0 +1,269 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLDepthStencil.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLDepthStencil.hpp"
+
+namespace MTL
+{
+_MTL_ENUM(NS::UInteger, CompareFunction) {
+    CompareFunctionNever = 0,
+    CompareFunctionLess = 1,
+    CompareFunctionEqual = 2,
+    CompareFunctionLessEqual = 3,
+    CompareFunctionGreater = 4,
+    CompareFunctionNotEqual = 5,
+    CompareFunctionGreaterEqual = 6,
+    CompareFunctionAlways = 7,
+};
+
+_MTL_ENUM(NS::UInteger, StencilOperation) {
+    StencilOperationKeep = 0,
+    StencilOperationZero = 1,
+    StencilOperationReplace = 2,
+    StencilOperationIncrementClamp = 3,
+    StencilOperationDecrementClamp = 4,
+    StencilOperationInvert = 5,
+    StencilOperationIncrementWrap = 6,
+    StencilOperationDecrementWrap = 7,
+};
+
+class StencilDescriptor : public NS::Copying<StencilDescriptor>
+{
+public:
+    static class StencilDescriptor* alloc();
+
+    class StencilDescriptor*        init();
+
+    MTL::CompareFunction            stencilCompareFunction() const;
+    void                            setStencilCompareFunction(MTL::CompareFunction stencilCompareFunction);
+
+    MTL::StencilOperation           stencilFailureOperation() const;
+    void                            setStencilFailureOperation(MTL::StencilOperation stencilFailureOperation);
+
+    MTL::StencilOperation           depthFailureOperation() const;
+    void                            setDepthFailureOperation(MTL::StencilOperation depthFailureOperation);
+
+    MTL::StencilOperation           depthStencilPassOperation() const;
+    void                            setDepthStencilPassOperation(MTL::StencilOperation depthStencilPassOperation);
+
+    uint32_t                        readMask() const;
+    void                            setReadMask(uint32_t readMask);
+
+    uint32_t                        writeMask() const;
+    void                            setWriteMask(uint32_t writeMask);
+};
+
+class DepthStencilDescriptor : public NS::Copying<DepthStencilDescriptor>
+{
+public:
+    static class DepthStencilDescriptor* alloc();
+
+    class DepthStencilDescriptor*        init();
+
+    MTL::CompareFunction                 depthCompareFunction() const;
+    void                                 setDepthCompareFunction(MTL::CompareFunction depthCompareFunction);
+
+    bool                                 depthWriteEnabled() const;
+    void                                 setDepthWriteEnabled(bool depthWriteEnabled);
+
+    class StencilDescriptor*             frontFaceStencil() const;
+    void                                 setFrontFaceStencil(const class StencilDescriptor* frontFaceStencil);
+
+    class StencilDescriptor*             backFaceStencil() const;
+    void                                 setBackFaceStencil(const class StencilDescriptor* backFaceStencil);
+
+    NS::String*                          label() const;
+    void                                 setLabel(const NS::String* label);
+};
+
+class DepthStencilState : public NS::Referencing<DepthStencilState>
+{
+public:
+    NS::String*   label() const;
+
+    class Device* device() const;
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::StencilDescriptor* MTL::StencilDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::StencilDescriptor>(_MTL_PRIVATE_CLS(MTLStencilDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::StencilDescriptor* MTL::StencilDescriptor::init()
+{
+    return NS::Object::init<MTL::StencilDescriptor>();
+}
+
+// property: stencilCompareFunction
+_MTL_INLINE MTL::CompareFunction MTL::StencilDescriptor::stencilCompareFunction() const
+{
+    return Object::sendMessage<MTL::CompareFunction>(this, _MTL_PRIVATE_SEL(stencilCompareFunction));
+}
+
+_MTL_INLINE void MTL::StencilDescriptor::setStencilCompareFunction(MTL::CompareFunction stencilCompareFunction)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStencilCompareFunction_), stencilCompareFunction);
+}
+
+// property: stencilFailureOperation
+_MTL_INLINE MTL::StencilOperation MTL::StencilDescriptor::stencilFailureOperation() const
+{
+    return Object::sendMessage<MTL::StencilOperation>(this, _MTL_PRIVATE_SEL(stencilFailureOperation));
+}
+
+_MTL_INLINE void MTL::StencilDescriptor::setStencilFailureOperation(MTL::StencilOperation stencilFailureOperation)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStencilFailureOperation_), stencilFailureOperation);
+}
+
+// property: depthFailureOperation
+_MTL_INLINE MTL::StencilOperation MTL::StencilDescriptor::depthFailureOperation() const
+{
+    return Object::sendMessage<MTL::StencilOperation>(this, _MTL_PRIVATE_SEL(depthFailureOperation));
+}
+
+_MTL_INLINE void MTL::StencilDescriptor::setDepthFailureOperation(MTL::StencilOperation depthFailureOperation)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDepthFailureOperation_), depthFailureOperation);
+}
+
+// property: depthStencilPassOperation
+_MTL_INLINE MTL::StencilOperation MTL::StencilDescriptor::depthStencilPassOperation() const
+{
+    return Object::sendMessage<MTL::StencilOperation>(this, _MTL_PRIVATE_SEL(depthStencilPassOperation));
+}
+
+_MTL_INLINE void MTL::StencilDescriptor::setDepthStencilPassOperation(MTL::StencilOperation depthStencilPassOperation)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDepthStencilPassOperation_), depthStencilPassOperation);
+}
+
+// property: readMask
+_MTL_INLINE uint32_t MTL::StencilDescriptor::readMask() const
+{
+    return Object::sendMessage<uint32_t>(this, _MTL_PRIVATE_SEL(readMask));
+}
+
+_MTL_INLINE void MTL::StencilDescriptor::setReadMask(uint32_t readMask)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setReadMask_), readMask);
+}
+
+// property: writeMask
+_MTL_INLINE uint32_t MTL::StencilDescriptor::writeMask() const
+{
+    return Object::sendMessage<uint32_t>(this, _MTL_PRIVATE_SEL(writeMask));
+}
+
+_MTL_INLINE void MTL::StencilDescriptor::setWriteMask(uint32_t writeMask)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setWriteMask_), writeMask);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::DepthStencilDescriptor* MTL::DepthStencilDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::DepthStencilDescriptor>(_MTL_PRIVATE_CLS(MTLDepthStencilDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::DepthStencilDescriptor* MTL::DepthStencilDescriptor::init()
+{
+    return NS::Object::init<MTL::DepthStencilDescriptor>();
+}
+
+// property: depthCompareFunction
+_MTL_INLINE MTL::CompareFunction MTL::DepthStencilDescriptor::depthCompareFunction() const
+{
+    return Object::sendMessage<MTL::CompareFunction>(this, _MTL_PRIVATE_SEL(depthCompareFunction));
+}
+
+_MTL_INLINE void MTL::DepthStencilDescriptor::setDepthCompareFunction(MTL::CompareFunction depthCompareFunction)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDepthCompareFunction_), depthCompareFunction);
+}
+
+// property: depthWriteEnabled
+_MTL_INLINE bool MTL::DepthStencilDescriptor::depthWriteEnabled() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isDepthWriteEnabled));
+}
+
+_MTL_INLINE void MTL::DepthStencilDescriptor::setDepthWriteEnabled(bool depthWriteEnabled)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDepthWriteEnabled_), depthWriteEnabled);
+}
+
+// property: frontFaceStencil
+_MTL_INLINE MTL::StencilDescriptor* MTL::DepthStencilDescriptor::frontFaceStencil() const
+{
+    return Object::sendMessage<MTL::StencilDescriptor*>(this, _MTL_PRIVATE_SEL(frontFaceStencil));
+}
+
+_MTL_INLINE void MTL::DepthStencilDescriptor::setFrontFaceStencil(const MTL::StencilDescriptor* frontFaceStencil)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFrontFaceStencil_), frontFaceStencil);
+}
+
+// property: backFaceStencil
+_MTL_INLINE MTL::StencilDescriptor* MTL::DepthStencilDescriptor::backFaceStencil() const
+{
+    return Object::sendMessage<MTL::StencilDescriptor*>(this, _MTL_PRIVATE_SEL(backFaceStencil));
+}
+
+_MTL_INLINE void MTL::DepthStencilDescriptor::setBackFaceStencil(const MTL::StencilDescriptor* backFaceStencil)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBackFaceStencil_), backFaceStencil);
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::DepthStencilDescriptor::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::DepthStencilDescriptor::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::DepthStencilState::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::DepthStencilState::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
diff --git a/metal-cpp/Metal/MTLDevice.hpp b/metal-cpp/Metal/MTLDevice.hpp
new file mode 100644
index 0000000..fcd0d77
--- /dev/null
+++ b/metal-cpp/Metal/MTLDevice.hpp
@@ -0,0 +1,1254 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLDevice.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLArgument.hpp"
+#include "MTLDevice.hpp"
+#include "MTLPixelFormat.hpp"
+#include "MTLResource.hpp"
+#include "MTLTexture.hpp"
+#include "MTLTypes.hpp"
+#include <IOSurface/IOSurfaceRef.h>
+#include <functional>
+
+namespace MTL
+{
+_MTL_ENUM(NS::UInteger, FeatureSet) {
+    FeatureSet_iOS_GPUFamily1_v1 = 0,
+    FeatureSet_iOS_GPUFamily2_v1 = 1,
+    FeatureSet_iOS_GPUFamily1_v2 = 2,
+    FeatureSet_iOS_GPUFamily2_v2 = 3,
+    FeatureSet_iOS_GPUFamily3_v1 = 4,
+    FeatureSet_iOS_GPUFamily1_v3 = 5,
+    FeatureSet_iOS_GPUFamily2_v3 = 6,
+    FeatureSet_iOS_GPUFamily3_v2 = 7,
+    FeatureSet_iOS_GPUFamily1_v4 = 8,
+    FeatureSet_iOS_GPUFamily2_v4 = 9,
+    FeatureSet_iOS_GPUFamily3_v3 = 10,
+    FeatureSet_iOS_GPUFamily4_v1 = 11,
+    FeatureSet_iOS_GPUFamily1_v5 = 12,
+    FeatureSet_iOS_GPUFamily2_v5 = 13,
+    FeatureSet_iOS_GPUFamily3_v4 = 14,
+    FeatureSet_iOS_GPUFamily4_v2 = 15,
+    FeatureSet_iOS_GPUFamily5_v1 = 16,
+    FeatureSet_macOS_GPUFamily1_v1 = 10000,
+    FeatureSet_OSX_GPUFamily1_v1 = 10000,
+    FeatureSet_macOS_GPUFamily1_v2 = 10001,
+    FeatureSet_OSX_GPUFamily1_v2 = 10001,
+    FeatureSet_OSX_ReadWriteTextureTier2 = 10002,
+    FeatureSet_macOS_ReadWriteTextureTier2 = 10002,
+    FeatureSet_macOS_GPUFamily1_v3 = 10003,
+    FeatureSet_macOS_GPUFamily1_v4 = 10004,
+    FeatureSet_macOS_GPUFamily2_v1 = 10005,
+    FeatureSet_watchOS_GPUFamily1_v1 = 20000,
+    FeatureSet_WatchOS_GPUFamily1_v1 = 20000,
+    FeatureSet_watchOS_GPUFamily2_v1 = 20001,
+    FeatureSet_WatchOS_GPUFamily2_v1 = 20001,
+    FeatureSet_tvOS_GPUFamily1_v1 = 30000,
+    FeatureSet_TVOS_GPUFamily1_v1 = 30000,
+    FeatureSet_tvOS_GPUFamily1_v2 = 30001,
+    FeatureSet_tvOS_GPUFamily1_v3 = 30002,
+    FeatureSet_tvOS_GPUFamily2_v1 = 30003,
+    FeatureSet_tvOS_GPUFamily1_v4 = 30004,
+    FeatureSet_tvOS_GPUFamily2_v2 = 30005,
+};
+
+_MTL_ENUM(NS::Integer, GPUFamily) {
+    GPUFamilyApple1 = 1001,
+    GPUFamilyApple2 = 1002,
+    GPUFamilyApple3 = 1003,
+    GPUFamilyApple4 = 1004,
+    GPUFamilyApple5 = 1005,
+    GPUFamilyApple6 = 1006,
+    GPUFamilyApple7 = 1007,
+    GPUFamilyApple8 = 1008,
+    GPUFamilyMac1 = 2001,
+    GPUFamilyMac2 = 2002,
+    GPUFamilyCommon1 = 3001,
+    GPUFamilyCommon2 = 3002,
+    GPUFamilyCommon3 = 3003,
+    GPUFamilyMacCatalyst1 = 4001,
+    GPUFamilyMacCatalyst2 = 4002,
+};
+
+_MTL_ENUM(NS::UInteger, DeviceLocation) {
+    DeviceLocationBuiltIn = 0,
+    DeviceLocationSlot = 1,
+    DeviceLocationExternal = 2,
+    DeviceLocationUnspecified = NS::UIntegerMax,
+};
+
+_MTL_OPTIONS(NS::UInteger, PipelineOption) {
+    PipelineOptionNone = 0,
+    PipelineOptionArgumentInfo = 1,
+    PipelineOptionBufferTypeInfo = 2,
+    PipelineOptionFailOnBinaryArchiveMiss = 4,
+};
+
+_MTL_ENUM(NS::UInteger, ReadWriteTextureTier) {
+    ReadWriteTextureTierNone = 0,
+    ReadWriteTextureTier1 = 1,
+    ReadWriteTextureTier2 = 2,
+};
+
+_MTL_ENUM(NS::UInteger, ArgumentBuffersTier) {
+    ArgumentBuffersTier1 = 0,
+    ArgumentBuffersTier2 = 1,
+};
+
+_MTL_ENUM(NS::UInteger, SparseTextureRegionAlignmentMode) {
+    SparseTextureRegionAlignmentModeOutward = 0,
+    SparseTextureRegionAlignmentModeInward = 1,
+};
+
+struct AccelerationStructureSizes
+{
+    NS::UInteger accelerationStructureSize;
+    NS::UInteger buildScratchBufferSize;
+    NS::UInteger refitScratchBufferSize;
+} _MTL_PACKED;
+
+_MTL_ENUM(NS::UInteger, CounterSamplingPoint) {
+    CounterSamplingPointAtStageBoundary = 0,
+    CounterSamplingPointAtDrawBoundary = 1,
+    CounterSamplingPointAtDispatchBoundary = 2,
+    CounterSamplingPointAtTileDispatchBoundary = 3,
+    CounterSamplingPointAtBlitBoundary = 4,
+};
+
+struct SizeAndAlign
+{
+    NS::UInteger size;
+    NS::UInteger align;
+} _MTL_PACKED;
+
+class ArgumentDescriptor : public NS::Copying<ArgumentDescriptor>
+{
+public:
+    static class ArgumentDescriptor* alloc();
+
+    class ArgumentDescriptor*        init();
+
+    static class ArgumentDescriptor* argumentDescriptor();
+
+    MTL::DataType                    dataType() const;
+    void                             setDataType(MTL::DataType dataType);
+
+    NS::UInteger                     index() const;
+    void                             setIndex(NS::UInteger index);
+
+    NS::UInteger                     arrayLength() const;
+    void                             setArrayLength(NS::UInteger arrayLength);
+
+    MTL::ArgumentAccess              access() const;
+    void                             setAccess(MTL::ArgumentAccess access);
+
+    MTL::TextureType                 textureType() const;
+    void                             setTextureType(MTL::TextureType textureType);
+
+    NS::UInteger                     constantBlockAlignment() const;
+    void                             setConstantBlockAlignment(NS::UInteger constantBlockAlignment);
+};
+
+using DeviceNotificationName = NS::String*;
+
+_MTL_CONST(DeviceNotificationName, DeviceWasAddedNotification);
+
+_MTL_CONST(DeviceNotificationName, DeviceRemovalRequestedNotification);
+
+_MTL_CONST(DeviceNotificationName, DeviceWasRemovedNotification);
+
+using DeviceNotificationHandlerBlock = void (^)(class Device* pDevice, DeviceNotificationName notifyName);
+
+using DeviceNotificationHandlerFunction = std::function<void(class Device* pDevice, DeviceNotificationName notifyName)>;
+
+using AutoreleasedComputePipelineReflection = class ComputePipelineReflection*;
+
+using AutoreleasedRenderPipelineReflection = class RenderPipelineReflection*;
+
+using NewLibraryCompletionHandler = void (^)(class Library*, NS::Error*);
+
+using NewLibraryCompletionHandlerFunction = std::function<void(class Library*, NS::Error*)>;
+
+using NewRenderPipelineStateCompletionHandler = void (^)(class RenderPipelineState*, NS::Error*);
+
+using NewRenderPipelineStateCompletionHandlerFunction = std::function<void(class RenderPipelineState*, NS::Error*)>;
+
+using NewRenderPipelineStateWithReflectionCompletionHandler = void (^)(class RenderPipelineState*, class RenderPipelineReflection*, NS::Error*);
+
+using NewRenderPipelineStateWithReflectionCompletionHandlerFunction = std::function<void(class RenderPipelineState*, class RenderPipelineReflection*, NS::Error*)>;
+
+using NewComputePipelineStateCompletionHandler = void (^)(class ComputePipelineState*, NS::Error*);
+
+using NewComputePipelineStateCompletionHandlerFunction = std::function<void(class ComputePipelineState*, NS::Error*)>;
+
+using NewComputePipelineStateWithReflectionCompletionHandler = void (^)(class ComputePipelineState*, class ComputePipelineReflection*, NS::Error*);
+
+using NewComputePipelineStateWithReflectionCompletionHandlerFunction = std::function<void(class ComputePipelineState*, class ComputePipelineReflection*, NS::Error*)>;
+
+using Timestamp = std::uint64_t;
+
+MTL::Device* CreateSystemDefaultDevice();
+
+NS::Array*   CopyAllDevices();
+
+NS::Array*   CopyAllDevicesWithObserver(NS::Object** pOutObserver, DeviceNotificationHandlerBlock handler);
+
+NS::Array*   CopyAllDevicesWithObserver(NS::Object** pOutObserver, const DeviceNotificationHandlerFunction& handler);
+
+void         RemoveDeviceObserver(const NS::Object* pObserver);
+
+class Device : public NS::Referencing<Device>
+{
+public:
+    void                            newLibrary(const NS::String* pSource, const class CompileOptions* pOptions, const NewLibraryCompletionHandlerFunction& completionHandler);
+
+    void                            newLibrary(const class StitchedLibraryDescriptor* pDescriptor, const MTL::NewLibraryCompletionHandlerFunction& completionHandler);
+
+    void                            newRenderPipelineState(const class RenderPipelineDescriptor* pDescriptor, const NewRenderPipelineStateCompletionHandlerFunction& completionHandler);
+
+    void                            newRenderPipelineState(const class RenderPipelineDescriptor* pDescriptor, PipelineOption options, const NewRenderPipelineStateWithReflectionCompletionHandlerFunction& completionHandler);
+
+    void                            newRenderPipelineState(const class TileRenderPipelineDescriptor* pDescriptor, PipelineOption options, const NewRenderPipelineStateWithReflectionCompletionHandlerFunction& completionHandler);
+
+    void                            newComputePipelineState(const class Function* pFunction, const NewComputePipelineStateCompletionHandlerFunction& completionHandler);
+
+    void                            newComputePipelineState(const class Function* pFunction, PipelineOption options, const NewComputePipelineStateWithReflectionCompletionHandlerFunction& completionHandler);
+
+    void                            newComputePipelineState(const class ComputePipelineDescriptor* pDescriptor, PipelineOption options, const NewComputePipelineStateWithReflectionCompletionHandlerFunction& completionHandler);
+
+    bool                            isHeadless() const;
+
+    NS::String*                     name() const;
+
+    uint64_t                        registryID() const;
+
+    MTL::Size                       maxThreadsPerThreadgroup() const;
+
+    bool                            lowPower() const;
+
+    bool                            headless() const;
+
+    bool                            removable() const;
+
+    bool                            hasUnifiedMemory() const;
+
+    uint64_t                        recommendedMaxWorkingSetSize() const;
+
+    MTL::DeviceLocation             location() const;
+
+    NS::UInteger                    locationNumber() const;
+
+    uint64_t                        maxTransferRate() const;
+
+    bool                            depth24Stencil8PixelFormatSupported() const;
+
+    MTL::ReadWriteTextureTier       readWriteTextureSupport() const;
+
+    MTL::ArgumentBuffersTier        argumentBuffersSupport() const;
+
+    bool                            rasterOrderGroupsSupported() const;
+
+    bool                            supports32BitFloatFiltering() const;
+
+    bool                            supports32BitMSAA() const;
+
+    bool                            supportsQueryTextureLOD() const;
+
+    bool                            supportsBCTextureCompression() const;
+
+    bool                            supportsPullModelInterpolation() const;
+
+    bool                            barycentricCoordsSupported() const;
+
+    bool                            supportsShaderBarycentricCoordinates() const;
+
+    NS::UInteger                    currentAllocatedSize() const;
+
+    class CommandQueue*             newCommandQueue();
+
+    class CommandQueue*             newCommandQueue(NS::UInteger maxCommandBufferCount);
+
+    MTL::SizeAndAlign               heapTextureSizeAndAlign(const class TextureDescriptor* desc);
+
+    MTL::SizeAndAlign               heapBufferSizeAndAlign(NS::UInteger length, MTL::ResourceOptions options);
+
+    class Heap*                     newHeap(const class HeapDescriptor* descriptor);
+
+    class Buffer*                   newBuffer(NS::UInteger length, MTL::ResourceOptions options);
+
+    class Buffer*                   newBuffer(const void* pointer, NS::UInteger length, MTL::ResourceOptions options);
+
+    class Buffer*                   newBuffer(const void* pointer, NS::UInteger length, MTL::ResourceOptions options, const void (^deallocator)(void*, NS::UInteger));
+
+    class DepthStencilState*        newDepthStencilState(const class DepthStencilDescriptor* descriptor);
+
+    class Texture*                  newTexture(const class TextureDescriptor* descriptor);
+
+    class Texture*                  newTexture(const class TextureDescriptor* descriptor, const IOSurfaceRef iosurface, NS::UInteger plane);
+
+    class Texture*                  newSharedTexture(const class TextureDescriptor* descriptor);
+
+    class Texture*                  newSharedTexture(const class SharedTextureHandle* sharedHandle);
+
+    class SamplerState*             newSamplerState(const class SamplerDescriptor* descriptor);
+
+    class Library*                  newDefaultLibrary();
+
+    class Library*                  newDefaultLibrary(const NS::Bundle* bundle, NS::Error** error);
+
+    class Library*                  newLibrary(const NS::String* filepath, NS::Error** error);
+
+    class Library*                  newLibrary(const NS::URL* url, NS::Error** error);
+
+    class Library*                  newLibrary(const dispatch_data_t data, NS::Error** error);
+
+    class Library*                  newLibrary(const NS::String* source, const class CompileOptions* options, NS::Error** error);
+
+    void                            newLibrary(const NS::String* source, const class CompileOptions* options, const MTL::NewLibraryCompletionHandler completionHandler);
+
+    class Library*                  newLibrary(const class StitchedLibraryDescriptor* descriptor, NS::Error** error);
+
+    void                            newLibrary(const class StitchedLibraryDescriptor* descriptor, const MTL::NewLibraryCompletionHandler completionHandler);
+
+    class RenderPipelineState*      newRenderPipelineState(const class RenderPipelineDescriptor* descriptor, NS::Error** error);
+
+    class RenderPipelineState*      newRenderPipelineState(const class RenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::AutoreleasedRenderPipelineReflection* reflection, NS::Error** error);
+
+    void                            newRenderPipelineState(const class RenderPipelineDescriptor* descriptor, const MTL::NewRenderPipelineStateCompletionHandler completionHandler);
+
+    void                            newRenderPipelineState(const class RenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::NewRenderPipelineStateWithReflectionCompletionHandler completionHandler);
+
+    class ComputePipelineState*     newComputePipelineState(const class Function* computeFunction, NS::Error** error);
+
+    class ComputePipelineState*     newComputePipelineState(const class Function* computeFunction, MTL::PipelineOption options, const MTL::AutoreleasedComputePipelineReflection* reflection, NS::Error** error);
+
+    void                            newComputePipelineState(const class Function* computeFunction, const MTL::NewComputePipelineStateCompletionHandler completionHandler);
+
+    void                            newComputePipelineState(const class Function* computeFunction, MTL::PipelineOption options, const MTL::NewComputePipelineStateWithReflectionCompletionHandler completionHandler);
+
+    class ComputePipelineState*     newComputePipelineState(const class ComputePipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::AutoreleasedComputePipelineReflection* reflection, NS::Error** error);
+
+    void                            newComputePipelineState(const class ComputePipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::NewComputePipelineStateWithReflectionCompletionHandler completionHandler);
+
+    class Fence*                    newFence();
+
+    bool                            supportsFeatureSet(MTL::FeatureSet featureSet);
+
+    bool                            supportsFamily(MTL::GPUFamily gpuFamily);
+
+    bool                            supportsTextureSampleCount(NS::UInteger sampleCount);
+
+    NS::UInteger                    minimumLinearTextureAlignmentForPixelFormat(MTL::PixelFormat format);
+
+    NS::UInteger                    minimumTextureBufferAlignmentForPixelFormat(MTL::PixelFormat format);
+
+    class RenderPipelineState*      newRenderPipelineState(const class TileRenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::AutoreleasedRenderPipelineReflection* reflection, NS::Error** error);
+
+    void                            newRenderPipelineState(const class TileRenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::NewRenderPipelineStateWithReflectionCompletionHandler completionHandler);
+
+    NS::UInteger                    maxThreadgroupMemoryLength() const;
+
+    NS::UInteger                    maxArgumentBufferSamplerCount() const;
+
+    bool                            programmableSamplePositionsSupported() const;
+
+    void                            getDefaultSamplePositions(MTL::SamplePosition* positions, NS::UInteger count);
+
+    class ArgumentEncoder*          newArgumentEncoder(const NS::Array* arguments);
+
+    bool                            supportsRasterizationRateMap(NS::UInteger layerCount);
+
+    class RasterizationRateMap*     newRasterizationRateMap(const class RasterizationRateMapDescriptor* descriptor);
+
+    class IndirectCommandBuffer*    newIndirectCommandBuffer(const class IndirectCommandBufferDescriptor* descriptor, NS::UInteger maxCount, MTL::ResourceOptions options);
+
+    class Event*                    newEvent();
+
+    class SharedEvent*              newSharedEvent();
+
+    class SharedEvent*              newSharedEvent(const class SharedEventHandle* sharedEventHandle);
+
+    uint64_t                        peerGroupID() const;
+
+    uint32_t                        peerIndex() const;
+
+    uint32_t                        peerCount() const;
+
+    MTL::Size                       sparseTileSize(MTL::TextureType textureType, MTL::PixelFormat pixelFormat, NS::UInteger sampleCount);
+
+    NS::UInteger                    sparseTileSizeInBytes() const;
+
+    void                            convertSparsePixelRegions(const MTL::Region* pixelRegions, MTL::Region* tileRegions, MTL::Size tileSize, MTL::SparseTextureRegionAlignmentMode mode, NS::UInteger numRegions);
+
+    void                            convertSparseTileRegions(const MTL::Region* tileRegions, MTL::Region* pixelRegions, MTL::Size tileSize, NS::UInteger numRegions);
+
+    NS::UInteger                    maxBufferLength() const;
+
+    NS::Array*                      counterSets() const;
+
+    class CounterSampleBuffer*      newCounterSampleBuffer(const class CounterSampleBufferDescriptor* descriptor, NS::Error** error);
+
+    void                            sampleTimestamps(MTL::Timestamp* cpuTimestamp, MTL::Timestamp* gpuTimestamp);
+
+    bool                            supportsCounterSampling(MTL::CounterSamplingPoint samplingPoint);
+
+    bool                            supportsVertexAmplificationCount(NS::UInteger count);
+
+    bool                            supportsDynamicLibraries() const;
+
+    bool                            supportsRenderDynamicLibraries() const;
+
+    class DynamicLibrary*           newDynamicLibrary(const class Library* library, NS::Error** error);
+
+    class DynamicLibrary*           newDynamicLibrary(const NS::URL* url, NS::Error** error);
+
+    class BinaryArchive*            newBinaryArchive(const class BinaryArchiveDescriptor* descriptor, NS::Error** error);
+
+    bool                            supportsRaytracing() const;
+
+    MTL::AccelerationStructureSizes accelerationStructureSizes(const class AccelerationStructureDescriptor* descriptor);
+
+    class AccelerationStructure*    newAccelerationStructure(NS::UInteger size);
+
+    class AccelerationStructure*    newAccelerationStructure(const class AccelerationStructureDescriptor* descriptor);
+
+    bool                            supportsFunctionPointers() const;
+
+    bool                            supportsFunctionPointersFromRender() const;
+
+    bool                            supportsRaytracingFromRender() const;
+
+    bool                            supportsPrimitiveMotionBlur() const;
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::ArgumentDescriptor* MTL::ArgumentDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::ArgumentDescriptor>(_MTL_PRIVATE_CLS(MTLArgumentDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::ArgumentDescriptor* MTL::ArgumentDescriptor::init()
+{
+    return NS::Object::init<MTL::ArgumentDescriptor>();
+}
+
+// static method: argumentDescriptor
+_MTL_INLINE MTL::ArgumentDescriptor* MTL::ArgumentDescriptor::argumentDescriptor()
+{
+    return Object::sendMessage<MTL::ArgumentDescriptor*>(_MTL_PRIVATE_CLS(MTLArgumentDescriptor), _MTL_PRIVATE_SEL(argumentDescriptor));
+}
+
+// property: dataType
+_MTL_INLINE MTL::DataType MTL::ArgumentDescriptor::dataType() const
+{
+    return Object::sendMessage<MTL::DataType>(this, _MTL_PRIVATE_SEL(dataType));
+}
+
+_MTL_INLINE void MTL::ArgumentDescriptor::setDataType(MTL::DataType dataType)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDataType_), dataType);
+}
+
+// property: index
+_MTL_INLINE NS::UInteger MTL::ArgumentDescriptor::index() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(index));
+}
+
+_MTL_INLINE void MTL::ArgumentDescriptor::setIndex(NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setIndex_), index);
+}
+
+// property: arrayLength
+_MTL_INLINE NS::UInteger MTL::ArgumentDescriptor::arrayLength() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(arrayLength));
+}
+
+_MTL_INLINE void MTL::ArgumentDescriptor::setArrayLength(NS::UInteger arrayLength)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setArrayLength_), arrayLength);
+}
+
+// property: access
+_MTL_INLINE MTL::ArgumentAccess MTL::ArgumentDescriptor::access() const
+{
+    return Object::sendMessage<MTL::ArgumentAccess>(this, _MTL_PRIVATE_SEL(access));
+}
+
+_MTL_INLINE void MTL::ArgumentDescriptor::setAccess(MTL::ArgumentAccess access)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setAccess_), access);
+}
+
+// property: textureType
+_MTL_INLINE MTL::TextureType MTL::ArgumentDescriptor::textureType() const
+{
+    return Object::sendMessage<MTL::TextureType>(this, _MTL_PRIVATE_SEL(textureType));
+}
+
+_MTL_INLINE void MTL::ArgumentDescriptor::setTextureType(MTL::TextureType textureType)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTextureType_), textureType);
+}
+
+// property: constantBlockAlignment
+_MTL_INLINE NS::UInteger MTL::ArgumentDescriptor::constantBlockAlignment() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(constantBlockAlignment));
+}
+
+_MTL_INLINE void MTL::ArgumentDescriptor::setConstantBlockAlignment(NS::UInteger constantBlockAlignment)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setConstantBlockAlignment_), constantBlockAlignment);
+}
+
+#if defined(MTL_PRIVATE_IMPLEMENTATION)
+
+extern "C" MTL::Device* MTLCreateSystemDefaultDevice();
+
+extern "C" NS::Array*   MTLCopyAllDevices();
+
+extern "C" NS::Array*   MTLCopyAllDevicesWithObserver(NS::Object**, MTL::DeviceNotificationHandlerBlock);
+
+extern "C" void         MTLRemoveDeviceObserver(const NS::Object*);
+
+#include <TargetConditionals.h>
+
+MTL::Device* MTL::CreateSystemDefaultDevice()
+{
+    return ::MTLCreateSystemDefaultDevice();
+}
+
+NS::Array* MTL::CopyAllDevices()
+{
+#if TARGET_OS_OSX
+    return ::MTLCopyAllDevices();
+#else
+    return nullptr;
+#endif // TARGET_OS_OSX
+}
+
+NS::Array* MTL::CopyAllDevicesWithObserver(NS::Object** pOutObserver, DeviceNotificationHandlerBlock handler)
+{
+#if TARGET_OS_OSX
+    return ::MTLCopyAllDevicesWithObserver(pOutObserver, handler);
+#else
+    (void)pOutObserver;
+    (void)handler;
+
+    return nullptr;
+#endif // TARGET_OS_OSX
+}
+
+NS::Array* MTL::CopyAllDevicesWithObserver(NS::Object** pOutObserver, const DeviceNotificationHandlerFunction& handler)
+{
+    __block DeviceNotificationHandlerFunction function = handler;
+
+    return CopyAllDevicesWithObserver(pOutObserver, ^(Device* pDevice, DeviceNotificationName pNotificationName) { function(pDevice, pNotificationName); });
+}
+
+void MTL::RemoveDeviceObserver(const NS::Object* pObserver)
+{
+#if TARGET_OS_OSX
+    ::MTLRemoveDeviceObserver(pObserver);
+#endif // TARGET_OS_OSX
+}
+
+#endif // MTL_PRIVATE_IMPLEMENTATION
+
+_MTL_INLINE void MTL::Device::newLibrary(const NS::String* pSource, const CompileOptions* pOptions, const NewLibraryCompletionHandlerFunction& completionHandler)
+{
+    __block NewLibraryCompletionHandlerFunction blockCompletionHandler = completionHandler;
+
+    newLibrary(pSource, pOptions, ^(Library* pLibrary, NS::Error* pError) { blockCompletionHandler(pLibrary, pError); });
+}
+
+_MTL_INLINE void MTL::Device::newLibrary(const class StitchedLibraryDescriptor* pDescriptor, const MTL::NewLibraryCompletionHandlerFunction& completionHandler)
+{
+    __block NewLibraryCompletionHandlerFunction blockCompletionHandler = completionHandler;
+
+    newLibrary(pDescriptor, ^(Library* pLibrary, NS::Error* pError) { blockCompletionHandler(pLibrary, pError); });
+}
+
+_MTL_INLINE void MTL::Device::newRenderPipelineState(const RenderPipelineDescriptor* pDescriptor, const NewRenderPipelineStateCompletionHandlerFunction& completionHandler)
+{
+    __block NewRenderPipelineStateCompletionHandlerFunction blockCompletionHandler = completionHandler;
+
+    newRenderPipelineState(pDescriptor, ^(RenderPipelineState* pPipelineState, NS::Error* pError) { blockCompletionHandler(pPipelineState, pError); });
+}
+
+_MTL_INLINE void MTL::Device::newRenderPipelineState(const RenderPipelineDescriptor* pDescriptor, PipelineOption options, const NewRenderPipelineStateWithReflectionCompletionHandlerFunction& completionHandler)
+{
+    __block NewRenderPipelineStateWithReflectionCompletionHandlerFunction blockCompletionHandler = completionHandler;
+
+    newRenderPipelineState(pDescriptor, options, ^(RenderPipelineState* pPipelineState, class RenderPipelineReflection* pReflection, NS::Error* pError) { blockCompletionHandler(pPipelineState, pReflection, pError); });
+}
+
+_MTL_INLINE void MTL::Device::newRenderPipelineState(const TileRenderPipelineDescriptor* pDescriptor, PipelineOption options, const NewRenderPipelineStateWithReflectionCompletionHandlerFunction& completionHandler)
+{
+    __block NewRenderPipelineStateWithReflectionCompletionHandlerFunction blockCompletionHandler = completionHandler;
+
+    newRenderPipelineState(pDescriptor, options, ^(RenderPipelineState* pPipelineState, class RenderPipelineReflection* pReflection, NS::Error* pError) { blockCompletionHandler(pPipelineState, pReflection, pError); });
+}
+
+_MTL_INLINE void MTL::Device::newComputePipelineState(const class Function* pFunction, const NewComputePipelineStateCompletionHandlerFunction& completionHandler)
+{
+    __block NewComputePipelineStateCompletionHandlerFunction blockCompletionHandler = completionHandler;
+
+    newComputePipelineState(pFunction, ^(ComputePipelineState* pPipelineState, NS::Error* pError) { blockCompletionHandler(pPipelineState, pError); });
+}
+
+_MTL_INLINE void MTL::Device::newComputePipelineState(const Function* pFunction, PipelineOption options, const NewComputePipelineStateWithReflectionCompletionHandlerFunction& completionHandler)
+{
+    __block NewComputePipelineStateWithReflectionCompletionHandlerFunction blockCompletionHandler = completionHandler;
+
+    newComputePipelineState(pFunction, options, ^(ComputePipelineState* pPipelineState, ComputePipelineReflection* pReflection, NS::Error* pError) { blockCompletionHandler(pPipelineState, pReflection, pError); });
+}
+
+_MTL_INLINE void MTL::Device::newComputePipelineState(const ComputePipelineDescriptor* pDescriptor, PipelineOption options, const NewComputePipelineStateWithReflectionCompletionHandlerFunction& completionHandler)
+{
+    __block NewComputePipelineStateWithReflectionCompletionHandlerFunction blockCompletionHandler = completionHandler;
+
+    newComputePipelineState(pDescriptor, options, ^(ComputePipelineState* pPipelineState, ComputePipelineReflection* pReflection, NS::Error* pError) { blockCompletionHandler(pPipelineState, pReflection, pError); });
+}
+
+_MTL_INLINE bool MTL::Device::isHeadless() const
+{
+    return headless();
+}
+
+// property: name
+_MTL_INLINE NS::String* MTL::Device::name() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(name));
+}
+
+// property: registryID
+_MTL_INLINE uint64_t MTL::Device::registryID() const
+{
+    return Object::sendMessage<uint64_t>(this, _MTL_PRIVATE_SEL(registryID));
+}
+
+// property: maxThreadsPerThreadgroup
+_MTL_INLINE MTL::Size MTL::Device::maxThreadsPerThreadgroup() const
+{
+    return Object::sendMessage<MTL::Size>(this, _MTL_PRIVATE_SEL(maxThreadsPerThreadgroup));
+}
+
+// property: lowPower
+_MTL_INLINE bool MTL::Device::lowPower() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isLowPower));
+}
+
+// property: headless
+_MTL_INLINE bool MTL::Device::headless() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isHeadless));
+}
+
+// property: removable
+_MTL_INLINE bool MTL::Device::removable() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isRemovable));
+}
+
+// property: hasUnifiedMemory
+_MTL_INLINE bool MTL::Device::hasUnifiedMemory() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(hasUnifiedMemory));
+}
+
+// property: recommendedMaxWorkingSetSize
+_MTL_INLINE uint64_t MTL::Device::recommendedMaxWorkingSetSize() const
+{
+    return Object::sendMessage<uint64_t>(this, _MTL_PRIVATE_SEL(recommendedMaxWorkingSetSize));
+}
+
+// property: location
+_MTL_INLINE MTL::DeviceLocation MTL::Device::location() const
+{
+    return Object::sendMessage<MTL::DeviceLocation>(this, _MTL_PRIVATE_SEL(location));
+}
+
+// property: locationNumber
+_MTL_INLINE NS::UInteger MTL::Device::locationNumber() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(locationNumber));
+}
+
+// property: maxTransferRate
+_MTL_INLINE uint64_t MTL::Device::maxTransferRate() const
+{
+    return Object::sendMessage<uint64_t>(this, _MTL_PRIVATE_SEL(maxTransferRate));
+}
+
+// property: depth24Stencil8PixelFormatSupported
+_MTL_INLINE bool MTL::Device::depth24Stencil8PixelFormatSupported() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(isDepth24Stencil8PixelFormatSupported));
+}
+
+// property: readWriteTextureSupport
+_MTL_INLINE MTL::ReadWriteTextureTier MTL::Device::readWriteTextureSupport() const
+{
+    return Object::sendMessage<MTL::ReadWriteTextureTier>(this, _MTL_PRIVATE_SEL(readWriteTextureSupport));
+}
+
+// property: argumentBuffersSupport
+_MTL_INLINE MTL::ArgumentBuffersTier MTL::Device::argumentBuffersSupport() const
+{
+    return Object::sendMessage<MTL::ArgumentBuffersTier>(this, _MTL_PRIVATE_SEL(argumentBuffersSupport));
+}
+
+// property: rasterOrderGroupsSupported
+_MTL_INLINE bool MTL::Device::rasterOrderGroupsSupported() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(areRasterOrderGroupsSupported));
+}
+
+// property: supports32BitFloatFiltering
+_MTL_INLINE bool MTL::Device::supports32BitFloatFiltering() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supports32BitFloatFiltering));
+}
+
+// property: supports32BitMSAA
+_MTL_INLINE bool MTL::Device::supports32BitMSAA() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supports32BitMSAA));
+}
+
+// property: supportsQueryTextureLOD
+_MTL_INLINE bool MTL::Device::supportsQueryTextureLOD() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportsQueryTextureLOD));
+}
+
+// property: supportsBCTextureCompression
+_MTL_INLINE bool MTL::Device::supportsBCTextureCompression() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportsBCTextureCompression));
+}
+
+// property: supportsPullModelInterpolation
+_MTL_INLINE bool MTL::Device::supportsPullModelInterpolation() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportsPullModelInterpolation));
+}
+
+// property: barycentricCoordsSupported
+_MTL_INLINE bool MTL::Device::barycentricCoordsSupported() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(areBarycentricCoordsSupported));
+}
+
+// property: supportsShaderBarycentricCoordinates
+_MTL_INLINE bool MTL::Device::supportsShaderBarycentricCoordinates() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportsShaderBarycentricCoordinates));
+}
+
+// property: currentAllocatedSize
+_MTL_INLINE NS::UInteger MTL::Device::currentAllocatedSize() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(currentAllocatedSize));
+}
+
+// method: newCommandQueue
+_MTL_INLINE MTL::CommandQueue* MTL::Device::newCommandQueue()
+{
+    return Object::sendMessage<MTL::CommandQueue*>(this, _MTL_PRIVATE_SEL(newCommandQueue));
+}
+
+// method: newCommandQueueWithMaxCommandBufferCount:
+_MTL_INLINE MTL::CommandQueue* MTL::Device::newCommandQueue(NS::UInteger maxCommandBufferCount)
+{
+    return Object::sendMessage<MTL::CommandQueue*>(this, _MTL_PRIVATE_SEL(newCommandQueueWithMaxCommandBufferCount_), maxCommandBufferCount);
+}
+
+// method: heapTextureSizeAndAlignWithDescriptor:
+_MTL_INLINE MTL::SizeAndAlign MTL::Device::heapTextureSizeAndAlign(const MTL::TextureDescriptor* desc)
+{
+    return Object::sendMessage<MTL::SizeAndAlign>(this, _MTL_PRIVATE_SEL(heapTextureSizeAndAlignWithDescriptor_), desc);
+}
+
+// method: heapBufferSizeAndAlignWithLength:options:
+_MTL_INLINE MTL::SizeAndAlign MTL::Device::heapBufferSizeAndAlign(NS::UInteger length, MTL::ResourceOptions options)
+{
+    return Object::sendMessage<MTL::SizeAndAlign>(this, _MTL_PRIVATE_SEL(heapBufferSizeAndAlignWithLength_options_), length, options);
+}
+
+// method: newHeapWithDescriptor:
+_MTL_INLINE MTL::Heap* MTL::Device::newHeap(const MTL::HeapDescriptor* descriptor)
+{
+    return Object::sendMessage<MTL::Heap*>(this, _MTL_PRIVATE_SEL(newHeapWithDescriptor_), descriptor);
+}
+
+// method: newBufferWithLength:options:
+_MTL_INLINE MTL::Buffer* MTL::Device::newBuffer(NS::UInteger length, MTL::ResourceOptions options)
+{
+    return Object::sendMessage<MTL::Buffer*>(this, _MTL_PRIVATE_SEL(newBufferWithLength_options_), length, options);
+}
+
+// method: newBufferWithBytes:length:options:
+_MTL_INLINE MTL::Buffer* MTL::Device::newBuffer(const void* pointer, NS::UInteger length, MTL::ResourceOptions options)
+{
+    return Object::sendMessage<MTL::Buffer*>(this, _MTL_PRIVATE_SEL(newBufferWithBytes_length_options_), pointer, length, options);
+}
+
+// method: newBufferWithBytesNoCopy:length:options:deallocator:
+_MTL_INLINE MTL::Buffer* MTL::Device::newBuffer(const void* pointer, NS::UInteger length, MTL::ResourceOptions options, const void (^deallocator)(void*, NS::UInteger))
+{
+    return Object::sendMessage<MTL::Buffer*>(this, _MTL_PRIVATE_SEL(newBufferWithBytesNoCopy_length_options_deallocator_), pointer, length, options, deallocator);
+}
+
+// method: newDepthStencilStateWithDescriptor:
+_MTL_INLINE MTL::DepthStencilState* MTL::Device::newDepthStencilState(const MTL::DepthStencilDescriptor* descriptor)
+{
+    return Object::sendMessage<MTL::DepthStencilState*>(this, _MTL_PRIVATE_SEL(newDepthStencilStateWithDescriptor_), descriptor);
+}
+
+// method: newTextureWithDescriptor:
+_MTL_INLINE MTL::Texture* MTL::Device::newTexture(const MTL::TextureDescriptor* descriptor)
+{
+    return Object::sendMessage<MTL::Texture*>(this, _MTL_PRIVATE_SEL(newTextureWithDescriptor_), descriptor);
+}
+
+// method: newTextureWithDescriptor:iosurface:plane:
+_MTL_INLINE MTL::Texture* MTL::Device::newTexture(const MTL::TextureDescriptor* descriptor, const IOSurfaceRef iosurface, NS::UInteger plane)
+{
+    return Object::sendMessage<MTL::Texture*>(this, _MTL_PRIVATE_SEL(newTextureWithDescriptor_iosurface_plane_), descriptor, iosurface, plane);
+}
+
+// method: newSharedTextureWithDescriptor:
+_MTL_INLINE MTL::Texture* MTL::Device::newSharedTexture(const MTL::TextureDescriptor* descriptor)
+{
+    return Object::sendMessage<MTL::Texture*>(this, _MTL_PRIVATE_SEL(newSharedTextureWithDescriptor_), descriptor);
+}
+
+// method: newSharedTextureWithHandle:
+_MTL_INLINE MTL::Texture* MTL::Device::newSharedTexture(const MTL::SharedTextureHandle* sharedHandle)
+{
+    return Object::sendMessage<MTL::Texture*>(this, _MTL_PRIVATE_SEL(newSharedTextureWithHandle_), sharedHandle);
+}
+
+// method: newSamplerStateWithDescriptor:
+_MTL_INLINE MTL::SamplerState* MTL::Device::newSamplerState(const MTL::SamplerDescriptor* descriptor)
+{
+    return Object::sendMessage<MTL::SamplerState*>(this, _MTL_PRIVATE_SEL(newSamplerStateWithDescriptor_), descriptor);
+}
+
+// method: newDefaultLibrary
+_MTL_INLINE MTL::Library* MTL::Device::newDefaultLibrary()
+{
+    return Object::sendMessage<MTL::Library*>(this, _MTL_PRIVATE_SEL(newDefaultLibrary));
+}
+
+// method: newDefaultLibraryWithBundle:error:
+_MTL_INLINE MTL::Library* MTL::Device::newDefaultLibrary(const NS::Bundle* bundle, NS::Error** error)
+{
+    return Object::sendMessage<MTL::Library*>(this, _MTL_PRIVATE_SEL(newDefaultLibraryWithBundle_error_), bundle, error);
+}
+
+// method: newLibraryWithFile:error:
+_MTL_INLINE MTL::Library* MTL::Device::newLibrary(const NS::String* filepath, NS::Error** error)
+{
+    return Object::sendMessage<MTL::Library*>(this, _MTL_PRIVATE_SEL(newLibraryWithFile_error_), filepath, error);
+}
+
+// method: newLibraryWithURL:error:
+_MTL_INLINE MTL::Library* MTL::Device::newLibrary(const NS::URL* url, NS::Error** error)
+{
+    return Object::sendMessage<MTL::Library*>(this, _MTL_PRIVATE_SEL(newLibraryWithURL_error_), url, error);
+}
+
+// method: newLibraryWithData:error:
+_MTL_INLINE MTL::Library* MTL::Device::newLibrary(const dispatch_data_t data, NS::Error** error)
+{
+    return Object::sendMessage<MTL::Library*>(this, _MTL_PRIVATE_SEL(newLibraryWithData_error_), data, error);
+}
+
+// method: newLibraryWithSource:options:error:
+_MTL_INLINE MTL::Library* MTL::Device::newLibrary(const NS::String* source, const MTL::CompileOptions* options, NS::Error** error)
+{
+    return Object::sendMessage<MTL::Library*>(this, _MTL_PRIVATE_SEL(newLibraryWithSource_options_error_), source, options, error);
+}
+
+// method: newLibraryWithSource:options:completionHandler:
+_MTL_INLINE void MTL::Device::newLibrary(const NS::String* source, const MTL::CompileOptions* options, const MTL::NewLibraryCompletionHandler completionHandler)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(newLibraryWithSource_options_completionHandler_), source, options, completionHandler);
+}
+
+// method: newLibraryWithStitchedDescriptor:error:
+_MTL_INLINE MTL::Library* MTL::Device::newLibrary(const MTL::StitchedLibraryDescriptor* descriptor, NS::Error** error)
+{
+    return Object::sendMessage<MTL::Library*>(this, _MTL_PRIVATE_SEL(newLibraryWithStitchedDescriptor_error_), descriptor, error);
+}
+
+// method: newLibraryWithStitchedDescriptor:completionHandler:
+_MTL_INLINE void MTL::Device::newLibrary(const MTL::StitchedLibraryDescriptor* descriptor, const MTL::NewLibraryCompletionHandler completionHandler)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(newLibraryWithStitchedDescriptor_completionHandler_), descriptor, completionHandler);
+}
+
+// method: newRenderPipelineStateWithDescriptor:error:
+_MTL_INLINE MTL::RenderPipelineState* MTL::Device::newRenderPipelineState(const MTL::RenderPipelineDescriptor* descriptor, NS::Error** error)
+{
+    return Object::sendMessage<MTL::RenderPipelineState*>(this, _MTL_PRIVATE_SEL(newRenderPipelineStateWithDescriptor_error_), descriptor, error);
+}
+
+// method: newRenderPipelineStateWithDescriptor:options:reflection:error:
+_MTL_INLINE MTL::RenderPipelineState* MTL::Device::newRenderPipelineState(const MTL::RenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::AutoreleasedRenderPipelineReflection* reflection, NS::Error** error)
+{
+    return Object::sendMessage<MTL::RenderPipelineState*>(this, _MTL_PRIVATE_SEL(newRenderPipelineStateWithDescriptor_options_reflection_error_), descriptor, options, reflection, error);
+}
+
+// method: newRenderPipelineStateWithDescriptor:completionHandler:
+_MTL_INLINE void MTL::Device::newRenderPipelineState(const MTL::RenderPipelineDescriptor* descriptor, const MTL::NewRenderPipelineStateCompletionHandler completionHandler)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(newRenderPipelineStateWithDescriptor_completionHandler_), descriptor, completionHandler);
+}
+
+// method: newRenderPipelineStateWithDescriptor:options:completionHandler:
+_MTL_INLINE void MTL::Device::newRenderPipelineState(const MTL::RenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::NewRenderPipelineStateWithReflectionCompletionHandler completionHandler)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(newRenderPipelineStateWithDescriptor_options_completionHandler_), descriptor, options, completionHandler);
+}
+
+// method: newComputePipelineStateWithFunction:error:
+_MTL_INLINE MTL::ComputePipelineState* MTL::Device::newComputePipelineState(const MTL::Function* computeFunction, NS::Error** error)
+{
+    return Object::sendMessage<MTL::ComputePipelineState*>(this, _MTL_PRIVATE_SEL(newComputePipelineStateWithFunction_error_), computeFunction, error);
+}
+
+// method: newComputePipelineStateWithFunction:options:reflection:error:
+_MTL_INLINE MTL::ComputePipelineState* MTL::Device::newComputePipelineState(const MTL::Function* computeFunction, MTL::PipelineOption options, const MTL::AutoreleasedComputePipelineReflection* reflection, NS::Error** error)
+{
+    return Object::sendMessage<MTL::ComputePipelineState*>(this, _MTL_PRIVATE_SEL(newComputePipelineStateWithFunction_options_reflection_error_), computeFunction, options, reflection, error);
+}
+
+// method: newComputePipelineStateWithFunction:completionHandler:
+_MTL_INLINE void MTL::Device::newComputePipelineState(const MTL::Function* computeFunction, const MTL::NewComputePipelineStateCompletionHandler completionHandler)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(newComputePipelineStateWithFunction_completionHandler_), computeFunction, completionHandler);
+}
+
+// method: newComputePipelineStateWithFunction:options:completionHandler:
+_MTL_INLINE void MTL::Device::newComputePipelineState(const MTL::Function* computeFunction, MTL::PipelineOption options, const MTL::NewComputePipelineStateWithReflectionCompletionHandler completionHandler)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(newComputePipelineStateWithFunction_options_completionHandler_), computeFunction, options, completionHandler);
+}
+
+// method: newComputePipelineStateWithDescriptor:options:reflection:error:
+_MTL_INLINE MTL::ComputePipelineState* MTL::Device::newComputePipelineState(const MTL::ComputePipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::AutoreleasedComputePipelineReflection* reflection, NS::Error** error)
+{
+    return Object::sendMessage<MTL::ComputePipelineState*>(this, _MTL_PRIVATE_SEL(newComputePipelineStateWithDescriptor_options_reflection_error_), descriptor, options, reflection, error);
+}
+
+// method: newComputePipelineStateWithDescriptor:options:completionHandler:
+_MTL_INLINE void MTL::Device::newComputePipelineState(const MTL::ComputePipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::NewComputePipelineStateWithReflectionCompletionHandler completionHandler)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(newComputePipelineStateWithDescriptor_options_completionHandler_), descriptor, options, completionHandler);
+}
+
+// method: newFence
+_MTL_INLINE MTL::Fence* MTL::Device::newFence()
+{
+    return Object::sendMessage<MTL::Fence*>(this, _MTL_PRIVATE_SEL(newFence));
+}
+
+// method: supportsFeatureSet:
+_MTL_INLINE bool MTL::Device::supportsFeatureSet(MTL::FeatureSet featureSet)
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportsFeatureSet_), featureSet);
+}
+
+// method: supportsFamily:
+_MTL_INLINE bool MTL::Device::supportsFamily(MTL::GPUFamily gpuFamily)
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportsFamily_), gpuFamily);
+}
+
+// method: supportsTextureSampleCount:
+_MTL_INLINE bool MTL::Device::supportsTextureSampleCount(NS::UInteger sampleCount)
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportsTextureSampleCount_), sampleCount);
+}
+
+// method: minimumLinearTextureAlignmentForPixelFormat:
+_MTL_INLINE NS::UInteger MTL::Device::minimumLinearTextureAlignmentForPixelFormat(MTL::PixelFormat format)
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(minimumLinearTextureAlignmentForPixelFormat_), format);
+}
+
+// method: minimumTextureBufferAlignmentForPixelFormat:
+_MTL_INLINE NS::UInteger MTL::Device::minimumTextureBufferAlignmentForPixelFormat(MTL::PixelFormat format)
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(minimumTextureBufferAlignmentForPixelFormat_), format);
+}
+
+// method: newRenderPipelineStateWithTileDescriptor:options:reflection:error:
+_MTL_INLINE MTL::RenderPipelineState* MTL::Device::newRenderPipelineState(const MTL::TileRenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::AutoreleasedRenderPipelineReflection* reflection, NS::Error** error)
+{
+    return Object::sendMessage<MTL::RenderPipelineState*>(this, _MTL_PRIVATE_SEL(newRenderPipelineStateWithTileDescriptor_options_reflection_error_), descriptor, options, reflection, error);
+}
+
+// method: newRenderPipelineStateWithTileDescriptor:options:completionHandler:
+_MTL_INLINE void MTL::Device::newRenderPipelineState(const MTL::TileRenderPipelineDescriptor* descriptor, MTL::PipelineOption options, const MTL::NewRenderPipelineStateWithReflectionCompletionHandler completionHandler)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(newRenderPipelineStateWithTileDescriptor_options_completionHandler_), descriptor, options, completionHandler);
+}
+
+// property: maxThreadgroupMemoryLength
+_MTL_INLINE NS::UInteger MTL::Device::maxThreadgroupMemoryLength() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(maxThreadgroupMemoryLength));
+}
+
+// property: maxArgumentBufferSamplerCount
+_MTL_INLINE NS::UInteger MTL::Device::maxArgumentBufferSamplerCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(maxArgumentBufferSamplerCount));
+}
+
+// property: programmableSamplePositionsSupported
+_MTL_INLINE bool MTL::Device::programmableSamplePositionsSupported() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(areProgrammableSamplePositionsSupported));
+}
+
+// method: getDefaultSamplePositions:count:
+_MTL_INLINE void MTL::Device::getDefaultSamplePositions(MTL::SamplePosition* positions, NS::UInteger count)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(getDefaultSamplePositions_count_), positions, count);
+}
+
+// method: newArgumentEncoderWithArguments:
+_MTL_INLINE MTL::ArgumentEncoder* MTL::Device::newArgumentEncoder(const NS::Array* arguments)
+{
+    return Object::sendMessage<MTL::ArgumentEncoder*>(this, _MTL_PRIVATE_SEL(newArgumentEncoderWithArguments_), arguments);
+}
+
+// method: supportsRasterizationRateMapWithLayerCount:
+_MTL_INLINE bool MTL::Device::supportsRasterizationRateMap(NS::UInteger layerCount)
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportsRasterizationRateMapWithLayerCount_), layerCount);
+}
+
+// method: newRasterizationRateMapWithDescriptor:
+_MTL_INLINE MTL::RasterizationRateMap* MTL::Device::newRasterizationRateMap(const MTL::RasterizationRateMapDescriptor* descriptor)
+{
+    return Object::sendMessage<MTL::RasterizationRateMap*>(this, _MTL_PRIVATE_SEL(newRasterizationRateMapWithDescriptor_), descriptor);
+}
+
+// method: newIndirectCommandBufferWithDescriptor:maxCommandCount:options:
+_MTL_INLINE MTL::IndirectCommandBuffer* MTL::Device::newIndirectCommandBuffer(const MTL::IndirectCommandBufferDescriptor* descriptor, NS::UInteger maxCount, MTL::ResourceOptions options)
+{
+    return Object::sendMessage<MTL::IndirectCommandBuffer*>(this, _MTL_PRIVATE_SEL(newIndirectCommandBufferWithDescriptor_maxCommandCount_options_), descriptor, maxCount, options);
+}
+
+// method: newEvent
+_MTL_INLINE MTL::Event* MTL::Device::newEvent()
+{
+    return Object::sendMessage<MTL::Event*>(this, _MTL_PRIVATE_SEL(newEvent));
+}
+
+// method: newSharedEvent
+_MTL_INLINE MTL::SharedEvent* MTL::Device::newSharedEvent()
+{
+    return Object::sendMessage<MTL::SharedEvent*>(this, _MTL_PRIVATE_SEL(newSharedEvent));
+}
+
+// method: newSharedEventWithHandle:
+_MTL_INLINE MTL::SharedEvent* MTL::Device::newSharedEvent(const MTL::SharedEventHandle* sharedEventHandle)
+{
+    return Object::sendMessage<MTL::SharedEvent*>(this, _MTL_PRIVATE_SEL(newSharedEventWithHandle_), sharedEventHandle);
+}
+
+// property: peerGroupID
+_MTL_INLINE uint64_t MTL::Device::peerGroupID() const
+{
+    return Object::sendMessage<uint64_t>(this, _MTL_PRIVATE_SEL(peerGroupID));
+}
+
+// property: peerIndex
+_MTL_INLINE uint32_t MTL::Device::peerIndex() const
+{
+    return Object::sendMessage<uint32_t>(this, _MTL_PRIVATE_SEL(peerIndex));
+}
+
+// property: peerCount
+_MTL_INLINE uint32_t MTL::Device::peerCount() const
+{
+    return Object::sendMessage<uint32_t>(this, _MTL_PRIVATE_SEL(peerCount));
+}
+
+// method: sparseTileSizeWithTextureType:pixelFormat:sampleCount:
+_MTL_INLINE MTL::Size MTL::Device::sparseTileSize(MTL::TextureType textureType, MTL::PixelFormat pixelFormat, NS::UInteger sampleCount)
+{
+    return Object::sendMessage<MTL::Size>(this, _MTL_PRIVATE_SEL(sparseTileSizeWithTextureType_pixelFormat_sampleCount_), textureType, pixelFormat, sampleCount);
+}
+
+// property: sparseTileSizeInBytes
+_MTL_INLINE NS::UInteger MTL::Device::sparseTileSizeInBytes() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(sparseTileSizeInBytes));
+}
+
+// method: convertSparsePixelRegions:toTileRegions:withTileSize:alignmentMode:numRegions:
+_MTL_INLINE void MTL::Device::convertSparsePixelRegions(const MTL::Region* pixelRegions, MTL::Region* tileRegions, MTL::Size tileSize, MTL::SparseTextureRegionAlignmentMode mode, NS::UInteger numRegions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(convertSparsePixelRegions_toTileRegions_withTileSize_alignmentMode_numRegions_), pixelRegions, tileRegions, tileSize, mode, numRegions);
+}
+
+// method: convertSparseTileRegions:toPixelRegions:withTileSize:numRegions:
+_MTL_INLINE void MTL::Device::convertSparseTileRegions(const MTL::Region* tileRegions, MTL::Region* pixelRegions, MTL::Size tileSize, NS::UInteger numRegions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(convertSparseTileRegions_toPixelRegions_withTileSize_numRegions_), tileRegions, pixelRegions, tileSize, numRegions);
+}
+
+// property: maxBufferLength
+_MTL_INLINE NS::UInteger MTL::Device::maxBufferLength() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(maxBufferLength));
+}
+
+// property: counterSets
+_MTL_INLINE NS::Array* MTL::Device::counterSets() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(counterSets));
+}
+
+// method: newCounterSampleBufferWithDescriptor:error:
+_MTL_INLINE MTL::CounterSampleBuffer* MTL::Device::newCounterSampleBuffer(const MTL::CounterSampleBufferDescriptor* descriptor, NS::Error** error)
+{
+    return Object::sendMessage<MTL::CounterSampleBuffer*>(this, _MTL_PRIVATE_SEL(newCounterSampleBufferWithDescriptor_error_), descriptor, error);
+}
+
+// method: sampleTimestamps:gpuTimestamp:
+_MTL_INLINE void MTL::Device::sampleTimestamps(MTL::Timestamp* cpuTimestamp, MTL::Timestamp* gpuTimestamp)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(sampleTimestamps_gpuTimestamp_), cpuTimestamp, gpuTimestamp);
+}
+
+// method: supportsCounterSampling:
+_MTL_INLINE bool MTL::Device::supportsCounterSampling(MTL::CounterSamplingPoint samplingPoint)
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportsCounterSampling_), samplingPoint);
+}
+
+// method: supportsVertexAmplificationCount:
+_MTL_INLINE bool MTL::Device::supportsVertexAmplificationCount(NS::UInteger count)
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportsVertexAmplificationCount_), count);
+}
+
+// property: supportsDynamicLibraries
+_MTL_INLINE bool MTL::Device::supportsDynamicLibraries() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportsDynamicLibraries));
+}
+
+// property: supportsRenderDynamicLibraries
+_MTL_INLINE bool MTL::Device::supportsRenderDynamicLibraries() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportsRenderDynamicLibraries));
+}
+
+// method: newDynamicLibrary:error:
+_MTL_INLINE MTL::DynamicLibrary* MTL::Device::newDynamicLibrary(const MTL::Library* library, NS::Error** error)
+{
+    return Object::sendMessage<MTL::DynamicLibrary*>(this, _MTL_PRIVATE_SEL(newDynamicLibrary_error_), library, error);
+}
+
+// method: newDynamicLibraryWithURL:error:
+_MTL_INLINE MTL::DynamicLibrary* MTL::Device::newDynamicLibrary(const NS::URL* url, NS::Error** error)
+{
+    return Object::sendMessage<MTL::DynamicLibrary*>(this, _MTL_PRIVATE_SEL(newDynamicLibraryWithURL_error_), url, error);
+}
+
+// method: newBinaryArchiveWithDescriptor:error:
+_MTL_INLINE MTL::BinaryArchive* MTL::Device::newBinaryArchive(const MTL::BinaryArchiveDescriptor* descriptor, NS::Error** error)
+{
+    return Object::sendMessage<MTL::BinaryArchive*>(this, _MTL_PRIVATE_SEL(newBinaryArchiveWithDescriptor_error_), descriptor, error);
+}
+
+// property: supportsRaytracing
+_MTL_INLINE bool MTL::Device::supportsRaytracing() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportsRaytracing));
+}
+
+// method: accelerationStructureSizesWithDescriptor:
+_MTL_INLINE MTL::AccelerationStructureSizes MTL::Device::accelerationStructureSizes(const MTL::AccelerationStructureDescriptor* descriptor)
+{
+    return Object::sendMessage<MTL::AccelerationStructureSizes>(this, _MTL_PRIVATE_SEL(accelerationStructureSizesWithDescriptor_), descriptor);
+}
+
+// method: newAccelerationStructureWithSize:
+_MTL_INLINE MTL::AccelerationStructure* MTL::Device::newAccelerationStructure(NS::UInteger size)
+{
+    return Object::sendMessage<MTL::AccelerationStructure*>(this, _MTL_PRIVATE_SEL(newAccelerationStructureWithSize_), size);
+}
+
+// method: newAccelerationStructureWithDescriptor:
+_MTL_INLINE MTL::AccelerationStructure* MTL::Device::newAccelerationStructure(const MTL::AccelerationStructureDescriptor* descriptor)
+{
+    return Object::sendMessage<MTL::AccelerationStructure*>(this, _MTL_PRIVATE_SEL(newAccelerationStructureWithDescriptor_), descriptor);
+}
+
+// property: supportsFunctionPointers
+_MTL_INLINE bool MTL::Device::supportsFunctionPointers() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportsFunctionPointers));
+}
+
+// property: supportsFunctionPointersFromRender
+_MTL_INLINE bool MTL::Device::supportsFunctionPointersFromRender() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportsFunctionPointersFromRender));
+}
+
+// property: supportsRaytracingFromRender
+_MTL_INLINE bool MTL::Device::supportsRaytracingFromRender() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportsRaytracingFromRender));
+}
+
+// property: supportsPrimitiveMotionBlur
+_MTL_INLINE bool MTL::Device::supportsPrimitiveMotionBlur() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportsPrimitiveMotionBlur));
+}
diff --git a/metal-cpp/Metal/MTLDrawable.hpp b/metal-cpp/Metal/MTLDrawable.hpp
new file mode 100644
index 0000000..7340976
--- /dev/null
+++ b/metal-cpp/Metal/MTLDrawable.hpp
@@ -0,0 +1,99 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLDrawable.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include <CoreFoundation/CoreFoundation.h>
+#include <functional>
+
+namespace MTL
+{
+using DrawablePresentedHandler = void (^)(class Drawable*);
+
+using DrawablePresentedHandlerFunction = std::function<void(class Drawable*)>;
+
+class Drawable : public NS::Referencing<Drawable>
+{
+public:
+    void           addPresentedHandler(const MTL::DrawablePresentedHandlerFunction& function);
+
+    void           present();
+
+    void           presentAtTime(CFTimeInterval presentationTime);
+
+    void           presentAfterMinimumDuration(CFTimeInterval duration);
+
+    void           addPresentedHandler(const MTL::DrawablePresentedHandler block);
+
+    CFTimeInterval presentedTime() const;
+
+    NS::UInteger   drawableID() const;
+};
+
+}
+
+_MTL_INLINE void MTL::Drawable::addPresentedHandler(const MTL::DrawablePresentedHandlerFunction& function)
+{
+    __block DrawablePresentedHandlerFunction blockFunction = function;
+
+    addPresentedHandler(^(Drawable* pDrawable) { blockFunction(pDrawable); });
+}
+
+// method: present
+_MTL_INLINE void MTL::Drawable::present()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(present));
+}
+
+// method: presentAtTime:
+_MTL_INLINE void MTL::Drawable::presentAtTime(CFTimeInterval presentationTime)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(presentAtTime_), presentationTime);
+}
+
+// method: presentAfterMinimumDuration:
+_MTL_INLINE void MTL::Drawable::presentAfterMinimumDuration(CFTimeInterval duration)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(presentAfterMinimumDuration_), duration);
+}
+
+// method: addPresentedHandler:
+_MTL_INLINE void MTL::Drawable::addPresentedHandler(const MTL::DrawablePresentedHandler block)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(addPresentedHandler_), block);
+}
+
+// property: presentedTime
+_MTL_INLINE CFTimeInterval MTL::Drawable::presentedTime() const
+{
+    return Object::sendMessage<CFTimeInterval>(this, _MTL_PRIVATE_SEL(presentedTime));
+}
+
+// property: drawableID
+_MTL_INLINE NS::UInteger MTL::Drawable::drawableID() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(drawableID));
+}
diff --git a/metal-cpp/Metal/MTLDynamicLibrary.hpp b/metal-cpp/Metal/MTLDynamicLibrary.hpp
new file mode 100644
index 0000000..489b999
--- /dev/null
+++ b/metal-cpp/Metal/MTLDynamicLibrary.hpp
@@ -0,0 +1,82 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLDynamicLibrary.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+namespace MTL
+{
+_MTL_ENUM(NS::UInteger, DynamicLibraryError) {
+    DynamicLibraryErrorNone = 0,
+    DynamicLibraryErrorInvalidFile = 1,
+    DynamicLibraryErrorCompilationFailure = 2,
+    DynamicLibraryErrorUnresolvedInstallName = 3,
+    DynamicLibraryErrorDependencyLoadFailure = 4,
+    DynamicLibraryErrorUnsupported = 5,
+};
+
+class DynamicLibrary : public NS::Referencing<DynamicLibrary>
+{
+public:
+    NS::String*   label() const;
+    void          setLabel(const NS::String* label);
+
+    class Device* device() const;
+
+    NS::String*   installName() const;
+
+    bool          serializeToURL(const NS::URL* url, NS::Error** error);
+};
+
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::DynamicLibrary::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::DynamicLibrary::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::DynamicLibrary::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
+
+// property: installName
+_MTL_INLINE NS::String* MTL::DynamicLibrary::installName() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(installName));
+}
+
+// method: serializeToURL:error:
+_MTL_INLINE bool MTL::DynamicLibrary::serializeToURL(const NS::URL* url, NS::Error** error)
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(serializeToURL_error_), url, error);
+}
diff --git a/metal-cpp/Metal/MTLEvent.hpp b/metal-cpp/Metal/MTLEvent.hpp
new file mode 100644
index 0000000..4a9fac2
--- /dev/null
+++ b/metal-cpp/Metal/MTLEvent.hpp
@@ -0,0 +1,163 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLEvent.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLEvent.hpp"
+
+namespace MTL
+{
+class Event : public NS::Referencing<Event>
+{
+public:
+    class Device* device() const;
+
+    NS::String*   label() const;
+    void          setLabel(const NS::String* label);
+};
+
+class SharedEventListener : public NS::Referencing<SharedEventListener>
+{
+public:
+    static class SharedEventListener* alloc();
+
+    MTL::SharedEventListener*         init();
+
+    MTL::SharedEventListener*         init(const dispatch_queue_t dispatchQueue);
+
+    dispatch_queue_t                  dispatchQueue() const;
+};
+
+using SharedEventNotificationBlock = void (^)(SharedEvent* pEvent, std::uint64_t value);
+
+class SharedEvent : public NS::Referencing<SharedEvent, Event>
+{
+public:
+    void                     notifyListener(const class SharedEventListener* listener, uint64_t value, const MTL::SharedEventNotificationBlock block);
+
+    class SharedEventHandle* newSharedEventHandle();
+
+    uint64_t                 signaledValue() const;
+    void                     setSignaledValue(uint64_t signaledValue);
+};
+
+class SharedEventHandle : public NS::Referencing<SharedEventHandle>
+{
+public:
+    static class SharedEventHandle* alloc();
+
+    class SharedEventHandle*        init();
+
+    NS::String*                     label() const;
+};
+
+struct SharedEventHandlePrivate
+{
+} _MTL_PACKED;
+
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::Event::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::Event::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::Event::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::SharedEventListener* MTL::SharedEventListener::alloc()
+{
+    return NS::Object::alloc<MTL::SharedEventListener>(_MTL_PRIVATE_CLS(MTLSharedEventListener));
+}
+
+// method: init
+_MTL_INLINE MTL::SharedEventListener* MTL::SharedEventListener::init()
+{
+    return NS::Object::init<MTL::SharedEventListener>();
+}
+
+// method: initWithDispatchQueue:
+_MTL_INLINE MTL::SharedEventListener* MTL::SharedEventListener::init(const dispatch_queue_t dispatchQueue)
+{
+    return Object::sendMessage<MTL::SharedEventListener*>(this, _MTL_PRIVATE_SEL(initWithDispatchQueue_), dispatchQueue);
+}
+
+// property: dispatchQueue
+_MTL_INLINE dispatch_queue_t MTL::SharedEventListener::dispatchQueue() const
+{
+    return Object::sendMessage<dispatch_queue_t>(this, _MTL_PRIVATE_SEL(dispatchQueue));
+}
+
+// method: notifyListener:atValue:block:
+_MTL_INLINE void MTL::SharedEvent::notifyListener(const MTL::SharedEventListener* listener, uint64_t value, const MTL::SharedEventNotificationBlock block)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(notifyListener_atValue_block_), listener, value, block);
+}
+
+// method: newSharedEventHandle
+_MTL_INLINE MTL::SharedEventHandle* MTL::SharedEvent::newSharedEventHandle()
+{
+    return Object::sendMessage<MTL::SharedEventHandle*>(this, _MTL_PRIVATE_SEL(newSharedEventHandle));
+}
+
+// property: signaledValue
+_MTL_INLINE uint64_t MTL::SharedEvent::signaledValue() const
+{
+    return Object::sendMessage<uint64_t>(this, _MTL_PRIVATE_SEL(signaledValue));
+}
+
+_MTL_INLINE void MTL::SharedEvent::setSignaledValue(uint64_t signaledValue)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSignaledValue_), signaledValue);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::SharedEventHandle* MTL::SharedEventHandle::alloc()
+{
+    return NS::Object::alloc<MTL::SharedEventHandle>(_MTL_PRIVATE_CLS(MTLSharedEventHandle));
+}
+
+// method: init
+_MTL_INLINE MTL::SharedEventHandle* MTL::SharedEventHandle::init()
+{
+    return NS::Object::init<MTL::SharedEventHandle>();
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::SharedEventHandle::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
diff --git a/metal-cpp/Metal/MTLFence.hpp b/metal-cpp/Metal/MTLFence.hpp
new file mode 100644
index 0000000..bc6e931
--- /dev/null
+++ b/metal-cpp/Metal/MTLFence.hpp
@@ -0,0 +1,57 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLFence.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+namespace MTL
+{
+class Fence : public NS::Referencing<Fence>
+{
+public:
+    class Device* device() const;
+
+    NS::String*   label() const;
+    void          setLabel(const NS::String* label);
+};
+
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::Fence::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::Fence::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::Fence::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
diff --git a/metal-cpp/Metal/MTLFunctionConstantValues.hpp b/metal-cpp/Metal/MTLFunctionConstantValues.hpp
new file mode 100644
index 0000000..9608554
--- /dev/null
+++ b/metal-cpp/Metal/MTLFunctionConstantValues.hpp
@@ -0,0 +1,85 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLFunctionConstantValues.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLArgument.hpp"
+
+namespace MTL
+{
+class FunctionConstantValues : public NS::Copying<FunctionConstantValues>
+{
+public:
+    static class FunctionConstantValues* alloc();
+
+    class FunctionConstantValues*        init();
+
+    void                                 setConstantValue(const void* value, MTL::DataType type, NS::UInteger index);
+
+    void                                 setConstantValues(const void* values, MTL::DataType type, NS::Range range);
+
+    void                                 setConstantValue(const void* value, MTL::DataType type, const NS::String* name);
+
+    void                                 reset();
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::FunctionConstantValues* MTL::FunctionConstantValues::alloc()
+{
+    return NS::Object::alloc<MTL::FunctionConstantValues>(_MTL_PRIVATE_CLS(MTLFunctionConstantValues));
+}
+
+// method: init
+_MTL_INLINE MTL::FunctionConstantValues* MTL::FunctionConstantValues::init()
+{
+    return NS::Object::init<MTL::FunctionConstantValues>();
+}
+
+// method: setConstantValue:type:atIndex:
+_MTL_INLINE void MTL::FunctionConstantValues::setConstantValue(const void* value, MTL::DataType type, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setConstantValue_type_atIndex_), value, type, index);
+}
+
+// method: setConstantValues:type:withRange:
+_MTL_INLINE void MTL::FunctionConstantValues::setConstantValues(const void* values, MTL::DataType type, NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setConstantValues_type_withRange_), values, type, range);
+}
+
+// method: setConstantValue:type:withName:
+_MTL_INLINE void MTL::FunctionConstantValues::setConstantValue(const void* value, MTL::DataType type, const NS::String* name)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setConstantValue_type_withName_), value, type, name);
+}
+
+// method: reset
+_MTL_INLINE void MTL::FunctionConstantValues::reset()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(reset));
+}
diff --git a/metal-cpp/Metal/MTLFunctionDescriptor.hpp b/metal-cpp/Metal/MTLFunctionDescriptor.hpp
new file mode 100644
index 0000000..a6f00d6
--- /dev/null
+++ b/metal-cpp/Metal/MTLFunctionDescriptor.hpp
@@ -0,0 +1,156 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLFunctionDescriptor.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLFunctionDescriptor.hpp"
+
+namespace MTL
+{
+_MTL_OPTIONS(NS::UInteger, FunctionOptions) {
+    FunctionOptionNone = 0,
+    FunctionOptionCompileToBinary = 1,
+};
+
+class FunctionDescriptor : public NS::Copying<FunctionDescriptor>
+{
+public:
+    static class FunctionDescriptor* alloc();
+
+    class FunctionDescriptor*        init();
+
+    static class FunctionDescriptor* functionDescriptor();
+
+    NS::String*                      name() const;
+    void                             setName(const NS::String* name);
+
+    NS::String*                      specializedName() const;
+    void                             setSpecializedName(const NS::String* specializedName);
+
+    class FunctionConstantValues*    constantValues() const;
+    void                             setConstantValues(const class FunctionConstantValues* constantValues);
+
+    MTL::FunctionOptions             options() const;
+    void                             setOptions(MTL::FunctionOptions options);
+
+    NS::Array*                       binaryArchives() const;
+    void                             setBinaryArchives(const NS::Array* binaryArchives);
+};
+
+class IntersectionFunctionDescriptor : public NS::Copying<IntersectionFunctionDescriptor, MTL::FunctionDescriptor>
+{
+public:
+    static class IntersectionFunctionDescriptor* alloc();
+
+    class IntersectionFunctionDescriptor*        init();
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::FunctionDescriptor* MTL::FunctionDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::FunctionDescriptor>(_MTL_PRIVATE_CLS(MTLFunctionDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::FunctionDescriptor* MTL::FunctionDescriptor::init()
+{
+    return NS::Object::init<MTL::FunctionDescriptor>();
+}
+
+// static method: functionDescriptor
+_MTL_INLINE MTL::FunctionDescriptor* MTL::FunctionDescriptor::functionDescriptor()
+{
+    return Object::sendMessage<MTL::FunctionDescriptor*>(_MTL_PRIVATE_CLS(MTLFunctionDescriptor), _MTL_PRIVATE_SEL(functionDescriptor));
+}
+
+// property: name
+_MTL_INLINE NS::String* MTL::FunctionDescriptor::name() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(name));
+}
+
+_MTL_INLINE void MTL::FunctionDescriptor::setName(const NS::String* name)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setName_), name);
+}
+
+// property: specializedName
+_MTL_INLINE NS::String* MTL::FunctionDescriptor::specializedName() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(specializedName));
+}
+
+_MTL_INLINE void MTL::FunctionDescriptor::setSpecializedName(const NS::String* specializedName)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSpecializedName_), specializedName);
+}
+
+// property: constantValues
+_MTL_INLINE MTL::FunctionConstantValues* MTL::FunctionDescriptor::constantValues() const
+{
+    return Object::sendMessage<MTL::FunctionConstantValues*>(this, _MTL_PRIVATE_SEL(constantValues));
+}
+
+_MTL_INLINE void MTL::FunctionDescriptor::setConstantValues(const MTL::FunctionConstantValues* constantValues)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setConstantValues_), constantValues);
+}
+
+// property: options
+_MTL_INLINE MTL::FunctionOptions MTL::FunctionDescriptor::options() const
+{
+    return Object::sendMessage<MTL::FunctionOptions>(this, _MTL_PRIVATE_SEL(options));
+}
+
+_MTL_INLINE void MTL::FunctionDescriptor::setOptions(MTL::FunctionOptions options)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setOptions_), options);
+}
+
+// property: binaryArchives
+_MTL_INLINE NS::Array* MTL::FunctionDescriptor::binaryArchives() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(binaryArchives));
+}
+
+_MTL_INLINE void MTL::FunctionDescriptor::setBinaryArchives(const NS::Array* binaryArchives)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBinaryArchives_), binaryArchives);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::IntersectionFunctionDescriptor* MTL::IntersectionFunctionDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::IntersectionFunctionDescriptor>(_MTL_PRIVATE_CLS(MTLIntersectionFunctionDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::IntersectionFunctionDescriptor* MTL::IntersectionFunctionDescriptor::init()
+{
+    return NS::Object::init<MTL::IntersectionFunctionDescriptor>();
+}
diff --git a/metal-cpp/Metal/MTLFunctionHandle.hpp b/metal-cpp/Metal/MTLFunctionHandle.hpp
new file mode 100644
index 0000000..64f3851
--- /dev/null
+++ b/metal-cpp/Metal/MTLFunctionHandle.hpp
@@ -0,0 +1,61 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLFunctionHandle.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLLibrary.hpp"
+
+namespace MTL
+{
+class FunctionHandle : public NS::Referencing<FunctionHandle>
+{
+public:
+    MTL::FunctionType functionType() const;
+
+    NS::String*       name() const;
+
+    class Device*     device() const;
+};
+
+}
+
+// property: functionType
+_MTL_INLINE MTL::FunctionType MTL::FunctionHandle::functionType() const
+{
+    return Object::sendMessage<MTL::FunctionType>(this, _MTL_PRIVATE_SEL(functionType));
+}
+
+// property: name
+_MTL_INLINE NS::String* MTL::FunctionHandle::name() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(name));
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::FunctionHandle::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
diff --git a/metal-cpp/Metal/MTLFunctionLog.hpp b/metal-cpp/Metal/MTLFunctionLog.hpp
new file mode 100644
index 0000000..ca38dc9
--- /dev/null
+++ b/metal-cpp/Metal/MTLFunctionLog.hpp
@@ -0,0 +1,114 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLFunctionLog.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLFunctionLog.hpp"
+
+namespace MTL
+{
+_MTL_ENUM(NS::UInteger, FunctionLogType) {
+    FunctionLogTypeValidation = 0,
+};
+
+class LogContainer : public NS::Referencing<LogContainer, NS::FastEnumeration>
+{
+public:
+};
+
+class FunctionLogDebugLocation : public NS::Referencing<FunctionLogDebugLocation>
+{
+public:
+    NS::String*  functionName() const;
+
+    NS::URL*     URL() const;
+
+    NS::UInteger line() const;
+
+    NS::UInteger column() const;
+};
+
+class FunctionLog : public NS::Referencing<FunctionLog>
+{
+public:
+    MTL::FunctionLogType            type() const;
+
+    NS::String*                     encoderLabel() const;
+
+    class Function*                 function() const;
+
+    class FunctionLogDebugLocation* debugLocation() const;
+};
+
+}
+
+// property: functionName
+_MTL_INLINE NS::String* MTL::FunctionLogDebugLocation::functionName() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(functionName));
+}
+
+// property: URL
+_MTL_INLINE NS::URL* MTL::FunctionLogDebugLocation::URL() const
+{
+    return Object::sendMessage<NS::URL*>(this, _MTL_PRIVATE_SEL(URL));
+}
+
+// property: line
+_MTL_INLINE NS::UInteger MTL::FunctionLogDebugLocation::line() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(line));
+}
+
+// property: column
+_MTL_INLINE NS::UInteger MTL::FunctionLogDebugLocation::column() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(column));
+}
+
+// property: type
+_MTL_INLINE MTL::FunctionLogType MTL::FunctionLog::type() const
+{
+    return Object::sendMessage<MTL::FunctionLogType>(this, _MTL_PRIVATE_SEL(type));
+}
+
+// property: encoderLabel
+_MTL_INLINE NS::String* MTL::FunctionLog::encoderLabel() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(encoderLabel));
+}
+
+// property: function
+_MTL_INLINE MTL::Function* MTL::FunctionLog::function() const
+{
+    return Object::sendMessage<MTL::Function*>(this, _MTL_PRIVATE_SEL(function));
+}
+
+// property: debugLocation
+_MTL_INLINE MTL::FunctionLogDebugLocation* MTL::FunctionLog::debugLocation() const
+{
+    return Object::sendMessage<MTL::FunctionLogDebugLocation*>(this, _MTL_PRIVATE_SEL(debugLocation));
+}
diff --git a/metal-cpp/Metal/MTLFunctionStitching.hpp b/metal-cpp/Metal/MTLFunctionStitching.hpp
new file mode 100644
index 0000000..19dcb49
--- /dev/null
+++ b/metal-cpp/Metal/MTLFunctionStitching.hpp
@@ -0,0 +1,305 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLFunctionStitching.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLFunctionStitching.hpp"
+
+namespace MTL
+{
+class FunctionStitchingAttribute : public NS::Referencing<FunctionStitchingAttribute>
+{
+};
+
+class FunctionStitchingAttributeAlwaysInline : public NS::Referencing<FunctionStitchingAttributeAlwaysInline, FunctionStitchingAttribute>
+{
+public:
+    static class FunctionStitchingAttributeAlwaysInline* alloc();
+
+    class FunctionStitchingAttributeAlwaysInline*        init();
+};
+
+class FunctionStitchingNode : public NS::Copying<FunctionStitchingNode>
+{
+};
+
+class FunctionStitchingInputNode : public NS::Referencing<FunctionStitchingInputNode, FunctionStitchingNode>
+{
+public:
+    static class FunctionStitchingInputNode* alloc();
+
+    class FunctionStitchingInputNode*        init();
+
+    NS::UInteger                             argumentIndex() const;
+    void                                     setArgumentIndex(NS::UInteger argumentIndex);
+
+    MTL::FunctionStitchingInputNode*         init(NS::UInteger argument);
+};
+
+class FunctionStitchingFunctionNode : public NS::Referencing<FunctionStitchingFunctionNode, FunctionStitchingNode>
+{
+public:
+    static class FunctionStitchingFunctionNode* alloc();
+
+    class FunctionStitchingFunctionNode*        init();
+
+    NS::String*                                 name() const;
+    void                                        setName(const NS::String* name);
+
+    NS::Array*                                  arguments() const;
+    void                                        setArguments(const NS::Array* arguments);
+
+    NS::Array*                                  controlDependencies() const;
+    void                                        setControlDependencies(const NS::Array* controlDependencies);
+
+    MTL::FunctionStitchingFunctionNode*         init(const NS::String* name, const NS::Array* arguments, const NS::Array* controlDependencies);
+};
+
+class FunctionStitchingGraph : public NS::Copying<FunctionStitchingGraph>
+{
+public:
+    static class FunctionStitchingGraph* alloc();
+
+    class FunctionStitchingGraph*        init();
+
+    NS::String*                          functionName() const;
+    void                                 setFunctionName(const NS::String* functionName);
+
+    NS::Array*                           nodes() const;
+    void                                 setNodes(const NS::Array* nodes);
+
+    class FunctionStitchingFunctionNode* outputNode() const;
+    void                                 setOutputNode(const class FunctionStitchingFunctionNode* outputNode);
+
+    NS::Array*                           attributes() const;
+    void                                 setAttributes(const NS::Array* attributes);
+
+    MTL::FunctionStitchingGraph*         init(const NS::String* functionName, const NS::Array* nodes, const class FunctionStitchingFunctionNode* outputNode, const NS::Array* attributes);
+};
+
+class StitchedLibraryDescriptor : public NS::Copying<StitchedLibraryDescriptor>
+{
+public:
+    static class StitchedLibraryDescriptor* alloc();
+
+    class StitchedLibraryDescriptor*        init();
+
+    NS::Array*                              functionGraphs() const;
+    void                                    setFunctionGraphs(const NS::Array* functionGraphs);
+
+    NS::Array*                              functions() const;
+    void                                    setFunctions(const NS::Array* functions);
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::FunctionStitchingAttributeAlwaysInline* MTL::FunctionStitchingAttributeAlwaysInline::alloc()
+{
+    return NS::Object::alloc<MTL::FunctionStitchingAttributeAlwaysInline>(_MTL_PRIVATE_CLS(MTLFunctionStitchingAttributeAlwaysInline));
+}
+
+// method: init
+_MTL_INLINE MTL::FunctionStitchingAttributeAlwaysInline* MTL::FunctionStitchingAttributeAlwaysInline::init()
+{
+    return NS::Object::init<MTL::FunctionStitchingAttributeAlwaysInline>();
+}
+
+// static method: alloc
+_MTL_INLINE MTL::FunctionStitchingInputNode* MTL::FunctionStitchingInputNode::alloc()
+{
+    return NS::Object::alloc<MTL::FunctionStitchingInputNode>(_MTL_PRIVATE_CLS(MTLFunctionStitchingInputNode));
+}
+
+// method: init
+_MTL_INLINE MTL::FunctionStitchingInputNode* MTL::FunctionStitchingInputNode::init()
+{
+    return NS::Object::init<MTL::FunctionStitchingInputNode>();
+}
+
+// property: argumentIndex
+_MTL_INLINE NS::UInteger MTL::FunctionStitchingInputNode::argumentIndex() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(argumentIndex));
+}
+
+_MTL_INLINE void MTL::FunctionStitchingInputNode::setArgumentIndex(NS::UInteger argumentIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setArgumentIndex_), argumentIndex);
+}
+
+// method: initWithArgumentIndex:
+_MTL_INLINE MTL::FunctionStitchingInputNode* MTL::FunctionStitchingInputNode::init(NS::UInteger argument)
+{
+    return Object::sendMessage<MTL::FunctionStitchingInputNode*>(this, _MTL_PRIVATE_SEL(initWithArgumentIndex_), argument);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::FunctionStitchingFunctionNode* MTL::FunctionStitchingFunctionNode::alloc()
+{
+    return NS::Object::alloc<MTL::FunctionStitchingFunctionNode>(_MTL_PRIVATE_CLS(MTLFunctionStitchingFunctionNode));
+}
+
+// method: init
+_MTL_INLINE MTL::FunctionStitchingFunctionNode* MTL::FunctionStitchingFunctionNode::init()
+{
+    return NS::Object::init<MTL::FunctionStitchingFunctionNode>();
+}
+
+// property: name
+_MTL_INLINE NS::String* MTL::FunctionStitchingFunctionNode::name() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(name));
+}
+
+_MTL_INLINE void MTL::FunctionStitchingFunctionNode::setName(const NS::String* name)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setName_), name);
+}
+
+// property: arguments
+_MTL_INLINE NS::Array* MTL::FunctionStitchingFunctionNode::arguments() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(arguments));
+}
+
+_MTL_INLINE void MTL::FunctionStitchingFunctionNode::setArguments(const NS::Array* arguments)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setArguments_), arguments);
+}
+
+// property: controlDependencies
+_MTL_INLINE NS::Array* MTL::FunctionStitchingFunctionNode::controlDependencies() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(controlDependencies));
+}
+
+_MTL_INLINE void MTL::FunctionStitchingFunctionNode::setControlDependencies(const NS::Array* controlDependencies)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setControlDependencies_), controlDependencies);
+}
+
+// method: initWithName:arguments:controlDependencies:
+_MTL_INLINE MTL::FunctionStitchingFunctionNode* MTL::FunctionStitchingFunctionNode::init(const NS::String* name, const NS::Array* arguments, const NS::Array* controlDependencies)
+{
+    return Object::sendMessage<MTL::FunctionStitchingFunctionNode*>(this, _MTL_PRIVATE_SEL(initWithName_arguments_controlDependencies_), name, arguments, controlDependencies);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::FunctionStitchingGraph* MTL::FunctionStitchingGraph::alloc()
+{
+    return NS::Object::alloc<MTL::FunctionStitchingGraph>(_MTL_PRIVATE_CLS(MTLFunctionStitchingGraph));
+}
+
+// method: init
+_MTL_INLINE MTL::FunctionStitchingGraph* MTL::FunctionStitchingGraph::init()
+{
+    return NS::Object::init<MTL::FunctionStitchingGraph>();
+}
+
+// property: functionName
+_MTL_INLINE NS::String* MTL::FunctionStitchingGraph::functionName() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(functionName));
+}
+
+_MTL_INLINE void MTL::FunctionStitchingGraph::setFunctionName(const NS::String* functionName)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFunctionName_), functionName);
+}
+
+// property: nodes
+_MTL_INLINE NS::Array* MTL::FunctionStitchingGraph::nodes() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(nodes));
+}
+
+_MTL_INLINE void MTL::FunctionStitchingGraph::setNodes(const NS::Array* nodes)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setNodes_), nodes);
+}
+
+// property: outputNode
+_MTL_INLINE MTL::FunctionStitchingFunctionNode* MTL::FunctionStitchingGraph::outputNode() const
+{
+    return Object::sendMessage<MTL::FunctionStitchingFunctionNode*>(this, _MTL_PRIVATE_SEL(outputNode));
+}
+
+_MTL_INLINE void MTL::FunctionStitchingGraph::setOutputNode(const MTL::FunctionStitchingFunctionNode* outputNode)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setOutputNode_), outputNode);
+}
+
+// property: attributes
+_MTL_INLINE NS::Array* MTL::FunctionStitchingGraph::attributes() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(attributes));
+}
+
+_MTL_INLINE void MTL::FunctionStitchingGraph::setAttributes(const NS::Array* attributes)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setAttributes_), attributes);
+}
+
+// method: initWithFunctionName:nodes:outputNode:attributes:
+_MTL_INLINE MTL::FunctionStitchingGraph* MTL::FunctionStitchingGraph::init(const NS::String* functionName, const NS::Array* nodes, const MTL::FunctionStitchingFunctionNode* outputNode, const NS::Array* attributes)
+{
+    return Object::sendMessage<MTL::FunctionStitchingGraph*>(this, _MTL_PRIVATE_SEL(initWithFunctionName_nodes_outputNode_attributes_), functionName, nodes, outputNode, attributes);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::StitchedLibraryDescriptor* MTL::StitchedLibraryDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::StitchedLibraryDescriptor>(_MTL_PRIVATE_CLS(MTLStitchedLibraryDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::StitchedLibraryDescriptor* MTL::StitchedLibraryDescriptor::init()
+{
+    return NS::Object::init<MTL::StitchedLibraryDescriptor>();
+}
+
+// property: functionGraphs
+_MTL_INLINE NS::Array* MTL::StitchedLibraryDescriptor::functionGraphs() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(functionGraphs));
+}
+
+_MTL_INLINE void MTL::StitchedLibraryDescriptor::setFunctionGraphs(const NS::Array* functionGraphs)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFunctionGraphs_), functionGraphs);
+}
+
+// property: functions
+_MTL_INLINE NS::Array* MTL::StitchedLibraryDescriptor::functions() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(functions));
+}
+
+_MTL_INLINE void MTL::StitchedLibraryDescriptor::setFunctions(const NS::Array* functions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFunctions_), functions);
+}
diff --git a/metal-cpp/Metal/MTLHeaderBridge.hpp b/metal-cpp/Metal/MTLHeaderBridge.hpp
new file mode 100644
index 0000000..810085d
--- /dev/null
+++ b/metal-cpp/Metal/MTLHeaderBridge.hpp
@@ -0,0 +1,2047 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLHeaderBridge.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+#include "MTLPrivate.hpp"
+
+namespace MTL::Private::Class
+{
+
+_MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureBoundingBoxGeometryDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureGeometryDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureMotionTriangleGeometryDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLAccelerationStructureTriangleGeometryDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLArgument);
+_MTL_PRIVATE_DEF_CLS(MTLArgumentDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLArrayType);
+_MTL_PRIVATE_DEF_CLS(MTLAttribute);
+_MTL_PRIVATE_DEF_CLS(MTLAttributeDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLAttributeDescriptorArray);
+_MTL_PRIVATE_DEF_CLS(MTLBinaryArchiveDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLBlitPassDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLBlitPassSampleBufferAttachmentDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLBlitPassSampleBufferAttachmentDescriptorArray);
+_MTL_PRIVATE_DEF_CLS(MTLBufferLayoutDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLBufferLayoutDescriptorArray);
+_MTL_PRIVATE_DEF_CLS(MTLCaptureDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLCaptureManager);
+_MTL_PRIVATE_DEF_CLS(MTLCommandBufferDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLCompileOptions);
+_MTL_PRIVATE_DEF_CLS(MTLComputePassDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLComputePassSampleBufferAttachmentDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLComputePassSampleBufferAttachmentDescriptorArray);
+_MTL_PRIVATE_DEF_CLS(MTLComputePipelineDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLComputePipelineReflection);
+_MTL_PRIVATE_DEF_CLS(MTLCounterSampleBufferDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLDepthStencilDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLFunctionConstant);
+_MTL_PRIVATE_DEF_CLS(MTLFunctionConstantValues);
+_MTL_PRIVATE_DEF_CLS(MTLFunctionDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLFunctionStitchingAttributeAlwaysInline);
+_MTL_PRIVATE_DEF_CLS(MTLFunctionStitchingFunctionNode);
+_MTL_PRIVATE_DEF_CLS(MTLFunctionStitchingGraph);
+_MTL_PRIVATE_DEF_CLS(MTLFunctionStitchingInputNode);
+_MTL_PRIVATE_DEF_CLS(MTLHeapDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLIndirectCommandBufferDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLInstanceAccelerationStructureDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLIntersectionFunctionDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLIntersectionFunctionTableDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLLinkedFunctions);
+_MTL_PRIVATE_DEF_CLS(MTLMotionKeyframeData);
+_MTL_PRIVATE_DEF_CLS(MTLPipelineBufferDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLPipelineBufferDescriptorArray);
+_MTL_PRIVATE_DEF_CLS(MTLPointerType);
+_MTL_PRIVATE_DEF_CLS(MTLPrimitiveAccelerationStructureDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLRasterizationRateLayerArray);
+_MTL_PRIVATE_DEF_CLS(MTLRasterizationRateLayerDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLRasterizationRateMapDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLRasterizationRateSampleArray);
+_MTL_PRIVATE_DEF_CLS(MTLRenderPassAttachmentDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLRenderPassColorAttachmentDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLRenderPassColorAttachmentDescriptorArray);
+_MTL_PRIVATE_DEF_CLS(MTLRenderPassDepthAttachmentDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLRenderPassDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLRenderPassSampleBufferAttachmentDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLRenderPassSampleBufferAttachmentDescriptorArray);
+_MTL_PRIVATE_DEF_CLS(MTLRenderPassStencilAttachmentDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLRenderPipelineColorAttachmentDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLRenderPipelineColorAttachmentDescriptorArray);
+_MTL_PRIVATE_DEF_CLS(MTLRenderPipelineDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLRenderPipelineFunctionsDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLRenderPipelineReflection);
+_MTL_PRIVATE_DEF_CLS(MTLResourceStatePassDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLResourceStatePassSampleBufferAttachmentDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLResourceStatePassSampleBufferAttachmentDescriptorArray);
+_MTL_PRIVATE_DEF_CLS(MTLSamplerDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLSharedEventHandle);
+_MTL_PRIVATE_DEF_CLS(MTLSharedEventListener);
+_MTL_PRIVATE_DEF_CLS(MTLSharedTextureHandle);
+_MTL_PRIVATE_DEF_CLS(MTLStageInputOutputDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLStencilDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLStitchedLibraryDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLStructMember);
+_MTL_PRIVATE_DEF_CLS(MTLStructType);
+_MTL_PRIVATE_DEF_CLS(MTLTextureDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLTextureReferenceType);
+_MTL_PRIVATE_DEF_CLS(MTLTileRenderPipelineColorAttachmentDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLTileRenderPipelineColorAttachmentDescriptorArray);
+_MTL_PRIVATE_DEF_CLS(MTLTileRenderPipelineDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLType);
+_MTL_PRIVATE_DEF_CLS(MTLVertexAttribute);
+_MTL_PRIVATE_DEF_CLS(MTLVertexAttributeDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLVertexAttributeDescriptorArray);
+_MTL_PRIVATE_DEF_CLS(MTLVertexBufferLayoutDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLVertexBufferLayoutDescriptorArray);
+_MTL_PRIVATE_DEF_CLS(MTLVertexDescriptor);
+_MTL_PRIVATE_DEF_CLS(MTLVisibleFunctionTableDescriptor);
+
+}
+
+namespace MTL::Private::Protocol
+{
+
+_MTL_PRIVATE_DEF_PRO(MTLAccelerationStructure);
+_MTL_PRIVATE_DEF_PRO(MTLAccelerationStructureCommandEncoder);
+_MTL_PRIVATE_DEF_PRO(MTLArgumentEncoder);
+_MTL_PRIVATE_DEF_PRO(MTLBinaryArchive);
+_MTL_PRIVATE_DEF_PRO(MTLBlitCommandEncoder);
+_MTL_PRIVATE_DEF_PRO(MTLBuffer);
+_MTL_PRIVATE_DEF_PRO(MTLCommandBuffer);
+_MTL_PRIVATE_DEF_PRO(MTLCommandBufferEncoderInfo);
+_MTL_PRIVATE_DEF_PRO(MTLCommandEncoder);
+_MTL_PRIVATE_DEF_PRO(MTLCommandQueue);
+_MTL_PRIVATE_DEF_PRO(MTLComputeCommandEncoder);
+_MTL_PRIVATE_DEF_PRO(MTLComputePipelineState);
+_MTL_PRIVATE_DEF_PRO(MTLCounter);
+_MTL_PRIVATE_DEF_PRO(MTLCounterSampleBuffer);
+_MTL_PRIVATE_DEF_PRO(MTLCounterSet);
+_MTL_PRIVATE_DEF_PRO(MTLDepthStencilState);
+_MTL_PRIVATE_DEF_PRO(MTLDevice);
+_MTL_PRIVATE_DEF_PRO(MTLDrawable);
+_MTL_PRIVATE_DEF_PRO(MTLDynamicLibrary);
+_MTL_PRIVATE_DEF_PRO(MTLEvent);
+_MTL_PRIVATE_DEF_PRO(MTLFence);
+_MTL_PRIVATE_DEF_PRO(MTLFunction);
+_MTL_PRIVATE_DEF_PRO(MTLFunctionHandle);
+_MTL_PRIVATE_DEF_PRO(MTLFunctionLog);
+_MTL_PRIVATE_DEF_PRO(MTLFunctionLogDebugLocation);
+_MTL_PRIVATE_DEF_PRO(MTLFunctionStitchingAttribute);
+_MTL_PRIVATE_DEF_PRO(MTLFunctionStitchingNode);
+_MTL_PRIVATE_DEF_PRO(MTLHeap);
+_MTL_PRIVATE_DEF_PRO(MTLIndirectCommandBuffer);
+_MTL_PRIVATE_DEF_PRO(MTLIndirectComputeCommand);
+_MTL_PRIVATE_DEF_PRO(MTLIndirectRenderCommand);
+_MTL_PRIVATE_DEF_PRO(MTLIntersectionFunctionTable);
+_MTL_PRIVATE_DEF_PRO(MTLLibrary);
+_MTL_PRIVATE_DEF_PRO(MTLLogContainer);
+_MTL_PRIVATE_DEF_PRO(MTLParallelRenderCommandEncoder);
+_MTL_PRIVATE_DEF_PRO(MTLRasterizationRateMap);
+_MTL_PRIVATE_DEF_PRO(MTLRenderCommandEncoder);
+_MTL_PRIVATE_DEF_PRO(MTLRenderPipelineState);
+_MTL_PRIVATE_DEF_PRO(MTLResource);
+_MTL_PRIVATE_DEF_PRO(MTLResourceStateCommandEncoder);
+_MTL_PRIVATE_DEF_PRO(MTLSamplerState);
+_MTL_PRIVATE_DEF_PRO(MTLSharedEvent);
+_MTL_PRIVATE_DEF_PRO(MTLTexture);
+_MTL_PRIVATE_DEF_PRO(MTLVisibleFunctionTable);
+
+}
+
+namespace MTL::Private::Selector
+{
+
+_MTL_PRIVATE_DEF_SEL(GPUEndTime,
+    "GPUEndTime");
+_MTL_PRIVATE_DEF_SEL(GPUStartTime,
+    "GPUStartTime");
+_MTL_PRIVATE_DEF_SEL(URL,
+    "URL");
+_MTL_PRIVATE_DEF_SEL(accelerationStructureCommandEncoder,
+    "accelerationStructureCommandEncoder");
+_MTL_PRIVATE_DEF_SEL(accelerationStructureSizesWithDescriptor_,
+    "accelerationStructureSizesWithDescriptor:");
+_MTL_PRIVATE_DEF_SEL(access,
+    "access");
+_MTL_PRIVATE_DEF_SEL(addCompletedHandler_,
+    "addCompletedHandler:");
+_MTL_PRIVATE_DEF_SEL(addComputePipelineFunctionsWithDescriptor_error_,
+    "addComputePipelineFunctionsWithDescriptor:error:");
+_MTL_PRIVATE_DEF_SEL(addDebugMarker_range_,
+    "addDebugMarker:range:");
+_MTL_PRIVATE_DEF_SEL(addFunctionWithDescriptor_library_error_,
+    "addFunctionWithDescriptor:library:error:");
+_MTL_PRIVATE_DEF_SEL(addPresentedHandler_,
+    "addPresentedHandler:");
+_MTL_PRIVATE_DEF_SEL(addRenderPipelineFunctionsWithDescriptor_error_,
+    "addRenderPipelineFunctionsWithDescriptor:error:");
+_MTL_PRIVATE_DEF_SEL(addScheduledHandler_,
+    "addScheduledHandler:");
+_MTL_PRIVATE_DEF_SEL(addTileRenderPipelineFunctionsWithDescriptor_error_,
+    "addTileRenderPipelineFunctionsWithDescriptor:error:");
+_MTL_PRIVATE_DEF_SEL(alignment,
+    "alignment");
+_MTL_PRIVATE_DEF_SEL(allocatedSize,
+    "allocatedSize");
+_MTL_PRIVATE_DEF_SEL(allowDuplicateIntersectionFunctionInvocation,
+    "allowDuplicateIntersectionFunctionInvocation");
+_MTL_PRIVATE_DEF_SEL(allowGPUOptimizedContents,
+    "allowGPUOptimizedContents");
+_MTL_PRIVATE_DEF_SEL(alphaBlendOperation,
+    "alphaBlendOperation");
+_MTL_PRIVATE_DEF_SEL(areBarycentricCoordsSupported,
+    "areBarycentricCoordsSupported");
+_MTL_PRIVATE_DEF_SEL(areProgrammableSamplePositionsSupported,
+    "areProgrammableSamplePositionsSupported");
+_MTL_PRIVATE_DEF_SEL(areRasterOrderGroupsSupported,
+    "areRasterOrderGroupsSupported");
+_MTL_PRIVATE_DEF_SEL(argumentBuffersSupport,
+    "argumentBuffersSupport");
+_MTL_PRIVATE_DEF_SEL(argumentDescriptor,
+    "argumentDescriptor");
+_MTL_PRIVATE_DEF_SEL(argumentIndex,
+    "argumentIndex");
+_MTL_PRIVATE_DEF_SEL(argumentIndexStride,
+    "argumentIndexStride");
+_MTL_PRIVATE_DEF_SEL(arguments,
+    "arguments");
+_MTL_PRIVATE_DEF_SEL(arrayLength,
+    "arrayLength");
+_MTL_PRIVATE_DEF_SEL(arrayType,
+    "arrayType");
+_MTL_PRIVATE_DEF_SEL(attributeIndex,
+    "attributeIndex");
+_MTL_PRIVATE_DEF_SEL(attributeType,
+    "attributeType");
+_MTL_PRIVATE_DEF_SEL(attributes,
+    "attributes");
+_MTL_PRIVATE_DEF_SEL(backFaceStencil,
+    "backFaceStencil");
+_MTL_PRIVATE_DEF_SEL(binaryArchives,
+    "binaryArchives");
+_MTL_PRIVATE_DEF_SEL(binaryFunctions,
+    "binaryFunctions");
+_MTL_PRIVATE_DEF_SEL(blitCommandEncoder,
+    "blitCommandEncoder");
+_MTL_PRIVATE_DEF_SEL(blitCommandEncoderWithDescriptor_,
+    "blitCommandEncoderWithDescriptor:");
+_MTL_PRIVATE_DEF_SEL(blitPassDescriptor,
+    "blitPassDescriptor");
+_MTL_PRIVATE_DEF_SEL(borderColor,
+    "borderColor");
+_MTL_PRIVATE_DEF_SEL(boundingBoxBuffer,
+    "boundingBoxBuffer");
+_MTL_PRIVATE_DEF_SEL(boundingBoxBufferOffset,
+    "boundingBoxBufferOffset");
+_MTL_PRIVATE_DEF_SEL(boundingBoxBuffers,
+    "boundingBoxBuffers");
+_MTL_PRIVATE_DEF_SEL(boundingBoxCount,
+    "boundingBoxCount");
+_MTL_PRIVATE_DEF_SEL(boundingBoxStride,
+    "boundingBoxStride");
+_MTL_PRIVATE_DEF_SEL(buffer,
+    "buffer");
+_MTL_PRIVATE_DEF_SEL(bufferAlignment,
+    "bufferAlignment");
+_MTL_PRIVATE_DEF_SEL(bufferBytesPerRow,
+    "bufferBytesPerRow");
+_MTL_PRIVATE_DEF_SEL(bufferDataSize,
+    "bufferDataSize");
+_MTL_PRIVATE_DEF_SEL(bufferDataType,
+    "bufferDataType");
+_MTL_PRIVATE_DEF_SEL(bufferIndex,
+    "bufferIndex");
+_MTL_PRIVATE_DEF_SEL(bufferOffset,
+    "bufferOffset");
+_MTL_PRIVATE_DEF_SEL(bufferPointerType,
+    "bufferPointerType");
+_MTL_PRIVATE_DEF_SEL(bufferStructType,
+    "bufferStructType");
+_MTL_PRIVATE_DEF_SEL(buffers,
+    "buffers");
+_MTL_PRIVATE_DEF_SEL(buildAccelerationStructure_descriptor_scratchBuffer_scratchBufferOffset_,
+    "buildAccelerationStructure:descriptor:scratchBuffer:scratchBufferOffset:");
+_MTL_PRIVATE_DEF_SEL(captureObject,
+    "captureObject");
+_MTL_PRIVATE_DEF_SEL(clearBarrier,
+    "clearBarrier");
+_MTL_PRIVATE_DEF_SEL(clearColor,
+    "clearColor");
+_MTL_PRIVATE_DEF_SEL(clearDepth,
+    "clearDepth");
+_MTL_PRIVATE_DEF_SEL(clearStencil,
+    "clearStencil");
+_MTL_PRIVATE_DEF_SEL(colorAttachments,
+    "colorAttachments");
+_MTL_PRIVATE_DEF_SEL(column,
+    "column");
+_MTL_PRIVATE_DEF_SEL(commandBuffer,
+    "commandBuffer");
+_MTL_PRIVATE_DEF_SEL(commandBufferWithDescriptor_,
+    "commandBufferWithDescriptor:");
+_MTL_PRIVATE_DEF_SEL(commandBufferWithUnretainedReferences,
+    "commandBufferWithUnretainedReferences");
+_MTL_PRIVATE_DEF_SEL(commandQueue,
+    "commandQueue");
+_MTL_PRIVATE_DEF_SEL(commandTypes,
+    "commandTypes");
+_MTL_PRIVATE_DEF_SEL(commit,
+    "commit");
+_MTL_PRIVATE_DEF_SEL(compareFunction,
+    "compareFunction");
+_MTL_PRIVATE_DEF_SEL(computeCommandEncoder,
+    "computeCommandEncoder");
+_MTL_PRIVATE_DEF_SEL(computeCommandEncoderWithDescriptor_,
+    "computeCommandEncoderWithDescriptor:");
+_MTL_PRIVATE_DEF_SEL(computeCommandEncoderWithDispatchType_,
+    "computeCommandEncoderWithDispatchType:");
+_MTL_PRIVATE_DEF_SEL(computeFunction,
+    "computeFunction");
+_MTL_PRIVATE_DEF_SEL(computePassDescriptor,
+    "computePassDescriptor");
+_MTL_PRIVATE_DEF_SEL(concurrentDispatchThreadgroups_threadsPerThreadgroup_,
+    "concurrentDispatchThreadgroups:threadsPerThreadgroup:");
+_MTL_PRIVATE_DEF_SEL(concurrentDispatchThreads_threadsPerThreadgroup_,
+    "concurrentDispatchThreads:threadsPerThreadgroup:");
+_MTL_PRIVATE_DEF_SEL(constantBlockAlignment,
+    "constantBlockAlignment");
+_MTL_PRIVATE_DEF_SEL(constantDataAtIndex_,
+    "constantDataAtIndex:");
+_MTL_PRIVATE_DEF_SEL(constantValues,
+    "constantValues");
+_MTL_PRIVATE_DEF_SEL(contents,
+    "contents");
+_MTL_PRIVATE_DEF_SEL(controlDependencies,
+    "controlDependencies");
+_MTL_PRIVATE_DEF_SEL(convertSparsePixelRegions_toTileRegions_withTileSize_alignmentMode_numRegions_,
+    "convertSparsePixelRegions:toTileRegions:withTileSize:alignmentMode:numRegions:");
+_MTL_PRIVATE_DEF_SEL(convertSparseTileRegions_toPixelRegions_withTileSize_numRegions_,
+    "convertSparseTileRegions:toPixelRegions:withTileSize:numRegions:");
+_MTL_PRIVATE_DEF_SEL(copyAccelerationStructure_toAccelerationStructure_,
+    "copyAccelerationStructure:toAccelerationStructure:");
+_MTL_PRIVATE_DEF_SEL(copyAndCompactAccelerationStructure_toAccelerationStructure_,
+    "copyAndCompactAccelerationStructure:toAccelerationStructure:");
+_MTL_PRIVATE_DEF_SEL(copyFromBuffer_sourceOffset_sourceBytesPerRow_sourceBytesPerImage_sourceSize_toTexture_destinationSlice_destinationLevel_destinationOrigin_,
+    "copyFromBuffer:sourceOffset:sourceBytesPerRow:sourceBytesPerImage:sourceSize:toTexture:destinationSlice:destinationLevel:destinationOrigin:");
+_MTL_PRIVATE_DEF_SEL(copyFromBuffer_sourceOffset_sourceBytesPerRow_sourceBytesPerImage_sourceSize_toTexture_destinationSlice_destinationLevel_destinationOrigin_options_,
+    "copyFromBuffer:sourceOffset:sourceBytesPerRow:sourceBytesPerImage:sourceSize:toTexture:destinationSlice:destinationLevel:destinationOrigin:options:");
+_MTL_PRIVATE_DEF_SEL(copyFromBuffer_sourceOffset_toBuffer_destinationOffset_size_,
+    "copyFromBuffer:sourceOffset:toBuffer:destinationOffset:size:");
+_MTL_PRIVATE_DEF_SEL(copyFromTexture_sourceSlice_sourceLevel_sourceOrigin_sourceSize_toBuffer_destinationOffset_destinationBytesPerRow_destinationBytesPerImage_,
+    "copyFromTexture:sourceSlice:sourceLevel:sourceOrigin:sourceSize:toBuffer:destinationOffset:destinationBytesPerRow:destinationBytesPerImage:");
+_MTL_PRIVATE_DEF_SEL(copyFromTexture_sourceSlice_sourceLevel_sourceOrigin_sourceSize_toBuffer_destinationOffset_destinationBytesPerRow_destinationBytesPerImage_options_,
+    "copyFromTexture:sourceSlice:sourceLevel:sourceOrigin:sourceSize:toBuffer:destinationOffset:destinationBytesPerRow:destinationBytesPerImage:options:");
+_MTL_PRIVATE_DEF_SEL(copyFromTexture_sourceSlice_sourceLevel_sourceOrigin_sourceSize_toTexture_destinationSlice_destinationLevel_destinationOrigin_,
+    "copyFromTexture:sourceSlice:sourceLevel:sourceOrigin:sourceSize:toTexture:destinationSlice:destinationLevel:destinationOrigin:");
+_MTL_PRIVATE_DEF_SEL(copyFromTexture_sourceSlice_sourceLevel_toTexture_destinationSlice_destinationLevel_sliceCount_levelCount_,
+    "copyFromTexture:sourceSlice:sourceLevel:toTexture:destinationSlice:destinationLevel:sliceCount:levelCount:");
+_MTL_PRIVATE_DEF_SEL(copyFromTexture_toTexture_,
+    "copyFromTexture:toTexture:");
+_MTL_PRIVATE_DEF_SEL(copyIndirectCommandBuffer_sourceRange_destination_destinationIndex_,
+    "copyIndirectCommandBuffer:sourceRange:destination:destinationIndex:");
+_MTL_PRIVATE_DEF_SEL(copyParameterDataToBuffer_offset_,
+    "copyParameterDataToBuffer:offset:");
+_MTL_PRIVATE_DEF_SEL(counterSet,
+    "counterSet");
+_MTL_PRIVATE_DEF_SEL(counterSets,
+    "counterSets");
+_MTL_PRIVATE_DEF_SEL(counters,
+    "counters");
+_MTL_PRIVATE_DEF_SEL(cpuCacheMode,
+    "cpuCacheMode");
+_MTL_PRIVATE_DEF_SEL(currentAllocatedSize,
+    "currentAllocatedSize");
+_MTL_PRIVATE_DEF_SEL(data,
+    "data");
+_MTL_PRIVATE_DEF_SEL(dataSize,
+    "dataSize");
+_MTL_PRIVATE_DEF_SEL(dataType,
+    "dataType");
+_MTL_PRIVATE_DEF_SEL(dealloc,
+    "dealloc");
+_MTL_PRIVATE_DEF_SEL(debugLocation,
+    "debugLocation");
+_MTL_PRIVATE_DEF_SEL(debugSignposts,
+    "debugSignposts");
+_MTL_PRIVATE_DEF_SEL(defaultCaptureScope,
+    "defaultCaptureScope");
+_MTL_PRIVATE_DEF_SEL(defaultRasterSampleCount,
+    "defaultRasterSampleCount");
+_MTL_PRIVATE_DEF_SEL(depth,
+    "depth");
+_MTL_PRIVATE_DEF_SEL(depthAttachment,
+    "depthAttachment");
+_MTL_PRIVATE_DEF_SEL(depthAttachmentPixelFormat,
+    "depthAttachmentPixelFormat");
+_MTL_PRIVATE_DEF_SEL(depthCompareFunction,
+    "depthCompareFunction");
+_MTL_PRIVATE_DEF_SEL(depthFailureOperation,
+    "depthFailureOperation");
+_MTL_PRIVATE_DEF_SEL(depthPlane,
+    "depthPlane");
+_MTL_PRIVATE_DEF_SEL(depthResolveFilter,
+    "depthResolveFilter");
+_MTL_PRIVATE_DEF_SEL(depthStencilPassOperation,
+    "depthStencilPassOperation");
+_MTL_PRIVATE_DEF_SEL(descriptor,
+    "descriptor");
+_MTL_PRIVATE_DEF_SEL(destination,
+    "destination");
+_MTL_PRIVATE_DEF_SEL(destinationAlphaBlendFactor,
+    "destinationAlphaBlendFactor");
+_MTL_PRIVATE_DEF_SEL(destinationRGBBlendFactor,
+    "destinationRGBBlendFactor");
+_MTL_PRIVATE_DEF_SEL(device,
+    "device");
+_MTL_PRIVATE_DEF_SEL(didModifyRange_,
+    "didModifyRange:");
+_MTL_PRIVATE_DEF_SEL(dispatchQueue,
+    "dispatchQueue");
+_MTL_PRIVATE_DEF_SEL(dispatchThreadgroups_threadsPerThreadgroup_,
+    "dispatchThreadgroups:threadsPerThreadgroup:");
+_MTL_PRIVATE_DEF_SEL(dispatchThreadgroupsWithIndirectBuffer_indirectBufferOffset_threadsPerThreadgroup_,
+    "dispatchThreadgroupsWithIndirectBuffer:indirectBufferOffset:threadsPerThreadgroup:");
+_MTL_PRIVATE_DEF_SEL(dispatchThreads_threadsPerThreadgroup_,
+    "dispatchThreads:threadsPerThreadgroup:");
+_MTL_PRIVATE_DEF_SEL(dispatchThreadsPerTile_,
+    "dispatchThreadsPerTile:");
+_MTL_PRIVATE_DEF_SEL(dispatchType,
+    "dispatchType");
+_MTL_PRIVATE_DEF_SEL(drawIndexedPatches_patchIndexBuffer_patchIndexBufferOffset_controlPointIndexBuffer_controlPointIndexBufferOffset_indirectBuffer_indirectBufferOffset_,
+    "drawIndexedPatches:patchIndexBuffer:patchIndexBufferOffset:controlPointIndexBuffer:controlPointIndexBufferOffset:indirectBuffer:indirectBufferOffset:");
+_MTL_PRIVATE_DEF_SEL(drawIndexedPatches_patchStart_patchCount_patchIndexBuffer_patchIndexBufferOffset_controlPointIndexBuffer_controlPointIndexBufferOffset_instanceCount_baseInstance_,
+    "drawIndexedPatches:patchStart:patchCount:patchIndexBuffer:patchIndexBufferOffset:controlPointIndexBuffer:controlPointIndexBufferOffset:instanceCount:baseInstance:");
+_MTL_PRIVATE_DEF_SEL(drawIndexedPatches_patchStart_patchCount_patchIndexBuffer_patchIndexBufferOffset_controlPointIndexBuffer_controlPointIndexBufferOffset_instanceCount_baseInstance_tessellationFactorBuffer_tessellationFactorBufferOffset_tessellationFactorBufferInstanceStride_,
+    "drawIndexedPatches:patchStart:patchCount:patchIndexBuffer:patchIndexBufferOffset:controlPointIndexBuffer:controlPointIndexBufferOffset:instanceCount:baseInstance:tessellationFactorBuffer:tessellationFactorBufferOffset:tessellationFactorBufferInstanceStride:");
+_MTL_PRIVATE_DEF_SEL(drawIndexedPrimitives_indexCount_indexType_indexBuffer_indexBufferOffset_,
+    "drawIndexedPrimitives:indexCount:indexType:indexBuffer:indexBufferOffset:");
+_MTL_PRIVATE_DEF_SEL(drawIndexedPrimitives_indexCount_indexType_indexBuffer_indexBufferOffset_instanceCount_,
+    "drawIndexedPrimitives:indexCount:indexType:indexBuffer:indexBufferOffset:instanceCount:");
+_MTL_PRIVATE_DEF_SEL(drawIndexedPrimitives_indexCount_indexType_indexBuffer_indexBufferOffset_instanceCount_baseVertex_baseInstance_,
+    "drawIndexedPrimitives:indexCount:indexType:indexBuffer:indexBufferOffset:instanceCount:baseVertex:baseInstance:");
+_MTL_PRIVATE_DEF_SEL(drawIndexedPrimitives_indexType_indexBuffer_indexBufferOffset_indirectBuffer_indirectBufferOffset_,
+    "drawIndexedPrimitives:indexType:indexBuffer:indexBufferOffset:indirectBuffer:indirectBufferOffset:");
+_MTL_PRIVATE_DEF_SEL(drawPatches_patchIndexBuffer_patchIndexBufferOffset_indirectBuffer_indirectBufferOffset_,
+    "drawPatches:patchIndexBuffer:patchIndexBufferOffset:indirectBuffer:indirectBufferOffset:");
+_MTL_PRIVATE_DEF_SEL(drawPatches_patchStart_patchCount_patchIndexBuffer_patchIndexBufferOffset_instanceCount_baseInstance_,
+    "drawPatches:patchStart:patchCount:patchIndexBuffer:patchIndexBufferOffset:instanceCount:baseInstance:");
+_MTL_PRIVATE_DEF_SEL(drawPatches_patchStart_patchCount_patchIndexBuffer_patchIndexBufferOffset_instanceCount_baseInstance_tessellationFactorBuffer_tessellationFactorBufferOffset_tessellationFactorBufferInstanceStride_,
+    "drawPatches:patchStart:patchCount:patchIndexBuffer:patchIndexBufferOffset:instanceCount:baseInstance:tessellationFactorBuffer:tessellationFactorBufferOffset:tessellationFactorBufferInstanceStride:");
+_MTL_PRIVATE_DEF_SEL(drawPrimitives_indirectBuffer_indirectBufferOffset_,
+    "drawPrimitives:indirectBuffer:indirectBufferOffset:");
+_MTL_PRIVATE_DEF_SEL(drawPrimitives_vertexStart_vertexCount_,
+    "drawPrimitives:vertexStart:vertexCount:");
+_MTL_PRIVATE_DEF_SEL(drawPrimitives_vertexStart_vertexCount_instanceCount_,
+    "drawPrimitives:vertexStart:vertexCount:instanceCount:");
+_MTL_PRIVATE_DEF_SEL(drawPrimitives_vertexStart_vertexCount_instanceCount_baseInstance_,
+    "drawPrimitives:vertexStart:vertexCount:instanceCount:baseInstance:");
+_MTL_PRIVATE_DEF_SEL(drawableID,
+    "drawableID");
+_MTL_PRIVATE_DEF_SEL(elementArrayType,
+    "elementArrayType");
+_MTL_PRIVATE_DEF_SEL(elementIsArgumentBuffer,
+    "elementIsArgumentBuffer");
+_MTL_PRIVATE_DEF_SEL(elementPointerType,
+    "elementPointerType");
+_MTL_PRIVATE_DEF_SEL(elementStructType,
+    "elementStructType");
+_MTL_PRIVATE_DEF_SEL(elementTextureReferenceType,
+    "elementTextureReferenceType");
+_MTL_PRIVATE_DEF_SEL(elementType,
+    "elementType");
+_MTL_PRIVATE_DEF_SEL(encodeSignalEvent_value_,
+    "encodeSignalEvent:value:");
+_MTL_PRIVATE_DEF_SEL(encodeWaitForEvent_value_,
+    "encodeWaitForEvent:value:");
+_MTL_PRIVATE_DEF_SEL(encodedLength,
+    "encodedLength");
+_MTL_PRIVATE_DEF_SEL(encoderLabel,
+    "encoderLabel");
+_MTL_PRIVATE_DEF_SEL(endEncoding,
+    "endEncoding");
+_MTL_PRIVATE_DEF_SEL(endOfEncoderSampleIndex,
+    "endOfEncoderSampleIndex");
+_MTL_PRIVATE_DEF_SEL(endOfFragmentSampleIndex,
+    "endOfFragmentSampleIndex");
+_MTL_PRIVATE_DEF_SEL(endOfVertexSampleIndex,
+    "endOfVertexSampleIndex");
+_MTL_PRIVATE_DEF_SEL(enqueue,
+    "enqueue");
+_MTL_PRIVATE_DEF_SEL(error,
+    "error");
+_MTL_PRIVATE_DEF_SEL(errorOptions,
+    "errorOptions");
+_MTL_PRIVATE_DEF_SEL(errorState,
+    "errorState");
+_MTL_PRIVATE_DEF_SEL(executeCommandsInBuffer_indirectBuffer_indirectBufferOffset_,
+    "executeCommandsInBuffer:indirectBuffer:indirectBufferOffset:");
+_MTL_PRIVATE_DEF_SEL(executeCommandsInBuffer_withRange_,
+    "executeCommandsInBuffer:withRange:");
+_MTL_PRIVATE_DEF_SEL(fastMathEnabled,
+    "fastMathEnabled");
+_MTL_PRIVATE_DEF_SEL(fillBuffer_range_value_,
+    "fillBuffer:range:value:");
+_MTL_PRIVATE_DEF_SEL(firstMipmapInTail,
+    "firstMipmapInTail");
+_MTL_PRIVATE_DEF_SEL(format,
+    "format");
+_MTL_PRIVATE_DEF_SEL(fragmentAdditionalBinaryFunctions,
+    "fragmentAdditionalBinaryFunctions");
+_MTL_PRIVATE_DEF_SEL(fragmentArguments,
+    "fragmentArguments");
+_MTL_PRIVATE_DEF_SEL(fragmentBuffers,
+    "fragmentBuffers");
+_MTL_PRIVATE_DEF_SEL(fragmentFunction,
+    "fragmentFunction");
+_MTL_PRIVATE_DEF_SEL(fragmentLinkedFunctions,
+    "fragmentLinkedFunctions");
+_MTL_PRIVATE_DEF_SEL(fragmentPreloadedLibraries,
+    "fragmentPreloadedLibraries");
+_MTL_PRIVATE_DEF_SEL(frontFaceStencil,
+    "frontFaceStencil");
+_MTL_PRIVATE_DEF_SEL(function,
+    "function");
+_MTL_PRIVATE_DEF_SEL(functionConstantsDictionary,
+    "functionConstantsDictionary");
+_MTL_PRIVATE_DEF_SEL(functionCount,
+    "functionCount");
+_MTL_PRIVATE_DEF_SEL(functionDescriptor,
+    "functionDescriptor");
+_MTL_PRIVATE_DEF_SEL(functionGraphs,
+    "functionGraphs");
+_MTL_PRIVATE_DEF_SEL(functionHandleWithFunction_,
+    "functionHandleWithFunction:");
+_MTL_PRIVATE_DEF_SEL(functionHandleWithFunction_stage_,
+    "functionHandleWithFunction:stage:");
+_MTL_PRIVATE_DEF_SEL(functionName,
+    "functionName");
+_MTL_PRIVATE_DEF_SEL(functionNames,
+    "functionNames");
+_MTL_PRIVATE_DEF_SEL(functionType,
+    "functionType");
+_MTL_PRIVATE_DEF_SEL(functions,
+    "functions");
+_MTL_PRIVATE_DEF_SEL(generateMipmapsForTexture_,
+    "generateMipmapsForTexture:");
+_MTL_PRIVATE_DEF_SEL(geometryDescriptors,
+    "geometryDescriptors");
+_MTL_PRIVATE_DEF_SEL(getBytes_bytesPerRow_bytesPerImage_fromRegion_mipmapLevel_slice_,
+    "getBytes:bytesPerRow:bytesPerImage:fromRegion:mipmapLevel:slice:");
+_MTL_PRIVATE_DEF_SEL(getBytes_bytesPerRow_fromRegion_mipmapLevel_,
+    "getBytes:bytesPerRow:fromRegion:mipmapLevel:");
+_MTL_PRIVATE_DEF_SEL(getDefaultSamplePositions_count_,
+    "getDefaultSamplePositions:count:");
+_MTL_PRIVATE_DEF_SEL(getSamplePositions_count_,
+    "getSamplePositions:count:");
+_MTL_PRIVATE_DEF_SEL(getTextureAccessCounters_region_mipLevel_slice_resetCounters_countersBuffer_countersBufferOffset_,
+    "getTextureAccessCounters:region:mipLevel:slice:resetCounters:countersBuffer:countersBufferOffset:");
+_MTL_PRIVATE_DEF_SEL(groups,
+    "groups");
+_MTL_PRIVATE_DEF_SEL(hasUnifiedMemory,
+    "hasUnifiedMemory");
+_MTL_PRIVATE_DEF_SEL(hazardTrackingMode,
+    "hazardTrackingMode");
+_MTL_PRIVATE_DEF_SEL(heap,
+    "heap");
+_MTL_PRIVATE_DEF_SEL(heapBufferSizeAndAlignWithLength_options_,
+    "heapBufferSizeAndAlignWithLength:options:");
+_MTL_PRIVATE_DEF_SEL(heapOffset,
+    "heapOffset");
+_MTL_PRIVATE_DEF_SEL(heapTextureSizeAndAlignWithDescriptor_,
+    "heapTextureSizeAndAlignWithDescriptor:");
+_MTL_PRIVATE_DEF_SEL(height,
+    "height");
+_MTL_PRIVATE_DEF_SEL(horizontal,
+    "horizontal");
+_MTL_PRIVATE_DEF_SEL(horizontalSampleStorage,
+    "horizontalSampleStorage");
+_MTL_PRIVATE_DEF_SEL(imageblockMemoryLengthForDimensions_,
+    "imageblockMemoryLengthForDimensions:");
+_MTL_PRIVATE_DEF_SEL(imageblockSampleLength,
+    "imageblockSampleLength");
+_MTL_PRIVATE_DEF_SEL(index,
+    "index");
+_MTL_PRIVATE_DEF_SEL(indexBuffer,
+    "indexBuffer");
+_MTL_PRIVATE_DEF_SEL(indexBufferIndex,
+    "indexBufferIndex");
+_MTL_PRIVATE_DEF_SEL(indexBufferOffset,
+    "indexBufferOffset");
+_MTL_PRIVATE_DEF_SEL(indexType,
+    "indexType");
+_MTL_PRIVATE_DEF_SEL(indirectComputeCommandAtIndex_,
+    "indirectComputeCommandAtIndex:");
+_MTL_PRIVATE_DEF_SEL(indirectRenderCommandAtIndex_,
+    "indirectRenderCommandAtIndex:");
+_MTL_PRIVATE_DEF_SEL(inheritBuffers,
+    "inheritBuffers");
+_MTL_PRIVATE_DEF_SEL(inheritPipelineState,
+    "inheritPipelineState");
+_MTL_PRIVATE_DEF_SEL(init,
+    "init");
+_MTL_PRIVATE_DEF_SEL(initWithArgumentIndex_,
+    "initWithArgumentIndex:");
+_MTL_PRIVATE_DEF_SEL(initWithDispatchQueue_,
+    "initWithDispatchQueue:");
+_MTL_PRIVATE_DEF_SEL(initWithFunctionName_nodes_outputNode_attributes_,
+    "initWithFunctionName:nodes:outputNode:attributes:");
+_MTL_PRIVATE_DEF_SEL(initWithName_arguments_controlDependencies_,
+    "initWithName:arguments:controlDependencies:");
+_MTL_PRIVATE_DEF_SEL(initWithSampleCount_,
+    "initWithSampleCount:");
+_MTL_PRIVATE_DEF_SEL(initWithSampleCount_horizontal_vertical_,
+    "initWithSampleCount:horizontal:vertical:");
+_MTL_PRIVATE_DEF_SEL(inputPrimitiveTopology,
+    "inputPrimitiveTopology");
+_MTL_PRIVATE_DEF_SEL(insertDebugCaptureBoundary,
+    "insertDebugCaptureBoundary");
+_MTL_PRIVATE_DEF_SEL(insertDebugSignpost_,
+    "insertDebugSignpost:");
+_MTL_PRIVATE_DEF_SEL(insertLibraries,
+    "insertLibraries");
+_MTL_PRIVATE_DEF_SEL(installName,
+    "installName");
+_MTL_PRIVATE_DEF_SEL(instanceCount,
+    "instanceCount");
+_MTL_PRIVATE_DEF_SEL(instanceDescriptorBuffer,
+    "instanceDescriptorBuffer");
+_MTL_PRIVATE_DEF_SEL(instanceDescriptorBufferOffset,
+    "instanceDescriptorBufferOffset");
+_MTL_PRIVATE_DEF_SEL(instanceDescriptorStride,
+    "instanceDescriptorStride");
+_MTL_PRIVATE_DEF_SEL(instanceDescriptorType,
+    "instanceDescriptorType");
+_MTL_PRIVATE_DEF_SEL(instancedAccelerationStructures,
+    "instancedAccelerationStructures");
+_MTL_PRIVATE_DEF_SEL(intersectionFunctionTableDescriptor,
+    "intersectionFunctionTableDescriptor");
+_MTL_PRIVATE_DEF_SEL(intersectionFunctionTableOffset,
+    "intersectionFunctionTableOffset");
+_MTL_PRIVATE_DEF_SEL(iosurface,
+    "iosurface");
+_MTL_PRIVATE_DEF_SEL(iosurfacePlane,
+    "iosurfacePlane");
+_MTL_PRIVATE_DEF_SEL(isActive,
+    "isActive");
+_MTL_PRIVATE_DEF_SEL(isAliasable,
+    "isAliasable");
+_MTL_PRIVATE_DEF_SEL(isAlphaToCoverageEnabled,
+    "isAlphaToCoverageEnabled");
+_MTL_PRIVATE_DEF_SEL(isAlphaToOneEnabled,
+    "isAlphaToOneEnabled");
+_MTL_PRIVATE_DEF_SEL(isBlendingEnabled,
+    "isBlendingEnabled");
+_MTL_PRIVATE_DEF_SEL(isCapturing,
+    "isCapturing");
+_MTL_PRIVATE_DEF_SEL(isDepth24Stencil8PixelFormatSupported,
+    "isDepth24Stencil8PixelFormatSupported");
+_MTL_PRIVATE_DEF_SEL(isDepthTexture,
+    "isDepthTexture");
+_MTL_PRIVATE_DEF_SEL(isDepthWriteEnabled,
+    "isDepthWriteEnabled");
+_MTL_PRIVATE_DEF_SEL(isFramebufferOnly,
+    "isFramebufferOnly");
+_MTL_PRIVATE_DEF_SEL(isHeadless,
+    "isHeadless");
+_MTL_PRIVATE_DEF_SEL(isLowPower,
+    "isLowPower");
+_MTL_PRIVATE_DEF_SEL(isPatchControlPointData,
+    "isPatchControlPointData");
+_MTL_PRIVATE_DEF_SEL(isPatchData,
+    "isPatchData");
+_MTL_PRIVATE_DEF_SEL(isRasterizationEnabled,
+    "isRasterizationEnabled");
+_MTL_PRIVATE_DEF_SEL(isRemovable,
+    "isRemovable");
+_MTL_PRIVATE_DEF_SEL(isShareable,
+    "isShareable");
+_MTL_PRIVATE_DEF_SEL(isSparse,
+    "isSparse");
+_MTL_PRIVATE_DEF_SEL(isTessellationFactorScaleEnabled,
+    "isTessellationFactorScaleEnabled");
+_MTL_PRIVATE_DEF_SEL(kernelEndTime,
+    "kernelEndTime");
+_MTL_PRIVATE_DEF_SEL(kernelStartTime,
+    "kernelStartTime");
+_MTL_PRIVATE_DEF_SEL(label,
+    "label");
+_MTL_PRIVATE_DEF_SEL(languageVersion,
+    "languageVersion");
+_MTL_PRIVATE_DEF_SEL(layerAtIndex_,
+    "layerAtIndex:");
+_MTL_PRIVATE_DEF_SEL(layerCount,
+    "layerCount");
+_MTL_PRIVATE_DEF_SEL(layers,
+    "layers");
+_MTL_PRIVATE_DEF_SEL(layouts,
+    "layouts");
+_MTL_PRIVATE_DEF_SEL(length,
+    "length");
+_MTL_PRIVATE_DEF_SEL(level,
+    "level");
+_MTL_PRIVATE_DEF_SEL(libraries,
+    "libraries");
+_MTL_PRIVATE_DEF_SEL(libraryType,
+    "libraryType");
+_MTL_PRIVATE_DEF_SEL(line,
+    "line");
+_MTL_PRIVATE_DEF_SEL(linkedFunctions,
+    "linkedFunctions");
+_MTL_PRIVATE_DEF_SEL(loadAction,
+    "loadAction");
+_MTL_PRIVATE_DEF_SEL(location,
+    "location");
+_MTL_PRIVATE_DEF_SEL(locationNumber,
+    "locationNumber");
+_MTL_PRIVATE_DEF_SEL(lodAverage,
+    "lodAverage");
+_MTL_PRIVATE_DEF_SEL(lodMaxClamp,
+    "lodMaxClamp");
+_MTL_PRIVATE_DEF_SEL(lodMinClamp,
+    "lodMinClamp");
+_MTL_PRIVATE_DEF_SEL(logs,
+    "logs");
+_MTL_PRIVATE_DEF_SEL(magFilter,
+    "magFilter");
+_MTL_PRIVATE_DEF_SEL(makeAliasable,
+    "makeAliasable");
+_MTL_PRIVATE_DEF_SEL(mapPhysicalToScreenCoordinates_forLayer_,
+    "mapPhysicalToScreenCoordinates:forLayer:");
+_MTL_PRIVATE_DEF_SEL(mapScreenToPhysicalCoordinates_forLayer_,
+    "mapScreenToPhysicalCoordinates:forLayer:");
+_MTL_PRIVATE_DEF_SEL(maxAnisotropy,
+    "maxAnisotropy");
+_MTL_PRIVATE_DEF_SEL(maxArgumentBufferSamplerCount,
+    "maxArgumentBufferSamplerCount");
+_MTL_PRIVATE_DEF_SEL(maxAvailableSizeWithAlignment_,
+    "maxAvailableSizeWithAlignment:");
+_MTL_PRIVATE_DEF_SEL(maxBufferLength,
+    "maxBufferLength");
+_MTL_PRIVATE_DEF_SEL(maxCallStackDepth,
+    "maxCallStackDepth");
+_MTL_PRIVATE_DEF_SEL(maxFragmentBufferBindCount,
+    "maxFragmentBufferBindCount");
+_MTL_PRIVATE_DEF_SEL(maxFragmentCallStackDepth,
+    "maxFragmentCallStackDepth");
+_MTL_PRIVATE_DEF_SEL(maxKernelBufferBindCount,
+    "maxKernelBufferBindCount");
+_MTL_PRIVATE_DEF_SEL(maxSampleCount,
+    "maxSampleCount");
+_MTL_PRIVATE_DEF_SEL(maxTessellationFactor,
+    "maxTessellationFactor");
+_MTL_PRIVATE_DEF_SEL(maxThreadgroupMemoryLength,
+    "maxThreadgroupMemoryLength");
+_MTL_PRIVATE_DEF_SEL(maxThreadsPerThreadgroup,
+    "maxThreadsPerThreadgroup");
+_MTL_PRIVATE_DEF_SEL(maxTotalThreadsPerThreadgroup,
+    "maxTotalThreadsPerThreadgroup");
+_MTL_PRIVATE_DEF_SEL(maxTransferRate,
+    "maxTransferRate");
+_MTL_PRIVATE_DEF_SEL(maxVertexAmplificationCount,
+    "maxVertexAmplificationCount");
+_MTL_PRIVATE_DEF_SEL(maxVertexBufferBindCount,
+    "maxVertexBufferBindCount");
+_MTL_PRIVATE_DEF_SEL(maxVertexCallStackDepth,
+    "maxVertexCallStackDepth");
+_MTL_PRIVATE_DEF_SEL(memberByName_,
+    "memberByName:");
+_MTL_PRIVATE_DEF_SEL(members,
+    "members");
+_MTL_PRIVATE_DEF_SEL(memoryBarrierWithResources_count_,
+    "memoryBarrierWithResources:count:");
+_MTL_PRIVATE_DEF_SEL(memoryBarrierWithResources_count_afterStages_beforeStages_,
+    "memoryBarrierWithResources:count:afterStages:beforeStages:");
+_MTL_PRIVATE_DEF_SEL(memoryBarrierWithScope_,
+    "memoryBarrierWithScope:");
+_MTL_PRIVATE_DEF_SEL(memoryBarrierWithScope_afterStages_beforeStages_,
+    "memoryBarrierWithScope:afterStages:beforeStages:");
+_MTL_PRIVATE_DEF_SEL(minFilter,
+    "minFilter");
+_MTL_PRIVATE_DEF_SEL(minimumLinearTextureAlignmentForPixelFormat_,
+    "minimumLinearTextureAlignmentForPixelFormat:");
+_MTL_PRIVATE_DEF_SEL(minimumTextureBufferAlignmentForPixelFormat_,
+    "minimumTextureBufferAlignmentForPixelFormat:");
+_MTL_PRIVATE_DEF_SEL(mipFilter,
+    "mipFilter");
+_MTL_PRIVATE_DEF_SEL(mipmapLevelCount,
+    "mipmapLevelCount");
+_MTL_PRIVATE_DEF_SEL(motionEndBorderMode,
+    "motionEndBorderMode");
+_MTL_PRIVATE_DEF_SEL(motionEndTime,
+    "motionEndTime");
+_MTL_PRIVATE_DEF_SEL(motionKeyframeCount,
+    "motionKeyframeCount");
+_MTL_PRIVATE_DEF_SEL(motionStartBorderMode,
+    "motionStartBorderMode");
+_MTL_PRIVATE_DEF_SEL(motionStartTime,
+    "motionStartTime");
+_MTL_PRIVATE_DEF_SEL(motionTransformBuffer,
+    "motionTransformBuffer");
+_MTL_PRIVATE_DEF_SEL(motionTransformBufferOffset,
+    "motionTransformBufferOffset");
+_MTL_PRIVATE_DEF_SEL(motionTransformCount,
+    "motionTransformCount");
+_MTL_PRIVATE_DEF_SEL(mutability,
+    "mutability");
+_MTL_PRIVATE_DEF_SEL(name,
+    "name");
+_MTL_PRIVATE_DEF_SEL(newAccelerationStructureWithDescriptor_,
+    "newAccelerationStructureWithDescriptor:");
+_MTL_PRIVATE_DEF_SEL(newAccelerationStructureWithSize_,
+    "newAccelerationStructureWithSize:");
+_MTL_PRIVATE_DEF_SEL(newArgumentEncoderForBufferAtIndex_,
+    "newArgumentEncoderForBufferAtIndex:");
+_MTL_PRIVATE_DEF_SEL(newArgumentEncoderWithArguments_,
+    "newArgumentEncoderWithArguments:");
+_MTL_PRIVATE_DEF_SEL(newArgumentEncoderWithBufferIndex_,
+    "newArgumentEncoderWithBufferIndex:");
+_MTL_PRIVATE_DEF_SEL(newArgumentEncoderWithBufferIndex_reflection_,
+    "newArgumentEncoderWithBufferIndex:reflection:");
+_MTL_PRIVATE_DEF_SEL(newBinaryArchiveWithDescriptor_error_,
+    "newBinaryArchiveWithDescriptor:error:");
+_MTL_PRIVATE_DEF_SEL(newBufferWithBytes_length_options_,
+    "newBufferWithBytes:length:options:");
+_MTL_PRIVATE_DEF_SEL(newBufferWithBytesNoCopy_length_options_deallocator_,
+    "newBufferWithBytesNoCopy:length:options:deallocator:");
+_MTL_PRIVATE_DEF_SEL(newBufferWithLength_options_,
+    "newBufferWithLength:options:");
+_MTL_PRIVATE_DEF_SEL(newBufferWithLength_options_offset_,
+    "newBufferWithLength:options:offset:");
+_MTL_PRIVATE_DEF_SEL(newCaptureScopeWithCommandQueue_,
+    "newCaptureScopeWithCommandQueue:");
+_MTL_PRIVATE_DEF_SEL(newCaptureScopeWithDevice_,
+    "newCaptureScopeWithDevice:");
+_MTL_PRIVATE_DEF_SEL(newCommandQueue,
+    "newCommandQueue");
+_MTL_PRIVATE_DEF_SEL(newCommandQueueWithMaxCommandBufferCount_,
+    "newCommandQueueWithMaxCommandBufferCount:");
+_MTL_PRIVATE_DEF_SEL(newComputePipelineStateWithAdditionalBinaryFunctions_error_,
+    "newComputePipelineStateWithAdditionalBinaryFunctions:error:");
+_MTL_PRIVATE_DEF_SEL(newComputePipelineStateWithDescriptor_options_completionHandler_,
+    "newComputePipelineStateWithDescriptor:options:completionHandler:");
+_MTL_PRIVATE_DEF_SEL(newComputePipelineStateWithDescriptor_options_reflection_error_,
+    "newComputePipelineStateWithDescriptor:options:reflection:error:");
+_MTL_PRIVATE_DEF_SEL(newComputePipelineStateWithFunction_completionHandler_,
+    "newComputePipelineStateWithFunction:completionHandler:");
+_MTL_PRIVATE_DEF_SEL(newComputePipelineStateWithFunction_error_,
+    "newComputePipelineStateWithFunction:error:");
+_MTL_PRIVATE_DEF_SEL(newComputePipelineStateWithFunction_options_completionHandler_,
+    "newComputePipelineStateWithFunction:options:completionHandler:");
+_MTL_PRIVATE_DEF_SEL(newComputePipelineStateWithFunction_options_reflection_error_,
+    "newComputePipelineStateWithFunction:options:reflection:error:");
+_MTL_PRIVATE_DEF_SEL(newCounterSampleBufferWithDescriptor_error_,
+    "newCounterSampleBufferWithDescriptor:error:");
+_MTL_PRIVATE_DEF_SEL(newDefaultLibrary,
+    "newDefaultLibrary");
+_MTL_PRIVATE_DEF_SEL(newDefaultLibraryWithBundle_error_,
+    "newDefaultLibraryWithBundle:error:");
+_MTL_PRIVATE_DEF_SEL(newDepthStencilStateWithDescriptor_,
+    "newDepthStencilStateWithDescriptor:");
+_MTL_PRIVATE_DEF_SEL(newDynamicLibrary_error_,
+    "newDynamicLibrary:error:");
+_MTL_PRIVATE_DEF_SEL(newDynamicLibraryWithURL_error_,
+    "newDynamicLibraryWithURL:error:");
+_MTL_PRIVATE_DEF_SEL(newEvent,
+    "newEvent");
+_MTL_PRIVATE_DEF_SEL(newFence,
+    "newFence");
+_MTL_PRIVATE_DEF_SEL(newFunctionWithDescriptor_completionHandler_,
+    "newFunctionWithDescriptor:completionHandler:");
+_MTL_PRIVATE_DEF_SEL(newFunctionWithDescriptor_error_,
+    "newFunctionWithDescriptor:error:");
+_MTL_PRIVATE_DEF_SEL(newFunctionWithName_,
+    "newFunctionWithName:");
+_MTL_PRIVATE_DEF_SEL(newFunctionWithName_constantValues_completionHandler_,
+    "newFunctionWithName:constantValues:completionHandler:");
+_MTL_PRIVATE_DEF_SEL(newFunctionWithName_constantValues_error_,
+    "newFunctionWithName:constantValues:error:");
+_MTL_PRIVATE_DEF_SEL(newHeapWithDescriptor_,
+    "newHeapWithDescriptor:");
+_MTL_PRIVATE_DEF_SEL(newIndirectCommandBufferWithDescriptor_maxCommandCount_options_,
+    "newIndirectCommandBufferWithDescriptor:maxCommandCount:options:");
+_MTL_PRIVATE_DEF_SEL(newIntersectionFunctionTableWithDescriptor_,
+    "newIntersectionFunctionTableWithDescriptor:");
+_MTL_PRIVATE_DEF_SEL(newIntersectionFunctionTableWithDescriptor_stage_,
+    "newIntersectionFunctionTableWithDescriptor:stage:");
+_MTL_PRIVATE_DEF_SEL(newIntersectionFunctionWithDescriptor_completionHandler_,
+    "newIntersectionFunctionWithDescriptor:completionHandler:");
+_MTL_PRIVATE_DEF_SEL(newIntersectionFunctionWithDescriptor_error_,
+    "newIntersectionFunctionWithDescriptor:error:");
+_MTL_PRIVATE_DEF_SEL(newLibraryWithData_error_,
+    "newLibraryWithData:error:");
+_MTL_PRIVATE_DEF_SEL(newLibraryWithFile_error_,
+    "newLibraryWithFile:error:");
+_MTL_PRIVATE_DEF_SEL(newLibraryWithSource_options_completionHandler_,
+    "newLibraryWithSource:options:completionHandler:");
+_MTL_PRIVATE_DEF_SEL(newLibraryWithSource_options_error_,
+    "newLibraryWithSource:options:error:");
+_MTL_PRIVATE_DEF_SEL(newLibraryWithStitchedDescriptor_completionHandler_,
+    "newLibraryWithStitchedDescriptor:completionHandler:");
+_MTL_PRIVATE_DEF_SEL(newLibraryWithStitchedDescriptor_error_,
+    "newLibraryWithStitchedDescriptor:error:");
+_MTL_PRIVATE_DEF_SEL(newLibraryWithURL_error_,
+    "newLibraryWithURL:error:");
+_MTL_PRIVATE_DEF_SEL(newRasterizationRateMapWithDescriptor_,
+    "newRasterizationRateMapWithDescriptor:");
+_MTL_PRIVATE_DEF_SEL(newRemoteBufferViewForDevice_,
+    "newRemoteBufferViewForDevice:");
+_MTL_PRIVATE_DEF_SEL(newRemoteTextureViewForDevice_,
+    "newRemoteTextureViewForDevice:");
+_MTL_PRIVATE_DEF_SEL(newRenderPipelineStateWithAdditionalBinaryFunctions_error_,
+    "newRenderPipelineStateWithAdditionalBinaryFunctions:error:");
+_MTL_PRIVATE_DEF_SEL(newRenderPipelineStateWithDescriptor_completionHandler_,
+    "newRenderPipelineStateWithDescriptor:completionHandler:");
+_MTL_PRIVATE_DEF_SEL(newRenderPipelineStateWithDescriptor_error_,
+    "newRenderPipelineStateWithDescriptor:error:");
+_MTL_PRIVATE_DEF_SEL(newRenderPipelineStateWithDescriptor_options_completionHandler_,
+    "newRenderPipelineStateWithDescriptor:options:completionHandler:");
+_MTL_PRIVATE_DEF_SEL(newRenderPipelineStateWithDescriptor_options_reflection_error_,
+    "newRenderPipelineStateWithDescriptor:options:reflection:error:");
+_MTL_PRIVATE_DEF_SEL(newRenderPipelineStateWithTileDescriptor_options_completionHandler_,
+    "newRenderPipelineStateWithTileDescriptor:options:completionHandler:");
+_MTL_PRIVATE_DEF_SEL(newRenderPipelineStateWithTileDescriptor_options_reflection_error_,
+    "newRenderPipelineStateWithTileDescriptor:options:reflection:error:");
+_MTL_PRIVATE_DEF_SEL(newSamplerStateWithDescriptor_,
+    "newSamplerStateWithDescriptor:");
+_MTL_PRIVATE_DEF_SEL(newSharedEvent,
+    "newSharedEvent");
+_MTL_PRIVATE_DEF_SEL(newSharedEventHandle,
+    "newSharedEventHandle");
+_MTL_PRIVATE_DEF_SEL(newSharedEventWithHandle_,
+    "newSharedEventWithHandle:");
+_MTL_PRIVATE_DEF_SEL(newSharedTextureHandle,
+    "newSharedTextureHandle");
+_MTL_PRIVATE_DEF_SEL(newSharedTextureWithDescriptor_,
+    "newSharedTextureWithDescriptor:");
+_MTL_PRIVATE_DEF_SEL(newSharedTextureWithHandle_,
+    "newSharedTextureWithHandle:");
+_MTL_PRIVATE_DEF_SEL(newTextureViewWithPixelFormat_,
+    "newTextureViewWithPixelFormat:");
+_MTL_PRIVATE_DEF_SEL(newTextureViewWithPixelFormat_textureType_levels_slices_,
+    "newTextureViewWithPixelFormat:textureType:levels:slices:");
+_MTL_PRIVATE_DEF_SEL(newTextureViewWithPixelFormat_textureType_levels_slices_swizzle_,
+    "newTextureViewWithPixelFormat:textureType:levels:slices:swizzle:");
+_MTL_PRIVATE_DEF_SEL(newTextureWithDescriptor_,
+    "newTextureWithDescriptor:");
+_MTL_PRIVATE_DEF_SEL(newTextureWithDescriptor_iosurface_plane_,
+    "newTextureWithDescriptor:iosurface:plane:");
+_MTL_PRIVATE_DEF_SEL(newTextureWithDescriptor_offset_,
+    "newTextureWithDescriptor:offset:");
+_MTL_PRIVATE_DEF_SEL(newTextureWithDescriptor_offset_bytesPerRow_,
+    "newTextureWithDescriptor:offset:bytesPerRow:");
+_MTL_PRIVATE_DEF_SEL(newVisibleFunctionTableWithDescriptor_,
+    "newVisibleFunctionTableWithDescriptor:");
+_MTL_PRIVATE_DEF_SEL(newVisibleFunctionTableWithDescriptor_stage_,
+    "newVisibleFunctionTableWithDescriptor:stage:");
+_MTL_PRIVATE_DEF_SEL(nodes,
+    "nodes");
+_MTL_PRIVATE_DEF_SEL(normalizedCoordinates,
+    "normalizedCoordinates");
+_MTL_PRIVATE_DEF_SEL(notifyListener_atValue_block_,
+    "notifyListener:atValue:block:");
+_MTL_PRIVATE_DEF_SEL(objectAtIndexedSubscript_,
+    "objectAtIndexedSubscript:");
+_MTL_PRIVATE_DEF_SEL(offset,
+    "offset");
+_MTL_PRIVATE_DEF_SEL(opaque,
+    "opaque");
+_MTL_PRIVATE_DEF_SEL(optimizeContentsForCPUAccess_,
+    "optimizeContentsForCPUAccess:");
+_MTL_PRIVATE_DEF_SEL(optimizeContentsForCPUAccess_slice_level_,
+    "optimizeContentsForCPUAccess:slice:level:");
+_MTL_PRIVATE_DEF_SEL(optimizeContentsForGPUAccess_,
+    "optimizeContentsForGPUAccess:");
+_MTL_PRIVATE_DEF_SEL(optimizeContentsForGPUAccess_slice_level_,
+    "optimizeContentsForGPUAccess:slice:level:");
+_MTL_PRIVATE_DEF_SEL(optimizeIndirectCommandBuffer_withRange_,
+    "optimizeIndirectCommandBuffer:withRange:");
+_MTL_PRIVATE_DEF_SEL(options,
+    "options");
+_MTL_PRIVATE_DEF_SEL(outputNode,
+    "outputNode");
+_MTL_PRIVATE_DEF_SEL(outputURL,
+    "outputURL");
+_MTL_PRIVATE_DEF_SEL(parallelRenderCommandEncoderWithDescriptor_,
+    "parallelRenderCommandEncoderWithDescriptor:");
+_MTL_PRIVATE_DEF_SEL(parameterBufferSizeAndAlign,
+    "parameterBufferSizeAndAlign");
+_MTL_PRIVATE_DEF_SEL(parentRelativeLevel,
+    "parentRelativeLevel");
+_MTL_PRIVATE_DEF_SEL(parentRelativeSlice,
+    "parentRelativeSlice");
+_MTL_PRIVATE_DEF_SEL(parentTexture,
+    "parentTexture");
+_MTL_PRIVATE_DEF_SEL(patchControlPointCount,
+    "patchControlPointCount");
+_MTL_PRIVATE_DEF_SEL(patchType,
+    "patchType");
+_MTL_PRIVATE_DEF_SEL(peerCount,
+    "peerCount");
+_MTL_PRIVATE_DEF_SEL(peerGroupID,
+    "peerGroupID");
+_MTL_PRIVATE_DEF_SEL(peerIndex,
+    "peerIndex");
+_MTL_PRIVATE_DEF_SEL(physicalGranularity,
+    "physicalGranularity");
+_MTL_PRIVATE_DEF_SEL(physicalSizeForLayer_,
+    "physicalSizeForLayer:");
+_MTL_PRIVATE_DEF_SEL(pixelFormat,
+    "pixelFormat");
+_MTL_PRIVATE_DEF_SEL(pointerType,
+    "pointerType");
+_MTL_PRIVATE_DEF_SEL(popDebugGroup,
+    "popDebugGroup");
+_MTL_PRIVATE_DEF_SEL(preloadedLibraries,
+    "preloadedLibraries");
+_MTL_PRIVATE_DEF_SEL(preprocessorMacros,
+    "preprocessorMacros");
+_MTL_PRIVATE_DEF_SEL(present,
+    "present");
+_MTL_PRIVATE_DEF_SEL(presentAfterMinimumDuration_,
+    "presentAfterMinimumDuration:");
+_MTL_PRIVATE_DEF_SEL(presentAtTime_,
+    "presentAtTime:");
+_MTL_PRIVATE_DEF_SEL(presentDrawable_,
+    "presentDrawable:");
+_MTL_PRIVATE_DEF_SEL(presentDrawable_afterMinimumDuration_,
+    "presentDrawable:afterMinimumDuration:");
+_MTL_PRIVATE_DEF_SEL(presentDrawable_atTime_,
+    "presentDrawable:atTime:");
+_MTL_PRIVATE_DEF_SEL(presentedTime,
+    "presentedTime");
+_MTL_PRIVATE_DEF_SEL(preserveInvariance,
+    "preserveInvariance");
+_MTL_PRIVATE_DEF_SEL(privateFunctions,
+    "privateFunctions");
+_MTL_PRIVATE_DEF_SEL(pushDebugGroup_,
+    "pushDebugGroup:");
+_MTL_PRIVATE_DEF_SEL(rAddressMode,
+    "rAddressMode");
+_MTL_PRIVATE_DEF_SEL(rasterSampleCount,
+    "rasterSampleCount");
+_MTL_PRIVATE_DEF_SEL(rasterizationRateMap,
+    "rasterizationRateMap");
+_MTL_PRIVATE_DEF_SEL(rasterizationRateMapDescriptorWithScreenSize_,
+    "rasterizationRateMapDescriptorWithScreenSize:");
+_MTL_PRIVATE_DEF_SEL(rasterizationRateMapDescriptorWithScreenSize_layer_,
+    "rasterizationRateMapDescriptorWithScreenSize:layer:");
+_MTL_PRIVATE_DEF_SEL(rasterizationRateMapDescriptorWithScreenSize_layerCount_layers_,
+    "rasterizationRateMapDescriptorWithScreenSize:layerCount:layers:");
+_MTL_PRIVATE_DEF_SEL(readMask,
+    "readMask");
+_MTL_PRIVATE_DEF_SEL(readWriteTextureSupport,
+    "readWriteTextureSupport");
+_MTL_PRIVATE_DEF_SEL(recommendedMaxWorkingSetSize,
+    "recommendedMaxWorkingSetSize");
+_MTL_PRIVATE_DEF_SEL(refitAccelerationStructure_descriptor_destination_scratchBuffer_scratchBufferOffset_,
+    "refitAccelerationStructure:descriptor:destination:scratchBuffer:scratchBufferOffset:");
+_MTL_PRIVATE_DEF_SEL(registryID,
+    "registryID");
+_MTL_PRIVATE_DEF_SEL(remoteStorageBuffer,
+    "remoteStorageBuffer");
+_MTL_PRIVATE_DEF_SEL(remoteStorageTexture,
+    "remoteStorageTexture");
+_MTL_PRIVATE_DEF_SEL(removeAllDebugMarkers,
+    "removeAllDebugMarkers");
+_MTL_PRIVATE_DEF_SEL(renderCommandEncoder,
+    "renderCommandEncoder");
+_MTL_PRIVATE_DEF_SEL(renderCommandEncoderWithDescriptor_,
+    "renderCommandEncoderWithDescriptor:");
+_MTL_PRIVATE_DEF_SEL(renderPassDescriptor,
+    "renderPassDescriptor");
+_MTL_PRIVATE_DEF_SEL(renderTargetArrayLength,
+    "renderTargetArrayLength");
+_MTL_PRIVATE_DEF_SEL(renderTargetHeight,
+    "renderTargetHeight");
+_MTL_PRIVATE_DEF_SEL(renderTargetWidth,
+    "renderTargetWidth");
+_MTL_PRIVATE_DEF_SEL(replaceRegion_mipmapLevel_slice_withBytes_bytesPerRow_bytesPerImage_,
+    "replaceRegion:mipmapLevel:slice:withBytes:bytesPerRow:bytesPerImage:");
+_MTL_PRIVATE_DEF_SEL(replaceRegion_mipmapLevel_withBytes_bytesPerRow_,
+    "replaceRegion:mipmapLevel:withBytes:bytesPerRow:");
+_MTL_PRIVATE_DEF_SEL(required,
+    "required");
+_MTL_PRIVATE_DEF_SEL(reset,
+    "reset");
+_MTL_PRIVATE_DEF_SEL(resetCommandsInBuffer_withRange_,
+    "resetCommandsInBuffer:withRange:");
+_MTL_PRIVATE_DEF_SEL(resetTextureAccessCounters_region_mipLevel_slice_,
+    "resetTextureAccessCounters:region:mipLevel:slice:");
+_MTL_PRIVATE_DEF_SEL(resetWithRange_,
+    "resetWithRange:");
+_MTL_PRIVATE_DEF_SEL(resolveCounterRange_,
+    "resolveCounterRange:");
+_MTL_PRIVATE_DEF_SEL(resolveCounters_inRange_destinationBuffer_destinationOffset_,
+    "resolveCounters:inRange:destinationBuffer:destinationOffset:");
+_MTL_PRIVATE_DEF_SEL(resolveDepthPlane,
+    "resolveDepthPlane");
+_MTL_PRIVATE_DEF_SEL(resolveLevel,
+    "resolveLevel");
+_MTL_PRIVATE_DEF_SEL(resolveSlice,
+    "resolveSlice");
+_MTL_PRIVATE_DEF_SEL(resolveTexture,
+    "resolveTexture");
+_MTL_PRIVATE_DEF_SEL(resourceOptions,
+    "resourceOptions");
+_MTL_PRIVATE_DEF_SEL(resourceStateCommandEncoder,
+    "resourceStateCommandEncoder");
+_MTL_PRIVATE_DEF_SEL(resourceStateCommandEncoderWithDescriptor_,
+    "resourceStateCommandEncoderWithDescriptor:");
+_MTL_PRIVATE_DEF_SEL(resourceStatePassDescriptor,
+    "resourceStatePassDescriptor");
+_MTL_PRIVATE_DEF_SEL(retainedReferences,
+    "retainedReferences");
+_MTL_PRIVATE_DEF_SEL(rgbBlendOperation,
+    "rgbBlendOperation");
+_MTL_PRIVATE_DEF_SEL(rootResource,
+    "rootResource");
+_MTL_PRIVATE_DEF_SEL(sAddressMode,
+    "sAddressMode");
+_MTL_PRIVATE_DEF_SEL(sampleBuffer,
+    "sampleBuffer");
+_MTL_PRIVATE_DEF_SEL(sampleBufferAttachments,
+    "sampleBufferAttachments");
+_MTL_PRIVATE_DEF_SEL(sampleCount,
+    "sampleCount");
+_MTL_PRIVATE_DEF_SEL(sampleCountersInBuffer_atSampleIndex_withBarrier_,
+    "sampleCountersInBuffer:atSampleIndex:withBarrier:");
+_MTL_PRIVATE_DEF_SEL(sampleTimestamps_gpuTimestamp_,
+    "sampleTimestamps:gpuTimestamp:");
+_MTL_PRIVATE_DEF_SEL(screenSize,
+    "screenSize");
+_MTL_PRIVATE_DEF_SEL(serializeToURL_error_,
+    "serializeToURL:error:");
+_MTL_PRIVATE_DEF_SEL(setAccelerationStructure_atBufferIndex_,
+    "setAccelerationStructure:atBufferIndex:");
+_MTL_PRIVATE_DEF_SEL(setAccelerationStructure_atIndex_,
+    "setAccelerationStructure:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setAccess_,
+    "setAccess:");
+_MTL_PRIVATE_DEF_SEL(setAllowDuplicateIntersectionFunctionInvocation_,
+    "setAllowDuplicateIntersectionFunctionInvocation:");
+_MTL_PRIVATE_DEF_SEL(setAllowGPUOptimizedContents_,
+    "setAllowGPUOptimizedContents:");
+_MTL_PRIVATE_DEF_SEL(setAlphaBlendOperation_,
+    "setAlphaBlendOperation:");
+_MTL_PRIVATE_DEF_SEL(setAlphaToCoverageEnabled_,
+    "setAlphaToCoverageEnabled:");
+_MTL_PRIVATE_DEF_SEL(setAlphaToOneEnabled_,
+    "setAlphaToOneEnabled:");
+_MTL_PRIVATE_DEF_SEL(setArgumentBuffer_offset_,
+    "setArgumentBuffer:offset:");
+_MTL_PRIVATE_DEF_SEL(setArgumentBuffer_startOffset_arrayElement_,
+    "setArgumentBuffer:startOffset:arrayElement:");
+_MTL_PRIVATE_DEF_SEL(setArgumentIndex_,
+    "setArgumentIndex:");
+_MTL_PRIVATE_DEF_SEL(setArguments_,
+    "setArguments:");
+_MTL_PRIVATE_DEF_SEL(setArrayLength_,
+    "setArrayLength:");
+_MTL_PRIVATE_DEF_SEL(setAttributes_,
+    "setAttributes:");
+_MTL_PRIVATE_DEF_SEL(setBackFaceStencil_,
+    "setBackFaceStencil:");
+_MTL_PRIVATE_DEF_SEL(setBarrier,
+    "setBarrier");
+_MTL_PRIVATE_DEF_SEL(setBinaryArchives_,
+    "setBinaryArchives:");
+_MTL_PRIVATE_DEF_SEL(setBinaryFunctions_,
+    "setBinaryFunctions:");
+_MTL_PRIVATE_DEF_SEL(setBlendColorRed_green_blue_alpha_,
+    "setBlendColorRed:green:blue:alpha:");
+_MTL_PRIVATE_DEF_SEL(setBlendingEnabled_,
+    "setBlendingEnabled:");
+_MTL_PRIVATE_DEF_SEL(setBorderColor_,
+    "setBorderColor:");
+_MTL_PRIVATE_DEF_SEL(setBoundingBoxBuffer_,
+    "setBoundingBoxBuffer:");
+_MTL_PRIVATE_DEF_SEL(setBoundingBoxBufferOffset_,
+    "setBoundingBoxBufferOffset:");
+_MTL_PRIVATE_DEF_SEL(setBoundingBoxBuffers_,
+    "setBoundingBoxBuffers:");
+_MTL_PRIVATE_DEF_SEL(setBoundingBoxCount_,
+    "setBoundingBoxCount:");
+_MTL_PRIVATE_DEF_SEL(setBoundingBoxStride_,
+    "setBoundingBoxStride:");
+_MTL_PRIVATE_DEF_SEL(setBuffer_,
+    "setBuffer:");
+_MTL_PRIVATE_DEF_SEL(setBuffer_offset_atIndex_,
+    "setBuffer:offset:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setBufferIndex_,
+    "setBufferIndex:");
+_MTL_PRIVATE_DEF_SEL(setBufferOffset_atIndex_,
+    "setBufferOffset:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setBuffers_offsets_withRange_,
+    "setBuffers:offsets:withRange:");
+_MTL_PRIVATE_DEF_SEL(setBytes_length_atIndex_,
+    "setBytes:length:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setCaptureObject_,
+    "setCaptureObject:");
+_MTL_PRIVATE_DEF_SEL(setClearColor_,
+    "setClearColor:");
+_MTL_PRIVATE_DEF_SEL(setClearDepth_,
+    "setClearDepth:");
+_MTL_PRIVATE_DEF_SEL(setClearStencil_,
+    "setClearStencil:");
+_MTL_PRIVATE_DEF_SEL(setColorStoreAction_atIndex_,
+    "setColorStoreAction:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setColorStoreActionOptions_atIndex_,
+    "setColorStoreActionOptions:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setCommandTypes_,
+    "setCommandTypes:");
+_MTL_PRIVATE_DEF_SEL(setCompareFunction_,
+    "setCompareFunction:");
+_MTL_PRIVATE_DEF_SEL(setComputeFunction_,
+    "setComputeFunction:");
+_MTL_PRIVATE_DEF_SEL(setComputePipelineState_,
+    "setComputePipelineState:");
+_MTL_PRIVATE_DEF_SEL(setComputePipelineState_atIndex_,
+    "setComputePipelineState:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setComputePipelineStates_withRange_,
+    "setComputePipelineStates:withRange:");
+_MTL_PRIVATE_DEF_SEL(setConstantBlockAlignment_,
+    "setConstantBlockAlignment:");
+_MTL_PRIVATE_DEF_SEL(setConstantValue_type_atIndex_,
+    "setConstantValue:type:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setConstantValue_type_withName_,
+    "setConstantValue:type:withName:");
+_MTL_PRIVATE_DEF_SEL(setConstantValues_,
+    "setConstantValues:");
+_MTL_PRIVATE_DEF_SEL(setConstantValues_type_withRange_,
+    "setConstantValues:type:withRange:");
+_MTL_PRIVATE_DEF_SEL(setControlDependencies_,
+    "setControlDependencies:");
+_MTL_PRIVATE_DEF_SEL(setCounterSet_,
+    "setCounterSet:");
+_MTL_PRIVATE_DEF_SEL(setCpuCacheMode_,
+    "setCpuCacheMode:");
+_MTL_PRIVATE_DEF_SEL(setCullMode_,
+    "setCullMode:");
+_MTL_PRIVATE_DEF_SEL(setDataType_,
+    "setDataType:");
+_MTL_PRIVATE_DEF_SEL(setDefaultCaptureScope_,
+    "setDefaultCaptureScope:");
+_MTL_PRIVATE_DEF_SEL(setDefaultRasterSampleCount_,
+    "setDefaultRasterSampleCount:");
+_MTL_PRIVATE_DEF_SEL(setDepth_,
+    "setDepth:");
+_MTL_PRIVATE_DEF_SEL(setDepthAttachment_,
+    "setDepthAttachment:");
+_MTL_PRIVATE_DEF_SEL(setDepthAttachmentPixelFormat_,
+    "setDepthAttachmentPixelFormat:");
+_MTL_PRIVATE_DEF_SEL(setDepthBias_slopeScale_clamp_,
+    "setDepthBias:slopeScale:clamp:");
+_MTL_PRIVATE_DEF_SEL(setDepthClipMode_,
+    "setDepthClipMode:");
+_MTL_PRIVATE_DEF_SEL(setDepthCompareFunction_,
+    "setDepthCompareFunction:");
+_MTL_PRIVATE_DEF_SEL(setDepthFailureOperation_,
+    "setDepthFailureOperation:");
+_MTL_PRIVATE_DEF_SEL(setDepthPlane_,
+    "setDepthPlane:");
+_MTL_PRIVATE_DEF_SEL(setDepthResolveFilter_,
+    "setDepthResolveFilter:");
+_MTL_PRIVATE_DEF_SEL(setDepthStencilPassOperation_,
+    "setDepthStencilPassOperation:");
+_MTL_PRIVATE_DEF_SEL(setDepthStencilState_,
+    "setDepthStencilState:");
+_MTL_PRIVATE_DEF_SEL(setDepthStoreAction_,
+    "setDepthStoreAction:");
+_MTL_PRIVATE_DEF_SEL(setDepthStoreActionOptions_,
+    "setDepthStoreActionOptions:");
+_MTL_PRIVATE_DEF_SEL(setDepthWriteEnabled_,
+    "setDepthWriteEnabled:");
+_MTL_PRIVATE_DEF_SEL(setDestination_,
+    "setDestination:");
+_MTL_PRIVATE_DEF_SEL(setDestinationAlphaBlendFactor_,
+    "setDestinationAlphaBlendFactor:");
+_MTL_PRIVATE_DEF_SEL(setDestinationRGBBlendFactor_,
+    "setDestinationRGBBlendFactor:");
+_MTL_PRIVATE_DEF_SEL(setDispatchType_,
+    "setDispatchType:");
+_MTL_PRIVATE_DEF_SEL(setEndOfEncoderSampleIndex_,
+    "setEndOfEncoderSampleIndex:");
+_MTL_PRIVATE_DEF_SEL(setEndOfFragmentSampleIndex_,
+    "setEndOfFragmentSampleIndex:");
+_MTL_PRIVATE_DEF_SEL(setEndOfVertexSampleIndex_,
+    "setEndOfVertexSampleIndex:");
+_MTL_PRIVATE_DEF_SEL(setErrorOptions_,
+    "setErrorOptions:");
+_MTL_PRIVATE_DEF_SEL(setFastMathEnabled_,
+    "setFastMathEnabled:");
+_MTL_PRIVATE_DEF_SEL(setFormat_,
+    "setFormat:");
+_MTL_PRIVATE_DEF_SEL(setFragmentAccelerationStructure_atBufferIndex_,
+    "setFragmentAccelerationStructure:atBufferIndex:");
+_MTL_PRIVATE_DEF_SEL(setFragmentAdditionalBinaryFunctions_,
+    "setFragmentAdditionalBinaryFunctions:");
+_MTL_PRIVATE_DEF_SEL(setFragmentBuffer_offset_atIndex_,
+    "setFragmentBuffer:offset:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setFragmentBufferOffset_atIndex_,
+    "setFragmentBufferOffset:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setFragmentBuffers_offsets_withRange_,
+    "setFragmentBuffers:offsets:withRange:");
+_MTL_PRIVATE_DEF_SEL(setFragmentBytes_length_atIndex_,
+    "setFragmentBytes:length:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setFragmentFunction_,
+    "setFragmentFunction:");
+_MTL_PRIVATE_DEF_SEL(setFragmentIntersectionFunctionTable_atBufferIndex_,
+    "setFragmentIntersectionFunctionTable:atBufferIndex:");
+_MTL_PRIVATE_DEF_SEL(setFragmentIntersectionFunctionTables_withBufferRange_,
+    "setFragmentIntersectionFunctionTables:withBufferRange:");
+_MTL_PRIVATE_DEF_SEL(setFragmentLinkedFunctions_,
+    "setFragmentLinkedFunctions:");
+_MTL_PRIVATE_DEF_SEL(setFragmentPreloadedLibraries_,
+    "setFragmentPreloadedLibraries:");
+_MTL_PRIVATE_DEF_SEL(setFragmentSamplerState_atIndex_,
+    "setFragmentSamplerState:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setFragmentSamplerState_lodMinClamp_lodMaxClamp_atIndex_,
+    "setFragmentSamplerState:lodMinClamp:lodMaxClamp:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setFragmentSamplerStates_lodMinClamps_lodMaxClamps_withRange_,
+    "setFragmentSamplerStates:lodMinClamps:lodMaxClamps:withRange:");
+_MTL_PRIVATE_DEF_SEL(setFragmentSamplerStates_withRange_,
+    "setFragmentSamplerStates:withRange:");
+_MTL_PRIVATE_DEF_SEL(setFragmentTexture_atIndex_,
+    "setFragmentTexture:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setFragmentTextures_withRange_,
+    "setFragmentTextures:withRange:");
+_MTL_PRIVATE_DEF_SEL(setFragmentVisibleFunctionTable_atBufferIndex_,
+    "setFragmentVisibleFunctionTable:atBufferIndex:");
+_MTL_PRIVATE_DEF_SEL(setFragmentVisibleFunctionTables_withBufferRange_,
+    "setFragmentVisibleFunctionTables:withBufferRange:");
+_MTL_PRIVATE_DEF_SEL(setFrontFaceStencil_,
+    "setFrontFaceStencil:");
+_MTL_PRIVATE_DEF_SEL(setFrontFacingWinding_,
+    "setFrontFacingWinding:");
+_MTL_PRIVATE_DEF_SEL(setFunction_atIndex_,
+    "setFunction:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setFunctionCount_,
+    "setFunctionCount:");
+_MTL_PRIVATE_DEF_SEL(setFunctionGraphs_,
+    "setFunctionGraphs:");
+_MTL_PRIVATE_DEF_SEL(setFunctionName_,
+    "setFunctionName:");
+_MTL_PRIVATE_DEF_SEL(setFunctions_,
+    "setFunctions:");
+_MTL_PRIVATE_DEF_SEL(setFunctions_withRange_,
+    "setFunctions:withRange:");
+_MTL_PRIVATE_DEF_SEL(setGeometryDescriptors_,
+    "setGeometryDescriptors:");
+_MTL_PRIVATE_DEF_SEL(setGroups_,
+    "setGroups:");
+_MTL_PRIVATE_DEF_SEL(setHazardTrackingMode_,
+    "setHazardTrackingMode:");
+_MTL_PRIVATE_DEF_SEL(setHeight_,
+    "setHeight:");
+_MTL_PRIVATE_DEF_SEL(setImageblockSampleLength_,
+    "setImageblockSampleLength:");
+_MTL_PRIVATE_DEF_SEL(setImageblockWidth_height_,
+    "setImageblockWidth:height:");
+_MTL_PRIVATE_DEF_SEL(setIndex_,
+    "setIndex:");
+_MTL_PRIVATE_DEF_SEL(setIndexBuffer_,
+    "setIndexBuffer:");
+_MTL_PRIVATE_DEF_SEL(setIndexBufferIndex_,
+    "setIndexBufferIndex:");
+_MTL_PRIVATE_DEF_SEL(setIndexBufferOffset_,
+    "setIndexBufferOffset:");
+_MTL_PRIVATE_DEF_SEL(setIndexType_,
+    "setIndexType:");
+_MTL_PRIVATE_DEF_SEL(setIndirectCommandBuffer_atIndex_,
+    "setIndirectCommandBuffer:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setIndirectCommandBuffers_withRange_,
+    "setIndirectCommandBuffers:withRange:");
+_MTL_PRIVATE_DEF_SEL(setInheritBuffers_,
+    "setInheritBuffers:");
+_MTL_PRIVATE_DEF_SEL(setInheritPipelineState_,
+    "setInheritPipelineState:");
+_MTL_PRIVATE_DEF_SEL(setInputPrimitiveTopology_,
+    "setInputPrimitiveTopology:");
+_MTL_PRIVATE_DEF_SEL(setInsertLibraries_,
+    "setInsertLibraries:");
+_MTL_PRIVATE_DEF_SEL(setInstallName_,
+    "setInstallName:");
+_MTL_PRIVATE_DEF_SEL(setInstanceCount_,
+    "setInstanceCount:");
+_MTL_PRIVATE_DEF_SEL(setInstanceDescriptorBuffer_,
+    "setInstanceDescriptorBuffer:");
+_MTL_PRIVATE_DEF_SEL(setInstanceDescriptorBufferOffset_,
+    "setInstanceDescriptorBufferOffset:");
+_MTL_PRIVATE_DEF_SEL(setInstanceDescriptorStride_,
+    "setInstanceDescriptorStride:");
+_MTL_PRIVATE_DEF_SEL(setInstanceDescriptorType_,
+    "setInstanceDescriptorType:");
+_MTL_PRIVATE_DEF_SEL(setInstancedAccelerationStructures_,
+    "setInstancedAccelerationStructures:");
+_MTL_PRIVATE_DEF_SEL(setIntersectionFunctionTable_atBufferIndex_,
+    "setIntersectionFunctionTable:atBufferIndex:");
+_MTL_PRIVATE_DEF_SEL(setIntersectionFunctionTable_atIndex_,
+    "setIntersectionFunctionTable:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setIntersectionFunctionTableOffset_,
+    "setIntersectionFunctionTableOffset:");
+_MTL_PRIVATE_DEF_SEL(setIntersectionFunctionTables_withBufferRange_,
+    "setIntersectionFunctionTables:withBufferRange:");
+_MTL_PRIVATE_DEF_SEL(setIntersectionFunctionTables_withRange_,
+    "setIntersectionFunctionTables:withRange:");
+_MTL_PRIVATE_DEF_SEL(setKernelBuffer_offset_atIndex_,
+    "setKernelBuffer:offset:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setLabel_,
+    "setLabel:");
+_MTL_PRIVATE_DEF_SEL(setLanguageVersion_,
+    "setLanguageVersion:");
+_MTL_PRIVATE_DEF_SEL(setLayer_atIndex_,
+    "setLayer:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setLevel_,
+    "setLevel:");
+_MTL_PRIVATE_DEF_SEL(setLibraries_,
+    "setLibraries:");
+_MTL_PRIVATE_DEF_SEL(setLibraryType_,
+    "setLibraryType:");
+_MTL_PRIVATE_DEF_SEL(setLinkedFunctions_,
+    "setLinkedFunctions:");
+_MTL_PRIVATE_DEF_SEL(setLoadAction_,
+    "setLoadAction:");
+_MTL_PRIVATE_DEF_SEL(setLodAverage_,
+    "setLodAverage:");
+_MTL_PRIVATE_DEF_SEL(setLodMaxClamp_,
+    "setLodMaxClamp:");
+_MTL_PRIVATE_DEF_SEL(setLodMinClamp_,
+    "setLodMinClamp:");
+_MTL_PRIVATE_DEF_SEL(setMagFilter_,
+    "setMagFilter:");
+_MTL_PRIVATE_DEF_SEL(setMaxAnisotropy_,
+    "setMaxAnisotropy:");
+_MTL_PRIVATE_DEF_SEL(setMaxCallStackDepth_,
+    "setMaxCallStackDepth:");
+_MTL_PRIVATE_DEF_SEL(setMaxFragmentBufferBindCount_,
+    "setMaxFragmentBufferBindCount:");
+_MTL_PRIVATE_DEF_SEL(setMaxFragmentCallStackDepth_,
+    "setMaxFragmentCallStackDepth:");
+_MTL_PRIVATE_DEF_SEL(setMaxKernelBufferBindCount_,
+    "setMaxKernelBufferBindCount:");
+_MTL_PRIVATE_DEF_SEL(setMaxTessellationFactor_,
+    "setMaxTessellationFactor:");
+_MTL_PRIVATE_DEF_SEL(setMaxTotalThreadsPerThreadgroup_,
+    "setMaxTotalThreadsPerThreadgroup:");
+_MTL_PRIVATE_DEF_SEL(setMaxVertexAmplificationCount_,
+    "setMaxVertexAmplificationCount:");
+_MTL_PRIVATE_DEF_SEL(setMaxVertexBufferBindCount_,
+    "setMaxVertexBufferBindCount:");
+_MTL_PRIVATE_DEF_SEL(setMaxVertexCallStackDepth_,
+    "setMaxVertexCallStackDepth:");
+_MTL_PRIVATE_DEF_SEL(setMinFilter_,
+    "setMinFilter:");
+_MTL_PRIVATE_DEF_SEL(setMipFilter_,
+    "setMipFilter:");
+_MTL_PRIVATE_DEF_SEL(setMipmapLevelCount_,
+    "setMipmapLevelCount:");
+_MTL_PRIVATE_DEF_SEL(setMotionEndBorderMode_,
+    "setMotionEndBorderMode:");
+_MTL_PRIVATE_DEF_SEL(setMotionEndTime_,
+    "setMotionEndTime:");
+_MTL_PRIVATE_DEF_SEL(setMotionKeyframeCount_,
+    "setMotionKeyframeCount:");
+_MTL_PRIVATE_DEF_SEL(setMotionStartBorderMode_,
+    "setMotionStartBorderMode:");
+_MTL_PRIVATE_DEF_SEL(setMotionStartTime_,
+    "setMotionStartTime:");
+_MTL_PRIVATE_DEF_SEL(setMotionTransformBuffer_,
+    "setMotionTransformBuffer:");
+_MTL_PRIVATE_DEF_SEL(setMotionTransformBufferOffset_,
+    "setMotionTransformBufferOffset:");
+_MTL_PRIVATE_DEF_SEL(setMotionTransformCount_,
+    "setMotionTransformCount:");
+_MTL_PRIVATE_DEF_SEL(setMutability_,
+    "setMutability:");
+_MTL_PRIVATE_DEF_SEL(setName_,
+    "setName:");
+_MTL_PRIVATE_DEF_SEL(setNodes_,
+    "setNodes:");
+_MTL_PRIVATE_DEF_SEL(setNormalizedCoordinates_,
+    "setNormalizedCoordinates:");
+_MTL_PRIVATE_DEF_SEL(setObject_atIndexedSubscript_,
+    "setObject:atIndexedSubscript:");
+_MTL_PRIVATE_DEF_SEL(setOffset_,
+    "setOffset:");
+_MTL_PRIVATE_DEF_SEL(setOpaque_,
+    "setOpaque:");
+_MTL_PRIVATE_DEF_SEL(setOpaqueTriangleIntersectionFunctionWithSignature_atIndex_,
+    "setOpaqueTriangleIntersectionFunctionWithSignature:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setOpaqueTriangleIntersectionFunctionWithSignature_withRange_,
+    "setOpaqueTriangleIntersectionFunctionWithSignature:withRange:");
+_MTL_PRIVATE_DEF_SEL(setOptions_,
+    "setOptions:");
+_MTL_PRIVATE_DEF_SEL(setOutputNode_,
+    "setOutputNode:");
+_MTL_PRIVATE_DEF_SEL(setOutputURL_,
+    "setOutputURL:");
+_MTL_PRIVATE_DEF_SEL(setPixelFormat_,
+    "setPixelFormat:");
+_MTL_PRIVATE_DEF_SEL(setPreloadedLibraries_,
+    "setPreloadedLibraries:");
+_MTL_PRIVATE_DEF_SEL(setPreprocessorMacros_,
+    "setPreprocessorMacros:");
+_MTL_PRIVATE_DEF_SEL(setPreserveInvariance_,
+    "setPreserveInvariance:");
+_MTL_PRIVATE_DEF_SEL(setPrivateFunctions_,
+    "setPrivateFunctions:");
+_MTL_PRIVATE_DEF_SEL(setPurgeableState_,
+    "setPurgeableState:");
+_MTL_PRIVATE_DEF_SEL(setRAddressMode_,
+    "setRAddressMode:");
+_MTL_PRIVATE_DEF_SEL(setRasterSampleCount_,
+    "setRasterSampleCount:");
+_MTL_PRIVATE_DEF_SEL(setRasterizationEnabled_,
+    "setRasterizationEnabled:");
+_MTL_PRIVATE_DEF_SEL(setRasterizationRateMap_,
+    "setRasterizationRateMap:");
+_MTL_PRIVATE_DEF_SEL(setReadMask_,
+    "setReadMask:");
+_MTL_PRIVATE_DEF_SEL(setRenderPipelineState_,
+    "setRenderPipelineState:");
+_MTL_PRIVATE_DEF_SEL(setRenderPipelineState_atIndex_,
+    "setRenderPipelineState:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setRenderPipelineStates_withRange_,
+    "setRenderPipelineStates:withRange:");
+_MTL_PRIVATE_DEF_SEL(setRenderTargetArrayLength_,
+    "setRenderTargetArrayLength:");
+_MTL_PRIVATE_DEF_SEL(setRenderTargetHeight_,
+    "setRenderTargetHeight:");
+_MTL_PRIVATE_DEF_SEL(setRenderTargetWidth_,
+    "setRenderTargetWidth:");
+_MTL_PRIVATE_DEF_SEL(setResolveDepthPlane_,
+    "setResolveDepthPlane:");
+_MTL_PRIVATE_DEF_SEL(setResolveLevel_,
+    "setResolveLevel:");
+_MTL_PRIVATE_DEF_SEL(setResolveSlice_,
+    "setResolveSlice:");
+_MTL_PRIVATE_DEF_SEL(setResolveTexture_,
+    "setResolveTexture:");
+_MTL_PRIVATE_DEF_SEL(setResourceOptions_,
+    "setResourceOptions:");
+_MTL_PRIVATE_DEF_SEL(setRetainedReferences_,
+    "setRetainedReferences:");
+_MTL_PRIVATE_DEF_SEL(setRgbBlendOperation_,
+    "setRgbBlendOperation:");
+_MTL_PRIVATE_DEF_SEL(setSAddressMode_,
+    "setSAddressMode:");
+_MTL_PRIVATE_DEF_SEL(setSampleBuffer_,
+    "setSampleBuffer:");
+_MTL_PRIVATE_DEF_SEL(setSampleCount_,
+    "setSampleCount:");
+_MTL_PRIVATE_DEF_SEL(setSamplePositions_count_,
+    "setSamplePositions:count:");
+_MTL_PRIVATE_DEF_SEL(setSamplerState_atIndex_,
+    "setSamplerState:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setSamplerState_lodMinClamp_lodMaxClamp_atIndex_,
+    "setSamplerState:lodMinClamp:lodMaxClamp:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setSamplerStates_lodMinClamps_lodMaxClamps_withRange_,
+    "setSamplerStates:lodMinClamps:lodMaxClamps:withRange:");
+_MTL_PRIVATE_DEF_SEL(setSamplerStates_withRange_,
+    "setSamplerStates:withRange:");
+_MTL_PRIVATE_DEF_SEL(setScissorRect_,
+    "setScissorRect:");
+_MTL_PRIVATE_DEF_SEL(setScissorRects_count_,
+    "setScissorRects:count:");
+_MTL_PRIVATE_DEF_SEL(setScreenSize_,
+    "setScreenSize:");
+_MTL_PRIVATE_DEF_SEL(setSignaledValue_,
+    "setSignaledValue:");
+_MTL_PRIVATE_DEF_SEL(setSize_,
+    "setSize:");
+_MTL_PRIVATE_DEF_SEL(setSlice_,
+    "setSlice:");
+_MTL_PRIVATE_DEF_SEL(setSourceAlphaBlendFactor_,
+    "setSourceAlphaBlendFactor:");
+_MTL_PRIVATE_DEF_SEL(setSourceRGBBlendFactor_,
+    "setSourceRGBBlendFactor:");
+_MTL_PRIVATE_DEF_SEL(setSpecializedName_,
+    "setSpecializedName:");
+_MTL_PRIVATE_DEF_SEL(setStageInRegion_,
+    "setStageInRegion:");
+_MTL_PRIVATE_DEF_SEL(setStageInRegionWithIndirectBuffer_indirectBufferOffset_,
+    "setStageInRegionWithIndirectBuffer:indirectBufferOffset:");
+_MTL_PRIVATE_DEF_SEL(setStageInputDescriptor_,
+    "setStageInputDescriptor:");
+_MTL_PRIVATE_DEF_SEL(setStartOfEncoderSampleIndex_,
+    "setStartOfEncoderSampleIndex:");
+_MTL_PRIVATE_DEF_SEL(setStartOfFragmentSampleIndex_,
+    "setStartOfFragmentSampleIndex:");
+_MTL_PRIVATE_DEF_SEL(setStartOfVertexSampleIndex_,
+    "setStartOfVertexSampleIndex:");
+_MTL_PRIVATE_DEF_SEL(setStencilAttachment_,
+    "setStencilAttachment:");
+_MTL_PRIVATE_DEF_SEL(setStencilAttachmentPixelFormat_,
+    "setStencilAttachmentPixelFormat:");
+_MTL_PRIVATE_DEF_SEL(setStencilCompareFunction_,
+    "setStencilCompareFunction:");
+_MTL_PRIVATE_DEF_SEL(setStencilFailureOperation_,
+    "setStencilFailureOperation:");
+_MTL_PRIVATE_DEF_SEL(setStencilFrontReferenceValue_backReferenceValue_,
+    "setStencilFrontReferenceValue:backReferenceValue:");
+_MTL_PRIVATE_DEF_SEL(setStencilReferenceValue_,
+    "setStencilReferenceValue:");
+_MTL_PRIVATE_DEF_SEL(setStencilResolveFilter_,
+    "setStencilResolveFilter:");
+_MTL_PRIVATE_DEF_SEL(setStencilStoreAction_,
+    "setStencilStoreAction:");
+_MTL_PRIVATE_DEF_SEL(setStencilStoreActionOptions_,
+    "setStencilStoreActionOptions:");
+_MTL_PRIVATE_DEF_SEL(setStepFunction_,
+    "setStepFunction:");
+_MTL_PRIVATE_DEF_SEL(setStepRate_,
+    "setStepRate:");
+_MTL_PRIVATE_DEF_SEL(setStorageMode_,
+    "setStorageMode:");
+_MTL_PRIVATE_DEF_SEL(setStoreAction_,
+    "setStoreAction:");
+_MTL_PRIVATE_DEF_SEL(setStoreActionOptions_,
+    "setStoreActionOptions:");
+_MTL_PRIVATE_DEF_SEL(setStride_,
+    "setStride:");
+_MTL_PRIVATE_DEF_SEL(setSupportAddingBinaryFunctions_,
+    "setSupportAddingBinaryFunctions:");
+_MTL_PRIVATE_DEF_SEL(setSupportAddingFragmentBinaryFunctions_,
+    "setSupportAddingFragmentBinaryFunctions:");
+_MTL_PRIVATE_DEF_SEL(setSupportAddingVertexBinaryFunctions_,
+    "setSupportAddingVertexBinaryFunctions:");
+_MTL_PRIVATE_DEF_SEL(setSupportArgumentBuffers_,
+    "setSupportArgumentBuffers:");
+_MTL_PRIVATE_DEF_SEL(setSupportIndirectCommandBuffers_,
+    "setSupportIndirectCommandBuffers:");
+_MTL_PRIVATE_DEF_SEL(setSwizzle_,
+    "setSwizzle:");
+_MTL_PRIVATE_DEF_SEL(setTAddressMode_,
+    "setTAddressMode:");
+_MTL_PRIVATE_DEF_SEL(setTessellationControlPointIndexType_,
+    "setTessellationControlPointIndexType:");
+_MTL_PRIVATE_DEF_SEL(setTessellationFactorBuffer_offset_instanceStride_,
+    "setTessellationFactorBuffer:offset:instanceStride:");
+_MTL_PRIVATE_DEF_SEL(setTessellationFactorFormat_,
+    "setTessellationFactorFormat:");
+_MTL_PRIVATE_DEF_SEL(setTessellationFactorScale_,
+    "setTessellationFactorScale:");
+_MTL_PRIVATE_DEF_SEL(setTessellationFactorScaleEnabled_,
+    "setTessellationFactorScaleEnabled:");
+_MTL_PRIVATE_DEF_SEL(setTessellationFactorStepFunction_,
+    "setTessellationFactorStepFunction:");
+_MTL_PRIVATE_DEF_SEL(setTessellationOutputWindingOrder_,
+    "setTessellationOutputWindingOrder:");
+_MTL_PRIVATE_DEF_SEL(setTessellationPartitionMode_,
+    "setTessellationPartitionMode:");
+_MTL_PRIVATE_DEF_SEL(setTexture_,
+    "setTexture:");
+_MTL_PRIVATE_DEF_SEL(setTexture_atIndex_,
+    "setTexture:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setTextureType_,
+    "setTextureType:");
+_MTL_PRIVATE_DEF_SEL(setTextures_withRange_,
+    "setTextures:withRange:");
+_MTL_PRIVATE_DEF_SEL(setThreadGroupSizeIsMultipleOfThreadExecutionWidth_,
+    "setThreadGroupSizeIsMultipleOfThreadExecutionWidth:");
+_MTL_PRIVATE_DEF_SEL(setThreadgroupMemoryLength_,
+    "setThreadgroupMemoryLength:");
+_MTL_PRIVATE_DEF_SEL(setThreadgroupMemoryLength_atIndex_,
+    "setThreadgroupMemoryLength:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setThreadgroupMemoryLength_offset_atIndex_,
+    "setThreadgroupMemoryLength:offset:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setThreadgroupSizeMatchesTileSize_,
+    "setThreadgroupSizeMatchesTileSize:");
+_MTL_PRIVATE_DEF_SEL(setTileAccelerationStructure_atBufferIndex_,
+    "setTileAccelerationStructure:atBufferIndex:");
+_MTL_PRIVATE_DEF_SEL(setTileAdditionalBinaryFunctions_,
+    "setTileAdditionalBinaryFunctions:");
+_MTL_PRIVATE_DEF_SEL(setTileBuffer_offset_atIndex_,
+    "setTileBuffer:offset:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setTileBufferOffset_atIndex_,
+    "setTileBufferOffset:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setTileBuffers_offsets_withRange_,
+    "setTileBuffers:offsets:withRange:");
+_MTL_PRIVATE_DEF_SEL(setTileBytes_length_atIndex_,
+    "setTileBytes:length:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setTileFunction_,
+    "setTileFunction:");
+_MTL_PRIVATE_DEF_SEL(setTileHeight_,
+    "setTileHeight:");
+_MTL_PRIVATE_DEF_SEL(setTileIntersectionFunctionTable_atBufferIndex_,
+    "setTileIntersectionFunctionTable:atBufferIndex:");
+_MTL_PRIVATE_DEF_SEL(setTileIntersectionFunctionTables_withBufferRange_,
+    "setTileIntersectionFunctionTables:withBufferRange:");
+_MTL_PRIVATE_DEF_SEL(setTileSamplerState_atIndex_,
+    "setTileSamplerState:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setTileSamplerState_lodMinClamp_lodMaxClamp_atIndex_,
+    "setTileSamplerState:lodMinClamp:lodMaxClamp:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setTileSamplerStates_lodMinClamps_lodMaxClamps_withRange_,
+    "setTileSamplerStates:lodMinClamps:lodMaxClamps:withRange:");
+_MTL_PRIVATE_DEF_SEL(setTileSamplerStates_withRange_,
+    "setTileSamplerStates:withRange:");
+_MTL_PRIVATE_DEF_SEL(setTileTexture_atIndex_,
+    "setTileTexture:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setTileTextures_withRange_,
+    "setTileTextures:withRange:");
+_MTL_PRIVATE_DEF_SEL(setTileVisibleFunctionTable_atBufferIndex_,
+    "setTileVisibleFunctionTable:atBufferIndex:");
+_MTL_PRIVATE_DEF_SEL(setTileVisibleFunctionTables_withBufferRange_,
+    "setTileVisibleFunctionTables:withBufferRange:");
+_MTL_PRIVATE_DEF_SEL(setTileWidth_,
+    "setTileWidth:");
+_MTL_PRIVATE_DEF_SEL(setTriangleCount_,
+    "setTriangleCount:");
+_MTL_PRIVATE_DEF_SEL(setTriangleFillMode_,
+    "setTriangleFillMode:");
+_MTL_PRIVATE_DEF_SEL(setType_,
+    "setType:");
+_MTL_PRIVATE_DEF_SEL(setUrl_,
+    "setUrl:");
+_MTL_PRIVATE_DEF_SEL(setUsage_,
+    "setUsage:");
+_MTL_PRIVATE_DEF_SEL(setVertexAccelerationStructure_atBufferIndex_,
+    "setVertexAccelerationStructure:atBufferIndex:");
+_MTL_PRIVATE_DEF_SEL(setVertexAdditionalBinaryFunctions_,
+    "setVertexAdditionalBinaryFunctions:");
+_MTL_PRIVATE_DEF_SEL(setVertexAmplificationCount_viewMappings_,
+    "setVertexAmplificationCount:viewMappings:");
+_MTL_PRIVATE_DEF_SEL(setVertexBuffer_,
+    "setVertexBuffer:");
+_MTL_PRIVATE_DEF_SEL(setVertexBuffer_offset_atIndex_,
+    "setVertexBuffer:offset:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setVertexBufferOffset_,
+    "setVertexBufferOffset:");
+_MTL_PRIVATE_DEF_SEL(setVertexBufferOffset_atIndex_,
+    "setVertexBufferOffset:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setVertexBuffers_,
+    "setVertexBuffers:");
+_MTL_PRIVATE_DEF_SEL(setVertexBuffers_offsets_withRange_,
+    "setVertexBuffers:offsets:withRange:");
+_MTL_PRIVATE_DEF_SEL(setVertexBytes_length_atIndex_,
+    "setVertexBytes:length:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setVertexDescriptor_,
+    "setVertexDescriptor:");
+_MTL_PRIVATE_DEF_SEL(setVertexFunction_,
+    "setVertexFunction:");
+_MTL_PRIVATE_DEF_SEL(setVertexIntersectionFunctionTable_atBufferIndex_,
+    "setVertexIntersectionFunctionTable:atBufferIndex:");
+_MTL_PRIVATE_DEF_SEL(setVertexIntersectionFunctionTables_withBufferRange_,
+    "setVertexIntersectionFunctionTables:withBufferRange:");
+_MTL_PRIVATE_DEF_SEL(setVertexLinkedFunctions_,
+    "setVertexLinkedFunctions:");
+_MTL_PRIVATE_DEF_SEL(setVertexPreloadedLibraries_,
+    "setVertexPreloadedLibraries:");
+_MTL_PRIVATE_DEF_SEL(setVertexSamplerState_atIndex_,
+    "setVertexSamplerState:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setVertexSamplerState_lodMinClamp_lodMaxClamp_atIndex_,
+    "setVertexSamplerState:lodMinClamp:lodMaxClamp:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setVertexSamplerStates_lodMinClamps_lodMaxClamps_withRange_,
+    "setVertexSamplerStates:lodMinClamps:lodMaxClamps:withRange:");
+_MTL_PRIVATE_DEF_SEL(setVertexSamplerStates_withRange_,
+    "setVertexSamplerStates:withRange:");
+_MTL_PRIVATE_DEF_SEL(setVertexStride_,
+    "setVertexStride:");
+_MTL_PRIVATE_DEF_SEL(setVertexTexture_atIndex_,
+    "setVertexTexture:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setVertexTextures_withRange_,
+    "setVertexTextures:withRange:");
+_MTL_PRIVATE_DEF_SEL(setVertexVisibleFunctionTable_atBufferIndex_,
+    "setVertexVisibleFunctionTable:atBufferIndex:");
+_MTL_PRIVATE_DEF_SEL(setVertexVisibleFunctionTables_withBufferRange_,
+    "setVertexVisibleFunctionTables:withBufferRange:");
+_MTL_PRIVATE_DEF_SEL(setViewport_,
+    "setViewport:");
+_MTL_PRIVATE_DEF_SEL(setViewports_count_,
+    "setViewports:count:");
+_MTL_PRIVATE_DEF_SEL(setVisibilityResultBuffer_,
+    "setVisibilityResultBuffer:");
+_MTL_PRIVATE_DEF_SEL(setVisibilityResultMode_offset_,
+    "setVisibilityResultMode:offset:");
+_MTL_PRIVATE_DEF_SEL(setVisibleFunctionTable_atBufferIndex_,
+    "setVisibleFunctionTable:atBufferIndex:");
+_MTL_PRIVATE_DEF_SEL(setVisibleFunctionTable_atIndex_,
+    "setVisibleFunctionTable:atIndex:");
+_MTL_PRIVATE_DEF_SEL(setVisibleFunctionTables_withBufferRange_,
+    "setVisibleFunctionTables:withBufferRange:");
+_MTL_PRIVATE_DEF_SEL(setVisibleFunctionTables_withRange_,
+    "setVisibleFunctionTables:withRange:");
+_MTL_PRIVATE_DEF_SEL(setWidth_,
+    "setWidth:");
+_MTL_PRIVATE_DEF_SEL(setWriteMask_,
+    "setWriteMask:");
+_MTL_PRIVATE_DEF_SEL(sharedCaptureManager,
+    "sharedCaptureManager");
+_MTL_PRIVATE_DEF_SEL(signaledValue,
+    "signaledValue");
+_MTL_PRIVATE_DEF_SEL(size,
+    "size");
+_MTL_PRIVATE_DEF_SEL(slice,
+    "slice");
+_MTL_PRIVATE_DEF_SEL(sourceAlphaBlendFactor,
+    "sourceAlphaBlendFactor");
+_MTL_PRIVATE_DEF_SEL(sourceRGBBlendFactor,
+    "sourceRGBBlendFactor");
+_MTL_PRIVATE_DEF_SEL(sparseTileSizeInBytes,
+    "sparseTileSizeInBytes");
+_MTL_PRIVATE_DEF_SEL(sparseTileSizeWithTextureType_pixelFormat_sampleCount_,
+    "sparseTileSizeWithTextureType:pixelFormat:sampleCount:");
+_MTL_PRIVATE_DEF_SEL(specializedName,
+    "specializedName");
+_MTL_PRIVATE_DEF_SEL(stageInputAttributes,
+    "stageInputAttributes");
+_MTL_PRIVATE_DEF_SEL(stageInputDescriptor,
+    "stageInputDescriptor");
+_MTL_PRIVATE_DEF_SEL(stageInputOutputDescriptor,
+    "stageInputOutputDescriptor");
+_MTL_PRIVATE_DEF_SEL(startCaptureWithCommandQueue_,
+    "startCaptureWithCommandQueue:");
+_MTL_PRIVATE_DEF_SEL(startCaptureWithDescriptor_error_,
+    "startCaptureWithDescriptor:error:");
+_MTL_PRIVATE_DEF_SEL(startCaptureWithDevice_,
+    "startCaptureWithDevice:");
+_MTL_PRIVATE_DEF_SEL(startCaptureWithScope_,
+    "startCaptureWithScope:");
+_MTL_PRIVATE_DEF_SEL(startOfEncoderSampleIndex,
+    "startOfEncoderSampleIndex");
+_MTL_PRIVATE_DEF_SEL(startOfFragmentSampleIndex,
+    "startOfFragmentSampleIndex");
+_MTL_PRIVATE_DEF_SEL(startOfVertexSampleIndex,
+    "startOfVertexSampleIndex");
+_MTL_PRIVATE_DEF_SEL(staticThreadgroupMemoryLength,
+    "staticThreadgroupMemoryLength");
+_MTL_PRIVATE_DEF_SEL(status,
+    "status");
+_MTL_PRIVATE_DEF_SEL(stencilAttachment,
+    "stencilAttachment");
+_MTL_PRIVATE_DEF_SEL(stencilAttachmentPixelFormat,
+    "stencilAttachmentPixelFormat");
+_MTL_PRIVATE_DEF_SEL(stencilCompareFunction,
+    "stencilCompareFunction");
+_MTL_PRIVATE_DEF_SEL(stencilFailureOperation,
+    "stencilFailureOperation");
+_MTL_PRIVATE_DEF_SEL(stencilResolveFilter,
+    "stencilResolveFilter");
+_MTL_PRIVATE_DEF_SEL(stepFunction,
+    "stepFunction");
+_MTL_PRIVATE_DEF_SEL(stepRate,
+    "stepRate");
+_MTL_PRIVATE_DEF_SEL(stopCapture,
+    "stopCapture");
+_MTL_PRIVATE_DEF_SEL(storageMode,
+    "storageMode");
+_MTL_PRIVATE_DEF_SEL(storeAction,
+    "storeAction");
+_MTL_PRIVATE_DEF_SEL(storeActionOptions,
+    "storeActionOptions");
+_MTL_PRIVATE_DEF_SEL(stride,
+    "stride");
+_MTL_PRIVATE_DEF_SEL(structType,
+    "structType");
+_MTL_PRIVATE_DEF_SEL(supportAddingBinaryFunctions,
+    "supportAddingBinaryFunctions");
+_MTL_PRIVATE_DEF_SEL(supportAddingFragmentBinaryFunctions,
+    "supportAddingFragmentBinaryFunctions");
+_MTL_PRIVATE_DEF_SEL(supportAddingVertexBinaryFunctions,
+    "supportAddingVertexBinaryFunctions");
+_MTL_PRIVATE_DEF_SEL(supportArgumentBuffers,
+    "supportArgumentBuffers");
+_MTL_PRIVATE_DEF_SEL(supportIndirectCommandBuffers,
+    "supportIndirectCommandBuffers");
+_MTL_PRIVATE_DEF_SEL(supports32BitFloatFiltering,
+    "supports32BitFloatFiltering");
+_MTL_PRIVATE_DEF_SEL(supports32BitMSAA,
+    "supports32BitMSAA");
+_MTL_PRIVATE_DEF_SEL(supportsBCTextureCompression,
+    "supportsBCTextureCompression");
+_MTL_PRIVATE_DEF_SEL(supportsCounterSampling_,
+    "supportsCounterSampling:");
+_MTL_PRIVATE_DEF_SEL(supportsDestination_,
+    "supportsDestination:");
+_MTL_PRIVATE_DEF_SEL(supportsDynamicLibraries,
+    "supportsDynamicLibraries");
+_MTL_PRIVATE_DEF_SEL(supportsFamily_,
+    "supportsFamily:");
+_MTL_PRIVATE_DEF_SEL(supportsFeatureSet_,
+    "supportsFeatureSet:");
+_MTL_PRIVATE_DEF_SEL(supportsFunctionPointers,
+    "supportsFunctionPointers");
+_MTL_PRIVATE_DEF_SEL(supportsFunctionPointersFromRender,
+    "supportsFunctionPointersFromRender");
+_MTL_PRIVATE_DEF_SEL(supportsPrimitiveMotionBlur,
+    "supportsPrimitiveMotionBlur");
+_MTL_PRIVATE_DEF_SEL(supportsPullModelInterpolation,
+    "supportsPullModelInterpolation");
+_MTL_PRIVATE_DEF_SEL(supportsQueryTextureLOD,
+    "supportsQueryTextureLOD");
+_MTL_PRIVATE_DEF_SEL(supportsRasterizationRateMapWithLayerCount_,
+    "supportsRasterizationRateMapWithLayerCount:");
+_MTL_PRIVATE_DEF_SEL(supportsRaytracing,
+    "supportsRaytracing");
+_MTL_PRIVATE_DEF_SEL(supportsRaytracingFromRender,
+    "supportsRaytracingFromRender");
+_MTL_PRIVATE_DEF_SEL(supportsRenderDynamicLibraries,
+    "supportsRenderDynamicLibraries");
+_MTL_PRIVATE_DEF_SEL(supportsShaderBarycentricCoordinates,
+    "supportsShaderBarycentricCoordinates");
+_MTL_PRIVATE_DEF_SEL(supportsTextureSampleCount_,
+    "supportsTextureSampleCount:");
+_MTL_PRIVATE_DEF_SEL(supportsVertexAmplificationCount_,
+    "supportsVertexAmplificationCount:");
+_MTL_PRIVATE_DEF_SEL(swizzle,
+    "swizzle");
+_MTL_PRIVATE_DEF_SEL(synchronizeResource_,
+    "synchronizeResource:");
+_MTL_PRIVATE_DEF_SEL(synchronizeTexture_slice_level_,
+    "synchronizeTexture:slice:level:");
+_MTL_PRIVATE_DEF_SEL(tAddressMode,
+    "tAddressMode");
+_MTL_PRIVATE_DEF_SEL(tailSizeInBytes,
+    "tailSizeInBytes");
+_MTL_PRIVATE_DEF_SEL(tessellationControlPointIndexType,
+    "tessellationControlPointIndexType");
+_MTL_PRIVATE_DEF_SEL(tessellationFactorFormat,
+    "tessellationFactorFormat");
+_MTL_PRIVATE_DEF_SEL(tessellationFactorStepFunction,
+    "tessellationFactorStepFunction");
+_MTL_PRIVATE_DEF_SEL(tessellationOutputWindingOrder,
+    "tessellationOutputWindingOrder");
+_MTL_PRIVATE_DEF_SEL(tessellationPartitionMode,
+    "tessellationPartitionMode");
+_MTL_PRIVATE_DEF_SEL(texture,
+    "texture");
+_MTL_PRIVATE_DEF_SEL(texture2DDescriptorWithPixelFormat_width_height_mipmapped_,
+    "texture2DDescriptorWithPixelFormat:width:height:mipmapped:");
+_MTL_PRIVATE_DEF_SEL(textureBarrier,
+    "textureBarrier");
+_MTL_PRIVATE_DEF_SEL(textureBufferDescriptorWithPixelFormat_width_resourceOptions_usage_,
+    "textureBufferDescriptorWithPixelFormat:width:resourceOptions:usage:");
+_MTL_PRIVATE_DEF_SEL(textureCubeDescriptorWithPixelFormat_size_mipmapped_,
+    "textureCubeDescriptorWithPixelFormat:size:mipmapped:");
+_MTL_PRIVATE_DEF_SEL(textureDataType,
+    "textureDataType");
+_MTL_PRIVATE_DEF_SEL(textureReferenceType,
+    "textureReferenceType");
+_MTL_PRIVATE_DEF_SEL(textureType,
+    "textureType");
+_MTL_PRIVATE_DEF_SEL(threadExecutionWidth,
+    "threadExecutionWidth");
+_MTL_PRIVATE_DEF_SEL(threadGroupSizeIsMultipleOfThreadExecutionWidth,
+    "threadGroupSizeIsMultipleOfThreadExecutionWidth");
+_MTL_PRIVATE_DEF_SEL(threadgroupMemoryAlignment,
+    "threadgroupMemoryAlignment");
+_MTL_PRIVATE_DEF_SEL(threadgroupMemoryDataSize,
+    "threadgroupMemoryDataSize");
+_MTL_PRIVATE_DEF_SEL(threadgroupMemoryLength,
+    "threadgroupMemoryLength");
+_MTL_PRIVATE_DEF_SEL(threadgroupSizeMatchesTileSize,
+    "threadgroupSizeMatchesTileSize");
+_MTL_PRIVATE_DEF_SEL(tileAdditionalBinaryFunctions,
+    "tileAdditionalBinaryFunctions");
+_MTL_PRIVATE_DEF_SEL(tileArguments,
+    "tileArguments");
+_MTL_PRIVATE_DEF_SEL(tileBuffers,
+    "tileBuffers");
+_MTL_PRIVATE_DEF_SEL(tileFunction,
+    "tileFunction");
+_MTL_PRIVATE_DEF_SEL(tileHeight,
+    "tileHeight");
+_MTL_PRIVATE_DEF_SEL(tileWidth,
+    "tileWidth");
+_MTL_PRIVATE_DEF_SEL(triangleCount,
+    "triangleCount");
+_MTL_PRIVATE_DEF_SEL(type,
+    "type");
+_MTL_PRIVATE_DEF_SEL(updateFence_,
+    "updateFence:");
+_MTL_PRIVATE_DEF_SEL(updateFence_afterStages_,
+    "updateFence:afterStages:");
+_MTL_PRIVATE_DEF_SEL(updateTextureMapping_mode_indirectBuffer_indirectBufferOffset_,
+    "updateTextureMapping:mode:indirectBuffer:indirectBufferOffset:");
+_MTL_PRIVATE_DEF_SEL(updateTextureMapping_mode_region_mipLevel_slice_,
+    "updateTextureMapping:mode:region:mipLevel:slice:");
+_MTL_PRIVATE_DEF_SEL(updateTextureMappings_mode_regions_mipLevels_slices_numRegions_,
+    "updateTextureMappings:mode:regions:mipLevels:slices:numRegions:");
+_MTL_PRIVATE_DEF_SEL(url,
+    "url");
+_MTL_PRIVATE_DEF_SEL(usage,
+    "usage");
+_MTL_PRIVATE_DEF_SEL(useHeap_,
+    "useHeap:");
+_MTL_PRIVATE_DEF_SEL(useHeap_stages_,
+    "useHeap:stages:");
+_MTL_PRIVATE_DEF_SEL(useHeaps_count_,
+    "useHeaps:count:");
+_MTL_PRIVATE_DEF_SEL(useHeaps_count_stages_,
+    "useHeaps:count:stages:");
+_MTL_PRIVATE_DEF_SEL(useResource_usage_,
+    "useResource:usage:");
+_MTL_PRIVATE_DEF_SEL(useResource_usage_stages_,
+    "useResource:usage:stages:");
+_MTL_PRIVATE_DEF_SEL(useResources_count_usage_,
+    "useResources:count:usage:");
+_MTL_PRIVATE_DEF_SEL(useResources_count_usage_stages_,
+    "useResources:count:usage:stages:");
+_MTL_PRIVATE_DEF_SEL(usedSize,
+    "usedSize");
+_MTL_PRIVATE_DEF_SEL(vertexAdditionalBinaryFunctions,
+    "vertexAdditionalBinaryFunctions");
+_MTL_PRIVATE_DEF_SEL(vertexArguments,
+    "vertexArguments");
+_MTL_PRIVATE_DEF_SEL(vertexAttributes,
+    "vertexAttributes");
+_MTL_PRIVATE_DEF_SEL(vertexBuffer,
+    "vertexBuffer");
+_MTL_PRIVATE_DEF_SEL(vertexBufferOffset,
+    "vertexBufferOffset");
+_MTL_PRIVATE_DEF_SEL(vertexBuffers,
+    "vertexBuffers");
+_MTL_PRIVATE_DEF_SEL(vertexDescriptor,
+    "vertexDescriptor");
+_MTL_PRIVATE_DEF_SEL(vertexFunction,
+    "vertexFunction");
+_MTL_PRIVATE_DEF_SEL(vertexLinkedFunctions,
+    "vertexLinkedFunctions");
+_MTL_PRIVATE_DEF_SEL(vertexPreloadedLibraries,
+    "vertexPreloadedLibraries");
+_MTL_PRIVATE_DEF_SEL(vertexStride,
+    "vertexStride");
+_MTL_PRIVATE_DEF_SEL(vertical,
+    "vertical");
+_MTL_PRIVATE_DEF_SEL(verticalSampleStorage,
+    "verticalSampleStorage");
+_MTL_PRIVATE_DEF_SEL(visibilityResultBuffer,
+    "visibilityResultBuffer");
+_MTL_PRIVATE_DEF_SEL(visibleFunctionTableDescriptor,
+    "visibleFunctionTableDescriptor");
+_MTL_PRIVATE_DEF_SEL(waitForFence_,
+    "waitForFence:");
+_MTL_PRIVATE_DEF_SEL(waitForFence_beforeStages_,
+    "waitForFence:beforeStages:");
+_MTL_PRIVATE_DEF_SEL(waitUntilCompleted,
+    "waitUntilCompleted");
+_MTL_PRIVATE_DEF_SEL(waitUntilScheduled,
+    "waitUntilScheduled");
+_MTL_PRIVATE_DEF_SEL(width,
+    "width");
+_MTL_PRIVATE_DEF_SEL(writeCompactedAccelerationStructureSize_toBuffer_offset_,
+    "writeCompactedAccelerationStructureSize:toBuffer:offset:");
+_MTL_PRIVATE_DEF_SEL(writeCompactedAccelerationStructureSize_toBuffer_offset_sizeDataType_,
+    "writeCompactedAccelerationStructureSize:toBuffer:offset:sizeDataType:");
+_MTL_PRIVATE_DEF_SEL(writeMask,
+    "writeMask");
+
+}
diff --git a/metal-cpp/Metal/MTLHeap.hpp b/metal-cpp/Metal/MTLHeap.hpp
new file mode 100644
index 0000000..eb2c3e1
--- /dev/null
+++ b/metal-cpp/Metal/MTLHeap.hpp
@@ -0,0 +1,282 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLHeap.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLHeap.hpp"
+#include "MTLResource.hpp"
+
+namespace MTL
+{
+_MTL_ENUM(NS::Integer, HeapType) {
+    HeapTypeAutomatic = 0,
+    HeapTypePlacement = 1,
+    HeapTypeSparse = 2,
+};
+
+class HeapDescriptor : public NS::Copying<HeapDescriptor>
+{
+public:
+    static class HeapDescriptor* alloc();
+
+    class HeapDescriptor*        init();
+
+    NS::UInteger                 size() const;
+    void                         setSize(NS::UInteger size);
+
+    MTL::StorageMode             storageMode() const;
+    void                         setStorageMode(MTL::StorageMode storageMode);
+
+    MTL::CPUCacheMode            cpuCacheMode() const;
+    void                         setCpuCacheMode(MTL::CPUCacheMode cpuCacheMode);
+
+    MTL::HazardTrackingMode      hazardTrackingMode() const;
+    void                         setHazardTrackingMode(MTL::HazardTrackingMode hazardTrackingMode);
+
+    MTL::ResourceOptions         resourceOptions() const;
+    void                         setResourceOptions(MTL::ResourceOptions resourceOptions);
+
+    MTL::HeapType                type() const;
+    void                         setType(MTL::HeapType type);
+};
+
+class Heap : public NS::Referencing<Heap>
+{
+public:
+    NS::String*             label() const;
+    void                    setLabel(const NS::String* label);
+
+    class Device*           device() const;
+
+    MTL::StorageMode        storageMode() const;
+
+    MTL::CPUCacheMode       cpuCacheMode() const;
+
+    MTL::HazardTrackingMode hazardTrackingMode() const;
+
+    MTL::ResourceOptions    resourceOptions() const;
+
+    NS::UInteger            size() const;
+
+    NS::UInteger            usedSize() const;
+
+    NS::UInteger            currentAllocatedSize() const;
+
+    NS::UInteger            maxAvailableSize(NS::UInteger alignment);
+
+    class Buffer*           newBuffer(NS::UInteger length, MTL::ResourceOptions options);
+
+    class Texture*          newTexture(const class TextureDescriptor* desc);
+
+    MTL::PurgeableState     setPurgeableState(MTL::PurgeableState state);
+
+    MTL::HeapType           type() const;
+
+    class Buffer*           newBuffer(NS::UInteger length, MTL::ResourceOptions options, NS::UInteger offset);
+
+    class Texture*          newTexture(const class TextureDescriptor* descriptor, NS::UInteger offset);
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::HeapDescriptor* MTL::HeapDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::HeapDescriptor>(_MTL_PRIVATE_CLS(MTLHeapDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::HeapDescriptor* MTL::HeapDescriptor::init()
+{
+    return NS::Object::init<MTL::HeapDescriptor>();
+}
+
+// property: size
+_MTL_INLINE NS::UInteger MTL::HeapDescriptor::size() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(size));
+}
+
+_MTL_INLINE void MTL::HeapDescriptor::setSize(NS::UInteger size)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSize_), size);
+}
+
+// property: storageMode
+_MTL_INLINE MTL::StorageMode MTL::HeapDescriptor::storageMode() const
+{
+    return Object::sendMessage<MTL::StorageMode>(this, _MTL_PRIVATE_SEL(storageMode));
+}
+
+_MTL_INLINE void MTL::HeapDescriptor::setStorageMode(MTL::StorageMode storageMode)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStorageMode_), storageMode);
+}
+
+// property: cpuCacheMode
+_MTL_INLINE MTL::CPUCacheMode MTL::HeapDescriptor::cpuCacheMode() const
+{
+    return Object::sendMessage<MTL::CPUCacheMode>(this, _MTL_PRIVATE_SEL(cpuCacheMode));
+}
+
+_MTL_INLINE void MTL::HeapDescriptor::setCpuCacheMode(MTL::CPUCacheMode cpuCacheMode)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setCpuCacheMode_), cpuCacheMode);
+}
+
+// property: hazardTrackingMode
+_MTL_INLINE MTL::HazardTrackingMode MTL::HeapDescriptor::hazardTrackingMode() const
+{
+    return Object::sendMessage<MTL::HazardTrackingMode>(this, _MTL_PRIVATE_SEL(hazardTrackingMode));
+}
+
+_MTL_INLINE void MTL::HeapDescriptor::setHazardTrackingMode(MTL::HazardTrackingMode hazardTrackingMode)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setHazardTrackingMode_), hazardTrackingMode);
+}
+
+// property: resourceOptions
+_MTL_INLINE MTL::ResourceOptions MTL::HeapDescriptor::resourceOptions() const
+{
+    return Object::sendMessage<MTL::ResourceOptions>(this, _MTL_PRIVATE_SEL(resourceOptions));
+}
+
+_MTL_INLINE void MTL::HeapDescriptor::setResourceOptions(MTL::ResourceOptions resourceOptions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setResourceOptions_), resourceOptions);
+}
+
+// property: type
+_MTL_INLINE MTL::HeapType MTL::HeapDescriptor::type() const
+{
+    return Object::sendMessage<MTL::HeapType>(this, _MTL_PRIVATE_SEL(type));
+}
+
+_MTL_INLINE void MTL::HeapDescriptor::setType(MTL::HeapType type)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setType_), type);
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::Heap::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::Heap::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::Heap::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
+
+// property: storageMode
+_MTL_INLINE MTL::StorageMode MTL::Heap::storageMode() const
+{
+    return Object::sendMessage<MTL::StorageMode>(this, _MTL_PRIVATE_SEL(storageMode));
+}
+
+// property: cpuCacheMode
+_MTL_INLINE MTL::CPUCacheMode MTL::Heap::cpuCacheMode() const
+{
+    return Object::sendMessage<MTL::CPUCacheMode>(this, _MTL_PRIVATE_SEL(cpuCacheMode));
+}
+
+// property: hazardTrackingMode
+_MTL_INLINE MTL::HazardTrackingMode MTL::Heap::hazardTrackingMode() const
+{
+    return Object::sendMessage<MTL::HazardTrackingMode>(this, _MTL_PRIVATE_SEL(hazardTrackingMode));
+}
+
+// property: resourceOptions
+_MTL_INLINE MTL::ResourceOptions MTL::Heap::resourceOptions() const
+{
+    return Object::sendMessage<MTL::ResourceOptions>(this, _MTL_PRIVATE_SEL(resourceOptions));
+}
+
+// property: size
+_MTL_INLINE NS::UInteger MTL::Heap::size() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(size));
+}
+
+// property: usedSize
+_MTL_INLINE NS::UInteger MTL::Heap::usedSize() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(usedSize));
+}
+
+// property: currentAllocatedSize
+_MTL_INLINE NS::UInteger MTL::Heap::currentAllocatedSize() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(currentAllocatedSize));
+}
+
+// method: maxAvailableSizeWithAlignment:
+_MTL_INLINE NS::UInteger MTL::Heap::maxAvailableSize(NS::UInteger alignment)
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(maxAvailableSizeWithAlignment_), alignment);
+}
+
+// method: newBufferWithLength:options:
+_MTL_INLINE MTL::Buffer* MTL::Heap::newBuffer(NS::UInteger length, MTL::ResourceOptions options)
+{
+    return Object::sendMessage<MTL::Buffer*>(this, _MTL_PRIVATE_SEL(newBufferWithLength_options_), length, options);
+}
+
+// method: newTextureWithDescriptor:
+_MTL_INLINE MTL::Texture* MTL::Heap::newTexture(const MTL::TextureDescriptor* desc)
+{
+    return Object::sendMessage<MTL::Texture*>(this, _MTL_PRIVATE_SEL(newTextureWithDescriptor_), desc);
+}
+
+// method: setPurgeableState:
+_MTL_INLINE MTL::PurgeableState MTL::Heap::setPurgeableState(MTL::PurgeableState state)
+{
+    return Object::sendMessage<MTL::PurgeableState>(this, _MTL_PRIVATE_SEL(setPurgeableState_), state);
+}
+
+// property: type
+_MTL_INLINE MTL::HeapType MTL::Heap::type() const
+{
+    return Object::sendMessage<MTL::HeapType>(this, _MTL_PRIVATE_SEL(type));
+}
+
+// method: newBufferWithLength:options:offset:
+_MTL_INLINE MTL::Buffer* MTL::Heap::newBuffer(NS::UInteger length, MTL::ResourceOptions options, NS::UInteger offset)
+{
+    return Object::sendMessage<MTL::Buffer*>(this, _MTL_PRIVATE_SEL(newBufferWithLength_options_offset_), length, options, offset);
+}
+
+// method: newTextureWithDescriptor:offset:
+_MTL_INLINE MTL::Texture* MTL::Heap::newTexture(const MTL::TextureDescriptor* descriptor, NS::UInteger offset)
+{
+    return Object::sendMessage<MTL::Texture*>(this, _MTL_PRIVATE_SEL(newTextureWithDescriptor_offset_), descriptor, offset);
+}
diff --git a/metal-cpp/Metal/MTLIndirectCommandBuffer.hpp b/metal-cpp/Metal/MTLIndirectCommandBuffer.hpp
new file mode 100644
index 0000000..4aa3a35
--- /dev/null
+++ b/metal-cpp/Metal/MTLIndirectCommandBuffer.hpp
@@ -0,0 +1,189 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLIndirectCommandBuffer.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLIndirectCommandBuffer.hpp"
+#include "MTLResource.hpp"
+
+namespace MTL
+{
+_MTL_OPTIONS(NS::UInteger, IndirectCommandType) {
+    IndirectCommandTypeDraw = 1,
+    IndirectCommandTypeDrawIndexed = 2,
+    IndirectCommandTypeDrawPatches = 4,
+    IndirectCommandTypeDrawIndexedPatches = 8,
+    IndirectCommandTypeConcurrentDispatch = 32,
+    IndirectCommandTypeConcurrentDispatchThreads = 64,
+};
+
+struct IndirectCommandBufferExecutionRange
+{
+    uint32_t location;
+    uint32_t length;
+} _MTL_PACKED;
+
+class IndirectCommandBufferDescriptor : public NS::Copying<IndirectCommandBufferDescriptor>
+{
+public:
+    static class IndirectCommandBufferDescriptor* alloc();
+
+    class IndirectCommandBufferDescriptor*        init();
+
+    MTL::IndirectCommandType                      commandTypes() const;
+    void                                          setCommandTypes(MTL::IndirectCommandType commandTypes);
+
+    bool                                          inheritPipelineState() const;
+    void                                          setInheritPipelineState(bool inheritPipelineState);
+
+    bool                                          inheritBuffers() const;
+    void                                          setInheritBuffers(bool inheritBuffers);
+
+    NS::UInteger                                  maxVertexBufferBindCount() const;
+    void                                          setMaxVertexBufferBindCount(NS::UInteger maxVertexBufferBindCount);
+
+    NS::UInteger                                  maxFragmentBufferBindCount() const;
+    void                                          setMaxFragmentBufferBindCount(NS::UInteger maxFragmentBufferBindCount);
+
+    NS::UInteger                                  maxKernelBufferBindCount() const;
+    void                                          setMaxKernelBufferBindCount(NS::UInteger maxKernelBufferBindCount);
+};
+
+class IndirectCommandBuffer : public NS::Referencing<IndirectCommandBuffer, Resource>
+{
+public:
+    NS::UInteger                  size() const;
+
+    void                          reset(NS::Range range);
+
+    class IndirectRenderCommand*  indirectRenderCommand(NS::UInteger commandIndex);
+
+    class IndirectComputeCommand* indirectComputeCommand(NS::UInteger commandIndex);
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::IndirectCommandBufferDescriptor* MTL::IndirectCommandBufferDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::IndirectCommandBufferDescriptor>(_MTL_PRIVATE_CLS(MTLIndirectCommandBufferDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::IndirectCommandBufferDescriptor* MTL::IndirectCommandBufferDescriptor::init()
+{
+    return NS::Object::init<MTL::IndirectCommandBufferDescriptor>();
+}
+
+// property: commandTypes
+_MTL_INLINE MTL::IndirectCommandType MTL::IndirectCommandBufferDescriptor::commandTypes() const
+{
+    return Object::sendMessage<MTL::IndirectCommandType>(this, _MTL_PRIVATE_SEL(commandTypes));
+}
+
+_MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setCommandTypes(MTL::IndirectCommandType commandTypes)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setCommandTypes_), commandTypes);
+}
+
+// property: inheritPipelineState
+_MTL_INLINE bool MTL::IndirectCommandBufferDescriptor::inheritPipelineState() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(inheritPipelineState));
+}
+
+_MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setInheritPipelineState(bool inheritPipelineState)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setInheritPipelineState_), inheritPipelineState);
+}
+
+// property: inheritBuffers
+_MTL_INLINE bool MTL::IndirectCommandBufferDescriptor::inheritBuffers() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(inheritBuffers));
+}
+
+_MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setInheritBuffers(bool inheritBuffers)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setInheritBuffers_), inheritBuffers);
+}
+
+// property: maxVertexBufferBindCount
+_MTL_INLINE NS::UInteger MTL::IndirectCommandBufferDescriptor::maxVertexBufferBindCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(maxVertexBufferBindCount));
+}
+
+_MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setMaxVertexBufferBindCount(NS::UInteger maxVertexBufferBindCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMaxVertexBufferBindCount_), maxVertexBufferBindCount);
+}
+
+// property: maxFragmentBufferBindCount
+_MTL_INLINE NS::UInteger MTL::IndirectCommandBufferDescriptor::maxFragmentBufferBindCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(maxFragmentBufferBindCount));
+}
+
+_MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setMaxFragmentBufferBindCount(NS::UInteger maxFragmentBufferBindCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMaxFragmentBufferBindCount_), maxFragmentBufferBindCount);
+}
+
+// property: maxKernelBufferBindCount
+_MTL_INLINE NS::UInteger MTL::IndirectCommandBufferDescriptor::maxKernelBufferBindCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(maxKernelBufferBindCount));
+}
+
+_MTL_INLINE void MTL::IndirectCommandBufferDescriptor::setMaxKernelBufferBindCount(NS::UInteger maxKernelBufferBindCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMaxKernelBufferBindCount_), maxKernelBufferBindCount);
+}
+
+// property: size
+_MTL_INLINE NS::UInteger MTL::IndirectCommandBuffer::size() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(size));
+}
+
+// method: resetWithRange:
+_MTL_INLINE void MTL::IndirectCommandBuffer::reset(NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(resetWithRange_), range);
+}
+
+// method: indirectRenderCommandAtIndex:
+_MTL_INLINE MTL::IndirectRenderCommand* MTL::IndirectCommandBuffer::indirectRenderCommand(NS::UInteger commandIndex)
+{
+    return Object::sendMessage<MTL::IndirectRenderCommand*>(this, _MTL_PRIVATE_SEL(indirectRenderCommandAtIndex_), commandIndex);
+}
+
+// method: indirectComputeCommandAtIndex:
+_MTL_INLINE MTL::IndirectComputeCommand* MTL::IndirectCommandBuffer::indirectComputeCommand(NS::UInteger commandIndex)
+{
+    return Object::sendMessage<MTL::IndirectComputeCommand*>(this, _MTL_PRIVATE_SEL(indirectComputeCommandAtIndex_), commandIndex);
+}
diff --git a/metal-cpp/Metal/MTLIndirectCommandEncoder.hpp b/metal-cpp/Metal/MTLIndirectCommandEncoder.hpp
new file mode 100644
index 0000000..06d52fd
--- /dev/null
+++ b/metal-cpp/Metal/MTLIndirectCommandEncoder.hpp
@@ -0,0 +1,187 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLIndirectCommandEncoder.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLRenderCommandEncoder.hpp"
+#include "MTLStageInputOutputDescriptor.hpp"
+#include "MTLTypes.hpp"
+
+namespace MTL
+{
+class IndirectRenderCommand : public NS::Referencing<IndirectRenderCommand>
+{
+public:
+    void setRenderPipelineState(const class RenderPipelineState* pipelineState);
+
+    void setVertexBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index);
+
+    void setFragmentBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index);
+
+    void drawPatches(NS::UInteger numberOfPatchControlPoints, NS::UInteger patchStart, NS::UInteger patchCount, const class Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, NS::UInteger instanceCount, NS::UInteger baseInstance, const class Buffer* buffer, NS::UInteger offset, NS::UInteger instanceStride);
+
+    void drawIndexedPatches(NS::UInteger numberOfPatchControlPoints, NS::UInteger patchStart, NS::UInteger patchCount, const class Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, const class Buffer* controlPointIndexBuffer, NS::UInteger controlPointIndexBufferOffset, NS::UInteger instanceCount, NS::UInteger baseInstance, const class Buffer* buffer, NS::UInteger offset, NS::UInteger instanceStride);
+
+    void drawPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger vertexStart, NS::UInteger vertexCount, NS::UInteger instanceCount, NS::UInteger baseInstance);
+
+    void drawIndexedPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger indexCount, MTL::IndexType indexType, const class Buffer* indexBuffer, NS::UInteger indexBufferOffset, NS::UInteger instanceCount, NS::Integer baseVertex, NS::UInteger baseInstance);
+
+    void reset();
+};
+
+class IndirectComputeCommand : public NS::Referencing<IndirectComputeCommand>
+{
+public:
+    void setComputePipelineState(const class ComputePipelineState* pipelineState);
+
+    void setKernelBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index);
+
+    void concurrentDispatchThreadgroups(MTL::Size threadgroupsPerGrid, MTL::Size threadsPerThreadgroup);
+
+    void concurrentDispatchThreads(MTL::Size threadsPerGrid, MTL::Size threadsPerThreadgroup);
+
+    void setBarrier();
+
+    void clearBarrier();
+
+    void setImageblockWidth(NS::UInteger width, NS::UInteger height);
+
+    void reset();
+
+    void setThreadgroupMemoryLength(NS::UInteger length, NS::UInteger index);
+
+    void setStageInRegion(MTL::Region region);
+};
+
+}
+
+// method: setRenderPipelineState:
+_MTL_INLINE void MTL::IndirectRenderCommand::setRenderPipelineState(const MTL::RenderPipelineState* pipelineState)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setRenderPipelineState_), pipelineState);
+}
+
+// method: setVertexBuffer:offset:atIndex:
+_MTL_INLINE void MTL::IndirectRenderCommand::setVertexBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexBuffer_offset_atIndex_), buffer, offset, index);
+}
+
+// method: setFragmentBuffer:offset:atIndex:
+_MTL_INLINE void MTL::IndirectRenderCommand::setFragmentBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentBuffer_offset_atIndex_), buffer, offset, index);
+}
+
+// method: drawPatches:patchStart:patchCount:patchIndexBuffer:patchIndexBufferOffset:instanceCount:baseInstance:tessellationFactorBuffer:tessellationFactorBufferOffset:tessellationFactorBufferInstanceStride:
+_MTL_INLINE void MTL::IndirectRenderCommand::drawPatches(NS::UInteger numberOfPatchControlPoints, NS::UInteger patchStart, NS::UInteger patchCount, const MTL::Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, NS::UInteger instanceCount, NS::UInteger baseInstance, const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger instanceStride)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(drawPatches_patchStart_patchCount_patchIndexBuffer_patchIndexBufferOffset_instanceCount_baseInstance_tessellationFactorBuffer_tessellationFactorBufferOffset_tessellationFactorBufferInstanceStride_), numberOfPatchControlPoints, patchStart, patchCount, patchIndexBuffer, patchIndexBufferOffset, instanceCount, baseInstance, buffer, offset, instanceStride);
+}
+
+// method: drawIndexedPatches:patchStart:patchCount:patchIndexBuffer:patchIndexBufferOffset:controlPointIndexBuffer:controlPointIndexBufferOffset:instanceCount:baseInstance:tessellationFactorBuffer:tessellationFactorBufferOffset:tessellationFactorBufferInstanceStride:
+_MTL_INLINE void MTL::IndirectRenderCommand::drawIndexedPatches(NS::UInteger numberOfPatchControlPoints, NS::UInteger patchStart, NS::UInteger patchCount, const MTL::Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, const MTL::Buffer* controlPointIndexBuffer, NS::UInteger controlPointIndexBufferOffset, NS::UInteger instanceCount, NS::UInteger baseInstance, const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger instanceStride)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(drawIndexedPatches_patchStart_patchCount_patchIndexBuffer_patchIndexBufferOffset_controlPointIndexBuffer_controlPointIndexBufferOffset_instanceCount_baseInstance_tessellationFactorBuffer_tessellationFactorBufferOffset_tessellationFactorBufferInstanceStride_), numberOfPatchControlPoints, patchStart, patchCount, patchIndexBuffer, patchIndexBufferOffset, controlPointIndexBuffer, controlPointIndexBufferOffset, instanceCount, baseInstance, buffer, offset, instanceStride);
+}
+
+// method: drawPrimitives:vertexStart:vertexCount:instanceCount:baseInstance:
+_MTL_INLINE void MTL::IndirectRenderCommand::drawPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger vertexStart, NS::UInteger vertexCount, NS::UInteger instanceCount, NS::UInteger baseInstance)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(drawPrimitives_vertexStart_vertexCount_instanceCount_baseInstance_), primitiveType, vertexStart, vertexCount, instanceCount, baseInstance);
+}
+
+// method: drawIndexedPrimitives:indexCount:indexType:indexBuffer:indexBufferOffset:instanceCount:baseVertex:baseInstance:
+_MTL_INLINE void MTL::IndirectRenderCommand::drawIndexedPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger indexCount, MTL::IndexType indexType, const MTL::Buffer* indexBuffer, NS::UInteger indexBufferOffset, NS::UInteger instanceCount, NS::Integer baseVertex, NS::UInteger baseInstance)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(drawIndexedPrimitives_indexCount_indexType_indexBuffer_indexBufferOffset_instanceCount_baseVertex_baseInstance_), primitiveType, indexCount, indexType, indexBuffer, indexBufferOffset, instanceCount, baseVertex, baseInstance);
+}
+
+// method: reset
+_MTL_INLINE void MTL::IndirectRenderCommand::reset()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(reset));
+}
+
+// method: setComputePipelineState:
+_MTL_INLINE void MTL::IndirectComputeCommand::setComputePipelineState(const MTL::ComputePipelineState* pipelineState)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setComputePipelineState_), pipelineState);
+}
+
+// method: setKernelBuffer:offset:atIndex:
+_MTL_INLINE void MTL::IndirectComputeCommand::setKernelBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setKernelBuffer_offset_atIndex_), buffer, offset, index);
+}
+
+// method: concurrentDispatchThreadgroups:threadsPerThreadgroup:
+_MTL_INLINE void MTL::IndirectComputeCommand::concurrentDispatchThreadgroups(MTL::Size threadgroupsPerGrid, MTL::Size threadsPerThreadgroup)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(concurrentDispatchThreadgroups_threadsPerThreadgroup_), threadgroupsPerGrid, threadsPerThreadgroup);
+}
+
+// method: concurrentDispatchThreads:threadsPerThreadgroup:
+_MTL_INLINE void MTL::IndirectComputeCommand::concurrentDispatchThreads(MTL::Size threadsPerGrid, MTL::Size threadsPerThreadgroup)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(concurrentDispatchThreads_threadsPerThreadgroup_), threadsPerGrid, threadsPerThreadgroup);
+}
+
+// method: setBarrier
+_MTL_INLINE void MTL::IndirectComputeCommand::setBarrier()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBarrier));
+}
+
+// method: clearBarrier
+_MTL_INLINE void MTL::IndirectComputeCommand::clearBarrier()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(clearBarrier));
+}
+
+// method: setImageblockWidth:height:
+_MTL_INLINE void MTL::IndirectComputeCommand::setImageblockWidth(NS::UInteger width, NS::UInteger height)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setImageblockWidth_height_), width, height);
+}
+
+// method: reset
+_MTL_INLINE void MTL::IndirectComputeCommand::reset()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(reset));
+}
+
+// method: setThreadgroupMemoryLength:atIndex:
+_MTL_INLINE void MTL::IndirectComputeCommand::setThreadgroupMemoryLength(NS::UInteger length, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setThreadgroupMemoryLength_atIndex_), length, index);
+}
+
+// method: setStageInRegion:
+_MTL_INLINE void MTL::IndirectComputeCommand::setStageInRegion(MTL::Region region)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStageInRegion_), region);
+}
diff --git a/metal-cpp/Metal/MTLIntersectionFunctionTable.hpp b/metal-cpp/Metal/MTLIntersectionFunctionTable.hpp
new file mode 100644
index 0000000..e23835d
--- /dev/null
+++ b/metal-cpp/Metal/MTLIntersectionFunctionTable.hpp
@@ -0,0 +1,157 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLIntersectionFunctionTable.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLBuffer.hpp"
+#include "MTLFunctionHandle.hpp"
+#include "MTLIntersectionFunctionTable.hpp"
+#include "MTLResource.hpp"
+#include "MTLVisibleFunctionTable.hpp"
+
+namespace MTL
+{
+_MTL_OPTIONS(NS::UInteger, IntersectionFunctionSignature) {
+    IntersectionFunctionSignatureNone = 0,
+    IntersectionFunctionSignatureInstancing = 1,
+    IntersectionFunctionSignatureTriangleData = 2,
+    IntersectionFunctionSignatureWorldSpaceData = 4,
+    IntersectionFunctionSignatureInstanceMotion = 8,
+    IntersectionFunctionSignaturePrimitiveMotion = 16,
+    IntersectionFunctionSignatureExtendedLimits = 32,
+};
+
+class IntersectionFunctionTableDescriptor : public NS::Copying<IntersectionFunctionTableDescriptor>
+{
+public:
+    static class IntersectionFunctionTableDescriptor* alloc();
+
+    class IntersectionFunctionTableDescriptor*        init();
+
+    static class IntersectionFunctionTableDescriptor* intersectionFunctionTableDescriptor();
+
+    NS::UInteger                                      functionCount() const;
+    void                                              setFunctionCount(NS::UInteger functionCount);
+};
+
+class IntersectionFunctionTable : public NS::Referencing<IntersectionFunctionTable, Resource>
+{
+public:
+    void setBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index);
+
+    void setBuffers(const class Buffer* buffers[], const NS::UInteger offsets[], NS::Range range);
+
+    void setFunction(const class FunctionHandle* function, NS::UInteger index);
+
+    void setFunctions(const class FunctionHandle* functions[], NS::Range range);
+
+    void setOpaqueTriangleIntersectionFunction(MTL::IntersectionFunctionSignature signature, NS::UInteger index);
+
+    void setOpaqueTriangleIntersectionFunction(MTL::IntersectionFunctionSignature signature, NS::Range range);
+
+    void setVisibleFunctionTable(const class VisibleFunctionTable* functionTable, NS::UInteger bufferIndex);
+
+    void setVisibleFunctionTables(const class VisibleFunctionTable* functionTables[], NS::Range bufferRange);
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::IntersectionFunctionTableDescriptor* MTL::IntersectionFunctionTableDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::IntersectionFunctionTableDescriptor>(_MTL_PRIVATE_CLS(MTLIntersectionFunctionTableDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::IntersectionFunctionTableDescriptor* MTL::IntersectionFunctionTableDescriptor::init()
+{
+    return NS::Object::init<MTL::IntersectionFunctionTableDescriptor>();
+}
+
+// static method: intersectionFunctionTableDescriptor
+_MTL_INLINE MTL::IntersectionFunctionTableDescriptor* MTL::IntersectionFunctionTableDescriptor::intersectionFunctionTableDescriptor()
+{
+    return Object::sendMessage<MTL::IntersectionFunctionTableDescriptor*>(_MTL_PRIVATE_CLS(MTLIntersectionFunctionTableDescriptor), _MTL_PRIVATE_SEL(intersectionFunctionTableDescriptor));
+}
+
+// property: functionCount
+_MTL_INLINE NS::UInteger MTL::IntersectionFunctionTableDescriptor::functionCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(functionCount));
+}
+
+_MTL_INLINE void MTL::IntersectionFunctionTableDescriptor::setFunctionCount(NS::UInteger functionCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFunctionCount_), functionCount);
+}
+
+// method: setBuffer:offset:atIndex:
+_MTL_INLINE void MTL::IntersectionFunctionTable::setBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBuffer_offset_atIndex_), buffer, offset, index);
+}
+
+// method: setBuffers:offsets:withRange:
+_MTL_INLINE void MTL::IntersectionFunctionTable::setBuffers(const MTL::Buffer* buffers[], const NS::UInteger offsets[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBuffers_offsets_withRange_), buffers, offsets, range);
+}
+
+// method: setFunction:atIndex:
+_MTL_INLINE void MTL::IntersectionFunctionTable::setFunction(const MTL::FunctionHandle* function, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFunction_atIndex_), function, index);
+}
+
+// method: setFunctions:withRange:
+_MTL_INLINE void MTL::IntersectionFunctionTable::setFunctions(const MTL::FunctionHandle* functions[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFunctions_withRange_), functions, range);
+}
+
+// method: setOpaqueTriangleIntersectionFunctionWithSignature:atIndex:
+_MTL_INLINE void MTL::IntersectionFunctionTable::setOpaqueTriangleIntersectionFunction(MTL::IntersectionFunctionSignature signature, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setOpaqueTriangleIntersectionFunctionWithSignature_atIndex_), signature, index);
+}
+
+// method: setOpaqueTriangleIntersectionFunctionWithSignature:withRange:
+_MTL_INLINE void MTL::IntersectionFunctionTable::setOpaqueTriangleIntersectionFunction(MTL::IntersectionFunctionSignature signature, NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setOpaqueTriangleIntersectionFunctionWithSignature_withRange_), signature, range);
+}
+
+// method: setVisibleFunctionTable:atBufferIndex:
+_MTL_INLINE void MTL::IntersectionFunctionTable::setVisibleFunctionTable(const MTL::VisibleFunctionTable* functionTable, NS::UInteger bufferIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVisibleFunctionTable_atBufferIndex_), functionTable, bufferIndex);
+}
+
+// method: setVisibleFunctionTables:withBufferRange:
+_MTL_INLINE void MTL::IntersectionFunctionTable::setVisibleFunctionTables(const MTL::VisibleFunctionTable* functionTables[], NS::Range bufferRange)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVisibleFunctionTables_withBufferRange_), functionTables, bufferRange);
+}
diff --git a/metal-cpp/Metal/MTLLibrary.hpp b/metal-cpp/Metal/MTLLibrary.hpp
new file mode 100644
index 0000000..1a5267a
--- /dev/null
+++ b/metal-cpp/Metal/MTLLibrary.hpp
@@ -0,0 +1,621 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLLibrary.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLArgument.hpp"
+#include "MTLFunctionDescriptor.hpp"
+#include "MTLLibrary.hpp"
+#include <functional>
+
+namespace MTL
+{
+_MTL_ENUM(NS::UInteger, PatchType) {
+    PatchTypeNone = 0,
+    PatchTypeTriangle = 1,
+    PatchTypeQuad = 2,
+};
+
+class VertexAttribute : public NS::Referencing<VertexAttribute>
+{
+public:
+    static class VertexAttribute* alloc();
+
+    class VertexAttribute*        init();
+
+    NS::String*                   name() const;
+
+    NS::UInteger                  attributeIndex() const;
+
+    MTL::DataType                 attributeType() const;
+
+    bool                          active() const;
+
+    bool                          patchData() const;
+
+    bool                          patchControlPointData() const;
+};
+
+class Attribute : public NS::Referencing<Attribute>
+{
+public:
+    static class Attribute* alloc();
+
+    class Attribute*        init();
+
+    NS::String*             name() const;
+
+    NS::UInteger            attributeIndex() const;
+
+    MTL::DataType           attributeType() const;
+
+    bool                    active() const;
+
+    bool                    patchData() const;
+
+    bool                    patchControlPointData() const;
+};
+
+_MTL_ENUM(NS::UInteger, FunctionType) {
+    FunctionTypeVertex = 1,
+    FunctionTypeFragment = 2,
+    FunctionTypeKernel = 3,
+    FunctionTypeVisible = 5,
+    FunctionTypeIntersection = 6,
+};
+
+class FunctionConstant : public NS::Referencing<FunctionConstant>
+{
+public:
+    static class FunctionConstant* alloc();
+
+    class FunctionConstant*        init();
+
+    NS::String*                    name() const;
+
+    MTL::DataType                  type() const;
+
+    NS::UInteger                   index() const;
+
+    bool                           required() const;
+};
+
+using AutoreleasedArgument = class Argument*;
+
+class Function : public NS::Referencing<Function>
+{
+public:
+    NS::String*            label() const;
+    void                   setLabel(const NS::String* label);
+
+    class Device*          device() const;
+
+    MTL::FunctionType      functionType() const;
+
+    MTL::PatchType         patchType() const;
+
+    NS::Integer            patchControlPointCount() const;
+
+    NS::Array*             vertexAttributes() const;
+
+    NS::Array*             stageInputAttributes() const;
+
+    NS::String*            name() const;
+
+    NS::Dictionary*        functionConstantsDictionary() const;
+
+    class ArgumentEncoder* newArgumentEncoder(NS::UInteger bufferIndex);
+
+    class ArgumentEncoder* newArgumentEncoder(NS::UInteger bufferIndex, const MTL::AutoreleasedArgument* reflection);
+
+    MTL::FunctionOptions   options() const;
+};
+
+_MTL_ENUM(NS::UInteger, LanguageVersion) {
+    LanguageVersion1_0 = 65536,
+    LanguageVersion1_1 = 65537,
+    LanguageVersion1_2 = 65538,
+    LanguageVersion2_0 = 131072,
+    LanguageVersion2_1 = 131073,
+    LanguageVersion2_2 = 131074,
+    LanguageVersion2_3 = 131075,
+    LanguageVersion2_4 = 131076,
+};
+
+_MTL_ENUM(NS::Integer, LibraryType) {
+    LibraryTypeExecutable = 0,
+    LibraryTypeDynamic = 1,
+};
+
+class CompileOptions : public NS::Copying<CompileOptions>
+{
+public:
+    static class CompileOptions* alloc();
+
+    class CompileOptions*        init();
+
+    NS::Dictionary*              preprocessorMacros() const;
+    void                         setPreprocessorMacros(const NS::Dictionary* preprocessorMacros);
+
+    bool                         fastMathEnabled() const;
+    void                         setFastMathEnabled(bool fastMathEnabled);
+
+    MTL::LanguageVersion         languageVersion() const;
+    void                         setLanguageVersion(MTL::LanguageVersion languageVersion);
+
+    MTL::LibraryType             libraryType() const;
+    void                         setLibraryType(MTL::LibraryType libraryType);
+
+    NS::String*                  installName() const;
+    void                         setInstallName(const NS::String* installName);
+
+    NS::Array*                   libraries() const;
+    void                         setLibraries(const NS::Array* libraries);
+
+    bool                         preserveInvariance() const;
+    void                         setPreserveInvariance(bool preserveInvariance);
+};
+
+_MTL_ENUM(NS::UInteger, LibraryError) {
+    LibraryErrorUnsupported = 1,
+    LibraryErrorCompileFailure = 3,
+    LibraryErrorCompileWarning = 4,
+    LibraryErrorFunctionNotFound = 5,
+    LibraryErrorFileNotFound = 6,
+};
+
+class Library : public NS::Referencing<Library>
+{
+public:
+    void             newFunction(const NS::String* pFunctionName, const class FunctionConstantValues* pConstantValues, const std::function<void(Function* pFunction, NS::Error* pError)>& completionHandler);
+
+    void             newFunction(const class FunctionDescriptor* pDescriptor, const std::function<void(Function* pFunction, NS::Error* pError)>& completionHandler);
+
+    void             newIntersectionFunction(const class IntersectionFunctionDescriptor* pDescriptor, const std::function<void(Function* pFunction, NS::Error* pError)>& completionHandler);
+
+    NS::String*      label() const;
+    void             setLabel(const NS::String* label);
+
+    class Device*    device() const;
+
+    class Function*  newFunction(const NS::String* functionName);
+
+    class Function*  newFunction(const NS::String* name, const class FunctionConstantValues* constantValues, NS::Error** error);
+
+    void             newFunction(const NS::String* name, const class FunctionConstantValues* constantValues, void (^completionHandler)(MTL::Function*, NS::Error*));
+
+    void             newFunction(const class FunctionDescriptor* descriptor, void (^completionHandler)(MTL::Function*, NS::Error*));
+
+    class Function*  newFunction(const class FunctionDescriptor* descriptor, NS::Error** error);
+
+    void             newIntersectionFunction(const class IntersectionFunctionDescriptor* descriptor, void (^completionHandler)(MTL::Function*, NS::Error*));
+
+    class Function*  newIntersectionFunction(const class IntersectionFunctionDescriptor* descriptor, NS::Error** error);
+
+    NS::Array*       functionNames() const;
+
+    MTL::LibraryType type() const;
+
+    NS::String*      installName() const;
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::VertexAttribute* MTL::VertexAttribute::alloc()
+{
+    return NS::Object::alloc<MTL::VertexAttribute>(_MTL_PRIVATE_CLS(MTLVertexAttribute));
+}
+
+// method: init
+_MTL_INLINE MTL::VertexAttribute* MTL::VertexAttribute::init()
+{
+    return NS::Object::init<MTL::VertexAttribute>();
+}
+
+// property: name
+_MTL_INLINE NS::String* MTL::VertexAttribute::name() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(name));
+}
+
+// property: attributeIndex
+_MTL_INLINE NS::UInteger MTL::VertexAttribute::attributeIndex() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(attributeIndex));
+}
+
+// property: attributeType
+_MTL_INLINE MTL::DataType MTL::VertexAttribute::attributeType() const
+{
+    return Object::sendMessage<MTL::DataType>(this, _MTL_PRIVATE_SEL(attributeType));
+}
+
+// property: active
+_MTL_INLINE bool MTL::VertexAttribute::active() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isActive));
+}
+
+// property: patchData
+_MTL_INLINE bool MTL::VertexAttribute::patchData() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isPatchData));
+}
+
+// property: patchControlPointData
+_MTL_INLINE bool MTL::VertexAttribute::patchControlPointData() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isPatchControlPointData));
+}
+
+// static method: alloc
+_MTL_INLINE MTL::Attribute* MTL::Attribute::alloc()
+{
+    return NS::Object::alloc<MTL::Attribute>(_MTL_PRIVATE_CLS(MTLAttribute));
+}
+
+// method: init
+_MTL_INLINE MTL::Attribute* MTL::Attribute::init()
+{
+    return NS::Object::init<MTL::Attribute>();
+}
+
+// property: name
+_MTL_INLINE NS::String* MTL::Attribute::name() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(name));
+}
+
+// property: attributeIndex
+_MTL_INLINE NS::UInteger MTL::Attribute::attributeIndex() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(attributeIndex));
+}
+
+// property: attributeType
+_MTL_INLINE MTL::DataType MTL::Attribute::attributeType() const
+{
+    return Object::sendMessage<MTL::DataType>(this, _MTL_PRIVATE_SEL(attributeType));
+}
+
+// property: active
+_MTL_INLINE bool MTL::Attribute::active() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isActive));
+}
+
+// property: patchData
+_MTL_INLINE bool MTL::Attribute::patchData() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isPatchData));
+}
+
+// property: patchControlPointData
+_MTL_INLINE bool MTL::Attribute::patchControlPointData() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isPatchControlPointData));
+}
+
+// static method: alloc
+_MTL_INLINE MTL::FunctionConstant* MTL::FunctionConstant::alloc()
+{
+    return NS::Object::alloc<MTL::FunctionConstant>(_MTL_PRIVATE_CLS(MTLFunctionConstant));
+}
+
+// method: init
+_MTL_INLINE MTL::FunctionConstant* MTL::FunctionConstant::init()
+{
+    return NS::Object::init<MTL::FunctionConstant>();
+}
+
+// property: name
+_MTL_INLINE NS::String* MTL::FunctionConstant::name() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(name));
+}
+
+// property: type
+_MTL_INLINE MTL::DataType MTL::FunctionConstant::type() const
+{
+    return Object::sendMessage<MTL::DataType>(this, _MTL_PRIVATE_SEL(type));
+}
+
+// property: index
+_MTL_INLINE NS::UInteger MTL::FunctionConstant::index() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(index));
+}
+
+// property: required
+_MTL_INLINE bool MTL::FunctionConstant::required() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(required));
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::Function::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::Function::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::Function::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
+
+// property: functionType
+_MTL_INLINE MTL::FunctionType MTL::Function::functionType() const
+{
+    return Object::sendMessage<MTL::FunctionType>(this, _MTL_PRIVATE_SEL(functionType));
+}
+
+// property: patchType
+_MTL_INLINE MTL::PatchType MTL::Function::patchType() const
+{
+    return Object::sendMessage<MTL::PatchType>(this, _MTL_PRIVATE_SEL(patchType));
+}
+
+// property: patchControlPointCount
+_MTL_INLINE NS::Integer MTL::Function::patchControlPointCount() const
+{
+    return Object::sendMessage<NS::Integer>(this, _MTL_PRIVATE_SEL(patchControlPointCount));
+}
+
+// property: vertexAttributes
+_MTL_INLINE NS::Array* MTL::Function::vertexAttributes() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(vertexAttributes));
+}
+
+// property: stageInputAttributes
+_MTL_INLINE NS::Array* MTL::Function::stageInputAttributes() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(stageInputAttributes));
+}
+
+// property: name
+_MTL_INLINE NS::String* MTL::Function::name() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(name));
+}
+
+// property: functionConstantsDictionary
+_MTL_INLINE NS::Dictionary* MTL::Function::functionConstantsDictionary() const
+{
+    return Object::sendMessage<NS::Dictionary*>(this, _MTL_PRIVATE_SEL(functionConstantsDictionary));
+}
+
+// method: newArgumentEncoderWithBufferIndex:
+_MTL_INLINE MTL::ArgumentEncoder* MTL::Function::newArgumentEncoder(NS::UInteger bufferIndex)
+{
+    return Object::sendMessage<MTL::ArgumentEncoder*>(this, _MTL_PRIVATE_SEL(newArgumentEncoderWithBufferIndex_), bufferIndex);
+}
+
+// method: newArgumentEncoderWithBufferIndex:reflection:
+_MTL_INLINE MTL::ArgumentEncoder* MTL::Function::newArgumentEncoder(NS::UInteger bufferIndex, const MTL::AutoreleasedArgument* reflection)
+{
+    return Object::sendMessage<MTL::ArgumentEncoder*>(this, _MTL_PRIVATE_SEL(newArgumentEncoderWithBufferIndex_reflection_), bufferIndex, reflection);
+}
+
+// property: options
+_MTL_INLINE MTL::FunctionOptions MTL::Function::options() const
+{
+    return Object::sendMessage<MTL::FunctionOptions>(this, _MTL_PRIVATE_SEL(options));
+}
+
+// static method: alloc
+_MTL_INLINE MTL::CompileOptions* MTL::CompileOptions::alloc()
+{
+    return NS::Object::alloc<MTL::CompileOptions>(_MTL_PRIVATE_CLS(MTLCompileOptions));
+}
+
+// method: init
+_MTL_INLINE MTL::CompileOptions* MTL::CompileOptions::init()
+{
+    return NS::Object::init<MTL::CompileOptions>();
+}
+
+// property: preprocessorMacros
+_MTL_INLINE NS::Dictionary* MTL::CompileOptions::preprocessorMacros() const
+{
+    return Object::sendMessage<NS::Dictionary*>(this, _MTL_PRIVATE_SEL(preprocessorMacros));
+}
+
+_MTL_INLINE void MTL::CompileOptions::setPreprocessorMacros(const NS::Dictionary* preprocessorMacros)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setPreprocessorMacros_), preprocessorMacros);
+}
+
+// property: fastMathEnabled
+_MTL_INLINE bool MTL::CompileOptions::fastMathEnabled() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(fastMathEnabled));
+}
+
+_MTL_INLINE void MTL::CompileOptions::setFastMathEnabled(bool fastMathEnabled)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFastMathEnabled_), fastMathEnabled);
+}
+
+// property: languageVersion
+_MTL_INLINE MTL::LanguageVersion MTL::CompileOptions::languageVersion() const
+{
+    return Object::sendMessage<MTL::LanguageVersion>(this, _MTL_PRIVATE_SEL(languageVersion));
+}
+
+_MTL_INLINE void MTL::CompileOptions::setLanguageVersion(MTL::LanguageVersion languageVersion)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLanguageVersion_), languageVersion);
+}
+
+// property: libraryType
+_MTL_INLINE MTL::LibraryType MTL::CompileOptions::libraryType() const
+{
+    return Object::sendMessage<MTL::LibraryType>(this, _MTL_PRIVATE_SEL(libraryType));
+}
+
+_MTL_INLINE void MTL::CompileOptions::setLibraryType(MTL::LibraryType libraryType)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLibraryType_), libraryType);
+}
+
+// property: installName
+_MTL_INLINE NS::String* MTL::CompileOptions::installName() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(installName));
+}
+
+_MTL_INLINE void MTL::CompileOptions::setInstallName(const NS::String* installName)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setInstallName_), installName);
+}
+
+// property: libraries
+_MTL_INLINE NS::Array* MTL::CompileOptions::libraries() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(libraries));
+}
+
+_MTL_INLINE void MTL::CompileOptions::setLibraries(const NS::Array* libraries)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLibraries_), libraries);
+}
+
+// property: preserveInvariance
+_MTL_INLINE bool MTL::CompileOptions::preserveInvariance() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(preserveInvariance));
+}
+
+_MTL_INLINE void MTL::CompileOptions::setPreserveInvariance(bool preserveInvariance)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setPreserveInvariance_), preserveInvariance);
+}
+
+_MTL_INLINE void MTL::Library::newFunction(const NS::String* pFunctionName, const FunctionConstantValues* pConstantValues, const std::function<void(Function* pFunction, NS::Error* pError)>& completionHandler)
+{
+    __block std::function<void(Function * pFunction, NS::Error * pError)> blockCompletionHandler = completionHandler;
+
+    newFunction(pFunctionName, pConstantValues, ^(Function* pFunction, NS::Error* pError) { blockCompletionHandler(pFunction, pError); });
+}
+
+_MTL_INLINE void MTL::Library::newFunction(const FunctionDescriptor* pDescriptor, const std::function<void(Function* pFunction, NS::Error* pError)>& completionHandler)
+{
+    __block std::function<void(Function * pFunction, NS::Error * pError)> blockCompletionHandler = completionHandler;
+
+    newFunction(pDescriptor, ^(Function* pFunction, NS::Error* pError) { blockCompletionHandler(pFunction, pError); });
+}
+
+_MTL_INLINE void MTL::Library::newIntersectionFunction(const IntersectionFunctionDescriptor* pDescriptor, const std::function<void(Function* pFunction, NS::Error* pError)>& completionHandler)
+{
+    __block std::function<void(Function * pFunction, NS::Error * pError)> blockCompletionHandler = completionHandler;
+
+    newIntersectionFunction(pDescriptor, ^(Function* pFunction, NS::Error* pError) { blockCompletionHandler(pFunction, pError); });
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::Library::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::Library::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::Library::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
+
+// method: newFunctionWithName:
+_MTL_INLINE MTL::Function* MTL::Library::newFunction(const NS::String* functionName)
+{
+    return Object::sendMessage<MTL::Function*>(this, _MTL_PRIVATE_SEL(newFunctionWithName_), functionName);
+}
+
+// method: newFunctionWithName:constantValues:error:
+_MTL_INLINE MTL::Function* MTL::Library::newFunction(const NS::String* name, const MTL::FunctionConstantValues* constantValues, NS::Error** error)
+{
+    return Object::sendMessage<MTL::Function*>(this, _MTL_PRIVATE_SEL(newFunctionWithName_constantValues_error_), name, constantValues, error);
+}
+
+// method: newFunctionWithName:constantValues:completionHandler:
+_MTL_INLINE void MTL::Library::newFunction(const NS::String* name, const MTL::FunctionConstantValues* constantValues, void (^completionHandler)(MTL::Function*, NS::Error*))
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(newFunctionWithName_constantValues_completionHandler_), name, constantValues, completionHandler);
+}
+
+// method: newFunctionWithDescriptor:completionHandler:
+_MTL_INLINE void MTL::Library::newFunction(const MTL::FunctionDescriptor* descriptor, void (^completionHandler)(MTL::Function*, NS::Error*))
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(newFunctionWithDescriptor_completionHandler_), descriptor, completionHandler);
+}
+
+// method: newFunctionWithDescriptor:error:
+_MTL_INLINE MTL::Function* MTL::Library::newFunction(const MTL::FunctionDescriptor* descriptor, NS::Error** error)
+{
+    return Object::sendMessage<MTL::Function*>(this, _MTL_PRIVATE_SEL(newFunctionWithDescriptor_error_), descriptor, error);
+}
+
+// method: newIntersectionFunctionWithDescriptor:completionHandler:
+_MTL_INLINE void MTL::Library::newIntersectionFunction(const MTL::IntersectionFunctionDescriptor* descriptor, void (^completionHandler)(MTL::Function*, NS::Error*))
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(newIntersectionFunctionWithDescriptor_completionHandler_), descriptor, completionHandler);
+}
+
+// method: newIntersectionFunctionWithDescriptor:error:
+_MTL_INLINE MTL::Function* MTL::Library::newIntersectionFunction(const MTL::IntersectionFunctionDescriptor* descriptor, NS::Error** error)
+{
+    return Object::sendMessage<MTL::Function*>(this, _MTL_PRIVATE_SEL(newIntersectionFunctionWithDescriptor_error_), descriptor, error);
+}
+
+// property: functionNames
+_MTL_INLINE NS::Array* MTL::Library::functionNames() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(functionNames));
+}
+
+// property: type
+_MTL_INLINE MTL::LibraryType MTL::Library::type() const
+{
+    return Object::sendMessage<MTL::LibraryType>(this, _MTL_PRIVATE_SEL(type));
+}
+
+// property: installName
+_MTL_INLINE NS::String* MTL::Library::installName() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(installName));
+}
diff --git a/metal-cpp/Metal/MTLLinkedFunctions.hpp b/metal-cpp/Metal/MTLLinkedFunctions.hpp
new file mode 100644
index 0000000..76319a5
--- /dev/null
+++ b/metal-cpp/Metal/MTLLinkedFunctions.hpp
@@ -0,0 +1,115 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLLinkedFunctions.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+namespace MTL
+{
+class LinkedFunctions : public NS::Copying<LinkedFunctions>
+{
+public:
+    static class LinkedFunctions* alloc();
+
+    class LinkedFunctions*        init();
+
+    static class LinkedFunctions* linkedFunctions();
+
+    NS::Array*                    functions() const;
+    void                          setFunctions(const NS::Array* functions);
+
+    NS::Array*                    binaryFunctions() const;
+    void                          setBinaryFunctions(const NS::Array* binaryFunctions);
+
+    NS::Array*                    groups() const;
+    void                          setGroups(const NS::Array* groups);
+
+    NS::Array*                    privateFunctions() const;
+    void                          setPrivateFunctions(const NS::Array* privateFunctions);
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::LinkedFunctions* MTL::LinkedFunctions::alloc()
+{
+    return NS::Object::alloc<MTL::LinkedFunctions>(_MTL_PRIVATE_CLS(MTLLinkedFunctions));
+}
+
+// method: init
+_MTL_INLINE MTL::LinkedFunctions* MTL::LinkedFunctions::init()
+{
+    return NS::Object::init<MTL::LinkedFunctions>();
+}
+
+// static method: linkedFunctions
+_MTL_INLINE MTL::LinkedFunctions* MTL::LinkedFunctions::linkedFunctions()
+{
+    return Object::sendMessage<MTL::LinkedFunctions*>(_MTL_PRIVATE_CLS(MTLLinkedFunctions), _MTL_PRIVATE_SEL(linkedFunctions));
+}
+
+// property: functions
+_MTL_INLINE NS::Array* MTL::LinkedFunctions::functions() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(functions));
+}
+
+_MTL_INLINE void MTL::LinkedFunctions::setFunctions(const NS::Array* functions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFunctions_), functions);
+}
+
+// property: binaryFunctions
+_MTL_INLINE NS::Array* MTL::LinkedFunctions::binaryFunctions() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(binaryFunctions));
+}
+
+_MTL_INLINE void MTL::LinkedFunctions::setBinaryFunctions(const NS::Array* binaryFunctions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBinaryFunctions_), binaryFunctions);
+}
+
+// property: groups
+_MTL_INLINE NS::Array* MTL::LinkedFunctions::groups() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(groups));
+}
+
+_MTL_INLINE void MTL::LinkedFunctions::setGroups(const NS::Array* groups)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setGroups_), groups);
+}
+
+// property: privateFunctions
+_MTL_INLINE NS::Array* MTL::LinkedFunctions::privateFunctions() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(privateFunctions));
+}
+
+_MTL_INLINE void MTL::LinkedFunctions::setPrivateFunctions(const NS::Array* privateFunctions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setPrivateFunctions_), privateFunctions);
+}
diff --git a/metal-cpp/Metal/MTLParallelRenderCommandEncoder.hpp b/metal-cpp/Metal/MTLParallelRenderCommandEncoder.hpp
new file mode 100644
index 0000000..0437b4c
--- /dev/null
+++ b/metal-cpp/Metal/MTLParallelRenderCommandEncoder.hpp
@@ -0,0 +1,94 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLParallelRenderCommandEncoder.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLCommandEncoder.hpp"
+#include "MTLRenderPass.hpp"
+
+namespace MTL
+{
+class ParallelRenderCommandEncoder : public NS::Referencing<ParallelRenderCommandEncoder, CommandEncoder>
+{
+public:
+    class RenderCommandEncoder* renderCommandEncoder();
+
+    void                        setColorStoreAction(MTL::StoreAction storeAction, NS::UInteger colorAttachmentIndex);
+
+    void                        setDepthStoreAction(MTL::StoreAction storeAction);
+
+    void                        setStencilStoreAction(MTL::StoreAction storeAction);
+
+    void                        setColorStoreActionOptions(MTL::StoreActionOptions storeActionOptions, NS::UInteger colorAttachmentIndex);
+
+    void                        setDepthStoreActionOptions(MTL::StoreActionOptions storeActionOptions);
+
+    void                        setStencilStoreActionOptions(MTL::StoreActionOptions storeActionOptions);
+};
+
+}
+
+// method: renderCommandEncoder
+_MTL_INLINE MTL::RenderCommandEncoder* MTL::ParallelRenderCommandEncoder::renderCommandEncoder()
+{
+    return Object::sendMessage<MTL::RenderCommandEncoder*>(this, _MTL_PRIVATE_SEL(renderCommandEncoder));
+}
+
+// method: setColorStoreAction:atIndex:
+_MTL_INLINE void MTL::ParallelRenderCommandEncoder::setColorStoreAction(MTL::StoreAction storeAction, NS::UInteger colorAttachmentIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setColorStoreAction_atIndex_), storeAction, colorAttachmentIndex);
+}
+
+// method: setDepthStoreAction:
+_MTL_INLINE void MTL::ParallelRenderCommandEncoder::setDepthStoreAction(MTL::StoreAction storeAction)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDepthStoreAction_), storeAction);
+}
+
+// method: setStencilStoreAction:
+_MTL_INLINE void MTL::ParallelRenderCommandEncoder::setStencilStoreAction(MTL::StoreAction storeAction)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStencilStoreAction_), storeAction);
+}
+
+// method: setColorStoreActionOptions:atIndex:
+_MTL_INLINE void MTL::ParallelRenderCommandEncoder::setColorStoreActionOptions(MTL::StoreActionOptions storeActionOptions, NS::UInteger colorAttachmentIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setColorStoreActionOptions_atIndex_), storeActionOptions, colorAttachmentIndex);
+}
+
+// method: setDepthStoreActionOptions:
+_MTL_INLINE void MTL::ParallelRenderCommandEncoder::setDepthStoreActionOptions(MTL::StoreActionOptions storeActionOptions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDepthStoreActionOptions_), storeActionOptions);
+}
+
+// method: setStencilStoreActionOptions:
+_MTL_INLINE void MTL::ParallelRenderCommandEncoder::setStencilStoreActionOptions(MTL::StoreActionOptions storeActionOptions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStencilStoreActionOptions_), storeActionOptions);
+}
diff --git a/metal-cpp/Metal/MTLPipeline.hpp b/metal-cpp/Metal/MTLPipeline.hpp
new file mode 100644
index 0000000..2a3c947
--- /dev/null
+++ b/metal-cpp/Metal/MTLPipeline.hpp
@@ -0,0 +1,109 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLPipeline.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLPipeline.hpp"
+
+namespace MTL
+{
+_MTL_ENUM(NS::UInteger, Mutability) {
+    MutabilityDefault = 0,
+    MutabilityMutable = 1,
+    MutabilityImmutable = 2,
+};
+
+class PipelineBufferDescriptor : public NS::Copying<PipelineBufferDescriptor>
+{
+public:
+    static class PipelineBufferDescriptor* alloc();
+
+    class PipelineBufferDescriptor*        init();
+
+    MTL::Mutability                        mutability() const;
+    void                                   setMutability(MTL::Mutability mutability);
+};
+
+class PipelineBufferDescriptorArray : public NS::Referencing<PipelineBufferDescriptorArray>
+{
+public:
+    static class PipelineBufferDescriptorArray* alloc();
+
+    class PipelineBufferDescriptorArray*        init();
+
+    class PipelineBufferDescriptor*             object(NS::UInteger bufferIndex);
+
+    void                                        setObject(const class PipelineBufferDescriptor* buffer, NS::UInteger bufferIndex);
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::PipelineBufferDescriptor* MTL::PipelineBufferDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::PipelineBufferDescriptor>(_MTL_PRIVATE_CLS(MTLPipelineBufferDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::PipelineBufferDescriptor* MTL::PipelineBufferDescriptor::init()
+{
+    return NS::Object::init<MTL::PipelineBufferDescriptor>();
+}
+
+// property: mutability
+_MTL_INLINE MTL::Mutability MTL::PipelineBufferDescriptor::mutability() const
+{
+    return Object::sendMessage<MTL::Mutability>(this, _MTL_PRIVATE_SEL(mutability));
+}
+
+_MTL_INLINE void MTL::PipelineBufferDescriptor::setMutability(MTL::Mutability mutability)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMutability_), mutability);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::PipelineBufferDescriptorArray* MTL::PipelineBufferDescriptorArray::alloc()
+{
+    return NS::Object::alloc<MTL::PipelineBufferDescriptorArray>(_MTL_PRIVATE_CLS(MTLPipelineBufferDescriptorArray));
+}
+
+// method: init
+_MTL_INLINE MTL::PipelineBufferDescriptorArray* MTL::PipelineBufferDescriptorArray::init()
+{
+    return NS::Object::init<MTL::PipelineBufferDescriptorArray>();
+}
+
+// method: objectAtIndexedSubscript:
+_MTL_INLINE MTL::PipelineBufferDescriptor* MTL::PipelineBufferDescriptorArray::object(NS::UInteger bufferIndex)
+{
+    return Object::sendMessage<MTL::PipelineBufferDescriptor*>(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), bufferIndex);
+}
+
+// method: setObject:atIndexedSubscript:
+_MTL_INLINE void MTL::PipelineBufferDescriptorArray::setObject(const MTL::PipelineBufferDescriptor* buffer, NS::UInteger bufferIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), buffer, bufferIndex);
+}
diff --git a/metal-cpp/Metal/MTLPixelFormat.hpp b/metal-cpp/Metal/MTLPixelFormat.hpp
new file mode 100644
index 0000000..b320c82
--- /dev/null
+++ b/metal-cpp/Metal/MTLPixelFormat.hpp
@@ -0,0 +1,173 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLPixelFormat.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+namespace MTL
+{
+_MTL_ENUM(NS::UInteger, PixelFormat) {
+    PixelFormatInvalid = 0,
+    PixelFormatA8Unorm = 1,
+    PixelFormatR8Unorm = 10,
+    PixelFormatR8Unorm_sRGB = 11,
+    PixelFormatR8Snorm = 12,
+    PixelFormatR8Uint = 13,
+    PixelFormatR8Sint = 14,
+    PixelFormatR16Unorm = 20,
+    PixelFormatR16Snorm = 22,
+    PixelFormatR16Uint = 23,
+    PixelFormatR16Sint = 24,
+    PixelFormatR16Float = 25,
+    PixelFormatRG8Unorm = 30,
+    PixelFormatRG8Unorm_sRGB = 31,
+    PixelFormatRG8Snorm = 32,
+    PixelFormatRG8Uint = 33,
+    PixelFormatRG8Sint = 34,
+    PixelFormatB5G6R5Unorm = 40,
+    PixelFormatA1BGR5Unorm = 41,
+    PixelFormatABGR4Unorm = 42,
+    PixelFormatBGR5A1Unorm = 43,
+    PixelFormatR32Uint = 53,
+    PixelFormatR32Sint = 54,
+    PixelFormatR32Float = 55,
+    PixelFormatRG16Unorm = 60,
+    PixelFormatRG16Snorm = 62,
+    PixelFormatRG16Uint = 63,
+    PixelFormatRG16Sint = 64,
+    PixelFormatRG16Float = 65,
+    PixelFormatRGBA8Unorm = 70,
+    PixelFormatRGBA8Unorm_sRGB = 71,
+    PixelFormatRGBA8Snorm = 72,
+    PixelFormatRGBA8Uint = 73,
+    PixelFormatRGBA8Sint = 74,
+    PixelFormatBGRA8Unorm = 80,
+    PixelFormatBGRA8Unorm_sRGB = 81,
+    PixelFormatRGB10A2Unorm = 90,
+    PixelFormatRGB10A2Uint = 91,
+    PixelFormatRG11B10Float = 92,
+    PixelFormatRGB9E5Float = 93,
+    PixelFormatBGR10A2Unorm = 94,
+    PixelFormatRG32Uint = 103,
+    PixelFormatRG32Sint = 104,
+    PixelFormatRG32Float = 105,
+    PixelFormatRGBA16Unorm = 110,
+    PixelFormatRGBA16Snorm = 112,
+    PixelFormatRGBA16Uint = 113,
+    PixelFormatRGBA16Sint = 114,
+    PixelFormatRGBA16Float = 115,
+    PixelFormatRGBA32Uint = 123,
+    PixelFormatRGBA32Sint = 124,
+    PixelFormatRGBA32Float = 125,
+    PixelFormatBC1_RGBA = 130,
+    PixelFormatBC1_RGBA_sRGB = 131,
+    PixelFormatBC2_RGBA = 132,
+    PixelFormatBC2_RGBA_sRGB = 133,
+    PixelFormatBC3_RGBA = 134,
+    PixelFormatBC3_RGBA_sRGB = 135,
+    PixelFormatBC4_RUnorm = 140,
+    PixelFormatBC4_RSnorm = 141,
+    PixelFormatBC5_RGUnorm = 142,
+    PixelFormatBC5_RGSnorm = 143,
+    PixelFormatBC6H_RGBFloat = 150,
+    PixelFormatBC6H_RGBUfloat = 151,
+    PixelFormatBC7_RGBAUnorm = 152,
+    PixelFormatBC7_RGBAUnorm_sRGB = 153,
+    PixelFormatPVRTC_RGB_2BPP = 160,
+    PixelFormatPVRTC_RGB_2BPP_sRGB = 161,
+    PixelFormatPVRTC_RGB_4BPP = 162,
+    PixelFormatPVRTC_RGB_4BPP_sRGB = 163,
+    PixelFormatPVRTC_RGBA_2BPP = 164,
+    PixelFormatPVRTC_RGBA_2BPP_sRGB = 165,
+    PixelFormatPVRTC_RGBA_4BPP = 166,
+    PixelFormatPVRTC_RGBA_4BPP_sRGB = 167,
+    PixelFormatEAC_R11Unorm = 170,
+    PixelFormatEAC_R11Snorm = 172,
+    PixelFormatEAC_RG11Unorm = 174,
+    PixelFormatEAC_RG11Snorm = 176,
+    PixelFormatEAC_RGBA8 = 178,
+    PixelFormatEAC_RGBA8_sRGB = 179,
+    PixelFormatETC2_RGB8 = 180,
+    PixelFormatETC2_RGB8_sRGB = 181,
+    PixelFormatETC2_RGB8A1 = 182,
+    PixelFormatETC2_RGB8A1_sRGB = 183,
+    PixelFormatASTC_4x4_sRGB = 186,
+    PixelFormatASTC_5x4_sRGB = 187,
+    PixelFormatASTC_5x5_sRGB = 188,
+    PixelFormatASTC_6x5_sRGB = 189,
+    PixelFormatASTC_6x6_sRGB = 190,
+    PixelFormatASTC_8x5_sRGB = 192,
+    PixelFormatASTC_8x6_sRGB = 193,
+    PixelFormatASTC_8x8_sRGB = 194,
+    PixelFormatASTC_10x5_sRGB = 195,
+    PixelFormatASTC_10x6_sRGB = 196,
+    PixelFormatASTC_10x8_sRGB = 197,
+    PixelFormatASTC_10x10_sRGB = 198,
+    PixelFormatASTC_12x10_sRGB = 199,
+    PixelFormatASTC_12x12_sRGB = 200,
+    PixelFormatASTC_4x4_LDR = 204,
+    PixelFormatASTC_5x4_LDR = 205,
+    PixelFormatASTC_5x5_LDR = 206,
+    PixelFormatASTC_6x5_LDR = 207,
+    PixelFormatASTC_6x6_LDR = 208,
+    PixelFormatASTC_8x5_LDR = 210,
+    PixelFormatASTC_8x6_LDR = 211,
+    PixelFormatASTC_8x8_LDR = 212,
+    PixelFormatASTC_10x5_LDR = 213,
+    PixelFormatASTC_10x6_LDR = 214,
+    PixelFormatASTC_10x8_LDR = 215,
+    PixelFormatASTC_10x10_LDR = 216,
+    PixelFormatASTC_12x10_LDR = 217,
+    PixelFormatASTC_12x12_LDR = 218,
+    PixelFormatASTC_4x4_HDR = 222,
+    PixelFormatASTC_5x4_HDR = 223,
+    PixelFormatASTC_5x5_HDR = 224,
+    PixelFormatASTC_6x5_HDR = 225,
+    PixelFormatASTC_6x6_HDR = 226,
+    PixelFormatASTC_8x5_HDR = 228,
+    PixelFormatASTC_8x6_HDR = 229,
+    PixelFormatASTC_8x8_HDR = 230,
+    PixelFormatASTC_10x5_HDR = 231,
+    PixelFormatASTC_10x6_HDR = 232,
+    PixelFormatASTC_10x8_HDR = 233,
+    PixelFormatASTC_10x10_HDR = 234,
+    PixelFormatASTC_12x10_HDR = 235,
+    PixelFormatASTC_12x12_HDR = 236,
+    PixelFormatGBGR422 = 240,
+    PixelFormatBGRG422 = 241,
+    PixelFormatDepth16Unorm = 250,
+    PixelFormatDepth32Float = 252,
+    PixelFormatStencil8 = 253,
+    PixelFormatDepth24Unorm_Stencil8 = 255,
+    PixelFormatDepth32Float_Stencil8 = 260,
+    PixelFormatX32_Stencil8 = 261,
+    PixelFormatX24_Stencil8 = 262,
+    PixelFormatBGRA10_XR = 552,
+    PixelFormatBGRA10_XR_sRGB = 553,
+    PixelFormatBGR10_XR = 554,
+    PixelFormatBGR10_XR_sRGB = 555,
+};
+
+}
diff --git a/metal-cpp/Metal/MTLPrivate.hpp b/metal-cpp/Metal/MTLPrivate.hpp
new file mode 100644
index 0000000..6a6aabe
--- /dev/null
+++ b/metal-cpp/Metal/MTLPrivate.hpp
@@ -0,0 +1,135 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLPrivate.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "MTLDefines.hpp"
+
+#include <objc/runtime.h>
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#define _MTL_PRIVATE_CLS(symbol) (Private::Class::s_k##symbol)
+#define _MTL_PRIVATE_SEL(accessor) (Private::Selector::s_k##accessor)
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#if defined(MTL_PRIVATE_IMPLEMENTATION)
+
+#define _MTL_PRIVATE_VISIBILITY __attribute__((visibility("default")))
+#define _MTL_PRIVATE_IMPORT __attribute__((weak_import))
+
+#if __OBJC__
+#define _MTL_PRIVATE_OBJC_LOOKUP_CLASS(symbol) ((__bridge void*)objc_lookUpClass(#symbol))
+#else
+#define _MTL_PRIVATE_OBJC_LOOKUP_CLASS(symbol) objc_lookUpClass(#symbol)
+#endif // __OBJC__
+
+#define _MTL_PRIVATE_DEF_CLS(symbol) void* s_k##symbol _MTL_PRIVATE_VISIBILITY = _MTL_PRIVATE_OBJC_LOOKUP_CLASS(symbol);
+#define _MTL_PRIVATE_DEF_PRO(symbol)
+#define _MTL_PRIVATE_DEF_SEL(accessor, symbol) SEL s_k##accessor _MTL_PRIVATE_VISIBILITY = sel_registerName(symbol);
+
+#if defined(__MAC_10_16) || defined(__MAC_11_0) || defined(__MAC_12_0) || defined(__IPHONE_14_0) || defined(__IPHONE_15_0) || defined(__TVOS_14_0) || defined(__TVOS_15_0)
+
+#define _MTL_PRIVATE_DEF_STR(type, symbol)                  \
+    _MTL_EXTERN type const MTL##symbol _MTL_PRIVATE_IMPORT; \
+    type const                         MTL::symbol = (nullptr != &MTL##symbol) ? MTL##symbol : nullptr;
+
+#else
+
+#include <dlfcn.h>
+
+namespace MTL
+{
+namespace Private
+{
+
+    template <typename _Type>
+    inline _Type const LoadSymbol(const char* pSymbol)
+    {
+        const _Type* pAddress = static_cast<_Type*>(dlsym(RTLD_DEFAULT, pSymbol));
+
+        return pAddress ? *pAddress : nullptr;
+    }
+
+} // Private
+} // MTL
+
+#define _MTL_PRIVATE_DEF_STR(type, symbol) \
+    _MTL_EXTERN type const MTL##symbol;    \
+    type const             MTL::symbol = Private::LoadSymbol<type>("MTL" #symbol);
+
+#endif // defined(__MAC_10_16) || defined(__MAC_11_0) || defined(__MAC_12_0) || defined(__IPHONE_14_0) || defined(__IPHONE_15_0) || defined(__TVOS_14_0) || defined(__TVOS_15_0)
+
+#else
+
+#define _MTL_PRIVATE_DEF_CLS(symbol) extern void* s_k##symbol;
+#define _MTL_PRIVATE_DEF_PRO(symbol)
+#define _MTL_PRIVATE_DEF_SEL(accessor, symbol) extern SEL s_k##accessor;
+#define _MTL_PRIVATE_DEF_STR(type, symbol)
+
+#endif // MTL_PRIVATE_IMPLEMENTATION
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace MTL
+{
+namespace Private
+{
+    namespace Class
+    {
+
+    } // Class
+} // Private
+} // MTL
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace MTL
+{
+namespace Private
+{
+    namespace Protocol
+    {
+
+    } // Protocol
+} // Private
+} // MTL
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace MTL
+{
+namespace Private
+{
+    namespace Selector
+    {
+
+        _MTL_PRIVATE_DEF_SEL(beginScope,
+            "beginScope");
+        _MTL_PRIVATE_DEF_SEL(endScope,
+            "endScope");
+    } // Class
+} // Private
+} // MTL
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/Metal/MTLRasterizationRate.hpp b/metal-cpp/Metal/MTLRasterizationRate.hpp
new file mode 100644
index 0000000..e866954
--- /dev/null
+++ b/metal-cpp/Metal/MTLRasterizationRate.hpp
@@ -0,0 +1,386 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLRasterizationRate.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLDevice.hpp"
+#include "MTLTypes.hpp"
+
+namespace MTL
+{
+class RasterizationRateSampleArray : public NS::Referencing<RasterizationRateSampleArray>
+{
+public:
+    static class RasterizationRateSampleArray* alloc();
+
+    class RasterizationRateSampleArray*        init();
+
+    NS::Number*                                object(NS::UInteger index);
+
+    void                                       setObject(const NS::Number* value, NS::UInteger index);
+};
+
+class RasterizationRateLayerDescriptor : public NS::Copying<RasterizationRateLayerDescriptor>
+{
+public:
+    static class RasterizationRateLayerDescriptor* alloc();
+
+    MTL::RasterizationRateLayerDescriptor*         init();
+
+    MTL::RasterizationRateLayerDescriptor*         init(MTL::Size sampleCount);
+
+    MTL::RasterizationRateLayerDescriptor*         init(MTL::Size sampleCount, const float* horizontal, const float* vertical);
+
+    MTL::Size                                      sampleCount() const;
+
+    MTL::Size                                      maxSampleCount() const;
+
+    float*                                         horizontalSampleStorage() const;
+
+    float*                                         verticalSampleStorage() const;
+
+    class RasterizationRateSampleArray*            horizontal() const;
+
+    class RasterizationRateSampleArray*            vertical() const;
+
+    void                                           setSampleCount(MTL::Size sampleCount);
+};
+
+class RasterizationRateLayerArray : public NS::Referencing<RasterizationRateLayerArray>
+{
+public:
+    static class RasterizationRateLayerArray* alloc();
+
+    class RasterizationRateLayerArray*        init();
+
+    class RasterizationRateLayerDescriptor*   object(NS::UInteger layerIndex);
+
+    void                                      setObject(const class RasterizationRateLayerDescriptor* layer, NS::UInteger layerIndex);
+};
+
+class RasterizationRateMapDescriptor : public NS::Copying<RasterizationRateMapDescriptor>
+{
+public:
+    static class RasterizationRateMapDescriptor* alloc();
+
+    class RasterizationRateMapDescriptor*        init();
+
+    static class RasterizationRateMapDescriptor* rasterizationRateMapDescriptor(MTL::Size screenSize);
+
+    static class RasterizationRateMapDescriptor* rasterizationRateMapDescriptor(MTL::Size screenSize, const class RasterizationRateLayerDescriptor* layer);
+
+    static class RasterizationRateMapDescriptor* rasterizationRateMapDescriptor(MTL::Size screenSize, NS::UInteger layerCount, MTL::RasterizationRateLayerDescriptor* const* layers);
+
+    class RasterizationRateLayerDescriptor*      layer(NS::UInteger layerIndex);
+
+    void                                         setLayer(const class RasterizationRateLayerDescriptor* layer, NS::UInteger layerIndex);
+
+    class RasterizationRateLayerArray*           layers() const;
+
+    MTL::Size                                    screenSize() const;
+    void                                         setScreenSize(MTL::Size screenSize);
+
+    NS::String*                                  label() const;
+    void                                         setLabel(const NS::String* label);
+
+    NS::UInteger                                 layerCount() const;
+};
+
+class RasterizationRateMap : public NS::Referencing<RasterizationRateMap>
+{
+public:
+    class Device*     device() const;
+
+    NS::String*       label() const;
+
+    MTL::Size         screenSize() const;
+
+    MTL::Size         physicalGranularity() const;
+
+    NS::UInteger      layerCount() const;
+
+    MTL::SizeAndAlign parameterBufferSizeAndAlign() const;
+
+    void              copyParameterDataToBuffer(const class Buffer* buffer, NS::UInteger offset);
+
+    MTL::Size         physicalSize(NS::UInteger layerIndex);
+
+    MTL::Coordinate2D mapScreenToPhysicalCoordinates(MTL::Coordinate2D screenCoordinates, NS::UInteger layerIndex);
+
+    MTL::Coordinate2D mapPhysicalToScreenCoordinates(MTL::Coordinate2D physicalCoordinates, NS::UInteger layerIndex);
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::RasterizationRateSampleArray* MTL::RasterizationRateSampleArray::alloc()
+{
+    return NS::Object::alloc<MTL::RasterizationRateSampleArray>(_MTL_PRIVATE_CLS(MTLRasterizationRateSampleArray));
+}
+
+// method: init
+_MTL_INLINE MTL::RasterizationRateSampleArray* MTL::RasterizationRateSampleArray::init()
+{
+    return NS::Object::init<MTL::RasterizationRateSampleArray>();
+}
+
+// method: objectAtIndexedSubscript:
+_MTL_INLINE NS::Number* MTL::RasterizationRateSampleArray::object(NS::UInteger index)
+{
+    return Object::sendMessage<NS::Number*>(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), index);
+}
+
+// method: setObject:atIndexedSubscript:
+_MTL_INLINE void MTL::RasterizationRateSampleArray::setObject(const NS::Number* value, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), value, index);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::RasterizationRateLayerDescriptor* MTL::RasterizationRateLayerDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::RasterizationRateLayerDescriptor>(_MTL_PRIVATE_CLS(MTLRasterizationRateLayerDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::RasterizationRateLayerDescriptor* MTL::RasterizationRateLayerDescriptor::init()
+{
+    return NS::Object::init<MTL::RasterizationRateLayerDescriptor>();
+}
+
+// method: initWithSampleCount:
+_MTL_INLINE MTL::RasterizationRateLayerDescriptor* MTL::RasterizationRateLayerDescriptor::init(MTL::Size sampleCount)
+{
+    return Object::sendMessage<MTL::RasterizationRateLayerDescriptor*>(this, _MTL_PRIVATE_SEL(initWithSampleCount_), sampleCount);
+}
+
+// method: initWithSampleCount:horizontal:vertical:
+_MTL_INLINE MTL::RasterizationRateLayerDescriptor* MTL::RasterizationRateLayerDescriptor::init(MTL::Size sampleCount, const float* horizontal, const float* vertical)
+{
+    return Object::sendMessage<MTL::RasterizationRateLayerDescriptor*>(this, _MTL_PRIVATE_SEL(initWithSampleCount_horizontal_vertical_), sampleCount, horizontal, vertical);
+}
+
+// property: sampleCount
+_MTL_INLINE MTL::Size MTL::RasterizationRateLayerDescriptor::sampleCount() const
+{
+    return Object::sendMessage<MTL::Size>(this, _MTL_PRIVATE_SEL(sampleCount));
+}
+
+// property: maxSampleCount
+_MTL_INLINE MTL::Size MTL::RasterizationRateLayerDescriptor::maxSampleCount() const
+{
+    return Object::sendMessage<MTL::Size>(this, _MTL_PRIVATE_SEL(maxSampleCount));
+}
+
+// property: horizontalSampleStorage
+_MTL_INLINE float* MTL::RasterizationRateLayerDescriptor::horizontalSampleStorage() const
+{
+    return Object::sendMessage<float*>(this, _MTL_PRIVATE_SEL(horizontalSampleStorage));
+}
+
+// property: verticalSampleStorage
+_MTL_INLINE float* MTL::RasterizationRateLayerDescriptor::verticalSampleStorage() const
+{
+    return Object::sendMessage<float*>(this, _MTL_PRIVATE_SEL(verticalSampleStorage));
+}
+
+// property: horizontal
+_MTL_INLINE MTL::RasterizationRateSampleArray* MTL::RasterizationRateLayerDescriptor::horizontal() const
+{
+    return Object::sendMessage<MTL::RasterizationRateSampleArray*>(this, _MTL_PRIVATE_SEL(horizontal));
+}
+
+// property: vertical
+_MTL_INLINE MTL::RasterizationRateSampleArray* MTL::RasterizationRateLayerDescriptor::vertical() const
+{
+    return Object::sendMessage<MTL::RasterizationRateSampleArray*>(this, _MTL_PRIVATE_SEL(vertical));
+}
+
+// method: setSampleCount:
+_MTL_INLINE void MTL::RasterizationRateLayerDescriptor::setSampleCount(MTL::Size sampleCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSampleCount_), sampleCount);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::RasterizationRateLayerArray* MTL::RasterizationRateLayerArray::alloc()
+{
+    return NS::Object::alloc<MTL::RasterizationRateLayerArray>(_MTL_PRIVATE_CLS(MTLRasterizationRateLayerArray));
+}
+
+// method: init
+_MTL_INLINE MTL::RasterizationRateLayerArray* MTL::RasterizationRateLayerArray::init()
+{
+    return NS::Object::init<MTL::RasterizationRateLayerArray>();
+}
+
+// method: objectAtIndexedSubscript:
+_MTL_INLINE MTL::RasterizationRateLayerDescriptor* MTL::RasterizationRateLayerArray::object(NS::UInteger layerIndex)
+{
+    return Object::sendMessage<MTL::RasterizationRateLayerDescriptor*>(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), layerIndex);
+}
+
+// method: setObject:atIndexedSubscript:
+_MTL_INLINE void MTL::RasterizationRateLayerArray::setObject(const MTL::RasterizationRateLayerDescriptor* layer, NS::UInteger layerIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), layer, layerIndex);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::RasterizationRateMapDescriptor* MTL::RasterizationRateMapDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::RasterizationRateMapDescriptor>(_MTL_PRIVATE_CLS(MTLRasterizationRateMapDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::RasterizationRateMapDescriptor* MTL::RasterizationRateMapDescriptor::init()
+{
+    return NS::Object::init<MTL::RasterizationRateMapDescriptor>();
+}
+
+// static method: rasterizationRateMapDescriptorWithScreenSize:
+_MTL_INLINE MTL::RasterizationRateMapDescriptor* MTL::RasterizationRateMapDescriptor::rasterizationRateMapDescriptor(MTL::Size screenSize)
+{
+    return Object::sendMessage<MTL::RasterizationRateMapDescriptor*>(_MTL_PRIVATE_CLS(MTLRasterizationRateMapDescriptor), _MTL_PRIVATE_SEL(rasterizationRateMapDescriptorWithScreenSize_), screenSize);
+}
+
+// static method: rasterizationRateMapDescriptorWithScreenSize:layer:
+_MTL_INLINE MTL::RasterizationRateMapDescriptor* MTL::RasterizationRateMapDescriptor::rasterizationRateMapDescriptor(MTL::Size screenSize, const MTL::RasterizationRateLayerDescriptor* layer)
+{
+    return Object::sendMessage<MTL::RasterizationRateMapDescriptor*>(_MTL_PRIVATE_CLS(MTLRasterizationRateMapDescriptor), _MTL_PRIVATE_SEL(rasterizationRateMapDescriptorWithScreenSize_layer_), screenSize, layer);
+}
+
+// static method: rasterizationRateMapDescriptorWithScreenSize:layerCount:layers:
+_MTL_INLINE MTL::RasterizationRateMapDescriptor* MTL::RasterizationRateMapDescriptor::rasterizationRateMapDescriptor(MTL::Size screenSize, NS::UInteger layerCount, MTL::RasterizationRateLayerDescriptor* const* layers)
+{
+    return Object::sendMessage<MTL::RasterizationRateMapDescriptor*>(_MTL_PRIVATE_CLS(MTLRasterizationRateMapDescriptor), _MTL_PRIVATE_SEL(rasterizationRateMapDescriptorWithScreenSize_layerCount_layers_), screenSize, layerCount, layers);
+}
+
+// method: layerAtIndex:
+_MTL_INLINE MTL::RasterizationRateLayerDescriptor* MTL::RasterizationRateMapDescriptor::layer(NS::UInteger layerIndex)
+{
+    return Object::sendMessage<MTL::RasterizationRateLayerDescriptor*>(this, _MTL_PRIVATE_SEL(layerAtIndex_), layerIndex);
+}
+
+// method: setLayer:atIndex:
+_MTL_INLINE void MTL::RasterizationRateMapDescriptor::setLayer(const MTL::RasterizationRateLayerDescriptor* layer, NS::UInteger layerIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLayer_atIndex_), layer, layerIndex);
+}
+
+// property: layers
+_MTL_INLINE MTL::RasterizationRateLayerArray* MTL::RasterizationRateMapDescriptor::layers() const
+{
+    return Object::sendMessage<MTL::RasterizationRateLayerArray*>(this, _MTL_PRIVATE_SEL(layers));
+}
+
+// property: screenSize
+_MTL_INLINE MTL::Size MTL::RasterizationRateMapDescriptor::screenSize() const
+{
+    return Object::sendMessage<MTL::Size>(this, _MTL_PRIVATE_SEL(screenSize));
+}
+
+_MTL_INLINE void MTL::RasterizationRateMapDescriptor::setScreenSize(MTL::Size screenSize)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setScreenSize_), screenSize);
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::RasterizationRateMapDescriptor::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::RasterizationRateMapDescriptor::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
+
+// property: layerCount
+_MTL_INLINE NS::UInteger MTL::RasterizationRateMapDescriptor::layerCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(layerCount));
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::RasterizationRateMap::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::RasterizationRateMap::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+// property: screenSize
+_MTL_INLINE MTL::Size MTL::RasterizationRateMap::screenSize() const
+{
+    return Object::sendMessage<MTL::Size>(this, _MTL_PRIVATE_SEL(screenSize));
+}
+
+// property: physicalGranularity
+_MTL_INLINE MTL::Size MTL::RasterizationRateMap::physicalGranularity() const
+{
+    return Object::sendMessage<MTL::Size>(this, _MTL_PRIVATE_SEL(physicalGranularity));
+}
+
+// property: layerCount
+_MTL_INLINE NS::UInteger MTL::RasterizationRateMap::layerCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(layerCount));
+}
+
+// property: parameterBufferSizeAndAlign
+_MTL_INLINE MTL::SizeAndAlign MTL::RasterizationRateMap::parameterBufferSizeAndAlign() const
+{
+    return Object::sendMessage<MTL::SizeAndAlign>(this, _MTL_PRIVATE_SEL(parameterBufferSizeAndAlign));
+}
+
+// method: copyParameterDataToBuffer:offset:
+_MTL_INLINE void MTL::RasterizationRateMap::copyParameterDataToBuffer(const MTL::Buffer* buffer, NS::UInteger offset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(copyParameterDataToBuffer_offset_), buffer, offset);
+}
+
+// method: physicalSizeForLayer:
+_MTL_INLINE MTL::Size MTL::RasterizationRateMap::physicalSize(NS::UInteger layerIndex)
+{
+    return Object::sendMessage<MTL::Size>(this, _MTL_PRIVATE_SEL(physicalSizeForLayer_), layerIndex);
+}
+
+// method: mapScreenToPhysicalCoordinates:forLayer:
+_MTL_INLINE MTL::Coordinate2D MTL::RasterizationRateMap::mapScreenToPhysicalCoordinates(MTL::Coordinate2D screenCoordinates, NS::UInteger layerIndex)
+{
+    return Object::sendMessage<MTL::Coordinate2D>(this, _MTL_PRIVATE_SEL(mapScreenToPhysicalCoordinates_forLayer_), screenCoordinates, layerIndex);
+}
+
+// method: mapPhysicalToScreenCoordinates:forLayer:
+_MTL_INLINE MTL::Coordinate2D MTL::RasterizationRateMap::mapPhysicalToScreenCoordinates(MTL::Coordinate2D physicalCoordinates, NS::UInteger layerIndex)
+{
+    return Object::sendMessage<MTL::Coordinate2D>(this, _MTL_PRIVATE_SEL(mapPhysicalToScreenCoordinates_forLayer_), physicalCoordinates, layerIndex);
+}
diff --git a/metal-cpp/Metal/MTLRenderCommandEncoder.hpp b/metal-cpp/Metal/MTLRenderCommandEncoder.hpp
new file mode 100644
index 0000000..8bda856
--- /dev/null
+++ b/metal-cpp/Metal/MTLRenderCommandEncoder.hpp
@@ -0,0 +1,958 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLRenderCommandEncoder.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLBuffer.hpp"
+#include "MTLCommandEncoder.hpp"
+#include "MTLHeap.hpp"
+#include "MTLIntersectionFunctionTable.hpp"
+#include "MTLRenderCommandEncoder.hpp"
+#include "MTLRenderPass.hpp"
+#include "MTLResource.hpp"
+#include "MTLSampler.hpp"
+#include "MTLStageInputOutputDescriptor.hpp"
+#include "MTLTexture.hpp"
+#include "MTLTypes.hpp"
+#include "MTLVisibleFunctionTable.hpp"
+
+namespace MTL
+{
+_MTL_ENUM(NS::UInteger, PrimitiveType) {
+    PrimitiveTypePoint = 0,
+    PrimitiveTypeLine = 1,
+    PrimitiveTypeLineStrip = 2,
+    PrimitiveTypeTriangle = 3,
+    PrimitiveTypeTriangleStrip = 4,
+};
+
+_MTL_ENUM(NS::UInteger, VisibilityResultMode) {
+    VisibilityResultModeDisabled = 0,
+    VisibilityResultModeBoolean = 1,
+    VisibilityResultModeCounting = 2,
+};
+
+struct ScissorRect
+{
+    NS::UInteger x;
+    NS::UInteger y;
+    NS::UInteger width;
+    NS::UInteger height;
+} _MTL_PACKED;
+
+struct Viewport
+{
+    double originX;
+    double originY;
+    double width;
+    double height;
+    double znear;
+    double zfar;
+} _MTL_PACKED;
+
+_MTL_ENUM(NS::UInteger, CullMode) {
+    CullModeNone = 0,
+    CullModeFront = 1,
+    CullModeBack = 2,
+};
+
+_MTL_ENUM(NS::UInteger, Winding) {
+    WindingClockwise = 0,
+    WindingCounterClockwise = 1,
+};
+
+_MTL_ENUM(NS::UInteger, DepthClipMode) {
+    DepthClipModeClip = 0,
+    DepthClipModeClamp = 1,
+};
+
+_MTL_ENUM(NS::UInteger, TriangleFillMode) {
+    TriangleFillModeFill = 0,
+    TriangleFillModeLines = 1,
+};
+
+struct DrawPrimitivesIndirectArguments
+{
+    uint32_t vertexCount;
+    uint32_t instanceCount;
+    uint32_t vertexStart;
+    uint32_t baseInstance;
+} _MTL_PACKED;
+
+struct DrawIndexedPrimitivesIndirectArguments
+{
+    uint32_t indexCount;
+    uint32_t instanceCount;
+    uint32_t indexStart;
+    int32_t  baseVertex;
+    uint32_t baseInstance;
+} _MTL_PACKED;
+
+struct VertexAmplificationViewMapping
+{
+    uint32_t viewportArrayIndexOffset;
+    uint32_t renderTargetArrayIndexOffset;
+} _MTL_PACKED;
+
+struct DrawPatchIndirectArguments
+{
+    uint32_t patchCount;
+    uint32_t instanceCount;
+    uint32_t patchStart;
+    uint32_t baseInstance;
+} _MTL_PACKED;
+
+struct QuadTessellationFactorsHalf
+{
+    uint16_t edgeTessellationFactor[4];
+    uint16_t insideTessellationFactor[2];
+} _MTL_PACKED;
+
+struct TriangleTessellationFactorsHalf
+{
+    uint16_t edgeTessellationFactor[3];
+    uint16_t insideTessellationFactor;
+} _MTL_PACKED;
+
+_MTL_OPTIONS(NS::UInteger, RenderStages) {
+    RenderStageVertex = 1,
+    RenderStageFragment = 2,
+    RenderStageTile = 4,
+};
+
+class RenderCommandEncoder : public NS::Referencing<RenderCommandEncoder, CommandEncoder>
+{
+public:
+    void         setRenderPipelineState(const class RenderPipelineState* pipelineState);
+
+    void         setVertexBytes(const void* bytes, NS::UInteger length, NS::UInteger index);
+
+    void         setVertexBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index);
+
+    void         setVertexBufferOffset(NS::UInteger offset, NS::UInteger index);
+
+    void         setVertexBuffers(MTL::Buffer* buffers[], const NS::UInteger offsets[], NS::Range range);
+
+    void         setVertexTexture(const class Texture* texture, NS::UInteger index);
+
+    void         setVertexTextures(MTL::Texture* textures[], NS::Range range);
+
+    void         setVertexSamplerState(const class SamplerState* sampler, NS::UInteger index);
+
+    void         setVertexSamplerStates(MTL::SamplerState* samplers[], NS::Range range);
+
+    void         setVertexSamplerState(const class SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index);
+
+    void         setVertexSamplerStates(MTL::SamplerState* samplers[], const float lodMinClamps[], const float lodMaxClamps[], NS::Range range);
+
+    void         setVertexVisibleFunctionTable(const class VisibleFunctionTable* functionTable, NS::UInteger bufferIndex);
+
+    void         setVertexVisibleFunctionTables(const class VisibleFunctionTable* functionTables[], NS::Range range);
+
+    void         setVertexIntersectionFunctionTable(const class IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger bufferIndex);
+
+    void         setVertexIntersectionFunctionTables(const class IntersectionFunctionTable* intersectionFunctionTables[], NS::Range range);
+
+    void         setVertexAccelerationStructure(const class AccelerationStructure* accelerationStructure, NS::UInteger bufferIndex);
+
+    void         setViewport(MTL::Viewport viewport);
+
+    void         setViewports(const MTL::Viewport* viewports, NS::UInteger count);
+
+    void         setFrontFacingWinding(MTL::Winding frontFacingWinding);
+
+    void         setVertexAmplificationCount(NS::UInteger count, const MTL::VertexAmplificationViewMapping* viewMappings);
+
+    void         setCullMode(MTL::CullMode cullMode);
+
+    void         setDepthClipMode(MTL::DepthClipMode depthClipMode);
+
+    void         setDepthBias(float depthBias, float slopeScale, float clamp);
+
+    void         setScissorRect(MTL::ScissorRect rect);
+
+    void         setScissorRects(const MTL::ScissorRect* scissorRects, NS::UInteger count);
+
+    void         setTriangleFillMode(MTL::TriangleFillMode fillMode);
+
+    void         setFragmentBytes(const void* bytes, NS::UInteger length, NS::UInteger index);
+
+    void         setFragmentBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index);
+
+    void         setFragmentBufferOffset(NS::UInteger offset, NS::UInteger index);
+
+    void         setFragmentBuffers(MTL::Buffer* buffers[], const NS::UInteger offsets[], NS::Range range);
+
+    void         setFragmentTexture(const class Texture* texture, NS::UInteger index);
+
+    void         setFragmentTextures(MTL::Texture* textures[], NS::Range range);
+
+    void         setFragmentSamplerState(const class SamplerState* sampler, NS::UInteger index);
+
+    void         setFragmentSamplerStates(MTL::SamplerState* samplers[], NS::Range range);
+
+    void         setFragmentSamplerState(const class SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index);
+
+    void         setFragmentSamplerStates(MTL::SamplerState* samplers[], const float lodMinClamps[], const float lodMaxClamps[], NS::Range range);
+
+    void         setFragmentVisibleFunctionTable(const class VisibleFunctionTable* functionTable, NS::UInteger bufferIndex);
+
+    void         setFragmentVisibleFunctionTables(const VisibleFunctionTable* functionTables[], NS::Range range);
+
+    void         setFragmentIntersectionFunctionTable(const class IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger bufferIndex);
+
+    void         setFragmentIntersectionFunctionTables(const class IntersectionFunctionTable* intersectionFunctionTables[], NS::Range range);
+
+    void         setFragmentAccelerationStructure(const class AccelerationStructure* accelerationStructure, NS::UInteger bufferIndex);
+
+    void         setBlendColorRed(float red, float green, float blue, float alpha);
+
+    void         setDepthStencilState(const class DepthStencilState* depthStencilState);
+
+    void         setStencilReferenceValue(uint32_t referenceValue);
+
+    void         setStencilFrontReferenceValue(uint32_t frontReferenceValue, uint32_t backReferenceValue);
+
+    void         setVisibilityResultMode(MTL::VisibilityResultMode mode, NS::UInteger offset);
+
+    void         setColorStoreAction(MTL::StoreAction storeAction, NS::UInteger colorAttachmentIndex);
+
+    void         setDepthStoreAction(MTL::StoreAction storeAction);
+
+    void         setStencilStoreAction(MTL::StoreAction storeAction);
+
+    void         setColorStoreActionOptions(MTL::StoreActionOptions storeActionOptions, NS::UInteger colorAttachmentIndex);
+
+    void         setDepthStoreActionOptions(MTL::StoreActionOptions storeActionOptions);
+
+    void         setStencilStoreActionOptions(MTL::StoreActionOptions storeActionOptions);
+
+    void         drawPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger vertexStart, NS::UInteger vertexCount, NS::UInteger instanceCount);
+
+    void         drawPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger vertexStart, NS::UInteger vertexCount);
+
+    void         drawIndexedPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger indexCount, MTL::IndexType indexType, const class Buffer* indexBuffer, NS::UInteger indexBufferOffset, NS::UInteger instanceCount);
+
+    void         drawIndexedPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger indexCount, MTL::IndexType indexType, const class Buffer* indexBuffer, NS::UInteger indexBufferOffset);
+
+    void         drawPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger vertexStart, NS::UInteger vertexCount, NS::UInteger instanceCount, NS::UInteger baseInstance);
+
+    void         drawIndexedPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger indexCount, MTL::IndexType indexType, const class Buffer* indexBuffer, NS::UInteger indexBufferOffset, NS::UInteger instanceCount, NS::Integer baseVertex, NS::UInteger baseInstance);
+
+    void         drawPrimitives(MTL::PrimitiveType primitiveType, const class Buffer* indirectBuffer, NS::UInteger indirectBufferOffset);
+
+    void         drawIndexedPrimitives(MTL::PrimitiveType primitiveType, MTL::IndexType indexType, const class Buffer* indexBuffer, NS::UInteger indexBufferOffset, const class Buffer* indirectBuffer, NS::UInteger indirectBufferOffset);
+
+    void         textureBarrier();
+
+    void         updateFence(const class Fence* fence, MTL::RenderStages stages);
+
+    void         waitForFence(const class Fence* fence, MTL::RenderStages stages);
+
+    void         setTessellationFactorBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger instanceStride);
+
+    void         setTessellationFactorScale(float scale);
+
+    void         drawPatches(NS::UInteger numberOfPatchControlPoints, NS::UInteger patchStart, NS::UInteger patchCount, const class Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, NS::UInteger instanceCount, NS::UInteger baseInstance);
+
+    void         drawPatches(NS::UInteger numberOfPatchControlPoints, const class Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, const class Buffer* indirectBuffer, NS::UInteger indirectBufferOffset);
+
+    void         drawIndexedPatches(NS::UInteger numberOfPatchControlPoints, NS::UInteger patchStart, NS::UInteger patchCount, const class Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, const class Buffer* controlPointIndexBuffer, NS::UInteger controlPointIndexBufferOffset, NS::UInteger instanceCount, NS::UInteger baseInstance);
+
+    void         drawIndexedPatches(NS::UInteger numberOfPatchControlPoints, const class Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, const class Buffer* controlPointIndexBuffer, NS::UInteger controlPointIndexBufferOffset, const class Buffer* indirectBuffer, NS::UInteger indirectBufferOffset);
+
+    NS::UInteger tileWidth() const;
+
+    NS::UInteger tileHeight() const;
+
+    void         setTileBytes(const void* bytes, NS::UInteger length, NS::UInteger index);
+
+    void         setTileBuffer(const class Buffer* buffer, NS::UInteger offset, NS::UInteger index);
+
+    void         setTileBufferOffset(NS::UInteger offset, NS::UInteger index);
+
+    void         setTileBuffers(MTL::Buffer* buffers, const NS::UInteger* offsets, NS::Range range);
+
+    void         setTileTexture(const class Texture* texture, NS::UInteger index);
+
+    void         setTileTextures(MTL::Texture* textures[], NS::Range range);
+
+    void         setTileSamplerState(const class SamplerState* sampler, NS::UInteger index);
+
+    void         setTileSamplerStates(MTL::SamplerState* samplers[], NS::Range range);
+
+    void         setTileSamplerState(const class SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index);
+
+    void         setTileSamplerStates(MTL::SamplerState* samplers[], const float lodMinClamps[], const float lodMaxClamps[], NS::Range range);
+
+    void         setTileVisibleFunctionTable(const class VisibleFunctionTable* functionTable, NS::UInteger bufferIndex);
+
+    void         setTileVisibleFunctionTables(const class VisibleFunctionTable* functionTables[], NS::Range range);
+
+    void         setTileIntersectionFunctionTable(const class IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger bufferIndex);
+
+    void         setTileIntersectionFunctionTables(const class IntersectionFunctionTable* intersectionFunctionTables[], NS::Range range);
+
+    void         setTileAccelerationStructure(const class AccelerationStructure* accelerationStructure, NS::UInteger bufferIndex);
+
+    void         dispatchThreadsPerTile(MTL::Size threadsPerTile);
+
+    void         setThreadgroupMemoryLength(NS::UInteger length, NS::UInteger offset, NS::UInteger index);
+
+    void         useResource(const class Resource* resource, MTL::ResourceUsage usage);
+
+    void         useResources(MTL::Resource* resources[], NS::UInteger count, MTL::ResourceUsage usage);
+
+    void         useResource(const class Resource* resource, MTL::ResourceUsage usage, MTL::RenderStages stages);
+
+    void         useResources(MTL::Resource* resources, NS::UInteger count, MTL::ResourceUsage usage, MTL::RenderStages stages);
+
+    void         useHeap(const class Heap* heap);
+
+    void         useHeaps(MTL::Heap* heaps[], NS::UInteger count);
+
+    void         useHeap(const class Heap* heap, MTL::RenderStages stages);
+
+    void         useHeaps(MTL::Heap* heaps[], NS::UInteger count, MTL::RenderStages stages);
+
+    void         executeCommandsInBuffer(const class IndirectCommandBuffer* indirectCommandBuffer, NS::Range executionRange);
+
+    void         executeCommandsInBuffer(const class IndirectCommandBuffer* indirectCommandbuffer, const class Buffer* indirectRangeBuffer, NS::UInteger indirectBufferOffset);
+
+    void         memoryBarrier(MTL::BarrierScope scope, MTL::RenderStages after, MTL::RenderStages before);
+
+    void         memoryBarrier(MTL::Resource* resources[], NS::UInteger count, MTL::RenderStages after, MTL::RenderStages before);
+
+    void         sampleCountersInBuffer(const class CounterSampleBuffer* sampleBuffer, NS::UInteger sampleIndex, bool barrier);
+};
+
+}
+
+// method: setRenderPipelineState:
+_MTL_INLINE void MTL::RenderCommandEncoder::setRenderPipelineState(const MTL::RenderPipelineState* pipelineState)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setRenderPipelineState_), pipelineState);
+}
+
+// method: setVertexBytes:length:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setVertexBytes(const void* bytes, NS::UInteger length, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexBytes_length_atIndex_), bytes, length, index);
+}
+
+// method: setVertexBuffer:offset:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setVertexBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexBuffer_offset_atIndex_), buffer, offset, index);
+}
+
+// method: setVertexBufferOffset:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setVertexBufferOffset(NS::UInteger offset, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexBufferOffset_atIndex_), offset, index);
+}
+
+// method: setVertexBuffers:offsets:withRange:
+_MTL_INLINE void MTL::RenderCommandEncoder::setVertexBuffers(MTL::Buffer* buffers[], const NS::UInteger offsets[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexBuffers_offsets_withRange_), buffers, offsets, range);
+}
+
+// method: setVertexTexture:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setVertexTexture(const MTL::Texture* texture, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexTexture_atIndex_), texture, index);
+}
+
+// method: setVertexTextures:withRange:
+_MTL_INLINE void MTL::RenderCommandEncoder::setVertexTextures(MTL::Texture* textures[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexTextures_withRange_), textures, range);
+}
+
+// method: setVertexSamplerState:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setVertexSamplerState(const MTL::SamplerState* sampler, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexSamplerState_atIndex_), sampler, index);
+}
+
+// method: setVertexSamplerStates:withRange:
+_MTL_INLINE void MTL::RenderCommandEncoder::setVertexSamplerStates(MTL::SamplerState* samplers[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexSamplerStates_withRange_), samplers, range);
+}
+
+// method: setVertexSamplerState:lodMinClamp:lodMaxClamp:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setVertexSamplerState(const MTL::SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexSamplerState_lodMinClamp_lodMaxClamp_atIndex_), sampler, lodMinClamp, lodMaxClamp, index);
+}
+
+// method: setVertexSamplerStates:lodMinClamps:lodMaxClamps:withRange:
+_MTL_INLINE void MTL::RenderCommandEncoder::setVertexSamplerStates(MTL::SamplerState* samplers[], const float lodMinClamps[], const float lodMaxClamps[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexSamplerStates_lodMinClamps_lodMaxClamps_withRange_), samplers, lodMinClamps, lodMaxClamps, range);
+}
+
+// method: setVertexVisibleFunctionTable:atBufferIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setVertexVisibleFunctionTable(const MTL::VisibleFunctionTable* functionTable, NS::UInteger bufferIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexVisibleFunctionTable_atBufferIndex_), functionTable, bufferIndex);
+}
+
+// method: setVertexVisibleFunctionTables:withBufferRange:
+_MTL_INLINE void MTL::RenderCommandEncoder::setVertexVisibleFunctionTables(const MTL::VisibleFunctionTable* functionTables[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexVisibleFunctionTables_withBufferRange_), functionTables, range);
+}
+
+// method: setVertexIntersectionFunctionTable:atBufferIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setVertexIntersectionFunctionTable(const MTL::IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger bufferIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexIntersectionFunctionTable_atBufferIndex_), intersectionFunctionTable, bufferIndex);
+}
+
+// method: setVertexIntersectionFunctionTables:withBufferRange:
+_MTL_INLINE void MTL::RenderCommandEncoder::setVertexIntersectionFunctionTables(const MTL::IntersectionFunctionTable* intersectionFunctionTables[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexIntersectionFunctionTables_withBufferRange_), intersectionFunctionTables, range);
+}
+
+// method: setVertexAccelerationStructure:atBufferIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setVertexAccelerationStructure(const MTL::AccelerationStructure* accelerationStructure, NS::UInteger bufferIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexAccelerationStructure_atBufferIndex_), accelerationStructure, bufferIndex);
+}
+
+// method: setViewport:
+_MTL_INLINE void MTL::RenderCommandEncoder::setViewport(MTL::Viewport viewport)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setViewport_), viewport);
+}
+
+// method: setViewports:count:
+_MTL_INLINE void MTL::RenderCommandEncoder::setViewports(const MTL::Viewport* viewports, NS::UInteger count)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setViewports_count_), viewports, count);
+}
+
+// method: setFrontFacingWinding:
+_MTL_INLINE void MTL::RenderCommandEncoder::setFrontFacingWinding(MTL::Winding frontFacingWinding)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFrontFacingWinding_), frontFacingWinding);
+}
+
+// method: setVertexAmplificationCount:viewMappings:
+_MTL_INLINE void MTL::RenderCommandEncoder::setVertexAmplificationCount(NS::UInteger count, const MTL::VertexAmplificationViewMapping* viewMappings)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexAmplificationCount_viewMappings_), count, viewMappings);
+}
+
+// method: setCullMode:
+_MTL_INLINE void MTL::RenderCommandEncoder::setCullMode(MTL::CullMode cullMode)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setCullMode_), cullMode);
+}
+
+// method: setDepthClipMode:
+_MTL_INLINE void MTL::RenderCommandEncoder::setDepthClipMode(MTL::DepthClipMode depthClipMode)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDepthClipMode_), depthClipMode);
+}
+
+// method: setDepthBias:slopeScale:clamp:
+_MTL_INLINE void MTL::RenderCommandEncoder::setDepthBias(float depthBias, float slopeScale, float clamp)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDepthBias_slopeScale_clamp_), depthBias, slopeScale, clamp);
+}
+
+// method: setScissorRect:
+_MTL_INLINE void MTL::RenderCommandEncoder::setScissorRect(MTL::ScissorRect rect)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setScissorRect_), rect);
+}
+
+// method: setScissorRects:count:
+_MTL_INLINE void MTL::RenderCommandEncoder::setScissorRects(const MTL::ScissorRect* scissorRects, NS::UInteger count)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setScissorRects_count_), scissorRects, count);
+}
+
+// method: setTriangleFillMode:
+_MTL_INLINE void MTL::RenderCommandEncoder::setTriangleFillMode(MTL::TriangleFillMode fillMode)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTriangleFillMode_), fillMode);
+}
+
+// method: setFragmentBytes:length:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentBytes(const void* bytes, NS::UInteger length, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentBytes_length_atIndex_), bytes, length, index);
+}
+
+// method: setFragmentBuffer:offset:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentBuffer_offset_atIndex_), buffer, offset, index);
+}
+
+// method: setFragmentBufferOffset:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentBufferOffset(NS::UInteger offset, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentBufferOffset_atIndex_), offset, index);
+}
+
+// method: setFragmentBuffers:offsets:withRange:
+_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentBuffers(MTL::Buffer* buffers[], const NS::UInteger offsets[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentBuffers_offsets_withRange_), buffers, offsets, range);
+}
+
+// method: setFragmentTexture:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentTexture(const MTL::Texture* texture, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentTexture_atIndex_), texture, index);
+}
+
+// method: setFragmentTextures:withRange:
+_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentTextures(MTL::Texture* textures[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentTextures_withRange_), textures, range);
+}
+
+// method: setFragmentSamplerState:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentSamplerState(const MTL::SamplerState* sampler, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentSamplerState_atIndex_), sampler, index);
+}
+
+// method: setFragmentSamplerStates:withRange:
+_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentSamplerStates(MTL::SamplerState* samplers[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentSamplerStates_withRange_), samplers, range);
+}
+
+// method: setFragmentSamplerState:lodMinClamp:lodMaxClamp:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentSamplerState(const MTL::SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentSamplerState_lodMinClamp_lodMaxClamp_atIndex_), sampler, lodMinClamp, lodMaxClamp, index);
+}
+
+// method: setFragmentSamplerStates:lodMinClamps:lodMaxClamps:withRange:
+_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentSamplerStates(MTL::SamplerState* samplers[], const float lodMinClamps[], const float lodMaxClamps[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentSamplerStates_lodMinClamps_lodMaxClamps_withRange_), samplers, lodMinClamps, lodMaxClamps, range);
+}
+
+// method: setFragmentVisibleFunctionTable:atBufferIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentVisibleFunctionTable(const MTL::VisibleFunctionTable* functionTable, NS::UInteger bufferIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentVisibleFunctionTable_atBufferIndex_), functionTable, bufferIndex);
+}
+
+// method: setFragmentVisibleFunctionTables:withBufferRange:
+_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentVisibleFunctionTables(const MTL::VisibleFunctionTable* functionTables[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentVisibleFunctionTables_withBufferRange_), functionTables, range);
+}
+
+// method: setFragmentIntersectionFunctionTable:atBufferIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentIntersectionFunctionTable(const MTL::IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger bufferIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentIntersectionFunctionTable_atBufferIndex_), intersectionFunctionTable, bufferIndex);
+}
+
+// method: setFragmentIntersectionFunctionTables:withBufferRange:
+_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentIntersectionFunctionTables(const MTL::IntersectionFunctionTable* intersectionFunctionTables[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentIntersectionFunctionTables_withBufferRange_), intersectionFunctionTables, range);
+}
+
+// method: setFragmentAccelerationStructure:atBufferIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setFragmentAccelerationStructure(const MTL::AccelerationStructure* accelerationStructure, NS::UInteger bufferIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentAccelerationStructure_atBufferIndex_), accelerationStructure, bufferIndex);
+}
+
+// method: setBlendColorRed:green:blue:alpha:
+_MTL_INLINE void MTL::RenderCommandEncoder::setBlendColorRed(float red, float green, float blue, float alpha)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBlendColorRed_green_blue_alpha_), red, green, blue, alpha);
+}
+
+// method: setDepthStencilState:
+_MTL_INLINE void MTL::RenderCommandEncoder::setDepthStencilState(const MTL::DepthStencilState* depthStencilState)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDepthStencilState_), depthStencilState);
+}
+
+// method: setStencilReferenceValue:
+_MTL_INLINE void MTL::RenderCommandEncoder::setStencilReferenceValue(uint32_t referenceValue)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStencilReferenceValue_), referenceValue);
+}
+
+// method: setStencilFrontReferenceValue:backReferenceValue:
+_MTL_INLINE void MTL::RenderCommandEncoder::setStencilFrontReferenceValue(uint32_t frontReferenceValue, uint32_t backReferenceValue)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStencilFrontReferenceValue_backReferenceValue_), frontReferenceValue, backReferenceValue);
+}
+
+// method: setVisibilityResultMode:offset:
+_MTL_INLINE void MTL::RenderCommandEncoder::setVisibilityResultMode(MTL::VisibilityResultMode mode, NS::UInteger offset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVisibilityResultMode_offset_), mode, offset);
+}
+
+// method: setColorStoreAction:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setColorStoreAction(MTL::StoreAction storeAction, NS::UInteger colorAttachmentIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setColorStoreAction_atIndex_), storeAction, colorAttachmentIndex);
+}
+
+// method: setDepthStoreAction:
+_MTL_INLINE void MTL::RenderCommandEncoder::setDepthStoreAction(MTL::StoreAction storeAction)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDepthStoreAction_), storeAction);
+}
+
+// method: setStencilStoreAction:
+_MTL_INLINE void MTL::RenderCommandEncoder::setStencilStoreAction(MTL::StoreAction storeAction)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStencilStoreAction_), storeAction);
+}
+
+// method: setColorStoreActionOptions:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setColorStoreActionOptions(MTL::StoreActionOptions storeActionOptions, NS::UInteger colorAttachmentIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setColorStoreActionOptions_atIndex_), storeActionOptions, colorAttachmentIndex);
+}
+
+// method: setDepthStoreActionOptions:
+_MTL_INLINE void MTL::RenderCommandEncoder::setDepthStoreActionOptions(MTL::StoreActionOptions storeActionOptions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDepthStoreActionOptions_), storeActionOptions);
+}
+
+// method: setStencilStoreActionOptions:
+_MTL_INLINE void MTL::RenderCommandEncoder::setStencilStoreActionOptions(MTL::StoreActionOptions storeActionOptions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStencilStoreActionOptions_), storeActionOptions);
+}
+
+// method: drawPrimitives:vertexStart:vertexCount:instanceCount:
+_MTL_INLINE void MTL::RenderCommandEncoder::drawPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger vertexStart, NS::UInteger vertexCount, NS::UInteger instanceCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(drawPrimitives_vertexStart_vertexCount_instanceCount_), primitiveType, vertexStart, vertexCount, instanceCount);
+}
+
+// method: drawPrimitives:vertexStart:vertexCount:
+_MTL_INLINE void MTL::RenderCommandEncoder::drawPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger vertexStart, NS::UInteger vertexCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(drawPrimitives_vertexStart_vertexCount_), primitiveType, vertexStart, vertexCount);
+}
+
+// method: drawIndexedPrimitives:indexCount:indexType:indexBuffer:indexBufferOffset:instanceCount:
+_MTL_INLINE void MTL::RenderCommandEncoder::drawIndexedPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger indexCount, MTL::IndexType indexType, const MTL::Buffer* indexBuffer, NS::UInteger indexBufferOffset, NS::UInteger instanceCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(drawIndexedPrimitives_indexCount_indexType_indexBuffer_indexBufferOffset_instanceCount_), primitiveType, indexCount, indexType, indexBuffer, indexBufferOffset, instanceCount);
+}
+
+// method: drawIndexedPrimitives:indexCount:indexType:indexBuffer:indexBufferOffset:
+_MTL_INLINE void MTL::RenderCommandEncoder::drawIndexedPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger indexCount, MTL::IndexType indexType, const MTL::Buffer* indexBuffer, NS::UInteger indexBufferOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(drawIndexedPrimitives_indexCount_indexType_indexBuffer_indexBufferOffset_), primitiveType, indexCount, indexType, indexBuffer, indexBufferOffset);
+}
+
+// method: drawPrimitives:vertexStart:vertexCount:instanceCount:baseInstance:
+_MTL_INLINE void MTL::RenderCommandEncoder::drawPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger vertexStart, NS::UInteger vertexCount, NS::UInteger instanceCount, NS::UInteger baseInstance)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(drawPrimitives_vertexStart_vertexCount_instanceCount_baseInstance_), primitiveType, vertexStart, vertexCount, instanceCount, baseInstance);
+}
+
+// method: drawIndexedPrimitives:indexCount:indexType:indexBuffer:indexBufferOffset:instanceCount:baseVertex:baseInstance:
+_MTL_INLINE void MTL::RenderCommandEncoder::drawIndexedPrimitives(MTL::PrimitiveType primitiveType, NS::UInteger indexCount, MTL::IndexType indexType, const MTL::Buffer* indexBuffer, NS::UInteger indexBufferOffset, NS::UInteger instanceCount, NS::Integer baseVertex, NS::UInteger baseInstance)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(drawIndexedPrimitives_indexCount_indexType_indexBuffer_indexBufferOffset_instanceCount_baseVertex_baseInstance_), primitiveType, indexCount, indexType, indexBuffer, indexBufferOffset, instanceCount, baseVertex, baseInstance);
+}
+
+// method: drawPrimitives:indirectBuffer:indirectBufferOffset:
+_MTL_INLINE void MTL::RenderCommandEncoder::drawPrimitives(MTL::PrimitiveType primitiveType, const MTL::Buffer* indirectBuffer, NS::UInteger indirectBufferOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(drawPrimitives_indirectBuffer_indirectBufferOffset_), primitiveType, indirectBuffer, indirectBufferOffset);
+}
+
+// method: drawIndexedPrimitives:indexType:indexBuffer:indexBufferOffset:indirectBuffer:indirectBufferOffset:
+_MTL_INLINE void MTL::RenderCommandEncoder::drawIndexedPrimitives(MTL::PrimitiveType primitiveType, MTL::IndexType indexType, const MTL::Buffer* indexBuffer, NS::UInteger indexBufferOffset, const MTL::Buffer* indirectBuffer, NS::UInteger indirectBufferOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(drawIndexedPrimitives_indexType_indexBuffer_indexBufferOffset_indirectBuffer_indirectBufferOffset_), primitiveType, indexType, indexBuffer, indexBufferOffset, indirectBuffer, indirectBufferOffset);
+}
+
+// method: textureBarrier
+_MTL_INLINE void MTL::RenderCommandEncoder::textureBarrier()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(textureBarrier));
+}
+
+// method: updateFence:afterStages:
+_MTL_INLINE void MTL::RenderCommandEncoder::updateFence(const MTL::Fence* fence, MTL::RenderStages stages)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(updateFence_afterStages_), fence, stages);
+}
+
+// method: waitForFence:beforeStages:
+_MTL_INLINE void MTL::RenderCommandEncoder::waitForFence(const MTL::Fence* fence, MTL::RenderStages stages)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(waitForFence_beforeStages_), fence, stages);
+}
+
+// method: setTessellationFactorBuffer:offset:instanceStride:
+_MTL_INLINE void MTL::RenderCommandEncoder::setTessellationFactorBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger instanceStride)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTessellationFactorBuffer_offset_instanceStride_), buffer, offset, instanceStride);
+}
+
+// method: setTessellationFactorScale:
+_MTL_INLINE void MTL::RenderCommandEncoder::setTessellationFactorScale(float scale)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTessellationFactorScale_), scale);
+}
+
+// method: drawPatches:patchStart:patchCount:patchIndexBuffer:patchIndexBufferOffset:instanceCount:baseInstance:
+_MTL_INLINE void MTL::RenderCommandEncoder::drawPatches(NS::UInteger numberOfPatchControlPoints, NS::UInteger patchStart, NS::UInteger patchCount, const MTL::Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, NS::UInteger instanceCount, NS::UInteger baseInstance)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(drawPatches_patchStart_patchCount_patchIndexBuffer_patchIndexBufferOffset_instanceCount_baseInstance_), numberOfPatchControlPoints, patchStart, patchCount, patchIndexBuffer, patchIndexBufferOffset, instanceCount, baseInstance);
+}
+
+// method: drawPatches:patchIndexBuffer:patchIndexBufferOffset:indirectBuffer:indirectBufferOffset:
+_MTL_INLINE void MTL::RenderCommandEncoder::drawPatches(NS::UInteger numberOfPatchControlPoints, const MTL::Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, const MTL::Buffer* indirectBuffer, NS::UInteger indirectBufferOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(drawPatches_patchIndexBuffer_patchIndexBufferOffset_indirectBuffer_indirectBufferOffset_), numberOfPatchControlPoints, patchIndexBuffer, patchIndexBufferOffset, indirectBuffer, indirectBufferOffset);
+}
+
+// method: drawIndexedPatches:patchStart:patchCount:patchIndexBuffer:patchIndexBufferOffset:controlPointIndexBuffer:controlPointIndexBufferOffset:instanceCount:baseInstance:
+_MTL_INLINE void MTL::RenderCommandEncoder::drawIndexedPatches(NS::UInteger numberOfPatchControlPoints, NS::UInteger patchStart, NS::UInteger patchCount, const MTL::Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, const MTL::Buffer* controlPointIndexBuffer, NS::UInteger controlPointIndexBufferOffset, NS::UInteger instanceCount, NS::UInteger baseInstance)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(drawIndexedPatches_patchStart_patchCount_patchIndexBuffer_patchIndexBufferOffset_controlPointIndexBuffer_controlPointIndexBufferOffset_instanceCount_baseInstance_), numberOfPatchControlPoints, patchStart, patchCount, patchIndexBuffer, patchIndexBufferOffset, controlPointIndexBuffer, controlPointIndexBufferOffset, instanceCount, baseInstance);
+}
+
+// method: drawIndexedPatches:patchIndexBuffer:patchIndexBufferOffset:controlPointIndexBuffer:controlPointIndexBufferOffset:indirectBuffer:indirectBufferOffset:
+_MTL_INLINE void MTL::RenderCommandEncoder::drawIndexedPatches(NS::UInteger numberOfPatchControlPoints, const MTL::Buffer* patchIndexBuffer, NS::UInteger patchIndexBufferOffset, const MTL::Buffer* controlPointIndexBuffer, NS::UInteger controlPointIndexBufferOffset, const MTL::Buffer* indirectBuffer, NS::UInteger indirectBufferOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(drawIndexedPatches_patchIndexBuffer_patchIndexBufferOffset_controlPointIndexBuffer_controlPointIndexBufferOffset_indirectBuffer_indirectBufferOffset_), numberOfPatchControlPoints, patchIndexBuffer, patchIndexBufferOffset, controlPointIndexBuffer, controlPointIndexBufferOffset, indirectBuffer, indirectBufferOffset);
+}
+
+// property: tileWidth
+_MTL_INLINE NS::UInteger MTL::RenderCommandEncoder::tileWidth() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(tileWidth));
+}
+
+// property: tileHeight
+_MTL_INLINE NS::UInteger MTL::RenderCommandEncoder::tileHeight() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(tileHeight));
+}
+
+// method: setTileBytes:length:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setTileBytes(const void* bytes, NS::UInteger length, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTileBytes_length_atIndex_), bytes, length, index);
+}
+
+// method: setTileBuffer:offset:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setTileBuffer(const MTL::Buffer* buffer, NS::UInteger offset, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTileBuffer_offset_atIndex_), buffer, offset, index);
+}
+
+// method: setTileBufferOffset:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setTileBufferOffset(NS::UInteger offset, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTileBufferOffset_atIndex_), offset, index);
+}
+
+// method: setTileBuffers:offsets:withRange:
+_MTL_INLINE void MTL::RenderCommandEncoder::setTileBuffers(MTL::Buffer* buffers, const NS::UInteger* offsets, NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTileBuffers_offsets_withRange_), buffers, offsets, range);
+}
+
+// method: setTileTexture:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setTileTexture(const MTL::Texture* texture, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTileTexture_atIndex_), texture, index);
+}
+
+// method: setTileTextures:withRange:
+_MTL_INLINE void MTL::RenderCommandEncoder::setTileTextures(MTL::Texture* textures[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTileTextures_withRange_), textures, range);
+}
+
+// method: setTileSamplerState:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setTileSamplerState(const MTL::SamplerState* sampler, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTileSamplerState_atIndex_), sampler, index);
+}
+
+// method: setTileSamplerStates:withRange:
+_MTL_INLINE void MTL::RenderCommandEncoder::setTileSamplerStates(MTL::SamplerState* samplers[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTileSamplerStates_withRange_), samplers, range);
+}
+
+// method: setTileSamplerState:lodMinClamp:lodMaxClamp:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setTileSamplerState(const MTL::SamplerState* sampler, float lodMinClamp, float lodMaxClamp, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTileSamplerState_lodMinClamp_lodMaxClamp_atIndex_), sampler, lodMinClamp, lodMaxClamp, index);
+}
+
+// method: setTileSamplerStates:lodMinClamps:lodMaxClamps:withRange:
+_MTL_INLINE void MTL::RenderCommandEncoder::setTileSamplerStates(MTL::SamplerState* samplers[], const float lodMinClamps[], const float lodMaxClamps[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTileSamplerStates_lodMinClamps_lodMaxClamps_withRange_), samplers, lodMinClamps, lodMaxClamps, range);
+}
+
+// method: setTileVisibleFunctionTable:atBufferIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setTileVisibleFunctionTable(const MTL::VisibleFunctionTable* functionTable, NS::UInteger bufferIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTileVisibleFunctionTable_atBufferIndex_), functionTable, bufferIndex);
+}
+
+// method: setTileVisibleFunctionTables:withBufferRange:
+_MTL_INLINE void MTL::RenderCommandEncoder::setTileVisibleFunctionTables(const MTL::VisibleFunctionTable* functionTables[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTileVisibleFunctionTables_withBufferRange_), functionTables, range);
+}
+
+// method: setTileIntersectionFunctionTable:atBufferIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setTileIntersectionFunctionTable(const MTL::IntersectionFunctionTable* intersectionFunctionTable, NS::UInteger bufferIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTileIntersectionFunctionTable_atBufferIndex_), intersectionFunctionTable, bufferIndex);
+}
+
+// method: setTileIntersectionFunctionTables:withBufferRange:
+_MTL_INLINE void MTL::RenderCommandEncoder::setTileIntersectionFunctionTables(const MTL::IntersectionFunctionTable* intersectionFunctionTables[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTileIntersectionFunctionTables_withBufferRange_), intersectionFunctionTables, range);
+}
+
+// method: setTileAccelerationStructure:atBufferIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setTileAccelerationStructure(const MTL::AccelerationStructure* accelerationStructure, NS::UInteger bufferIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTileAccelerationStructure_atBufferIndex_), accelerationStructure, bufferIndex);
+}
+
+// method: dispatchThreadsPerTile:
+_MTL_INLINE void MTL::RenderCommandEncoder::dispatchThreadsPerTile(MTL::Size threadsPerTile)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(dispatchThreadsPerTile_), threadsPerTile);
+}
+
+// method: setThreadgroupMemoryLength:offset:atIndex:
+_MTL_INLINE void MTL::RenderCommandEncoder::setThreadgroupMemoryLength(NS::UInteger length, NS::UInteger offset, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setThreadgroupMemoryLength_offset_atIndex_), length, offset, index);
+}
+
+// method: useResource:usage:
+_MTL_INLINE void MTL::RenderCommandEncoder::useResource(const MTL::Resource* resource, MTL::ResourceUsage usage)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(useResource_usage_), resource, usage);
+}
+
+// method: useResources:count:usage:
+_MTL_INLINE void MTL::RenderCommandEncoder::useResources(MTL::Resource* resources[], NS::UInteger count, MTL::ResourceUsage usage)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(useResources_count_usage_), resources, count, usage);
+}
+
+// method: useResource:usage:stages:
+_MTL_INLINE void MTL::RenderCommandEncoder::useResource(const MTL::Resource* resource, MTL::ResourceUsage usage, MTL::RenderStages stages)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(useResource_usage_stages_), resource, usage, stages);
+}
+
+// method: useResources:count:usage:stages:
+_MTL_INLINE void MTL::RenderCommandEncoder::useResources(MTL::Resource* resources, NS::UInteger count, MTL::ResourceUsage usage, MTL::RenderStages stages)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(useResources_count_usage_stages_), resources, count, usage, stages);
+}
+
+// method: useHeap:
+_MTL_INLINE void MTL::RenderCommandEncoder::useHeap(const MTL::Heap* heap)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(useHeap_), heap);
+}
+
+// method: useHeaps:count:
+_MTL_INLINE void MTL::RenderCommandEncoder::useHeaps(MTL::Heap* heaps[], NS::UInteger count)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(useHeaps_count_), heaps, count);
+}
+
+// method: useHeap:stages:
+_MTL_INLINE void MTL::RenderCommandEncoder::useHeap(const MTL::Heap* heap, MTL::RenderStages stages)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(useHeap_stages_), heap, stages);
+}
+
+// method: useHeaps:count:stages:
+_MTL_INLINE void MTL::RenderCommandEncoder::useHeaps(MTL::Heap* heaps[], NS::UInteger count, MTL::RenderStages stages)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(useHeaps_count_stages_), heaps, count, stages);
+}
+
+// method: executeCommandsInBuffer:withRange:
+_MTL_INLINE void MTL::RenderCommandEncoder::executeCommandsInBuffer(const MTL::IndirectCommandBuffer* indirectCommandBuffer, NS::Range executionRange)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(executeCommandsInBuffer_withRange_), indirectCommandBuffer, executionRange);
+}
+
+// method: executeCommandsInBuffer:indirectBuffer:indirectBufferOffset:
+_MTL_INLINE void MTL::RenderCommandEncoder::executeCommandsInBuffer(const MTL::IndirectCommandBuffer* indirectCommandbuffer, const MTL::Buffer* indirectRangeBuffer, NS::UInteger indirectBufferOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(executeCommandsInBuffer_indirectBuffer_indirectBufferOffset_), indirectCommandbuffer, indirectRangeBuffer, indirectBufferOffset);
+}
+
+// method: memoryBarrierWithScope:afterStages:beforeStages:
+_MTL_INLINE void MTL::RenderCommandEncoder::memoryBarrier(MTL::BarrierScope scope, MTL::RenderStages after, MTL::RenderStages before)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(memoryBarrierWithScope_afterStages_beforeStages_), scope, after, before);
+}
+
+// method: memoryBarrierWithResources:count:afterStages:beforeStages:
+_MTL_INLINE void MTL::RenderCommandEncoder::memoryBarrier(MTL::Resource* resources[], NS::UInteger count, MTL::RenderStages after, MTL::RenderStages before)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(memoryBarrierWithResources_count_afterStages_beforeStages_), resources, count, after, before);
+}
+
+// method: sampleCountersInBuffer:atSampleIndex:withBarrier:
+_MTL_INLINE void MTL::RenderCommandEncoder::sampleCountersInBuffer(const MTL::CounterSampleBuffer* sampleBuffer, NS::UInteger sampleIndex, bool barrier)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(sampleCountersInBuffer_atSampleIndex_withBarrier_), sampleBuffer, sampleIndex, barrier);
+}
diff --git a/metal-cpp/Metal/MTLRenderPass.hpp b/metal-cpp/Metal/MTLRenderPass.hpp
new file mode 100644
index 0000000..85751ea
--- /dev/null
+++ b/metal-cpp/Metal/MTLRenderPass.hpp
@@ -0,0 +1,786 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLRenderPass.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLRenderPass.hpp"
+
+namespace MTL
+{
+_MTL_ENUM(NS::UInteger, LoadAction) {
+    LoadActionDontCare = 0,
+    LoadActionLoad = 1,
+    LoadActionClear = 2,
+};
+
+_MTL_ENUM(NS::UInteger, StoreAction) {
+    StoreActionDontCare = 0,
+    StoreActionStore = 1,
+    StoreActionMultisampleResolve = 2,
+    StoreActionStoreAndMultisampleResolve = 3,
+    StoreActionUnknown = 4,
+    StoreActionCustomSampleDepthStore = 5,
+};
+
+_MTL_OPTIONS(NS::UInteger, StoreActionOptions) {
+    StoreActionOptionNone = 0,
+    StoreActionOptionValidMask = 1,
+    StoreActionOptionCustomSamplePositions = 1,
+};
+
+struct ClearColor
+{
+    static ClearColor Make(double red, double green, double blue, double alpha);
+
+    ClearColor() = default;
+
+    ClearColor(double red, double green, double blue, double alpha);
+
+    double red;
+    double green;
+    double blue;
+    double alpha;
+} _MTL_PACKED;
+
+class RenderPassAttachmentDescriptor : public NS::Copying<RenderPassAttachmentDescriptor>
+{
+public:
+    static class RenderPassAttachmentDescriptor* alloc();
+
+    class RenderPassAttachmentDescriptor*        init();
+
+    class Texture*                               texture() const;
+    void                                         setTexture(const class Texture* texture);
+
+    NS::UInteger                                 level() const;
+    void                                         setLevel(NS::UInteger level);
+
+    NS::UInteger                                 slice() const;
+    void                                         setSlice(NS::UInteger slice);
+
+    NS::UInteger                                 depthPlane() const;
+    void                                         setDepthPlane(NS::UInteger depthPlane);
+
+    class Texture*                               resolveTexture() const;
+    void                                         setResolveTexture(const class Texture* resolveTexture);
+
+    NS::UInteger                                 resolveLevel() const;
+    void                                         setResolveLevel(NS::UInteger resolveLevel);
+
+    NS::UInteger                                 resolveSlice() const;
+    void                                         setResolveSlice(NS::UInteger resolveSlice);
+
+    NS::UInteger                                 resolveDepthPlane() const;
+    void                                         setResolveDepthPlane(NS::UInteger resolveDepthPlane);
+
+    MTL::LoadAction                              loadAction() const;
+    void                                         setLoadAction(MTL::LoadAction loadAction);
+
+    MTL::StoreAction                             storeAction() const;
+    void                                         setStoreAction(MTL::StoreAction storeAction);
+
+    MTL::StoreActionOptions                      storeActionOptions() const;
+    void                                         setStoreActionOptions(MTL::StoreActionOptions storeActionOptions);
+};
+
+class RenderPassColorAttachmentDescriptor : public NS::Copying<RenderPassColorAttachmentDescriptor, MTL::RenderPassAttachmentDescriptor>
+{
+public:
+    static class RenderPassColorAttachmentDescriptor* alloc();
+
+    class RenderPassColorAttachmentDescriptor*        init();
+
+    MTL::ClearColor                                   clearColor() const;
+    void                                              setClearColor(MTL::ClearColor clearColor);
+};
+
+_MTL_ENUM(NS::UInteger, MultisampleDepthResolveFilter) {
+    MultisampleDepthResolveFilterSample0 = 0,
+    MultisampleDepthResolveFilterMin = 1,
+    MultisampleDepthResolveFilterMax = 2,
+};
+
+class RenderPassDepthAttachmentDescriptor : public NS::Copying<RenderPassDepthAttachmentDescriptor, MTL::RenderPassAttachmentDescriptor>
+{
+public:
+    static class RenderPassDepthAttachmentDescriptor* alloc();
+
+    class RenderPassDepthAttachmentDescriptor*        init();
+
+    double                                            clearDepth() const;
+    void                                              setClearDepth(double clearDepth);
+
+    MTL::MultisampleDepthResolveFilter                depthResolveFilter() const;
+    void                                              setDepthResolveFilter(MTL::MultisampleDepthResolveFilter depthResolveFilter);
+};
+
+_MTL_ENUM(NS::UInteger, MultisampleStencilResolveFilter) {
+    MultisampleStencilResolveFilterSample0 = 0,
+    MultisampleStencilResolveFilterDepthResolvedSample = 1,
+};
+
+class RenderPassStencilAttachmentDescriptor : public NS::Copying<RenderPassStencilAttachmentDescriptor, MTL::RenderPassAttachmentDescriptor>
+{
+public:
+    static class RenderPassStencilAttachmentDescriptor* alloc();
+
+    class RenderPassStencilAttachmentDescriptor*        init();
+
+    uint32_t                                            clearStencil() const;
+    void                                                setClearStencil(uint32_t clearStencil);
+
+    MTL::MultisampleStencilResolveFilter                stencilResolveFilter() const;
+    void                                                setStencilResolveFilter(MTL::MultisampleStencilResolveFilter stencilResolveFilter);
+};
+
+class RenderPassColorAttachmentDescriptorArray : public NS::Referencing<RenderPassColorAttachmentDescriptorArray>
+{
+public:
+    static class RenderPassColorAttachmentDescriptorArray* alloc();
+
+    class RenderPassColorAttachmentDescriptorArray*        init();
+
+    class RenderPassColorAttachmentDescriptor*             object(NS::UInteger attachmentIndex);
+
+    void                                                   setObject(const class RenderPassColorAttachmentDescriptor* attachment, NS::UInteger attachmentIndex);
+};
+
+class RenderPassSampleBufferAttachmentDescriptor : public NS::Copying<RenderPassSampleBufferAttachmentDescriptor>
+{
+public:
+    static class RenderPassSampleBufferAttachmentDescriptor* alloc();
+
+    class RenderPassSampleBufferAttachmentDescriptor*        init();
+
+    class CounterSampleBuffer*                               sampleBuffer() const;
+    void                                                     setSampleBuffer(const class CounterSampleBuffer* sampleBuffer);
+
+    NS::UInteger                                             startOfVertexSampleIndex() const;
+    void                                                     setStartOfVertexSampleIndex(NS::UInteger startOfVertexSampleIndex);
+
+    NS::UInteger                                             endOfVertexSampleIndex() const;
+    void                                                     setEndOfVertexSampleIndex(NS::UInteger endOfVertexSampleIndex);
+
+    NS::UInteger                                             startOfFragmentSampleIndex() const;
+    void                                                     setStartOfFragmentSampleIndex(NS::UInteger startOfFragmentSampleIndex);
+
+    NS::UInteger                                             endOfFragmentSampleIndex() const;
+    void                                                     setEndOfFragmentSampleIndex(NS::UInteger endOfFragmentSampleIndex);
+};
+
+class RenderPassSampleBufferAttachmentDescriptorArray : public NS::Referencing<RenderPassSampleBufferAttachmentDescriptorArray>
+{
+public:
+    static class RenderPassSampleBufferAttachmentDescriptorArray* alloc();
+
+    class RenderPassSampleBufferAttachmentDescriptorArray*        init();
+
+    class RenderPassSampleBufferAttachmentDescriptor*             object(NS::UInteger attachmentIndex);
+
+    void                                                          setObject(const class RenderPassSampleBufferAttachmentDescriptor* attachment, NS::UInteger attachmentIndex);
+};
+
+class RenderPassDescriptor : public NS::Copying<RenderPassDescriptor>
+{
+public:
+    static class RenderPassDescriptor*                     alloc();
+
+    class RenderPassDescriptor*                            init();
+
+    static class RenderPassDescriptor*                     renderPassDescriptor();
+
+    class RenderPassColorAttachmentDescriptorArray*        colorAttachments() const;
+
+    class RenderPassDepthAttachmentDescriptor*             depthAttachment() const;
+    void                                                   setDepthAttachment(const class RenderPassDepthAttachmentDescriptor* depthAttachment);
+
+    class RenderPassStencilAttachmentDescriptor*           stencilAttachment() const;
+    void                                                   setStencilAttachment(const class RenderPassStencilAttachmentDescriptor* stencilAttachment);
+
+    class Buffer*                                          visibilityResultBuffer() const;
+    void                                                   setVisibilityResultBuffer(const class Buffer* visibilityResultBuffer);
+
+    NS::UInteger                                           renderTargetArrayLength() const;
+    void                                                   setRenderTargetArrayLength(NS::UInteger renderTargetArrayLength);
+
+    NS::UInteger                                           imageblockSampleLength() const;
+    void                                                   setImageblockSampleLength(NS::UInteger imageblockSampleLength);
+
+    NS::UInteger                                           threadgroupMemoryLength() const;
+    void                                                   setThreadgroupMemoryLength(NS::UInteger threadgroupMemoryLength);
+
+    NS::UInteger                                           tileWidth() const;
+    void                                                   setTileWidth(NS::UInteger tileWidth);
+
+    NS::UInteger                                           tileHeight() const;
+    void                                                   setTileHeight(NS::UInteger tileHeight);
+
+    NS::UInteger                                           defaultRasterSampleCount() const;
+    void                                                   setDefaultRasterSampleCount(NS::UInteger defaultRasterSampleCount);
+
+    NS::UInteger                                           renderTargetWidth() const;
+    void                                                   setRenderTargetWidth(NS::UInteger renderTargetWidth);
+
+    NS::UInteger                                           renderTargetHeight() const;
+    void                                                   setRenderTargetHeight(NS::UInteger renderTargetHeight);
+
+    void                                                   setSamplePositions(const MTL::SamplePosition* positions, NS::UInteger count);
+
+    NS::UInteger                                           getSamplePositions(MTL::SamplePosition* positions, NS::UInteger count);
+
+    class RasterizationRateMap*                            rasterizationRateMap() const;
+    void                                                   setRasterizationRateMap(const class RasterizationRateMap* rasterizationRateMap);
+
+    class RenderPassSampleBufferAttachmentDescriptorArray* sampleBufferAttachments() const;
+};
+
+}
+
+_MTL_INLINE MTL::ClearColor MTL::ClearColor::Make(double red, double green, double blue, double alpha)
+{
+    return ClearColor(red, green, blue, alpha);
+}
+
+_MTL_INLINE MTL::ClearColor::ClearColor(double _red, double _green, double _blue, double _alpha)
+    : red(_red)
+    , green(_green)
+    , blue(_blue)
+    , alpha(_alpha)
+{
+}
+
+// static method: alloc
+_MTL_INLINE MTL::RenderPassAttachmentDescriptor* MTL::RenderPassAttachmentDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::RenderPassAttachmentDescriptor>(_MTL_PRIVATE_CLS(MTLRenderPassAttachmentDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::RenderPassAttachmentDescriptor* MTL::RenderPassAttachmentDescriptor::init()
+{
+    return NS::Object::init<MTL::RenderPassAttachmentDescriptor>();
+}
+
+// property: texture
+_MTL_INLINE MTL::Texture* MTL::RenderPassAttachmentDescriptor::texture() const
+{
+    return Object::sendMessage<MTL::Texture*>(this, _MTL_PRIVATE_SEL(texture));
+}
+
+_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setTexture(const MTL::Texture* texture)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTexture_), texture);
+}
+
+// property: level
+_MTL_INLINE NS::UInteger MTL::RenderPassAttachmentDescriptor::level() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(level));
+}
+
+_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setLevel(NS::UInteger level)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLevel_), level);
+}
+
+// property: slice
+_MTL_INLINE NS::UInteger MTL::RenderPassAttachmentDescriptor::slice() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(slice));
+}
+
+_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setSlice(NS::UInteger slice)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSlice_), slice);
+}
+
+// property: depthPlane
+_MTL_INLINE NS::UInteger MTL::RenderPassAttachmentDescriptor::depthPlane() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(depthPlane));
+}
+
+_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setDepthPlane(NS::UInteger depthPlane)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDepthPlane_), depthPlane);
+}
+
+// property: resolveTexture
+_MTL_INLINE MTL::Texture* MTL::RenderPassAttachmentDescriptor::resolveTexture() const
+{
+    return Object::sendMessage<MTL::Texture*>(this, _MTL_PRIVATE_SEL(resolveTexture));
+}
+
+_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setResolveTexture(const MTL::Texture* resolveTexture)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setResolveTexture_), resolveTexture);
+}
+
+// property: resolveLevel
+_MTL_INLINE NS::UInteger MTL::RenderPassAttachmentDescriptor::resolveLevel() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(resolveLevel));
+}
+
+_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setResolveLevel(NS::UInteger resolveLevel)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setResolveLevel_), resolveLevel);
+}
+
+// property: resolveSlice
+_MTL_INLINE NS::UInteger MTL::RenderPassAttachmentDescriptor::resolveSlice() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(resolveSlice));
+}
+
+_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setResolveSlice(NS::UInteger resolveSlice)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setResolveSlice_), resolveSlice);
+}
+
+// property: resolveDepthPlane
+_MTL_INLINE NS::UInteger MTL::RenderPassAttachmentDescriptor::resolveDepthPlane() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(resolveDepthPlane));
+}
+
+_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setResolveDepthPlane(NS::UInteger resolveDepthPlane)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setResolveDepthPlane_), resolveDepthPlane);
+}
+
+// property: loadAction
+_MTL_INLINE MTL::LoadAction MTL::RenderPassAttachmentDescriptor::loadAction() const
+{
+    return Object::sendMessage<MTL::LoadAction>(this, _MTL_PRIVATE_SEL(loadAction));
+}
+
+_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setLoadAction(MTL::LoadAction loadAction)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLoadAction_), loadAction);
+}
+
+// property: storeAction
+_MTL_INLINE MTL::StoreAction MTL::RenderPassAttachmentDescriptor::storeAction() const
+{
+    return Object::sendMessage<MTL::StoreAction>(this, _MTL_PRIVATE_SEL(storeAction));
+}
+
+_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setStoreAction(MTL::StoreAction storeAction)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStoreAction_), storeAction);
+}
+
+// property: storeActionOptions
+_MTL_INLINE MTL::StoreActionOptions MTL::RenderPassAttachmentDescriptor::storeActionOptions() const
+{
+    return Object::sendMessage<MTL::StoreActionOptions>(this, _MTL_PRIVATE_SEL(storeActionOptions));
+}
+
+_MTL_INLINE void MTL::RenderPassAttachmentDescriptor::setStoreActionOptions(MTL::StoreActionOptions storeActionOptions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStoreActionOptions_), storeActionOptions);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::RenderPassColorAttachmentDescriptor* MTL::RenderPassColorAttachmentDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::RenderPassColorAttachmentDescriptor>(_MTL_PRIVATE_CLS(MTLRenderPassColorAttachmentDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::RenderPassColorAttachmentDescriptor* MTL::RenderPassColorAttachmentDescriptor::init()
+{
+    return NS::Object::init<MTL::RenderPassColorAttachmentDescriptor>();
+}
+
+// property: clearColor
+_MTL_INLINE MTL::ClearColor MTL::RenderPassColorAttachmentDescriptor::clearColor() const
+{
+    return Object::sendMessage<MTL::ClearColor>(this, _MTL_PRIVATE_SEL(clearColor));
+}
+
+_MTL_INLINE void MTL::RenderPassColorAttachmentDescriptor::setClearColor(MTL::ClearColor clearColor)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setClearColor_), clearColor);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::RenderPassDepthAttachmentDescriptor* MTL::RenderPassDepthAttachmentDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::RenderPassDepthAttachmentDescriptor>(_MTL_PRIVATE_CLS(MTLRenderPassDepthAttachmentDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::RenderPassDepthAttachmentDescriptor* MTL::RenderPassDepthAttachmentDescriptor::init()
+{
+    return NS::Object::init<MTL::RenderPassDepthAttachmentDescriptor>();
+}
+
+// property: clearDepth
+_MTL_INLINE double MTL::RenderPassDepthAttachmentDescriptor::clearDepth() const
+{
+    return Object::sendMessage<double>(this, _MTL_PRIVATE_SEL(clearDepth));
+}
+
+_MTL_INLINE void MTL::RenderPassDepthAttachmentDescriptor::setClearDepth(double clearDepth)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setClearDepth_), clearDepth);
+}
+
+// property: depthResolveFilter
+_MTL_INLINE MTL::MultisampleDepthResolveFilter MTL::RenderPassDepthAttachmentDescriptor::depthResolveFilter() const
+{
+    return Object::sendMessage<MTL::MultisampleDepthResolveFilter>(this, _MTL_PRIVATE_SEL(depthResolveFilter));
+}
+
+_MTL_INLINE void MTL::RenderPassDepthAttachmentDescriptor::setDepthResolveFilter(MTL::MultisampleDepthResolveFilter depthResolveFilter)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDepthResolveFilter_), depthResolveFilter);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::RenderPassStencilAttachmentDescriptor* MTL::RenderPassStencilAttachmentDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::RenderPassStencilAttachmentDescriptor>(_MTL_PRIVATE_CLS(MTLRenderPassStencilAttachmentDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::RenderPassStencilAttachmentDescriptor* MTL::RenderPassStencilAttachmentDescriptor::init()
+{
+    return NS::Object::init<MTL::RenderPassStencilAttachmentDescriptor>();
+}
+
+// property: clearStencil
+_MTL_INLINE uint32_t MTL::RenderPassStencilAttachmentDescriptor::clearStencil() const
+{
+    return Object::sendMessage<uint32_t>(this, _MTL_PRIVATE_SEL(clearStencil));
+}
+
+_MTL_INLINE void MTL::RenderPassStencilAttachmentDescriptor::setClearStencil(uint32_t clearStencil)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setClearStencil_), clearStencil);
+}
+
+// property: stencilResolveFilter
+_MTL_INLINE MTL::MultisampleStencilResolveFilter MTL::RenderPassStencilAttachmentDescriptor::stencilResolveFilter() const
+{
+    return Object::sendMessage<MTL::MultisampleStencilResolveFilter>(this, _MTL_PRIVATE_SEL(stencilResolveFilter));
+}
+
+_MTL_INLINE void MTL::RenderPassStencilAttachmentDescriptor::setStencilResolveFilter(MTL::MultisampleStencilResolveFilter stencilResolveFilter)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStencilResolveFilter_), stencilResolveFilter);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::RenderPassColorAttachmentDescriptorArray* MTL::RenderPassColorAttachmentDescriptorArray::alloc()
+{
+    return NS::Object::alloc<MTL::RenderPassColorAttachmentDescriptorArray>(_MTL_PRIVATE_CLS(MTLRenderPassColorAttachmentDescriptorArray));
+}
+
+// method: init
+_MTL_INLINE MTL::RenderPassColorAttachmentDescriptorArray* MTL::RenderPassColorAttachmentDescriptorArray::init()
+{
+    return NS::Object::init<MTL::RenderPassColorAttachmentDescriptorArray>();
+}
+
+// method: objectAtIndexedSubscript:
+_MTL_INLINE MTL::RenderPassColorAttachmentDescriptor* MTL::RenderPassColorAttachmentDescriptorArray::object(NS::UInteger attachmentIndex)
+{
+    return Object::sendMessage<MTL::RenderPassColorAttachmentDescriptor*>(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), attachmentIndex);
+}
+
+// method: setObject:atIndexedSubscript:
+_MTL_INLINE void MTL::RenderPassColorAttachmentDescriptorArray::setObject(const MTL::RenderPassColorAttachmentDescriptor* attachment, NS::UInteger attachmentIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), attachment, attachmentIndex);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::RenderPassSampleBufferAttachmentDescriptor* MTL::RenderPassSampleBufferAttachmentDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::RenderPassSampleBufferAttachmentDescriptor>(_MTL_PRIVATE_CLS(MTLRenderPassSampleBufferAttachmentDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::RenderPassSampleBufferAttachmentDescriptor* MTL::RenderPassSampleBufferAttachmentDescriptor::init()
+{
+    return NS::Object::init<MTL::RenderPassSampleBufferAttachmentDescriptor>();
+}
+
+// property: sampleBuffer
+_MTL_INLINE MTL::CounterSampleBuffer* MTL::RenderPassSampleBufferAttachmentDescriptor::sampleBuffer() const
+{
+    return Object::sendMessage<MTL::CounterSampleBuffer*>(this, _MTL_PRIVATE_SEL(sampleBuffer));
+}
+
+_MTL_INLINE void MTL::RenderPassSampleBufferAttachmentDescriptor::setSampleBuffer(const MTL::CounterSampleBuffer* sampleBuffer)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSampleBuffer_), sampleBuffer);
+}
+
+// property: startOfVertexSampleIndex
+_MTL_INLINE NS::UInteger MTL::RenderPassSampleBufferAttachmentDescriptor::startOfVertexSampleIndex() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(startOfVertexSampleIndex));
+}
+
+_MTL_INLINE void MTL::RenderPassSampleBufferAttachmentDescriptor::setStartOfVertexSampleIndex(NS::UInteger startOfVertexSampleIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStartOfVertexSampleIndex_), startOfVertexSampleIndex);
+}
+
+// property: endOfVertexSampleIndex
+_MTL_INLINE NS::UInteger MTL::RenderPassSampleBufferAttachmentDescriptor::endOfVertexSampleIndex() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(endOfVertexSampleIndex));
+}
+
+_MTL_INLINE void MTL::RenderPassSampleBufferAttachmentDescriptor::setEndOfVertexSampleIndex(NS::UInteger endOfVertexSampleIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setEndOfVertexSampleIndex_), endOfVertexSampleIndex);
+}
+
+// property: startOfFragmentSampleIndex
+_MTL_INLINE NS::UInteger MTL::RenderPassSampleBufferAttachmentDescriptor::startOfFragmentSampleIndex() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(startOfFragmentSampleIndex));
+}
+
+_MTL_INLINE void MTL::RenderPassSampleBufferAttachmentDescriptor::setStartOfFragmentSampleIndex(NS::UInteger startOfFragmentSampleIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStartOfFragmentSampleIndex_), startOfFragmentSampleIndex);
+}
+
+// property: endOfFragmentSampleIndex
+_MTL_INLINE NS::UInteger MTL::RenderPassSampleBufferAttachmentDescriptor::endOfFragmentSampleIndex() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(endOfFragmentSampleIndex));
+}
+
+_MTL_INLINE void MTL::RenderPassSampleBufferAttachmentDescriptor::setEndOfFragmentSampleIndex(NS::UInteger endOfFragmentSampleIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setEndOfFragmentSampleIndex_), endOfFragmentSampleIndex);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::RenderPassSampleBufferAttachmentDescriptorArray* MTL::RenderPassSampleBufferAttachmentDescriptorArray::alloc()
+{
+    return NS::Object::alloc<MTL::RenderPassSampleBufferAttachmentDescriptorArray>(_MTL_PRIVATE_CLS(MTLRenderPassSampleBufferAttachmentDescriptorArray));
+}
+
+// method: init
+_MTL_INLINE MTL::RenderPassSampleBufferAttachmentDescriptorArray* MTL::RenderPassSampleBufferAttachmentDescriptorArray::init()
+{
+    return NS::Object::init<MTL::RenderPassSampleBufferAttachmentDescriptorArray>();
+}
+
+// method: objectAtIndexedSubscript:
+_MTL_INLINE MTL::RenderPassSampleBufferAttachmentDescriptor* MTL::RenderPassSampleBufferAttachmentDescriptorArray::object(NS::UInteger attachmentIndex)
+{
+    return Object::sendMessage<MTL::RenderPassSampleBufferAttachmentDescriptor*>(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), attachmentIndex);
+}
+
+// method: setObject:atIndexedSubscript:
+_MTL_INLINE void MTL::RenderPassSampleBufferAttachmentDescriptorArray::setObject(const MTL::RenderPassSampleBufferAttachmentDescriptor* attachment, NS::UInteger attachmentIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), attachment, attachmentIndex);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::RenderPassDescriptor* MTL::RenderPassDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::RenderPassDescriptor>(_MTL_PRIVATE_CLS(MTLRenderPassDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::RenderPassDescriptor* MTL::RenderPassDescriptor::init()
+{
+    return NS::Object::init<MTL::RenderPassDescriptor>();
+}
+
+// static method: renderPassDescriptor
+_MTL_INLINE MTL::RenderPassDescriptor* MTL::RenderPassDescriptor::renderPassDescriptor()
+{
+    return Object::sendMessage<MTL::RenderPassDescriptor*>(_MTL_PRIVATE_CLS(MTLRenderPassDescriptor), _MTL_PRIVATE_SEL(renderPassDescriptor));
+}
+
+// property: colorAttachments
+_MTL_INLINE MTL::RenderPassColorAttachmentDescriptorArray* MTL::RenderPassDescriptor::colorAttachments() const
+{
+    return Object::sendMessage<MTL::RenderPassColorAttachmentDescriptorArray*>(this, _MTL_PRIVATE_SEL(colorAttachments));
+}
+
+// property: depthAttachment
+_MTL_INLINE MTL::RenderPassDepthAttachmentDescriptor* MTL::RenderPassDescriptor::depthAttachment() const
+{
+    return Object::sendMessage<MTL::RenderPassDepthAttachmentDescriptor*>(this, _MTL_PRIVATE_SEL(depthAttachment));
+}
+
+_MTL_INLINE void MTL::RenderPassDescriptor::setDepthAttachment(const MTL::RenderPassDepthAttachmentDescriptor* depthAttachment)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDepthAttachment_), depthAttachment);
+}
+
+// property: stencilAttachment
+_MTL_INLINE MTL::RenderPassStencilAttachmentDescriptor* MTL::RenderPassDescriptor::stencilAttachment() const
+{
+    return Object::sendMessage<MTL::RenderPassStencilAttachmentDescriptor*>(this, _MTL_PRIVATE_SEL(stencilAttachment));
+}
+
+_MTL_INLINE void MTL::RenderPassDescriptor::setStencilAttachment(const MTL::RenderPassStencilAttachmentDescriptor* stencilAttachment)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStencilAttachment_), stencilAttachment);
+}
+
+// property: visibilityResultBuffer
+_MTL_INLINE MTL::Buffer* MTL::RenderPassDescriptor::visibilityResultBuffer() const
+{
+    return Object::sendMessage<MTL::Buffer*>(this, _MTL_PRIVATE_SEL(visibilityResultBuffer));
+}
+
+_MTL_INLINE void MTL::RenderPassDescriptor::setVisibilityResultBuffer(const MTL::Buffer* visibilityResultBuffer)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVisibilityResultBuffer_), visibilityResultBuffer);
+}
+
+// property: renderTargetArrayLength
+_MTL_INLINE NS::UInteger MTL::RenderPassDescriptor::renderTargetArrayLength() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(renderTargetArrayLength));
+}
+
+_MTL_INLINE void MTL::RenderPassDescriptor::setRenderTargetArrayLength(NS::UInteger renderTargetArrayLength)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setRenderTargetArrayLength_), renderTargetArrayLength);
+}
+
+// property: imageblockSampleLength
+_MTL_INLINE NS::UInteger MTL::RenderPassDescriptor::imageblockSampleLength() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(imageblockSampleLength));
+}
+
+_MTL_INLINE void MTL::RenderPassDescriptor::setImageblockSampleLength(NS::UInteger imageblockSampleLength)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setImageblockSampleLength_), imageblockSampleLength);
+}
+
+// property: threadgroupMemoryLength
+_MTL_INLINE NS::UInteger MTL::RenderPassDescriptor::threadgroupMemoryLength() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(threadgroupMemoryLength));
+}
+
+_MTL_INLINE void MTL::RenderPassDescriptor::setThreadgroupMemoryLength(NS::UInteger threadgroupMemoryLength)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setThreadgroupMemoryLength_), threadgroupMemoryLength);
+}
+
+// property: tileWidth
+_MTL_INLINE NS::UInteger MTL::RenderPassDescriptor::tileWidth() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(tileWidth));
+}
+
+_MTL_INLINE void MTL::RenderPassDescriptor::setTileWidth(NS::UInteger tileWidth)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTileWidth_), tileWidth);
+}
+
+// property: tileHeight
+_MTL_INLINE NS::UInteger MTL::RenderPassDescriptor::tileHeight() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(tileHeight));
+}
+
+_MTL_INLINE void MTL::RenderPassDescriptor::setTileHeight(NS::UInteger tileHeight)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTileHeight_), tileHeight);
+}
+
+// property: defaultRasterSampleCount
+_MTL_INLINE NS::UInteger MTL::RenderPassDescriptor::defaultRasterSampleCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(defaultRasterSampleCount));
+}
+
+_MTL_INLINE void MTL::RenderPassDescriptor::setDefaultRasterSampleCount(NS::UInteger defaultRasterSampleCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDefaultRasterSampleCount_), defaultRasterSampleCount);
+}
+
+// property: renderTargetWidth
+_MTL_INLINE NS::UInteger MTL::RenderPassDescriptor::renderTargetWidth() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(renderTargetWidth));
+}
+
+_MTL_INLINE void MTL::RenderPassDescriptor::setRenderTargetWidth(NS::UInteger renderTargetWidth)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setRenderTargetWidth_), renderTargetWidth);
+}
+
+// property: renderTargetHeight
+_MTL_INLINE NS::UInteger MTL::RenderPassDescriptor::renderTargetHeight() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(renderTargetHeight));
+}
+
+_MTL_INLINE void MTL::RenderPassDescriptor::setRenderTargetHeight(NS::UInteger renderTargetHeight)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setRenderTargetHeight_), renderTargetHeight);
+}
+
+// method: setSamplePositions:count:
+_MTL_INLINE void MTL::RenderPassDescriptor::setSamplePositions(const MTL::SamplePosition* positions, NS::UInteger count)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSamplePositions_count_), positions, count);
+}
+
+// method: getSamplePositions:count:
+_MTL_INLINE NS::UInteger MTL::RenderPassDescriptor::getSamplePositions(MTL::SamplePosition* positions, NS::UInteger count)
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(getSamplePositions_count_), positions, count);
+}
+
+// property: rasterizationRateMap
+_MTL_INLINE MTL::RasterizationRateMap* MTL::RenderPassDescriptor::rasterizationRateMap() const
+{
+    return Object::sendMessage<MTL::RasterizationRateMap*>(this, _MTL_PRIVATE_SEL(rasterizationRateMap));
+}
+
+_MTL_INLINE void MTL::RenderPassDescriptor::setRasterizationRateMap(const MTL::RasterizationRateMap* rasterizationRateMap)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setRasterizationRateMap_), rasterizationRateMap);
+}
+
+// property: sampleBufferAttachments
+_MTL_INLINE MTL::RenderPassSampleBufferAttachmentDescriptorArray* MTL::RenderPassDescriptor::sampleBufferAttachments() const
+{
+    return Object::sendMessage<MTL::RenderPassSampleBufferAttachmentDescriptorArray*>(this, _MTL_PRIVATE_SEL(sampleBufferAttachments));
+}
diff --git a/metal-cpp/Metal/MTLRenderPipeline.hpp b/metal-cpp/Metal/MTLRenderPipeline.hpp
new file mode 100644
index 0000000..1d1db6f
--- /dev/null
+++ b/metal-cpp/Metal/MTLRenderPipeline.hpp
@@ -0,0 +1,1212 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLRenderPipeline.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLPixelFormat.hpp"
+#include "MTLRenderCommandEncoder.hpp"
+#include "MTLRenderPipeline.hpp"
+#include "MTLTypes.hpp"
+
+namespace MTL
+{
+_MTL_ENUM(NS::UInteger, BlendFactor) {
+    BlendFactorZero = 0,
+    BlendFactorOne = 1,
+    BlendFactorSourceColor = 2,
+    BlendFactorOneMinusSourceColor = 3,
+    BlendFactorSourceAlpha = 4,
+    BlendFactorOneMinusSourceAlpha = 5,
+    BlendFactorDestinationColor = 6,
+    BlendFactorOneMinusDestinationColor = 7,
+    BlendFactorDestinationAlpha = 8,
+    BlendFactorOneMinusDestinationAlpha = 9,
+    BlendFactorSourceAlphaSaturated = 10,
+    BlendFactorBlendColor = 11,
+    BlendFactorOneMinusBlendColor = 12,
+    BlendFactorBlendAlpha = 13,
+    BlendFactorOneMinusBlendAlpha = 14,
+    BlendFactorSource1Color = 15,
+    BlendFactorOneMinusSource1Color = 16,
+    BlendFactorSource1Alpha = 17,
+    BlendFactorOneMinusSource1Alpha = 18,
+};
+
+_MTL_ENUM(NS::UInteger, BlendOperation) {
+    BlendOperationAdd = 0,
+    BlendOperationSubtract = 1,
+    BlendOperationReverseSubtract = 2,
+    BlendOperationMin = 3,
+    BlendOperationMax = 4,
+};
+
+_MTL_OPTIONS(NS::UInteger, ColorWriteMask) {
+    ColorWriteMaskNone = 0,
+    ColorWriteMaskAlpha = 1,
+    ColorWriteMaskBlue = 2,
+    ColorWriteMaskGreen = 4,
+    ColorWriteMaskRed = 8,
+    ColorWriteMaskAll = 15,
+};
+
+_MTL_ENUM(NS::UInteger, PrimitiveTopologyClass) {
+    PrimitiveTopologyClassUnspecified = 0,
+    PrimitiveTopologyClassPoint = 1,
+    PrimitiveTopologyClassLine = 2,
+    PrimitiveTopologyClassTriangle = 3,
+};
+
+_MTL_ENUM(NS::UInteger, TessellationPartitionMode) {
+    TessellationPartitionModePow2 = 0,
+    TessellationPartitionModeInteger = 1,
+    TessellationPartitionModeFractionalOdd = 2,
+    TessellationPartitionModeFractionalEven = 3,
+};
+
+_MTL_ENUM(NS::UInteger, TessellationFactorStepFunction) {
+    TessellationFactorStepFunctionConstant = 0,
+    TessellationFactorStepFunctionPerPatch = 1,
+    TessellationFactorStepFunctionPerInstance = 2,
+    TessellationFactorStepFunctionPerPatchAndPerInstance = 3,
+};
+
+_MTL_ENUM(NS::UInteger, TessellationFactorFormat) {
+    TessellationFactorFormatHalf = 0,
+};
+
+_MTL_ENUM(NS::UInteger, TessellationControlPointIndexType) {
+    TessellationControlPointIndexTypeNone = 0,
+    TessellationControlPointIndexTypeUInt16 = 1,
+    TessellationControlPointIndexTypeUInt32 = 2,
+};
+
+class RenderPipelineColorAttachmentDescriptor : public NS::Copying<RenderPipelineColorAttachmentDescriptor>
+{
+public:
+    static class RenderPipelineColorAttachmentDescriptor* alloc();
+
+    class RenderPipelineColorAttachmentDescriptor*        init();
+
+    MTL::PixelFormat                                      pixelFormat() const;
+    void                                                  setPixelFormat(MTL::PixelFormat pixelFormat);
+
+    bool                                                  blendingEnabled() const;
+    void                                                  setBlendingEnabled(bool blendingEnabled);
+
+    MTL::BlendFactor                                      sourceRGBBlendFactor() const;
+    void                                                  setSourceRGBBlendFactor(MTL::BlendFactor sourceRGBBlendFactor);
+
+    MTL::BlendFactor                                      destinationRGBBlendFactor() const;
+    void                                                  setDestinationRGBBlendFactor(MTL::BlendFactor destinationRGBBlendFactor);
+
+    MTL::BlendOperation                                   rgbBlendOperation() const;
+    void                                                  setRgbBlendOperation(MTL::BlendOperation rgbBlendOperation);
+
+    MTL::BlendFactor                                      sourceAlphaBlendFactor() const;
+    void                                                  setSourceAlphaBlendFactor(MTL::BlendFactor sourceAlphaBlendFactor);
+
+    MTL::BlendFactor                                      destinationAlphaBlendFactor() const;
+    void                                                  setDestinationAlphaBlendFactor(MTL::BlendFactor destinationAlphaBlendFactor);
+
+    MTL::BlendOperation                                   alphaBlendOperation() const;
+    void                                                  setAlphaBlendOperation(MTL::BlendOperation alphaBlendOperation);
+
+    MTL::ColorWriteMask                                   writeMask() const;
+    void                                                  setWriteMask(MTL::ColorWriteMask writeMask);
+};
+
+class RenderPipelineReflection : public NS::Referencing<RenderPipelineReflection>
+{
+public:
+    static class RenderPipelineReflection* alloc();
+
+    class RenderPipelineReflection*        init();
+
+    NS::Array*                             vertexArguments() const;
+
+    NS::Array*                             fragmentArguments() const;
+
+    NS::Array*                             tileArguments() const;
+};
+
+class RenderPipelineDescriptor : public NS::Copying<RenderPipelineDescriptor>
+{
+public:
+    static class RenderPipelineDescriptor*              alloc();
+
+    class RenderPipelineDescriptor*                     init();
+
+    NS::String*                                         label() const;
+    void                                                setLabel(const NS::String* label);
+
+    class Function*                                     vertexFunction() const;
+    void                                                setVertexFunction(const class Function* vertexFunction);
+
+    class Function*                                     fragmentFunction() const;
+    void                                                setFragmentFunction(const class Function* fragmentFunction);
+
+    class VertexDescriptor*                             vertexDescriptor() const;
+    void                                                setVertexDescriptor(const class VertexDescriptor* vertexDescriptor);
+
+    NS::UInteger                                        sampleCount() const;
+    void                                                setSampleCount(NS::UInteger sampleCount);
+
+    NS::UInteger                                        rasterSampleCount() const;
+    void                                                setRasterSampleCount(NS::UInteger rasterSampleCount);
+
+    bool                                                alphaToCoverageEnabled() const;
+    void                                                setAlphaToCoverageEnabled(bool alphaToCoverageEnabled);
+
+    bool                                                alphaToOneEnabled() const;
+    void                                                setAlphaToOneEnabled(bool alphaToOneEnabled);
+
+    bool                                                rasterizationEnabled() const;
+    void                                                setRasterizationEnabled(bool rasterizationEnabled);
+
+    NS::UInteger                                        maxVertexAmplificationCount() const;
+    void                                                setMaxVertexAmplificationCount(NS::UInteger maxVertexAmplificationCount);
+
+    class RenderPipelineColorAttachmentDescriptorArray* colorAttachments() const;
+
+    MTL::PixelFormat                                    depthAttachmentPixelFormat() const;
+    void                                                setDepthAttachmentPixelFormat(MTL::PixelFormat depthAttachmentPixelFormat);
+
+    MTL::PixelFormat                                    stencilAttachmentPixelFormat() const;
+    void                                                setStencilAttachmentPixelFormat(MTL::PixelFormat stencilAttachmentPixelFormat);
+
+    MTL::PrimitiveTopologyClass                         inputPrimitiveTopology() const;
+    void                                                setInputPrimitiveTopology(MTL::PrimitiveTopologyClass inputPrimitiveTopology);
+
+    MTL::TessellationPartitionMode                      tessellationPartitionMode() const;
+    void                                                setTessellationPartitionMode(MTL::TessellationPartitionMode tessellationPartitionMode);
+
+    NS::UInteger                                        maxTessellationFactor() const;
+    void                                                setMaxTessellationFactor(NS::UInteger maxTessellationFactor);
+
+    bool                                                tessellationFactorScaleEnabled() const;
+    void                                                setTessellationFactorScaleEnabled(bool tessellationFactorScaleEnabled);
+
+    MTL::TessellationFactorFormat                       tessellationFactorFormat() const;
+    void                                                setTessellationFactorFormat(MTL::TessellationFactorFormat tessellationFactorFormat);
+
+    MTL::TessellationControlPointIndexType              tessellationControlPointIndexType() const;
+    void                                                setTessellationControlPointIndexType(MTL::TessellationControlPointIndexType tessellationControlPointIndexType);
+
+    MTL::TessellationFactorStepFunction                 tessellationFactorStepFunction() const;
+    void                                                setTessellationFactorStepFunction(MTL::TessellationFactorStepFunction tessellationFactorStepFunction);
+
+    MTL::Winding                                        tessellationOutputWindingOrder() const;
+    void                                                setTessellationOutputWindingOrder(MTL::Winding tessellationOutputWindingOrder);
+
+    class PipelineBufferDescriptorArray*                vertexBuffers() const;
+
+    class PipelineBufferDescriptorArray*                fragmentBuffers() const;
+
+    bool                                                supportIndirectCommandBuffers() const;
+    void                                                setSupportIndirectCommandBuffers(bool supportIndirectCommandBuffers);
+
+    NS::Array*                                          binaryArchives() const;
+    void                                                setBinaryArchives(const NS::Array* binaryArchives);
+
+    NS::Array*                                          vertexPreloadedLibraries() const;
+    void                                                setVertexPreloadedLibraries(const NS::Array* vertexPreloadedLibraries);
+
+    NS::Array*                                          fragmentPreloadedLibraries() const;
+    void                                                setFragmentPreloadedLibraries(const NS::Array* fragmentPreloadedLibraries);
+
+    class LinkedFunctions*                              vertexLinkedFunctions() const;
+    void                                                setVertexLinkedFunctions(const class LinkedFunctions* vertexLinkedFunctions);
+
+    class LinkedFunctions*                              fragmentLinkedFunctions() const;
+    void                                                setFragmentLinkedFunctions(const class LinkedFunctions* fragmentLinkedFunctions);
+
+    bool                                                supportAddingVertexBinaryFunctions() const;
+    void                                                setSupportAddingVertexBinaryFunctions(bool supportAddingVertexBinaryFunctions);
+
+    bool                                                supportAddingFragmentBinaryFunctions() const;
+    void                                                setSupportAddingFragmentBinaryFunctions(bool supportAddingFragmentBinaryFunctions);
+
+    NS::UInteger                                        maxVertexCallStackDepth() const;
+    void                                                setMaxVertexCallStackDepth(NS::UInteger maxVertexCallStackDepth);
+
+    NS::UInteger                                        maxFragmentCallStackDepth() const;
+    void                                                setMaxFragmentCallStackDepth(NS::UInteger maxFragmentCallStackDepth);
+
+    void                                                reset();
+};
+
+class RenderPipelineFunctionsDescriptor : public NS::Copying<RenderPipelineFunctionsDescriptor>
+{
+public:
+    static class RenderPipelineFunctionsDescriptor* alloc();
+
+    class RenderPipelineFunctionsDescriptor*        init();
+
+    NS::Array*                                      vertexAdditionalBinaryFunctions() const;
+    void                                            setVertexAdditionalBinaryFunctions(const NS::Array* vertexAdditionalBinaryFunctions);
+
+    NS::Array*                                      fragmentAdditionalBinaryFunctions() const;
+    void                                            setFragmentAdditionalBinaryFunctions(const NS::Array* fragmentAdditionalBinaryFunctions);
+
+    NS::Array*                                      tileAdditionalBinaryFunctions() const;
+    void                                            setTileAdditionalBinaryFunctions(const NS::Array* tileAdditionalBinaryFunctions);
+};
+
+class RenderPipelineState : public NS::Referencing<RenderPipelineState>
+{
+public:
+    NS::String*                      label() const;
+
+    class Device*                    device() const;
+
+    NS::UInteger                     maxTotalThreadsPerThreadgroup() const;
+
+    bool                             threadgroupSizeMatchesTileSize() const;
+
+    NS::UInteger                     imageblockSampleLength() const;
+
+    NS::UInteger                     imageblockMemoryLength(MTL::Size imageblockDimensions);
+
+    bool                             supportIndirectCommandBuffers() const;
+
+    class FunctionHandle*            functionHandle(const class Function* function, MTL::RenderStages stage);
+
+    class VisibleFunctionTable*      newVisibleFunctionTable(const class VisibleFunctionTableDescriptor* descriptor, MTL::RenderStages stage);
+
+    class IntersectionFunctionTable* newIntersectionFunctionTable(const class IntersectionFunctionTableDescriptor* descriptor, MTL::RenderStages stage);
+
+    class RenderPipelineState*       newRenderPipelineState(const class RenderPipelineFunctionsDescriptor* additionalBinaryFunctions, NS::Error** error);
+};
+
+class RenderPipelineColorAttachmentDescriptorArray : public NS::Referencing<RenderPipelineColorAttachmentDescriptorArray>
+{
+public:
+    static class RenderPipelineColorAttachmentDescriptorArray* alloc();
+
+    class RenderPipelineColorAttachmentDescriptorArray*        init();
+
+    class RenderPipelineColorAttachmentDescriptor*             object(NS::UInteger attachmentIndex);
+
+    void                                                       setObject(const class RenderPipelineColorAttachmentDescriptor* attachment, NS::UInteger attachmentIndex);
+};
+
+class TileRenderPipelineColorAttachmentDescriptor : public NS::Copying<TileRenderPipelineColorAttachmentDescriptor>
+{
+public:
+    static class TileRenderPipelineColorAttachmentDescriptor* alloc();
+
+    class TileRenderPipelineColorAttachmentDescriptor*        init();
+
+    MTL::PixelFormat                                          pixelFormat() const;
+    void                                                      setPixelFormat(MTL::PixelFormat pixelFormat);
+};
+
+class TileRenderPipelineColorAttachmentDescriptorArray : public NS::Referencing<TileRenderPipelineColorAttachmentDescriptorArray>
+{
+public:
+    static class TileRenderPipelineColorAttachmentDescriptorArray* alloc();
+
+    class TileRenderPipelineColorAttachmentDescriptorArray*        init();
+
+    class TileRenderPipelineColorAttachmentDescriptor*             object(NS::UInteger attachmentIndex);
+
+    void                                                           setObject(const class TileRenderPipelineColorAttachmentDescriptor* attachment, NS::UInteger attachmentIndex);
+};
+
+class TileRenderPipelineDescriptor : public NS::Copying<TileRenderPipelineDescriptor>
+{
+public:
+    static class TileRenderPipelineDescriptor*              alloc();
+
+    class TileRenderPipelineDescriptor*                     init();
+
+    NS::String*                                             label() const;
+    void                                                    setLabel(const NS::String* label);
+
+    class Function*                                         tileFunction() const;
+    void                                                    setTileFunction(const class Function* tileFunction);
+
+    NS::UInteger                                            rasterSampleCount() const;
+    void                                                    setRasterSampleCount(NS::UInteger rasterSampleCount);
+
+    class TileRenderPipelineColorAttachmentDescriptorArray* colorAttachments() const;
+
+    bool                                                    threadgroupSizeMatchesTileSize() const;
+    void                                                    setThreadgroupSizeMatchesTileSize(bool threadgroupSizeMatchesTileSize);
+
+    class PipelineBufferDescriptorArray*                    tileBuffers() const;
+
+    NS::UInteger                                            maxTotalThreadsPerThreadgroup() const;
+    void                                                    setMaxTotalThreadsPerThreadgroup(NS::UInteger maxTotalThreadsPerThreadgroup);
+
+    NS::Array*                                              binaryArchives() const;
+    void                                                    setBinaryArchives(const NS::Array* binaryArchives);
+
+    NS::Array*                                              preloadedLibraries() const;
+    void                                                    setPreloadedLibraries(const NS::Array* preloadedLibraries);
+
+    class LinkedFunctions*                                  linkedFunctions() const;
+    void                                                    setLinkedFunctions(const class LinkedFunctions* linkedFunctions);
+
+    bool                                                    supportAddingBinaryFunctions() const;
+    void                                                    setSupportAddingBinaryFunctions(bool supportAddingBinaryFunctions);
+
+    NS::UInteger                                            maxCallStackDepth() const;
+    void                                                    setMaxCallStackDepth(NS::UInteger maxCallStackDepth);
+
+    void                                                    reset();
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::RenderPipelineColorAttachmentDescriptor* MTL::RenderPipelineColorAttachmentDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::RenderPipelineColorAttachmentDescriptor>(_MTL_PRIVATE_CLS(MTLRenderPipelineColorAttachmentDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::RenderPipelineColorAttachmentDescriptor* MTL::RenderPipelineColorAttachmentDescriptor::init()
+{
+    return NS::Object::init<MTL::RenderPipelineColorAttachmentDescriptor>();
+}
+
+// property: pixelFormat
+_MTL_INLINE MTL::PixelFormat MTL::RenderPipelineColorAttachmentDescriptor::pixelFormat() const
+{
+    return Object::sendMessage<MTL::PixelFormat>(this, _MTL_PRIVATE_SEL(pixelFormat));
+}
+
+_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptor::setPixelFormat(MTL::PixelFormat pixelFormat)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setPixelFormat_), pixelFormat);
+}
+
+// property: blendingEnabled
+_MTL_INLINE bool MTL::RenderPipelineColorAttachmentDescriptor::blendingEnabled() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isBlendingEnabled));
+}
+
+_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptor::setBlendingEnabled(bool blendingEnabled)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBlendingEnabled_), blendingEnabled);
+}
+
+// property: sourceRGBBlendFactor
+_MTL_INLINE MTL::BlendFactor MTL::RenderPipelineColorAttachmentDescriptor::sourceRGBBlendFactor() const
+{
+    return Object::sendMessage<MTL::BlendFactor>(this, _MTL_PRIVATE_SEL(sourceRGBBlendFactor));
+}
+
+_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptor::setSourceRGBBlendFactor(MTL::BlendFactor sourceRGBBlendFactor)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSourceRGBBlendFactor_), sourceRGBBlendFactor);
+}
+
+// property: destinationRGBBlendFactor
+_MTL_INLINE MTL::BlendFactor MTL::RenderPipelineColorAttachmentDescriptor::destinationRGBBlendFactor() const
+{
+    return Object::sendMessage<MTL::BlendFactor>(this, _MTL_PRIVATE_SEL(destinationRGBBlendFactor));
+}
+
+_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptor::setDestinationRGBBlendFactor(MTL::BlendFactor destinationRGBBlendFactor)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDestinationRGBBlendFactor_), destinationRGBBlendFactor);
+}
+
+// property: rgbBlendOperation
+_MTL_INLINE MTL::BlendOperation MTL::RenderPipelineColorAttachmentDescriptor::rgbBlendOperation() const
+{
+    return Object::sendMessage<MTL::BlendOperation>(this, _MTL_PRIVATE_SEL(rgbBlendOperation));
+}
+
+_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptor::setRgbBlendOperation(MTL::BlendOperation rgbBlendOperation)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setRgbBlendOperation_), rgbBlendOperation);
+}
+
+// property: sourceAlphaBlendFactor
+_MTL_INLINE MTL::BlendFactor MTL::RenderPipelineColorAttachmentDescriptor::sourceAlphaBlendFactor() const
+{
+    return Object::sendMessage<MTL::BlendFactor>(this, _MTL_PRIVATE_SEL(sourceAlphaBlendFactor));
+}
+
+_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptor::setSourceAlphaBlendFactor(MTL::BlendFactor sourceAlphaBlendFactor)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSourceAlphaBlendFactor_), sourceAlphaBlendFactor);
+}
+
+// property: destinationAlphaBlendFactor
+_MTL_INLINE MTL::BlendFactor MTL::RenderPipelineColorAttachmentDescriptor::destinationAlphaBlendFactor() const
+{
+    return Object::sendMessage<MTL::BlendFactor>(this, _MTL_PRIVATE_SEL(destinationAlphaBlendFactor));
+}
+
+_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptor::setDestinationAlphaBlendFactor(MTL::BlendFactor destinationAlphaBlendFactor)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDestinationAlphaBlendFactor_), destinationAlphaBlendFactor);
+}
+
+// property: alphaBlendOperation
+_MTL_INLINE MTL::BlendOperation MTL::RenderPipelineColorAttachmentDescriptor::alphaBlendOperation() const
+{
+    return Object::sendMessage<MTL::BlendOperation>(this, _MTL_PRIVATE_SEL(alphaBlendOperation));
+}
+
+_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptor::setAlphaBlendOperation(MTL::BlendOperation alphaBlendOperation)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setAlphaBlendOperation_), alphaBlendOperation);
+}
+
+// property: writeMask
+_MTL_INLINE MTL::ColorWriteMask MTL::RenderPipelineColorAttachmentDescriptor::writeMask() const
+{
+    return Object::sendMessage<MTL::ColorWriteMask>(this, _MTL_PRIVATE_SEL(writeMask));
+}
+
+_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptor::setWriteMask(MTL::ColorWriteMask writeMask)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setWriteMask_), writeMask);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::RenderPipelineReflection* MTL::RenderPipelineReflection::alloc()
+{
+    return NS::Object::alloc<MTL::RenderPipelineReflection>(_MTL_PRIVATE_CLS(MTLRenderPipelineReflection));
+}
+
+// method: init
+_MTL_INLINE MTL::RenderPipelineReflection* MTL::RenderPipelineReflection::init()
+{
+    return NS::Object::init<MTL::RenderPipelineReflection>();
+}
+
+// property: vertexArguments
+_MTL_INLINE NS::Array* MTL::RenderPipelineReflection::vertexArguments() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(vertexArguments));
+}
+
+// property: fragmentArguments
+_MTL_INLINE NS::Array* MTL::RenderPipelineReflection::fragmentArguments() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(fragmentArguments));
+}
+
+// property: tileArguments
+_MTL_INLINE NS::Array* MTL::RenderPipelineReflection::tileArguments() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(tileArguments));
+}
+
+// static method: alloc
+_MTL_INLINE MTL::RenderPipelineDescriptor* MTL::RenderPipelineDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::RenderPipelineDescriptor>(_MTL_PRIVATE_CLS(MTLRenderPipelineDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::RenderPipelineDescriptor* MTL::RenderPipelineDescriptor::init()
+{
+    return NS::Object::init<MTL::RenderPipelineDescriptor>();
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::RenderPipelineDescriptor::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
+
+// property: vertexFunction
+_MTL_INLINE MTL::Function* MTL::RenderPipelineDescriptor::vertexFunction() const
+{
+    return Object::sendMessage<MTL::Function*>(this, _MTL_PRIVATE_SEL(vertexFunction));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setVertexFunction(const MTL::Function* vertexFunction)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexFunction_), vertexFunction);
+}
+
+// property: fragmentFunction
+_MTL_INLINE MTL::Function* MTL::RenderPipelineDescriptor::fragmentFunction() const
+{
+    return Object::sendMessage<MTL::Function*>(this, _MTL_PRIVATE_SEL(fragmentFunction));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setFragmentFunction(const MTL::Function* fragmentFunction)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentFunction_), fragmentFunction);
+}
+
+// property: vertexDescriptor
+_MTL_INLINE MTL::VertexDescriptor* MTL::RenderPipelineDescriptor::vertexDescriptor() const
+{
+    return Object::sendMessage<MTL::VertexDescriptor*>(this, _MTL_PRIVATE_SEL(vertexDescriptor));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setVertexDescriptor(const MTL::VertexDescriptor* vertexDescriptor)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexDescriptor_), vertexDescriptor);
+}
+
+// property: sampleCount
+_MTL_INLINE NS::UInteger MTL::RenderPipelineDescriptor::sampleCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(sampleCount));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setSampleCount(NS::UInteger sampleCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSampleCount_), sampleCount);
+}
+
+// property: rasterSampleCount
+_MTL_INLINE NS::UInteger MTL::RenderPipelineDescriptor::rasterSampleCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(rasterSampleCount));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setRasterSampleCount(NS::UInteger rasterSampleCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setRasterSampleCount_), rasterSampleCount);
+}
+
+// property: alphaToCoverageEnabled
+_MTL_INLINE bool MTL::RenderPipelineDescriptor::alphaToCoverageEnabled() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isAlphaToCoverageEnabled));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setAlphaToCoverageEnabled(bool alphaToCoverageEnabled)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setAlphaToCoverageEnabled_), alphaToCoverageEnabled);
+}
+
+// property: alphaToOneEnabled
+_MTL_INLINE bool MTL::RenderPipelineDescriptor::alphaToOneEnabled() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isAlphaToOneEnabled));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setAlphaToOneEnabled(bool alphaToOneEnabled)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setAlphaToOneEnabled_), alphaToOneEnabled);
+}
+
+// property: rasterizationEnabled
+_MTL_INLINE bool MTL::RenderPipelineDescriptor::rasterizationEnabled() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isRasterizationEnabled));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setRasterizationEnabled(bool rasterizationEnabled)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setRasterizationEnabled_), rasterizationEnabled);
+}
+
+// property: maxVertexAmplificationCount
+_MTL_INLINE NS::UInteger MTL::RenderPipelineDescriptor::maxVertexAmplificationCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(maxVertexAmplificationCount));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setMaxVertexAmplificationCount(NS::UInteger maxVertexAmplificationCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMaxVertexAmplificationCount_), maxVertexAmplificationCount);
+}
+
+// property: colorAttachments
+_MTL_INLINE MTL::RenderPipelineColorAttachmentDescriptorArray* MTL::RenderPipelineDescriptor::colorAttachments() const
+{
+    return Object::sendMessage<MTL::RenderPipelineColorAttachmentDescriptorArray*>(this, _MTL_PRIVATE_SEL(colorAttachments));
+}
+
+// property: depthAttachmentPixelFormat
+_MTL_INLINE MTL::PixelFormat MTL::RenderPipelineDescriptor::depthAttachmentPixelFormat() const
+{
+    return Object::sendMessage<MTL::PixelFormat>(this, _MTL_PRIVATE_SEL(depthAttachmentPixelFormat));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setDepthAttachmentPixelFormat(MTL::PixelFormat depthAttachmentPixelFormat)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDepthAttachmentPixelFormat_), depthAttachmentPixelFormat);
+}
+
+// property: stencilAttachmentPixelFormat
+_MTL_INLINE MTL::PixelFormat MTL::RenderPipelineDescriptor::stencilAttachmentPixelFormat() const
+{
+    return Object::sendMessage<MTL::PixelFormat>(this, _MTL_PRIVATE_SEL(stencilAttachmentPixelFormat));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setStencilAttachmentPixelFormat(MTL::PixelFormat stencilAttachmentPixelFormat)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStencilAttachmentPixelFormat_), stencilAttachmentPixelFormat);
+}
+
+// property: inputPrimitiveTopology
+_MTL_INLINE MTL::PrimitiveTopologyClass MTL::RenderPipelineDescriptor::inputPrimitiveTopology() const
+{
+    return Object::sendMessage<MTL::PrimitiveTopologyClass>(this, _MTL_PRIVATE_SEL(inputPrimitiveTopology));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setInputPrimitiveTopology(MTL::PrimitiveTopologyClass inputPrimitiveTopology)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setInputPrimitiveTopology_), inputPrimitiveTopology);
+}
+
+// property: tessellationPartitionMode
+_MTL_INLINE MTL::TessellationPartitionMode MTL::RenderPipelineDescriptor::tessellationPartitionMode() const
+{
+    return Object::sendMessage<MTL::TessellationPartitionMode>(this, _MTL_PRIVATE_SEL(tessellationPartitionMode));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setTessellationPartitionMode(MTL::TessellationPartitionMode tessellationPartitionMode)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTessellationPartitionMode_), tessellationPartitionMode);
+}
+
+// property: maxTessellationFactor
+_MTL_INLINE NS::UInteger MTL::RenderPipelineDescriptor::maxTessellationFactor() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(maxTessellationFactor));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setMaxTessellationFactor(NS::UInteger maxTessellationFactor)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMaxTessellationFactor_), maxTessellationFactor);
+}
+
+// property: tessellationFactorScaleEnabled
+_MTL_INLINE bool MTL::RenderPipelineDescriptor::tessellationFactorScaleEnabled() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isTessellationFactorScaleEnabled));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setTessellationFactorScaleEnabled(bool tessellationFactorScaleEnabled)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTessellationFactorScaleEnabled_), tessellationFactorScaleEnabled);
+}
+
+// property: tessellationFactorFormat
+_MTL_INLINE MTL::TessellationFactorFormat MTL::RenderPipelineDescriptor::tessellationFactorFormat() const
+{
+    return Object::sendMessage<MTL::TessellationFactorFormat>(this, _MTL_PRIVATE_SEL(tessellationFactorFormat));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setTessellationFactorFormat(MTL::TessellationFactorFormat tessellationFactorFormat)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTessellationFactorFormat_), tessellationFactorFormat);
+}
+
+// property: tessellationControlPointIndexType
+_MTL_INLINE MTL::TessellationControlPointIndexType MTL::RenderPipelineDescriptor::tessellationControlPointIndexType() const
+{
+    return Object::sendMessage<MTL::TessellationControlPointIndexType>(this, _MTL_PRIVATE_SEL(tessellationControlPointIndexType));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setTessellationControlPointIndexType(MTL::TessellationControlPointIndexType tessellationControlPointIndexType)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTessellationControlPointIndexType_), tessellationControlPointIndexType);
+}
+
+// property: tessellationFactorStepFunction
+_MTL_INLINE MTL::TessellationFactorStepFunction MTL::RenderPipelineDescriptor::tessellationFactorStepFunction() const
+{
+    return Object::sendMessage<MTL::TessellationFactorStepFunction>(this, _MTL_PRIVATE_SEL(tessellationFactorStepFunction));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setTessellationFactorStepFunction(MTL::TessellationFactorStepFunction tessellationFactorStepFunction)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTessellationFactorStepFunction_), tessellationFactorStepFunction);
+}
+
+// property: tessellationOutputWindingOrder
+_MTL_INLINE MTL::Winding MTL::RenderPipelineDescriptor::tessellationOutputWindingOrder() const
+{
+    return Object::sendMessage<MTL::Winding>(this, _MTL_PRIVATE_SEL(tessellationOutputWindingOrder));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setTessellationOutputWindingOrder(MTL::Winding tessellationOutputWindingOrder)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTessellationOutputWindingOrder_), tessellationOutputWindingOrder);
+}
+
+// property: vertexBuffers
+_MTL_INLINE MTL::PipelineBufferDescriptorArray* MTL::RenderPipelineDescriptor::vertexBuffers() const
+{
+    return Object::sendMessage<MTL::PipelineBufferDescriptorArray*>(this, _MTL_PRIVATE_SEL(vertexBuffers));
+}
+
+// property: fragmentBuffers
+_MTL_INLINE MTL::PipelineBufferDescriptorArray* MTL::RenderPipelineDescriptor::fragmentBuffers() const
+{
+    return Object::sendMessage<MTL::PipelineBufferDescriptorArray*>(this, _MTL_PRIVATE_SEL(fragmentBuffers));
+}
+
+// property: supportIndirectCommandBuffers
+_MTL_INLINE bool MTL::RenderPipelineDescriptor::supportIndirectCommandBuffers() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportIndirectCommandBuffers));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setSupportIndirectCommandBuffers(bool supportIndirectCommandBuffers)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSupportIndirectCommandBuffers_), supportIndirectCommandBuffers);
+}
+
+// property: binaryArchives
+_MTL_INLINE NS::Array* MTL::RenderPipelineDescriptor::binaryArchives() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(binaryArchives));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setBinaryArchives(const NS::Array* binaryArchives)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBinaryArchives_), binaryArchives);
+}
+
+// property: vertexPreloadedLibraries
+_MTL_INLINE NS::Array* MTL::RenderPipelineDescriptor::vertexPreloadedLibraries() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(vertexPreloadedLibraries));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setVertexPreloadedLibraries(const NS::Array* vertexPreloadedLibraries)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexPreloadedLibraries_), vertexPreloadedLibraries);
+}
+
+// property: fragmentPreloadedLibraries
+_MTL_INLINE NS::Array* MTL::RenderPipelineDescriptor::fragmentPreloadedLibraries() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(fragmentPreloadedLibraries));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setFragmentPreloadedLibraries(const NS::Array* fragmentPreloadedLibraries)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentPreloadedLibraries_), fragmentPreloadedLibraries);
+}
+
+// property: vertexLinkedFunctions
+_MTL_INLINE MTL::LinkedFunctions* MTL::RenderPipelineDescriptor::vertexLinkedFunctions() const
+{
+    return Object::sendMessage<MTL::LinkedFunctions*>(this, _MTL_PRIVATE_SEL(vertexLinkedFunctions));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setVertexLinkedFunctions(const MTL::LinkedFunctions* vertexLinkedFunctions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexLinkedFunctions_), vertexLinkedFunctions);
+}
+
+// property: fragmentLinkedFunctions
+_MTL_INLINE MTL::LinkedFunctions* MTL::RenderPipelineDescriptor::fragmentLinkedFunctions() const
+{
+    return Object::sendMessage<MTL::LinkedFunctions*>(this, _MTL_PRIVATE_SEL(fragmentLinkedFunctions));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setFragmentLinkedFunctions(const MTL::LinkedFunctions* fragmentLinkedFunctions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentLinkedFunctions_), fragmentLinkedFunctions);
+}
+
+// property: supportAddingVertexBinaryFunctions
+_MTL_INLINE bool MTL::RenderPipelineDescriptor::supportAddingVertexBinaryFunctions() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportAddingVertexBinaryFunctions));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setSupportAddingVertexBinaryFunctions(bool supportAddingVertexBinaryFunctions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSupportAddingVertexBinaryFunctions_), supportAddingVertexBinaryFunctions);
+}
+
+// property: supportAddingFragmentBinaryFunctions
+_MTL_INLINE bool MTL::RenderPipelineDescriptor::supportAddingFragmentBinaryFunctions() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportAddingFragmentBinaryFunctions));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setSupportAddingFragmentBinaryFunctions(bool supportAddingFragmentBinaryFunctions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSupportAddingFragmentBinaryFunctions_), supportAddingFragmentBinaryFunctions);
+}
+
+// property: maxVertexCallStackDepth
+_MTL_INLINE NS::UInteger MTL::RenderPipelineDescriptor::maxVertexCallStackDepth() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(maxVertexCallStackDepth));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setMaxVertexCallStackDepth(NS::UInteger maxVertexCallStackDepth)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMaxVertexCallStackDepth_), maxVertexCallStackDepth);
+}
+
+// property: maxFragmentCallStackDepth
+_MTL_INLINE NS::UInteger MTL::RenderPipelineDescriptor::maxFragmentCallStackDepth() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(maxFragmentCallStackDepth));
+}
+
+_MTL_INLINE void MTL::RenderPipelineDescriptor::setMaxFragmentCallStackDepth(NS::UInteger maxFragmentCallStackDepth)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMaxFragmentCallStackDepth_), maxFragmentCallStackDepth);
+}
+
+// method: reset
+_MTL_INLINE void MTL::RenderPipelineDescriptor::reset()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(reset));
+}
+
+// static method: alloc
+_MTL_INLINE MTL::RenderPipelineFunctionsDescriptor* MTL::RenderPipelineFunctionsDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::RenderPipelineFunctionsDescriptor>(_MTL_PRIVATE_CLS(MTLRenderPipelineFunctionsDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::RenderPipelineFunctionsDescriptor* MTL::RenderPipelineFunctionsDescriptor::init()
+{
+    return NS::Object::init<MTL::RenderPipelineFunctionsDescriptor>();
+}
+
+// property: vertexAdditionalBinaryFunctions
+_MTL_INLINE NS::Array* MTL::RenderPipelineFunctionsDescriptor::vertexAdditionalBinaryFunctions() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(vertexAdditionalBinaryFunctions));
+}
+
+_MTL_INLINE void MTL::RenderPipelineFunctionsDescriptor::setVertexAdditionalBinaryFunctions(const NS::Array* vertexAdditionalBinaryFunctions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setVertexAdditionalBinaryFunctions_), vertexAdditionalBinaryFunctions);
+}
+
+// property: fragmentAdditionalBinaryFunctions
+_MTL_INLINE NS::Array* MTL::RenderPipelineFunctionsDescriptor::fragmentAdditionalBinaryFunctions() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(fragmentAdditionalBinaryFunctions));
+}
+
+_MTL_INLINE void MTL::RenderPipelineFunctionsDescriptor::setFragmentAdditionalBinaryFunctions(const NS::Array* fragmentAdditionalBinaryFunctions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFragmentAdditionalBinaryFunctions_), fragmentAdditionalBinaryFunctions);
+}
+
+// property: tileAdditionalBinaryFunctions
+_MTL_INLINE NS::Array* MTL::RenderPipelineFunctionsDescriptor::tileAdditionalBinaryFunctions() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(tileAdditionalBinaryFunctions));
+}
+
+_MTL_INLINE void MTL::RenderPipelineFunctionsDescriptor::setTileAdditionalBinaryFunctions(const NS::Array* tileAdditionalBinaryFunctions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTileAdditionalBinaryFunctions_), tileAdditionalBinaryFunctions);
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::RenderPipelineState::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::RenderPipelineState::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
+
+// property: maxTotalThreadsPerThreadgroup
+_MTL_INLINE NS::UInteger MTL::RenderPipelineState::maxTotalThreadsPerThreadgroup() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(maxTotalThreadsPerThreadgroup));
+}
+
+// property: threadgroupSizeMatchesTileSize
+_MTL_INLINE bool MTL::RenderPipelineState::threadgroupSizeMatchesTileSize() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(threadgroupSizeMatchesTileSize));
+}
+
+// property: imageblockSampleLength
+_MTL_INLINE NS::UInteger MTL::RenderPipelineState::imageblockSampleLength() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(imageblockSampleLength));
+}
+
+// method: imageblockMemoryLengthForDimensions:
+_MTL_INLINE NS::UInteger MTL::RenderPipelineState::imageblockMemoryLength(MTL::Size imageblockDimensions)
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(imageblockMemoryLengthForDimensions_), imageblockDimensions);
+}
+
+// property: supportIndirectCommandBuffers
+_MTL_INLINE bool MTL::RenderPipelineState::supportIndirectCommandBuffers() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportIndirectCommandBuffers));
+}
+
+// method: functionHandleWithFunction:stage:
+_MTL_INLINE MTL::FunctionHandle* MTL::RenderPipelineState::functionHandle(const MTL::Function* function, MTL::RenderStages stage)
+{
+    return Object::sendMessage<MTL::FunctionHandle*>(this, _MTL_PRIVATE_SEL(functionHandleWithFunction_stage_), function, stage);
+}
+
+// method: newVisibleFunctionTableWithDescriptor:stage:
+_MTL_INLINE MTL::VisibleFunctionTable* MTL::RenderPipelineState::newVisibleFunctionTable(const MTL::VisibleFunctionTableDescriptor* descriptor, MTL::RenderStages stage)
+{
+    return Object::sendMessage<MTL::VisibleFunctionTable*>(this, _MTL_PRIVATE_SEL(newVisibleFunctionTableWithDescriptor_stage_), descriptor, stage);
+}
+
+// method: newIntersectionFunctionTableWithDescriptor:stage:
+_MTL_INLINE MTL::IntersectionFunctionTable* MTL::RenderPipelineState::newIntersectionFunctionTable(const MTL::IntersectionFunctionTableDescriptor* descriptor, MTL::RenderStages stage)
+{
+    return Object::sendMessage<MTL::IntersectionFunctionTable*>(this, _MTL_PRIVATE_SEL(newIntersectionFunctionTableWithDescriptor_stage_), descriptor, stage);
+}
+
+// method: newRenderPipelineStateWithAdditionalBinaryFunctions:error:
+_MTL_INLINE MTL::RenderPipelineState* MTL::RenderPipelineState::newRenderPipelineState(const MTL::RenderPipelineFunctionsDescriptor* additionalBinaryFunctions, NS::Error** error)
+{
+    return Object::sendMessage<MTL::RenderPipelineState*>(this, _MTL_PRIVATE_SEL(newRenderPipelineStateWithAdditionalBinaryFunctions_error_), additionalBinaryFunctions, error);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::RenderPipelineColorAttachmentDescriptorArray* MTL::RenderPipelineColorAttachmentDescriptorArray::alloc()
+{
+    return NS::Object::alloc<MTL::RenderPipelineColorAttachmentDescriptorArray>(_MTL_PRIVATE_CLS(MTLRenderPipelineColorAttachmentDescriptorArray));
+}
+
+// method: init
+_MTL_INLINE MTL::RenderPipelineColorAttachmentDescriptorArray* MTL::RenderPipelineColorAttachmentDescriptorArray::init()
+{
+    return NS::Object::init<MTL::RenderPipelineColorAttachmentDescriptorArray>();
+}
+
+// method: objectAtIndexedSubscript:
+_MTL_INLINE MTL::RenderPipelineColorAttachmentDescriptor* MTL::RenderPipelineColorAttachmentDescriptorArray::object(NS::UInteger attachmentIndex)
+{
+    return Object::sendMessage<MTL::RenderPipelineColorAttachmentDescriptor*>(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), attachmentIndex);
+}
+
+// method: setObject:atIndexedSubscript:
+_MTL_INLINE void MTL::RenderPipelineColorAttachmentDescriptorArray::setObject(const MTL::RenderPipelineColorAttachmentDescriptor* attachment, NS::UInteger attachmentIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), attachment, attachmentIndex);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::TileRenderPipelineColorAttachmentDescriptor* MTL::TileRenderPipelineColorAttachmentDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::TileRenderPipelineColorAttachmentDescriptor>(_MTL_PRIVATE_CLS(MTLTileRenderPipelineColorAttachmentDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::TileRenderPipelineColorAttachmentDescriptor* MTL::TileRenderPipelineColorAttachmentDescriptor::init()
+{
+    return NS::Object::init<MTL::TileRenderPipelineColorAttachmentDescriptor>();
+}
+
+// property: pixelFormat
+_MTL_INLINE MTL::PixelFormat MTL::TileRenderPipelineColorAttachmentDescriptor::pixelFormat() const
+{
+    return Object::sendMessage<MTL::PixelFormat>(this, _MTL_PRIVATE_SEL(pixelFormat));
+}
+
+_MTL_INLINE void MTL::TileRenderPipelineColorAttachmentDescriptor::setPixelFormat(MTL::PixelFormat pixelFormat)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setPixelFormat_), pixelFormat);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::TileRenderPipelineColorAttachmentDescriptorArray* MTL::TileRenderPipelineColorAttachmentDescriptorArray::alloc()
+{
+    return NS::Object::alloc<MTL::TileRenderPipelineColorAttachmentDescriptorArray>(_MTL_PRIVATE_CLS(MTLTileRenderPipelineColorAttachmentDescriptorArray));
+}
+
+// method: init
+_MTL_INLINE MTL::TileRenderPipelineColorAttachmentDescriptorArray* MTL::TileRenderPipelineColorAttachmentDescriptorArray::init()
+{
+    return NS::Object::init<MTL::TileRenderPipelineColorAttachmentDescriptorArray>();
+}
+
+// method: objectAtIndexedSubscript:
+_MTL_INLINE MTL::TileRenderPipelineColorAttachmentDescriptor* MTL::TileRenderPipelineColorAttachmentDescriptorArray::object(NS::UInteger attachmentIndex)
+{
+    return Object::sendMessage<MTL::TileRenderPipelineColorAttachmentDescriptor*>(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), attachmentIndex);
+}
+
+// method: setObject:atIndexedSubscript:
+_MTL_INLINE void MTL::TileRenderPipelineColorAttachmentDescriptorArray::setObject(const MTL::TileRenderPipelineColorAttachmentDescriptor* attachment, NS::UInteger attachmentIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), attachment, attachmentIndex);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::TileRenderPipelineDescriptor* MTL::TileRenderPipelineDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::TileRenderPipelineDescriptor>(_MTL_PRIVATE_CLS(MTLTileRenderPipelineDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::TileRenderPipelineDescriptor* MTL::TileRenderPipelineDescriptor::init()
+{
+    return NS::Object::init<MTL::TileRenderPipelineDescriptor>();
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::TileRenderPipelineDescriptor::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
+
+// property: tileFunction
+_MTL_INLINE MTL::Function* MTL::TileRenderPipelineDescriptor::tileFunction() const
+{
+    return Object::sendMessage<MTL::Function*>(this, _MTL_PRIVATE_SEL(tileFunction));
+}
+
+_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setTileFunction(const MTL::Function* tileFunction)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTileFunction_), tileFunction);
+}
+
+// property: rasterSampleCount
+_MTL_INLINE NS::UInteger MTL::TileRenderPipelineDescriptor::rasterSampleCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(rasterSampleCount));
+}
+
+_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setRasterSampleCount(NS::UInteger rasterSampleCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setRasterSampleCount_), rasterSampleCount);
+}
+
+// property: colorAttachments
+_MTL_INLINE MTL::TileRenderPipelineColorAttachmentDescriptorArray* MTL::TileRenderPipelineDescriptor::colorAttachments() const
+{
+    return Object::sendMessage<MTL::TileRenderPipelineColorAttachmentDescriptorArray*>(this, _MTL_PRIVATE_SEL(colorAttachments));
+}
+
+// property: threadgroupSizeMatchesTileSize
+_MTL_INLINE bool MTL::TileRenderPipelineDescriptor::threadgroupSizeMatchesTileSize() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(threadgroupSizeMatchesTileSize));
+}
+
+_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setThreadgroupSizeMatchesTileSize(bool threadgroupSizeMatchesTileSize)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setThreadgroupSizeMatchesTileSize_), threadgroupSizeMatchesTileSize);
+}
+
+// property: tileBuffers
+_MTL_INLINE MTL::PipelineBufferDescriptorArray* MTL::TileRenderPipelineDescriptor::tileBuffers() const
+{
+    return Object::sendMessage<MTL::PipelineBufferDescriptorArray*>(this, _MTL_PRIVATE_SEL(tileBuffers));
+}
+
+// property: maxTotalThreadsPerThreadgroup
+_MTL_INLINE NS::UInteger MTL::TileRenderPipelineDescriptor::maxTotalThreadsPerThreadgroup() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(maxTotalThreadsPerThreadgroup));
+}
+
+_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setMaxTotalThreadsPerThreadgroup(NS::UInteger maxTotalThreadsPerThreadgroup)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMaxTotalThreadsPerThreadgroup_), maxTotalThreadsPerThreadgroup);
+}
+
+// property: binaryArchives
+_MTL_INLINE NS::Array* MTL::TileRenderPipelineDescriptor::binaryArchives() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(binaryArchives));
+}
+
+_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setBinaryArchives(const NS::Array* binaryArchives)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBinaryArchives_), binaryArchives);
+}
+
+// property: preloadedLibraries
+_MTL_INLINE NS::Array* MTL::TileRenderPipelineDescriptor::preloadedLibraries() const
+{
+    return Object::sendMessage<NS::Array*>(this, _MTL_PRIVATE_SEL(preloadedLibraries));
+}
+
+_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setPreloadedLibraries(const NS::Array* preloadedLibraries)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setPreloadedLibraries_), preloadedLibraries);
+}
+
+// property: linkedFunctions
+_MTL_INLINE MTL::LinkedFunctions* MTL::TileRenderPipelineDescriptor::linkedFunctions() const
+{
+    return Object::sendMessage<MTL::LinkedFunctions*>(this, _MTL_PRIVATE_SEL(linkedFunctions));
+}
+
+_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setLinkedFunctions(const MTL::LinkedFunctions* linkedFunctions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLinkedFunctions_), linkedFunctions);
+}
+
+// property: supportAddingBinaryFunctions
+_MTL_INLINE bool MTL::TileRenderPipelineDescriptor::supportAddingBinaryFunctions() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportAddingBinaryFunctions));
+}
+
+_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setSupportAddingBinaryFunctions(bool supportAddingBinaryFunctions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSupportAddingBinaryFunctions_), supportAddingBinaryFunctions);
+}
+
+// property: maxCallStackDepth
+_MTL_INLINE NS::UInteger MTL::TileRenderPipelineDescriptor::maxCallStackDepth() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(maxCallStackDepth));
+}
+
+_MTL_INLINE void MTL::TileRenderPipelineDescriptor::setMaxCallStackDepth(NS::UInteger maxCallStackDepth)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMaxCallStackDepth_), maxCallStackDepth);
+}
+
+// method: reset
+_MTL_INLINE void MTL::TileRenderPipelineDescriptor::reset()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(reset));
+}
diff --git a/metal-cpp/Metal/MTLResource.hpp b/metal-cpp/Metal/MTLResource.hpp
new file mode 100644
index 0000000..f3e268b
--- /dev/null
+++ b/metal-cpp/Metal/MTLResource.hpp
@@ -0,0 +1,178 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLResource.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLResource.hpp"
+
+namespace MTL
+{
+_MTL_ENUM(NS::UInteger, PurgeableState) {
+    PurgeableStateKeepCurrent = 1,
+    PurgeableStateNonVolatile = 2,
+    PurgeableStateVolatile = 3,
+    PurgeableStateEmpty = 4,
+};
+
+_MTL_ENUM(NS::UInteger, CPUCacheMode) {
+    CPUCacheModeDefaultCache = 0,
+    CPUCacheModeWriteCombined = 1,
+};
+
+_MTL_ENUM(NS::UInteger, StorageMode) {
+    StorageModeShared = 0,
+    StorageModeManaged = 1,
+    StorageModePrivate = 2,
+    StorageModeMemoryless = 3,
+};
+
+_MTL_ENUM(NS::UInteger, HazardTrackingMode) {
+    HazardTrackingModeDefault = 0,
+    HazardTrackingModeUntracked = 1,
+    HazardTrackingModeTracked = 2,
+};
+
+_MTL_OPTIONS(NS::UInteger, ResourceOptions) {
+    ResourceStorageModeShared = 0,
+    ResourceHazardTrackingModeDefault = 0,
+    ResourceCPUCacheModeDefaultCache = 0,
+    ResourceOptionCPUCacheModeDefault = 0,
+    ResourceCPUCacheModeWriteCombined = 1,
+    ResourceOptionCPUCacheModeWriteCombined = 1,
+    ResourceStorageModeManaged = 16,
+    ResourceStorageModePrivate = 32,
+    ResourceStorageModeMemoryless = 48,
+    ResourceHazardTrackingModeUntracked = 256,
+    ResourceHazardTrackingModeTracked = 512,
+};
+
+class Resource : public NS::Referencing<Resource>
+{
+public:
+    NS::String*             label() const;
+    void                    setLabel(const NS::String* label);
+
+    class Device*           device() const;
+
+    MTL::CPUCacheMode       cpuCacheMode() const;
+
+    MTL::StorageMode        storageMode() const;
+
+    MTL::HazardTrackingMode hazardTrackingMode() const;
+
+    MTL::ResourceOptions    resourceOptions() const;
+
+    MTL::PurgeableState     setPurgeableState(MTL::PurgeableState state);
+
+    class Heap*             heap() const;
+
+    NS::UInteger            heapOffset() const;
+
+    NS::UInteger            allocatedSize() const;
+
+    void                    makeAliasable();
+
+    bool                    isAliasable();
+};
+
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::Resource::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::Resource::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::Resource::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
+
+// property: cpuCacheMode
+_MTL_INLINE MTL::CPUCacheMode MTL::Resource::cpuCacheMode() const
+{
+    return Object::sendMessage<MTL::CPUCacheMode>(this, _MTL_PRIVATE_SEL(cpuCacheMode));
+}
+
+// property: storageMode
+_MTL_INLINE MTL::StorageMode MTL::Resource::storageMode() const
+{
+    return Object::sendMessage<MTL::StorageMode>(this, _MTL_PRIVATE_SEL(storageMode));
+}
+
+// property: hazardTrackingMode
+_MTL_INLINE MTL::HazardTrackingMode MTL::Resource::hazardTrackingMode() const
+{
+    return Object::sendMessage<MTL::HazardTrackingMode>(this, _MTL_PRIVATE_SEL(hazardTrackingMode));
+}
+
+// property: resourceOptions
+_MTL_INLINE MTL::ResourceOptions MTL::Resource::resourceOptions() const
+{
+    return Object::sendMessage<MTL::ResourceOptions>(this, _MTL_PRIVATE_SEL(resourceOptions));
+}
+
+// method: setPurgeableState:
+_MTL_INLINE MTL::PurgeableState MTL::Resource::setPurgeableState(MTL::PurgeableState state)
+{
+    return Object::sendMessage<MTL::PurgeableState>(this, _MTL_PRIVATE_SEL(setPurgeableState_), state);
+}
+
+// property: heap
+_MTL_INLINE MTL::Heap* MTL::Resource::heap() const
+{
+    return Object::sendMessage<MTL::Heap*>(this, _MTL_PRIVATE_SEL(heap));
+}
+
+// property: heapOffset
+_MTL_INLINE NS::UInteger MTL::Resource::heapOffset() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(heapOffset));
+}
+
+// property: allocatedSize
+_MTL_INLINE NS::UInteger MTL::Resource::allocatedSize() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(allocatedSize));
+}
+
+// method: makeAliasable
+_MTL_INLINE void MTL::Resource::makeAliasable()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(makeAliasable));
+}
+
+// method: isAliasable
+_MTL_INLINE bool MTL::Resource::isAliasable()
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isAliasable));
+}
diff --git a/metal-cpp/Metal/MTLResourceStateCommandEncoder.hpp b/metal-cpp/Metal/MTLResourceStateCommandEncoder.hpp
new file mode 100644
index 0000000..b683ac1
--- /dev/null
+++ b/metal-cpp/Metal/MTLResourceStateCommandEncoder.hpp
@@ -0,0 +1,94 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLResourceStateCommandEncoder.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLCommandEncoder.hpp"
+
+namespace MTL
+{
+_MTL_ENUM(NS::UInteger, SparseTextureMappingMode) {
+    SparseTextureMappingModeMap = 0,
+    SparseTextureMappingModeUnmap = 1,
+};
+
+struct MapIndirectArguments
+{
+    uint32_t regionOriginX;
+    uint32_t regionOriginY;
+    uint32_t regionOriginZ;
+    uint32_t regionSizeWidth;
+    uint32_t regionSizeHeight;
+    uint32_t regionSizeDepth;
+    uint32_t mipMapLevel;
+    uint32_t sliceId;
+} _MTL_PACKED;
+
+class ResourceStateCommandEncoder : public NS::Referencing<ResourceStateCommandEncoder, CommandEncoder>
+{
+public:
+    void updateTextureMappings(const class Texture* texture, const MTL::SparseTextureMappingMode mode, const MTL::Region* regions, const NS::UInteger* mipLevels, const NS::UInteger* slices, NS::UInteger numRegions);
+
+    void updateTextureMapping(const class Texture* texture, const MTL::SparseTextureMappingMode mode, const MTL::Region region, const NS::UInteger mipLevel, const NS::UInteger slice);
+
+    void updateTextureMapping(const class Texture* texture, const MTL::SparseTextureMappingMode mode, const class Buffer* indirectBuffer, NS::UInteger indirectBufferOffset);
+
+    void updateFence(const class Fence* fence);
+
+    void waitForFence(const class Fence* fence);
+};
+
+}
+
+// method: updateTextureMappings:mode:regions:mipLevels:slices:numRegions:
+_MTL_INLINE void MTL::ResourceStateCommandEncoder::updateTextureMappings(const MTL::Texture* texture, const MTL::SparseTextureMappingMode mode, const MTL::Region* regions, const NS::UInteger* mipLevels, const NS::UInteger* slices, NS::UInteger numRegions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(updateTextureMappings_mode_regions_mipLevels_slices_numRegions_), texture, mode, regions, mipLevels, slices, numRegions);
+}
+
+// method: updateTextureMapping:mode:region:mipLevel:slice:
+_MTL_INLINE void MTL::ResourceStateCommandEncoder::updateTextureMapping(const MTL::Texture* texture, const MTL::SparseTextureMappingMode mode, const MTL::Region region, const NS::UInteger mipLevel, const NS::UInteger slice)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(updateTextureMapping_mode_region_mipLevel_slice_), texture, mode, region, mipLevel, slice);
+}
+
+// method: updateTextureMapping:mode:indirectBuffer:indirectBufferOffset:
+_MTL_INLINE void MTL::ResourceStateCommandEncoder::updateTextureMapping(const MTL::Texture* texture, const MTL::SparseTextureMappingMode mode, const MTL::Buffer* indirectBuffer, NS::UInteger indirectBufferOffset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(updateTextureMapping_mode_indirectBuffer_indirectBufferOffset_), texture, mode, indirectBuffer, indirectBufferOffset);
+}
+
+// method: updateFence:
+_MTL_INLINE void MTL::ResourceStateCommandEncoder::updateFence(const MTL::Fence* fence)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(updateFence_), fence);
+}
+
+// method: waitForFence:
+_MTL_INLINE void MTL::ResourceStateCommandEncoder::waitForFence(const MTL::Fence* fence)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(waitForFence_), fence);
+}
diff --git a/metal-cpp/Metal/MTLResourceStatePass.hpp b/metal-cpp/Metal/MTLResourceStatePass.hpp
new file mode 100644
index 0000000..fc770f6
--- /dev/null
+++ b/metal-cpp/Metal/MTLResourceStatePass.hpp
@@ -0,0 +1,165 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLResourceStatePass.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+namespace MTL
+{
+class ResourceStatePassSampleBufferAttachmentDescriptor : public NS::Copying<ResourceStatePassSampleBufferAttachmentDescriptor>
+{
+public:
+    static class ResourceStatePassSampleBufferAttachmentDescriptor* alloc();
+
+    class ResourceStatePassSampleBufferAttachmentDescriptor*        init();
+
+    class CounterSampleBuffer*                                      sampleBuffer() const;
+    void                                                            setSampleBuffer(const class CounterSampleBuffer* sampleBuffer);
+
+    NS::UInteger                                                    startOfEncoderSampleIndex() const;
+    void                                                            setStartOfEncoderSampleIndex(NS::UInteger startOfEncoderSampleIndex);
+
+    NS::UInteger                                                    endOfEncoderSampleIndex() const;
+    void                                                            setEndOfEncoderSampleIndex(NS::UInteger endOfEncoderSampleIndex);
+};
+
+class ResourceStatePassSampleBufferAttachmentDescriptorArray : public NS::Referencing<ResourceStatePassSampleBufferAttachmentDescriptorArray>
+{
+public:
+    static class ResourceStatePassSampleBufferAttachmentDescriptorArray* alloc();
+
+    class ResourceStatePassSampleBufferAttachmentDescriptorArray*        init();
+
+    class ResourceStatePassSampleBufferAttachmentDescriptor*             object(NS::UInteger attachmentIndex);
+
+    void                                                                 setObject(const class ResourceStatePassSampleBufferAttachmentDescriptor* attachment, NS::UInteger attachmentIndex);
+};
+
+class ResourceStatePassDescriptor : public NS::Copying<ResourceStatePassDescriptor>
+{
+public:
+    static class ResourceStatePassDescriptor*                     alloc();
+
+    class ResourceStatePassDescriptor*                            init();
+
+    static class ResourceStatePassDescriptor*                     resourceStatePassDescriptor();
+
+    class ResourceStatePassSampleBufferAttachmentDescriptorArray* sampleBufferAttachments() const;
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::ResourceStatePassSampleBufferAttachmentDescriptor* MTL::ResourceStatePassSampleBufferAttachmentDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::ResourceStatePassSampleBufferAttachmentDescriptor>(_MTL_PRIVATE_CLS(MTLResourceStatePassSampleBufferAttachmentDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::ResourceStatePassSampleBufferAttachmentDescriptor* MTL::ResourceStatePassSampleBufferAttachmentDescriptor::init()
+{
+    return NS::Object::init<MTL::ResourceStatePassSampleBufferAttachmentDescriptor>();
+}
+
+// property: sampleBuffer
+_MTL_INLINE MTL::CounterSampleBuffer* MTL::ResourceStatePassSampleBufferAttachmentDescriptor::sampleBuffer() const
+{
+    return Object::sendMessage<MTL::CounterSampleBuffer*>(this, _MTL_PRIVATE_SEL(sampleBuffer));
+}
+
+_MTL_INLINE void MTL::ResourceStatePassSampleBufferAttachmentDescriptor::setSampleBuffer(const MTL::CounterSampleBuffer* sampleBuffer)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSampleBuffer_), sampleBuffer);
+}
+
+// property: startOfEncoderSampleIndex
+_MTL_INLINE NS::UInteger MTL::ResourceStatePassSampleBufferAttachmentDescriptor::startOfEncoderSampleIndex() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(startOfEncoderSampleIndex));
+}
+
+_MTL_INLINE void MTL::ResourceStatePassSampleBufferAttachmentDescriptor::setStartOfEncoderSampleIndex(NS::UInteger startOfEncoderSampleIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStartOfEncoderSampleIndex_), startOfEncoderSampleIndex);
+}
+
+// property: endOfEncoderSampleIndex
+_MTL_INLINE NS::UInteger MTL::ResourceStatePassSampleBufferAttachmentDescriptor::endOfEncoderSampleIndex() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(endOfEncoderSampleIndex));
+}
+
+_MTL_INLINE void MTL::ResourceStatePassSampleBufferAttachmentDescriptor::setEndOfEncoderSampleIndex(NS::UInteger endOfEncoderSampleIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setEndOfEncoderSampleIndex_), endOfEncoderSampleIndex);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::ResourceStatePassSampleBufferAttachmentDescriptorArray* MTL::ResourceStatePassSampleBufferAttachmentDescriptorArray::alloc()
+{
+    return NS::Object::alloc<MTL::ResourceStatePassSampleBufferAttachmentDescriptorArray>(_MTL_PRIVATE_CLS(MTLResourceStatePassSampleBufferAttachmentDescriptorArray));
+}
+
+// method: init
+_MTL_INLINE MTL::ResourceStatePassSampleBufferAttachmentDescriptorArray* MTL::ResourceStatePassSampleBufferAttachmentDescriptorArray::init()
+{
+    return NS::Object::init<MTL::ResourceStatePassSampleBufferAttachmentDescriptorArray>();
+}
+
+// method: objectAtIndexedSubscript:
+_MTL_INLINE MTL::ResourceStatePassSampleBufferAttachmentDescriptor* MTL::ResourceStatePassSampleBufferAttachmentDescriptorArray::object(NS::UInteger attachmentIndex)
+{
+    return Object::sendMessage<MTL::ResourceStatePassSampleBufferAttachmentDescriptor*>(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), attachmentIndex);
+}
+
+// method: setObject:atIndexedSubscript:
+_MTL_INLINE void MTL::ResourceStatePassSampleBufferAttachmentDescriptorArray::setObject(const MTL::ResourceStatePassSampleBufferAttachmentDescriptor* attachment, NS::UInteger attachmentIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), attachment, attachmentIndex);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::ResourceStatePassDescriptor* MTL::ResourceStatePassDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::ResourceStatePassDescriptor>(_MTL_PRIVATE_CLS(MTLResourceStatePassDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::ResourceStatePassDescriptor* MTL::ResourceStatePassDescriptor::init()
+{
+    return NS::Object::init<MTL::ResourceStatePassDescriptor>();
+}
+
+// static method: resourceStatePassDescriptor
+_MTL_INLINE MTL::ResourceStatePassDescriptor* MTL::ResourceStatePassDescriptor::resourceStatePassDescriptor()
+{
+    return Object::sendMessage<MTL::ResourceStatePassDescriptor*>(_MTL_PRIVATE_CLS(MTLResourceStatePassDescriptor), _MTL_PRIVATE_SEL(resourceStatePassDescriptor));
+}
+
+// property: sampleBufferAttachments
+_MTL_INLINE MTL::ResourceStatePassSampleBufferAttachmentDescriptorArray* MTL::ResourceStatePassDescriptor::sampleBufferAttachments() const
+{
+    return Object::sendMessage<MTL::ResourceStatePassSampleBufferAttachmentDescriptorArray*>(this, _MTL_PRIVATE_SEL(sampleBufferAttachments));
+}
diff --git a/metal-cpp/Metal/MTLSampler.hpp b/metal-cpp/Metal/MTLSampler.hpp
new file mode 100644
index 0000000..50e7915
--- /dev/null
+++ b/metal-cpp/Metal/MTLSampler.hpp
@@ -0,0 +1,310 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLSampler.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLDepthStencil.hpp"
+#include "MTLSampler.hpp"
+
+namespace MTL
+{
+_MTL_ENUM(NS::UInteger, SamplerMinMagFilter) {
+    SamplerMinMagFilterNearest = 0,
+    SamplerMinMagFilterLinear = 1,
+};
+
+_MTL_ENUM(NS::UInteger, SamplerMipFilter) {
+    SamplerMipFilterNotMipmapped = 0,
+    SamplerMipFilterNearest = 1,
+    SamplerMipFilterLinear = 2,
+};
+
+_MTL_ENUM(NS::UInteger, SamplerAddressMode) {
+    SamplerAddressModeClampToEdge = 0,
+    SamplerAddressModeMirrorClampToEdge = 1,
+    SamplerAddressModeRepeat = 2,
+    SamplerAddressModeMirrorRepeat = 3,
+    SamplerAddressModeClampToZero = 4,
+    SamplerAddressModeClampToBorderColor = 5,
+};
+
+_MTL_ENUM(NS::UInteger, SamplerBorderColor) {
+    SamplerBorderColorTransparentBlack = 0,
+    SamplerBorderColorOpaqueBlack = 1,
+    SamplerBorderColorOpaqueWhite = 2,
+};
+
+class SamplerDescriptor : public NS::Copying<SamplerDescriptor>
+{
+public:
+    static class SamplerDescriptor* alloc();
+
+    class SamplerDescriptor*        init();
+
+    MTL::SamplerMinMagFilter        minFilter() const;
+    void                            setMinFilter(MTL::SamplerMinMagFilter minFilter);
+
+    MTL::SamplerMinMagFilter        magFilter() const;
+    void                            setMagFilter(MTL::SamplerMinMagFilter magFilter);
+
+    MTL::SamplerMipFilter           mipFilter() const;
+    void                            setMipFilter(MTL::SamplerMipFilter mipFilter);
+
+    NS::UInteger                    maxAnisotropy() const;
+    void                            setMaxAnisotropy(NS::UInteger maxAnisotropy);
+
+    MTL::SamplerAddressMode         sAddressMode() const;
+    void                            setSAddressMode(MTL::SamplerAddressMode sAddressMode);
+
+    MTL::SamplerAddressMode         tAddressMode() const;
+    void                            setTAddressMode(MTL::SamplerAddressMode tAddressMode);
+
+    MTL::SamplerAddressMode         rAddressMode() const;
+    void                            setRAddressMode(MTL::SamplerAddressMode rAddressMode);
+
+    MTL::SamplerBorderColor         borderColor() const;
+    void                            setBorderColor(MTL::SamplerBorderColor borderColor);
+
+    bool                            normalizedCoordinates() const;
+    void                            setNormalizedCoordinates(bool normalizedCoordinates);
+
+    float                           lodMinClamp() const;
+    void                            setLodMinClamp(float lodMinClamp);
+
+    float                           lodMaxClamp() const;
+    void                            setLodMaxClamp(float lodMaxClamp);
+
+    bool                            lodAverage() const;
+    void                            setLodAverage(bool lodAverage);
+
+    MTL::CompareFunction            compareFunction() const;
+    void                            setCompareFunction(MTL::CompareFunction compareFunction);
+
+    bool                            supportArgumentBuffers() const;
+    void                            setSupportArgumentBuffers(bool supportArgumentBuffers);
+
+    NS::String*                     label() const;
+    void                            setLabel(const NS::String* label);
+};
+
+class SamplerState : public NS::Referencing<SamplerState>
+{
+public:
+    NS::String*   label() const;
+
+    class Device* device() const;
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::SamplerDescriptor* MTL::SamplerDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::SamplerDescriptor>(_MTL_PRIVATE_CLS(MTLSamplerDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::SamplerDescriptor* MTL::SamplerDescriptor::init()
+{
+    return NS::Object::init<MTL::SamplerDescriptor>();
+}
+
+// property: minFilter
+_MTL_INLINE MTL::SamplerMinMagFilter MTL::SamplerDescriptor::minFilter() const
+{
+    return Object::sendMessage<MTL::SamplerMinMagFilter>(this, _MTL_PRIVATE_SEL(minFilter));
+}
+
+_MTL_INLINE void MTL::SamplerDescriptor::setMinFilter(MTL::SamplerMinMagFilter minFilter)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMinFilter_), minFilter);
+}
+
+// property: magFilter
+_MTL_INLINE MTL::SamplerMinMagFilter MTL::SamplerDescriptor::magFilter() const
+{
+    return Object::sendMessage<MTL::SamplerMinMagFilter>(this, _MTL_PRIVATE_SEL(magFilter));
+}
+
+_MTL_INLINE void MTL::SamplerDescriptor::setMagFilter(MTL::SamplerMinMagFilter magFilter)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMagFilter_), magFilter);
+}
+
+// property: mipFilter
+_MTL_INLINE MTL::SamplerMipFilter MTL::SamplerDescriptor::mipFilter() const
+{
+    return Object::sendMessage<MTL::SamplerMipFilter>(this, _MTL_PRIVATE_SEL(mipFilter));
+}
+
+_MTL_INLINE void MTL::SamplerDescriptor::setMipFilter(MTL::SamplerMipFilter mipFilter)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMipFilter_), mipFilter);
+}
+
+// property: maxAnisotropy
+_MTL_INLINE NS::UInteger MTL::SamplerDescriptor::maxAnisotropy() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(maxAnisotropy));
+}
+
+_MTL_INLINE void MTL::SamplerDescriptor::setMaxAnisotropy(NS::UInteger maxAnisotropy)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMaxAnisotropy_), maxAnisotropy);
+}
+
+// property: sAddressMode
+_MTL_INLINE MTL::SamplerAddressMode MTL::SamplerDescriptor::sAddressMode() const
+{
+    return Object::sendMessage<MTL::SamplerAddressMode>(this, _MTL_PRIVATE_SEL(sAddressMode));
+}
+
+_MTL_INLINE void MTL::SamplerDescriptor::setSAddressMode(MTL::SamplerAddressMode sAddressMode)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSAddressMode_), sAddressMode);
+}
+
+// property: tAddressMode
+_MTL_INLINE MTL::SamplerAddressMode MTL::SamplerDescriptor::tAddressMode() const
+{
+    return Object::sendMessage<MTL::SamplerAddressMode>(this, _MTL_PRIVATE_SEL(tAddressMode));
+}
+
+_MTL_INLINE void MTL::SamplerDescriptor::setTAddressMode(MTL::SamplerAddressMode tAddressMode)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTAddressMode_), tAddressMode);
+}
+
+// property: rAddressMode
+_MTL_INLINE MTL::SamplerAddressMode MTL::SamplerDescriptor::rAddressMode() const
+{
+    return Object::sendMessage<MTL::SamplerAddressMode>(this, _MTL_PRIVATE_SEL(rAddressMode));
+}
+
+_MTL_INLINE void MTL::SamplerDescriptor::setRAddressMode(MTL::SamplerAddressMode rAddressMode)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setRAddressMode_), rAddressMode);
+}
+
+// property: borderColor
+_MTL_INLINE MTL::SamplerBorderColor MTL::SamplerDescriptor::borderColor() const
+{
+    return Object::sendMessage<MTL::SamplerBorderColor>(this, _MTL_PRIVATE_SEL(borderColor));
+}
+
+_MTL_INLINE void MTL::SamplerDescriptor::setBorderColor(MTL::SamplerBorderColor borderColor)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBorderColor_), borderColor);
+}
+
+// property: normalizedCoordinates
+_MTL_INLINE bool MTL::SamplerDescriptor::normalizedCoordinates() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(normalizedCoordinates));
+}
+
+_MTL_INLINE void MTL::SamplerDescriptor::setNormalizedCoordinates(bool normalizedCoordinates)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setNormalizedCoordinates_), normalizedCoordinates);
+}
+
+// property: lodMinClamp
+_MTL_INLINE float MTL::SamplerDescriptor::lodMinClamp() const
+{
+    return Object::sendMessage<float>(this, _MTL_PRIVATE_SEL(lodMinClamp));
+}
+
+_MTL_INLINE void MTL::SamplerDescriptor::setLodMinClamp(float lodMinClamp)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLodMinClamp_), lodMinClamp);
+}
+
+// property: lodMaxClamp
+_MTL_INLINE float MTL::SamplerDescriptor::lodMaxClamp() const
+{
+    return Object::sendMessage<float>(this, _MTL_PRIVATE_SEL(lodMaxClamp));
+}
+
+_MTL_INLINE void MTL::SamplerDescriptor::setLodMaxClamp(float lodMaxClamp)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLodMaxClamp_), lodMaxClamp);
+}
+
+// property: lodAverage
+_MTL_INLINE bool MTL::SamplerDescriptor::lodAverage() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(lodAverage));
+}
+
+_MTL_INLINE void MTL::SamplerDescriptor::setLodAverage(bool lodAverage)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLodAverage_), lodAverage);
+}
+
+// property: compareFunction
+_MTL_INLINE MTL::CompareFunction MTL::SamplerDescriptor::compareFunction() const
+{
+    return Object::sendMessage<MTL::CompareFunction>(this, _MTL_PRIVATE_SEL(compareFunction));
+}
+
+_MTL_INLINE void MTL::SamplerDescriptor::setCompareFunction(MTL::CompareFunction compareFunction)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setCompareFunction_), compareFunction);
+}
+
+// property: supportArgumentBuffers
+_MTL_INLINE bool MTL::SamplerDescriptor::supportArgumentBuffers() const
+{
+    return Object::sendMessageSafe<bool>(this, _MTL_PRIVATE_SEL(supportArgumentBuffers));
+}
+
+_MTL_INLINE void MTL::SamplerDescriptor::setSupportArgumentBuffers(bool supportArgumentBuffers)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSupportArgumentBuffers_), supportArgumentBuffers);
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::SamplerDescriptor::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+_MTL_INLINE void MTL::SamplerDescriptor::setLabel(const NS::String* label)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setLabel_), label);
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::SamplerState::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::SamplerState::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
diff --git a/metal-cpp/Metal/MTLStageInputOutputDescriptor.hpp b/metal-cpp/Metal/MTLStageInputOutputDescriptor.hpp
new file mode 100644
index 0000000..879244e
--- /dev/null
+++ b/metal-cpp/Metal/MTLStageInputOutputDescriptor.hpp
@@ -0,0 +1,381 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLStageInputOutputDescriptor.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLStageInputOutputDescriptor.hpp"
+
+namespace MTL
+{
+_MTL_ENUM(NS::UInteger, AttributeFormat) {
+    AttributeFormatInvalid = 0,
+    AttributeFormatUChar2 = 1,
+    AttributeFormatUChar3 = 2,
+    AttributeFormatUChar4 = 3,
+    AttributeFormatChar2 = 4,
+    AttributeFormatChar3 = 5,
+    AttributeFormatChar4 = 6,
+    AttributeFormatUChar2Normalized = 7,
+    AttributeFormatUChar3Normalized = 8,
+    AttributeFormatUChar4Normalized = 9,
+    AttributeFormatChar2Normalized = 10,
+    AttributeFormatChar3Normalized = 11,
+    AttributeFormatChar4Normalized = 12,
+    AttributeFormatUShort2 = 13,
+    AttributeFormatUShort3 = 14,
+    AttributeFormatUShort4 = 15,
+    AttributeFormatShort2 = 16,
+    AttributeFormatShort3 = 17,
+    AttributeFormatShort4 = 18,
+    AttributeFormatUShort2Normalized = 19,
+    AttributeFormatUShort3Normalized = 20,
+    AttributeFormatUShort4Normalized = 21,
+    AttributeFormatShort2Normalized = 22,
+    AttributeFormatShort3Normalized = 23,
+    AttributeFormatShort4Normalized = 24,
+    AttributeFormatHalf2 = 25,
+    AttributeFormatHalf3 = 26,
+    AttributeFormatHalf4 = 27,
+    AttributeFormatFloat = 28,
+    AttributeFormatFloat2 = 29,
+    AttributeFormatFloat3 = 30,
+    AttributeFormatFloat4 = 31,
+    AttributeFormatInt = 32,
+    AttributeFormatInt2 = 33,
+    AttributeFormatInt3 = 34,
+    AttributeFormatInt4 = 35,
+    AttributeFormatUInt = 36,
+    AttributeFormatUInt2 = 37,
+    AttributeFormatUInt3 = 38,
+    AttributeFormatUInt4 = 39,
+    AttributeFormatInt1010102Normalized = 40,
+    AttributeFormatUInt1010102Normalized = 41,
+    AttributeFormatUChar4Normalized_BGRA = 42,
+    AttributeFormatUChar = 45,
+    AttributeFormatChar = 46,
+    AttributeFormatUCharNormalized = 47,
+    AttributeFormatCharNormalized = 48,
+    AttributeFormatUShort = 49,
+    AttributeFormatShort = 50,
+    AttributeFormatUShortNormalized = 51,
+    AttributeFormatShortNormalized = 52,
+    AttributeFormatHalf = 53,
+};
+
+_MTL_ENUM(NS::UInteger, IndexType) {
+    IndexTypeUInt16 = 0,
+    IndexTypeUInt32 = 1,
+};
+
+_MTL_ENUM(NS::UInteger, StepFunction) {
+    StepFunctionConstant = 0,
+    StepFunctionPerVertex = 1,
+    StepFunctionPerInstance = 2,
+    StepFunctionPerPatch = 3,
+    StepFunctionPerPatchControlPoint = 4,
+    StepFunctionThreadPositionInGridX = 5,
+    StepFunctionThreadPositionInGridY = 6,
+    StepFunctionThreadPositionInGridXIndexed = 7,
+    StepFunctionThreadPositionInGridYIndexed = 8,
+};
+
+class BufferLayoutDescriptor : public NS::Copying<BufferLayoutDescriptor>
+{
+public:
+    static class BufferLayoutDescriptor* alloc();
+
+    class BufferLayoutDescriptor*        init();
+
+    NS::UInteger                         stride() const;
+    void                                 setStride(NS::UInteger stride);
+
+    MTL::StepFunction                    stepFunction() const;
+    void                                 setStepFunction(MTL::StepFunction stepFunction);
+
+    NS::UInteger                         stepRate() const;
+    void                                 setStepRate(NS::UInteger stepRate);
+};
+
+class BufferLayoutDescriptorArray : public NS::Referencing<BufferLayoutDescriptorArray>
+{
+public:
+    static class BufferLayoutDescriptorArray* alloc();
+
+    class BufferLayoutDescriptorArray*        init();
+
+    class BufferLayoutDescriptor*             object(NS::UInteger index);
+
+    void                                      setObject(const class BufferLayoutDescriptor* bufferDesc, NS::UInteger index);
+};
+
+class AttributeDescriptor : public NS::Copying<AttributeDescriptor>
+{
+public:
+    static class AttributeDescriptor* alloc();
+
+    class AttributeDescriptor*        init();
+
+    MTL::AttributeFormat              format() const;
+    void                              setFormat(MTL::AttributeFormat format);
+
+    NS::UInteger                      offset() const;
+    void                              setOffset(NS::UInteger offset);
+
+    NS::UInteger                      bufferIndex() const;
+    void                              setBufferIndex(NS::UInteger bufferIndex);
+};
+
+class AttributeDescriptorArray : public NS::Referencing<AttributeDescriptorArray>
+{
+public:
+    static class AttributeDescriptorArray* alloc();
+
+    class AttributeDescriptorArray*        init();
+
+    class AttributeDescriptor*             object(NS::UInteger index);
+
+    void                                   setObject(const class AttributeDescriptor* attributeDesc, NS::UInteger index);
+};
+
+class StageInputOutputDescriptor : public NS::Copying<StageInputOutputDescriptor>
+{
+public:
+    static class StageInputOutputDescriptor* alloc();
+
+    class StageInputOutputDescriptor*        init();
+
+    static class StageInputOutputDescriptor* stageInputOutputDescriptor();
+
+    class BufferLayoutDescriptorArray*       layouts() const;
+
+    class AttributeDescriptorArray*          attributes() const;
+
+    MTL::IndexType                           indexType() const;
+    void                                     setIndexType(MTL::IndexType indexType);
+
+    NS::UInteger                             indexBufferIndex() const;
+    void                                     setIndexBufferIndex(NS::UInteger indexBufferIndex);
+
+    void                                     reset();
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::BufferLayoutDescriptor* MTL::BufferLayoutDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::BufferLayoutDescriptor>(_MTL_PRIVATE_CLS(MTLBufferLayoutDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::BufferLayoutDescriptor* MTL::BufferLayoutDescriptor::init()
+{
+    return NS::Object::init<MTL::BufferLayoutDescriptor>();
+}
+
+// property: stride
+_MTL_INLINE NS::UInteger MTL::BufferLayoutDescriptor::stride() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(stride));
+}
+
+_MTL_INLINE void MTL::BufferLayoutDescriptor::setStride(NS::UInteger stride)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStride_), stride);
+}
+
+// property: stepFunction
+_MTL_INLINE MTL::StepFunction MTL::BufferLayoutDescriptor::stepFunction() const
+{
+    return Object::sendMessage<MTL::StepFunction>(this, _MTL_PRIVATE_SEL(stepFunction));
+}
+
+_MTL_INLINE void MTL::BufferLayoutDescriptor::setStepFunction(MTL::StepFunction stepFunction)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStepFunction_), stepFunction);
+}
+
+// property: stepRate
+_MTL_INLINE NS::UInteger MTL::BufferLayoutDescriptor::stepRate() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(stepRate));
+}
+
+_MTL_INLINE void MTL::BufferLayoutDescriptor::setStepRate(NS::UInteger stepRate)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStepRate_), stepRate);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::BufferLayoutDescriptorArray* MTL::BufferLayoutDescriptorArray::alloc()
+{
+    return NS::Object::alloc<MTL::BufferLayoutDescriptorArray>(_MTL_PRIVATE_CLS(MTLBufferLayoutDescriptorArray));
+}
+
+// method: init
+_MTL_INLINE MTL::BufferLayoutDescriptorArray* MTL::BufferLayoutDescriptorArray::init()
+{
+    return NS::Object::init<MTL::BufferLayoutDescriptorArray>();
+}
+
+// method: objectAtIndexedSubscript:
+_MTL_INLINE MTL::BufferLayoutDescriptor* MTL::BufferLayoutDescriptorArray::object(NS::UInteger index)
+{
+    return Object::sendMessage<MTL::BufferLayoutDescriptor*>(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), index);
+}
+
+// method: setObject:atIndexedSubscript:
+_MTL_INLINE void MTL::BufferLayoutDescriptorArray::setObject(const MTL::BufferLayoutDescriptor* bufferDesc, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), bufferDesc, index);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::AttributeDescriptor* MTL::AttributeDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::AttributeDescriptor>(_MTL_PRIVATE_CLS(MTLAttributeDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::AttributeDescriptor* MTL::AttributeDescriptor::init()
+{
+    return NS::Object::init<MTL::AttributeDescriptor>();
+}
+
+// property: format
+_MTL_INLINE MTL::AttributeFormat MTL::AttributeDescriptor::format() const
+{
+    return Object::sendMessage<MTL::AttributeFormat>(this, _MTL_PRIVATE_SEL(format));
+}
+
+_MTL_INLINE void MTL::AttributeDescriptor::setFormat(MTL::AttributeFormat format)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFormat_), format);
+}
+
+// property: offset
+_MTL_INLINE NS::UInteger MTL::AttributeDescriptor::offset() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(offset));
+}
+
+_MTL_INLINE void MTL::AttributeDescriptor::setOffset(NS::UInteger offset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setOffset_), offset);
+}
+
+// property: bufferIndex
+_MTL_INLINE NS::UInteger MTL::AttributeDescriptor::bufferIndex() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(bufferIndex));
+}
+
+_MTL_INLINE void MTL::AttributeDescriptor::setBufferIndex(NS::UInteger bufferIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBufferIndex_), bufferIndex);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::AttributeDescriptorArray* MTL::AttributeDescriptorArray::alloc()
+{
+    return NS::Object::alloc<MTL::AttributeDescriptorArray>(_MTL_PRIVATE_CLS(MTLAttributeDescriptorArray));
+}
+
+// method: init
+_MTL_INLINE MTL::AttributeDescriptorArray* MTL::AttributeDescriptorArray::init()
+{
+    return NS::Object::init<MTL::AttributeDescriptorArray>();
+}
+
+// method: objectAtIndexedSubscript:
+_MTL_INLINE MTL::AttributeDescriptor* MTL::AttributeDescriptorArray::object(NS::UInteger index)
+{
+    return Object::sendMessage<MTL::AttributeDescriptor*>(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), index);
+}
+
+// method: setObject:atIndexedSubscript:
+_MTL_INLINE void MTL::AttributeDescriptorArray::setObject(const MTL::AttributeDescriptor* attributeDesc, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), attributeDesc, index);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::StageInputOutputDescriptor* MTL::StageInputOutputDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::StageInputOutputDescriptor>(_MTL_PRIVATE_CLS(MTLStageInputOutputDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::StageInputOutputDescriptor* MTL::StageInputOutputDescriptor::init()
+{
+    return NS::Object::init<MTL::StageInputOutputDescriptor>();
+}
+
+// static method: stageInputOutputDescriptor
+_MTL_INLINE MTL::StageInputOutputDescriptor* MTL::StageInputOutputDescriptor::stageInputOutputDescriptor()
+{
+    return Object::sendMessage<MTL::StageInputOutputDescriptor*>(_MTL_PRIVATE_CLS(MTLStageInputOutputDescriptor), _MTL_PRIVATE_SEL(stageInputOutputDescriptor));
+}
+
+// property: layouts
+_MTL_INLINE MTL::BufferLayoutDescriptorArray* MTL::StageInputOutputDescriptor::layouts() const
+{
+    return Object::sendMessage<MTL::BufferLayoutDescriptorArray*>(this, _MTL_PRIVATE_SEL(layouts));
+}
+
+// property: attributes
+_MTL_INLINE MTL::AttributeDescriptorArray* MTL::StageInputOutputDescriptor::attributes() const
+{
+    return Object::sendMessage<MTL::AttributeDescriptorArray*>(this, _MTL_PRIVATE_SEL(attributes));
+}
+
+// property: indexType
+_MTL_INLINE MTL::IndexType MTL::StageInputOutputDescriptor::indexType() const
+{
+    return Object::sendMessage<MTL::IndexType>(this, _MTL_PRIVATE_SEL(indexType));
+}
+
+_MTL_INLINE void MTL::StageInputOutputDescriptor::setIndexType(MTL::IndexType indexType)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setIndexType_), indexType);
+}
+
+// property: indexBufferIndex
+_MTL_INLINE NS::UInteger MTL::StageInputOutputDescriptor::indexBufferIndex() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(indexBufferIndex));
+}
+
+_MTL_INLINE void MTL::StageInputOutputDescriptor::setIndexBufferIndex(NS::UInteger indexBufferIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setIndexBufferIndex_), indexBufferIndex);
+}
+
+// method: reset
+_MTL_INLINE void MTL::StageInputOutputDescriptor::reset()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(reset));
+}
diff --git a/metal-cpp/Metal/MTLTexture.hpp b/metal-cpp/Metal/MTLTexture.hpp
new file mode 100644
index 0000000..ea2ba49
--- /dev/null
+++ b/metal-cpp/Metal/MTLTexture.hpp
@@ -0,0 +1,658 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLTexture.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLPixelFormat.hpp"
+#include "MTLResource.hpp"
+#include "MTLTexture.hpp"
+#include "MTLTypes.hpp"
+#include <IOSurface/IOSurfaceRef.h>
+
+namespace MTL
+{
+_MTL_ENUM(NS::UInteger, TextureType) {
+    TextureType1D = 0,
+    TextureType1DArray = 1,
+    TextureType2D = 2,
+    TextureType2DArray = 3,
+    TextureType2DMultisample = 4,
+    TextureTypeCube = 5,
+    TextureTypeCubeArray = 6,
+    TextureType3D = 7,
+    TextureType2DMultisampleArray = 8,
+    TextureTypeTextureBuffer = 9,
+};
+
+_MTL_ENUM(uint8_t, TextureSwizzle) {
+    TextureSwizzleZero = 0,
+    TextureSwizzleOne = 1,
+    TextureSwizzleRed = 2,
+    TextureSwizzleGreen = 3,
+    TextureSwizzleBlue = 4,
+    TextureSwizzleAlpha = 5,
+};
+
+struct TextureSwizzleChannels
+{
+    MTL::TextureSwizzle red;
+    MTL::TextureSwizzle green;
+    MTL::TextureSwizzle blue;
+    MTL::TextureSwizzle alpha;
+} _MTL_PACKED;
+
+class SharedTextureHandle : public NS::Referencing<SharedTextureHandle>
+{
+public:
+    static class SharedTextureHandle* alloc();
+
+    class SharedTextureHandle*        init();
+
+    class Device*                     device() const;
+
+    NS::String*                       label() const;
+};
+
+struct SharedTextureHandlePrivate
+{
+} _MTL_PACKED;
+
+_MTL_OPTIONS(NS::UInteger, TextureUsage) {
+    TextureUsageUnknown = 0,
+    TextureUsageShaderRead = 1,
+    TextureUsageShaderWrite = 2,
+    TextureUsageRenderTarget = 4,
+    TextureUsagePixelFormatView = 16,
+};
+
+_MTL_ENUM(NS::Integer, TextureCompressionType) {
+    TextureCompressionTypeLossless = 0,
+    TextureCompressionTypeLossy = 1,
+};
+
+class TextureDescriptor : public NS::Copying<TextureDescriptor>
+{
+public:
+    static class TextureDescriptor* alloc();
+
+    class TextureDescriptor*        init();
+
+    static class TextureDescriptor* texture2DDescriptor(MTL::PixelFormat pixelFormat, NS::UInteger width, NS::UInteger height, bool mipmapped);
+
+    static class TextureDescriptor* textureCubeDescriptor(MTL::PixelFormat pixelFormat, NS::UInteger size, bool mipmapped);
+
+    static class TextureDescriptor* textureBufferDescriptor(MTL::PixelFormat pixelFormat, NS::UInteger width, MTL::ResourceOptions resourceOptions, MTL::TextureUsage usage);
+
+    MTL::TextureType                textureType() const;
+    void                            setTextureType(MTL::TextureType textureType);
+
+    MTL::PixelFormat                pixelFormat() const;
+    void                            setPixelFormat(MTL::PixelFormat pixelFormat);
+
+    NS::UInteger                    width() const;
+    void                            setWidth(NS::UInteger width);
+
+    NS::UInteger                    height() const;
+    void                            setHeight(NS::UInteger height);
+
+    NS::UInteger                    depth() const;
+    void                            setDepth(NS::UInteger depth);
+
+    NS::UInteger                    mipmapLevelCount() const;
+    void                            setMipmapLevelCount(NS::UInteger mipmapLevelCount);
+
+    NS::UInteger                    sampleCount() const;
+    void                            setSampleCount(NS::UInteger sampleCount);
+
+    NS::UInteger                    arrayLength() const;
+    void                            setArrayLength(NS::UInteger arrayLength);
+
+    MTL::ResourceOptions            resourceOptions() const;
+    void                            setResourceOptions(MTL::ResourceOptions resourceOptions);
+
+    MTL::CPUCacheMode               cpuCacheMode() const;
+    void                            setCpuCacheMode(MTL::CPUCacheMode cpuCacheMode);
+
+    MTL::StorageMode                storageMode() const;
+    void                            setStorageMode(MTL::StorageMode storageMode);
+
+    MTL::HazardTrackingMode         hazardTrackingMode() const;
+    void                            setHazardTrackingMode(MTL::HazardTrackingMode hazardTrackingMode);
+
+    MTL::TextureUsage               usage() const;
+    void                            setUsage(MTL::TextureUsage usage);
+
+    bool                            allowGPUOptimizedContents() const;
+    void                            setAllowGPUOptimizedContents(bool allowGPUOptimizedContents);
+
+    MTL::TextureSwizzleChannels     swizzle() const;
+    void                            setSwizzle(MTL::TextureSwizzleChannels swizzle);
+};
+
+class Texture : public NS::Referencing<Texture, Resource>
+{
+public:
+    class Resource*             rootResource() const;
+
+    class Texture*              parentTexture() const;
+
+    NS::UInteger                parentRelativeLevel() const;
+
+    NS::UInteger                parentRelativeSlice() const;
+
+    class Buffer*               buffer() const;
+
+    NS::UInteger                bufferOffset() const;
+
+    NS::UInteger                bufferBytesPerRow() const;
+
+    IOSurfaceRef                iosurface() const;
+
+    NS::UInteger                iosurfacePlane() const;
+
+    MTL::TextureType            textureType() const;
+
+    MTL::PixelFormat            pixelFormat() const;
+
+    NS::UInteger                width() const;
+
+    NS::UInteger                height() const;
+
+    NS::UInteger                depth() const;
+
+    NS::UInteger                mipmapLevelCount() const;
+
+    NS::UInteger                sampleCount() const;
+
+    NS::UInteger                arrayLength() const;
+
+    MTL::TextureUsage           usage() const;
+
+    bool                        shareable() const;
+
+    bool                        framebufferOnly() const;
+
+    NS::UInteger                firstMipmapInTail() const;
+
+    NS::UInteger                tailSizeInBytes() const;
+
+    bool                        isSparse() const;
+
+    bool                        allowGPUOptimizedContents() const;
+
+    void                        getBytes(const void* pixelBytes, NS::UInteger bytesPerRow, NS::UInteger bytesPerImage, MTL::Region region, NS::UInteger level, NS::UInteger slice);
+
+    void                        replaceRegion(MTL::Region region, NS::UInteger level, NS::UInteger slice, const void* pixelBytes, NS::UInteger bytesPerRow, NS::UInteger bytesPerImage);
+
+    void                        getBytes(const void* pixelBytes, NS::UInteger bytesPerRow, MTL::Region region, NS::UInteger level);
+
+    void                        replaceRegion(MTL::Region region, NS::UInteger level, const void* pixelBytes, NS::UInteger bytesPerRow);
+
+    class Texture*              newTextureView(MTL::PixelFormat pixelFormat);
+
+    class Texture*              newTextureView(MTL::PixelFormat pixelFormat, MTL::TextureType textureType, NS::Range levelRange, NS::Range sliceRange);
+
+    class SharedTextureHandle*  newSharedTextureHandle();
+
+    class Texture*              remoteStorageTexture() const;
+
+    class Texture*              newRemoteTextureViewForDevice(const class Device* device);
+
+    MTL::TextureSwizzleChannels swizzle() const;
+
+    class Texture*              newTextureView(MTL::PixelFormat pixelFormat, MTL::TextureType textureType, NS::Range levelRange, NS::Range sliceRange, MTL::TextureSwizzleChannels swizzle);
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::SharedTextureHandle* MTL::SharedTextureHandle::alloc()
+{
+    return NS::Object::alloc<MTL::SharedTextureHandle>(_MTL_PRIVATE_CLS(MTLSharedTextureHandle));
+}
+
+// method: init
+_MTL_INLINE MTL::SharedTextureHandle* MTL::SharedTextureHandle::init()
+{
+    return NS::Object::init<MTL::SharedTextureHandle>();
+}
+
+// property: device
+_MTL_INLINE MTL::Device* MTL::SharedTextureHandle::device() const
+{
+    return Object::sendMessage<MTL::Device*>(this, _MTL_PRIVATE_SEL(device));
+}
+
+// property: label
+_MTL_INLINE NS::String* MTL::SharedTextureHandle::label() const
+{
+    return Object::sendMessage<NS::String*>(this, _MTL_PRIVATE_SEL(label));
+}
+
+// static method: alloc
+_MTL_INLINE MTL::TextureDescriptor* MTL::TextureDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::TextureDescriptor>(_MTL_PRIVATE_CLS(MTLTextureDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::TextureDescriptor* MTL::TextureDescriptor::init()
+{
+    return NS::Object::init<MTL::TextureDescriptor>();
+}
+
+// static method: texture2DDescriptorWithPixelFormat:width:height:mipmapped:
+_MTL_INLINE MTL::TextureDescriptor* MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormat pixelFormat, NS::UInteger width, NS::UInteger height, bool mipmapped)
+{
+    return Object::sendMessage<MTL::TextureDescriptor*>(_MTL_PRIVATE_CLS(MTLTextureDescriptor), _MTL_PRIVATE_SEL(texture2DDescriptorWithPixelFormat_width_height_mipmapped_), pixelFormat, width, height, mipmapped);
+}
+
+// static method: textureCubeDescriptorWithPixelFormat:size:mipmapped:
+_MTL_INLINE MTL::TextureDescriptor* MTL::TextureDescriptor::textureCubeDescriptor(MTL::PixelFormat pixelFormat, NS::UInteger size, bool mipmapped)
+{
+    return Object::sendMessage<MTL::TextureDescriptor*>(_MTL_PRIVATE_CLS(MTLTextureDescriptor), _MTL_PRIVATE_SEL(textureCubeDescriptorWithPixelFormat_size_mipmapped_), pixelFormat, size, mipmapped);
+}
+
+// static method: textureBufferDescriptorWithPixelFormat:width:resourceOptions:usage:
+_MTL_INLINE MTL::TextureDescriptor* MTL::TextureDescriptor::textureBufferDescriptor(MTL::PixelFormat pixelFormat, NS::UInteger width, MTL::ResourceOptions resourceOptions, MTL::TextureUsage usage)
+{
+    return Object::sendMessage<MTL::TextureDescriptor*>(_MTL_PRIVATE_CLS(MTLTextureDescriptor), _MTL_PRIVATE_SEL(textureBufferDescriptorWithPixelFormat_width_resourceOptions_usage_), pixelFormat, width, resourceOptions, usage);
+}
+
+// property: textureType
+_MTL_INLINE MTL::TextureType MTL::TextureDescriptor::textureType() const
+{
+    return Object::sendMessage<MTL::TextureType>(this, _MTL_PRIVATE_SEL(textureType));
+}
+
+_MTL_INLINE void MTL::TextureDescriptor::setTextureType(MTL::TextureType textureType)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setTextureType_), textureType);
+}
+
+// property: pixelFormat
+_MTL_INLINE MTL::PixelFormat MTL::TextureDescriptor::pixelFormat() const
+{
+    return Object::sendMessage<MTL::PixelFormat>(this, _MTL_PRIVATE_SEL(pixelFormat));
+}
+
+_MTL_INLINE void MTL::TextureDescriptor::setPixelFormat(MTL::PixelFormat pixelFormat)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setPixelFormat_), pixelFormat);
+}
+
+// property: width
+_MTL_INLINE NS::UInteger MTL::TextureDescriptor::width() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(width));
+}
+
+_MTL_INLINE void MTL::TextureDescriptor::setWidth(NS::UInteger width)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setWidth_), width);
+}
+
+// property: height
+_MTL_INLINE NS::UInteger MTL::TextureDescriptor::height() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(height));
+}
+
+_MTL_INLINE void MTL::TextureDescriptor::setHeight(NS::UInteger height)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setHeight_), height);
+}
+
+// property: depth
+_MTL_INLINE NS::UInteger MTL::TextureDescriptor::depth() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(depth));
+}
+
+_MTL_INLINE void MTL::TextureDescriptor::setDepth(NS::UInteger depth)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setDepth_), depth);
+}
+
+// property: mipmapLevelCount
+_MTL_INLINE NS::UInteger MTL::TextureDescriptor::mipmapLevelCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(mipmapLevelCount));
+}
+
+_MTL_INLINE void MTL::TextureDescriptor::setMipmapLevelCount(NS::UInteger mipmapLevelCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setMipmapLevelCount_), mipmapLevelCount);
+}
+
+// property: sampleCount
+_MTL_INLINE NS::UInteger MTL::TextureDescriptor::sampleCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(sampleCount));
+}
+
+_MTL_INLINE void MTL::TextureDescriptor::setSampleCount(NS::UInteger sampleCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSampleCount_), sampleCount);
+}
+
+// property: arrayLength
+_MTL_INLINE NS::UInteger MTL::TextureDescriptor::arrayLength() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(arrayLength));
+}
+
+_MTL_INLINE void MTL::TextureDescriptor::setArrayLength(NS::UInteger arrayLength)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setArrayLength_), arrayLength);
+}
+
+// property: resourceOptions
+_MTL_INLINE MTL::ResourceOptions MTL::TextureDescriptor::resourceOptions() const
+{
+    return Object::sendMessage<MTL::ResourceOptions>(this, _MTL_PRIVATE_SEL(resourceOptions));
+}
+
+_MTL_INLINE void MTL::TextureDescriptor::setResourceOptions(MTL::ResourceOptions resourceOptions)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setResourceOptions_), resourceOptions);
+}
+
+// property: cpuCacheMode
+_MTL_INLINE MTL::CPUCacheMode MTL::TextureDescriptor::cpuCacheMode() const
+{
+    return Object::sendMessage<MTL::CPUCacheMode>(this, _MTL_PRIVATE_SEL(cpuCacheMode));
+}
+
+_MTL_INLINE void MTL::TextureDescriptor::setCpuCacheMode(MTL::CPUCacheMode cpuCacheMode)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setCpuCacheMode_), cpuCacheMode);
+}
+
+// property: storageMode
+_MTL_INLINE MTL::StorageMode MTL::TextureDescriptor::storageMode() const
+{
+    return Object::sendMessage<MTL::StorageMode>(this, _MTL_PRIVATE_SEL(storageMode));
+}
+
+_MTL_INLINE void MTL::TextureDescriptor::setStorageMode(MTL::StorageMode storageMode)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStorageMode_), storageMode);
+}
+
+// property: hazardTrackingMode
+_MTL_INLINE MTL::HazardTrackingMode MTL::TextureDescriptor::hazardTrackingMode() const
+{
+    return Object::sendMessage<MTL::HazardTrackingMode>(this, _MTL_PRIVATE_SEL(hazardTrackingMode));
+}
+
+_MTL_INLINE void MTL::TextureDescriptor::setHazardTrackingMode(MTL::HazardTrackingMode hazardTrackingMode)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setHazardTrackingMode_), hazardTrackingMode);
+}
+
+// property: usage
+_MTL_INLINE MTL::TextureUsage MTL::TextureDescriptor::usage() const
+{
+    return Object::sendMessage<MTL::TextureUsage>(this, _MTL_PRIVATE_SEL(usage));
+}
+
+_MTL_INLINE void MTL::TextureDescriptor::setUsage(MTL::TextureUsage usage)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setUsage_), usage);
+}
+
+// property: allowGPUOptimizedContents
+_MTL_INLINE bool MTL::TextureDescriptor::allowGPUOptimizedContents() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(allowGPUOptimizedContents));
+}
+
+_MTL_INLINE void MTL::TextureDescriptor::setAllowGPUOptimizedContents(bool allowGPUOptimizedContents)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setAllowGPUOptimizedContents_), allowGPUOptimizedContents);
+}
+
+// property: swizzle
+_MTL_INLINE MTL::TextureSwizzleChannels MTL::TextureDescriptor::swizzle() const
+{
+    return Object::sendMessage<MTL::TextureSwizzleChannels>(this, _MTL_PRIVATE_SEL(swizzle));
+}
+
+_MTL_INLINE void MTL::TextureDescriptor::setSwizzle(MTL::TextureSwizzleChannels swizzle)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setSwizzle_), swizzle);
+}
+
+// property: rootResource
+_MTL_INLINE MTL::Resource* MTL::Texture::rootResource() const
+{
+    return Object::sendMessage<MTL::Resource*>(this, _MTL_PRIVATE_SEL(rootResource));
+}
+
+// property: parentTexture
+_MTL_INLINE MTL::Texture* MTL::Texture::parentTexture() const
+{
+    return Object::sendMessage<MTL::Texture*>(this, _MTL_PRIVATE_SEL(parentTexture));
+}
+
+// property: parentRelativeLevel
+_MTL_INLINE NS::UInteger MTL::Texture::parentRelativeLevel() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(parentRelativeLevel));
+}
+
+// property: parentRelativeSlice
+_MTL_INLINE NS::UInteger MTL::Texture::parentRelativeSlice() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(parentRelativeSlice));
+}
+
+// property: buffer
+_MTL_INLINE MTL::Buffer* MTL::Texture::buffer() const
+{
+    return Object::sendMessage<MTL::Buffer*>(this, _MTL_PRIVATE_SEL(buffer));
+}
+
+// property: bufferOffset
+_MTL_INLINE NS::UInteger MTL::Texture::bufferOffset() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(bufferOffset));
+}
+
+// property: bufferBytesPerRow
+_MTL_INLINE NS::UInteger MTL::Texture::bufferBytesPerRow() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(bufferBytesPerRow));
+}
+
+// property: iosurface
+_MTL_INLINE IOSurfaceRef MTL::Texture::iosurface() const
+{
+    return Object::sendMessage<IOSurfaceRef>(this, _MTL_PRIVATE_SEL(iosurface));
+}
+
+// property: iosurfacePlane
+_MTL_INLINE NS::UInteger MTL::Texture::iosurfacePlane() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(iosurfacePlane));
+}
+
+// property: textureType
+_MTL_INLINE MTL::TextureType MTL::Texture::textureType() const
+{
+    return Object::sendMessage<MTL::TextureType>(this, _MTL_PRIVATE_SEL(textureType));
+}
+
+// property: pixelFormat
+_MTL_INLINE MTL::PixelFormat MTL::Texture::pixelFormat() const
+{
+    return Object::sendMessage<MTL::PixelFormat>(this, _MTL_PRIVATE_SEL(pixelFormat));
+}
+
+// property: width
+_MTL_INLINE NS::UInteger MTL::Texture::width() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(width));
+}
+
+// property: height
+_MTL_INLINE NS::UInteger MTL::Texture::height() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(height));
+}
+
+// property: depth
+_MTL_INLINE NS::UInteger MTL::Texture::depth() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(depth));
+}
+
+// property: mipmapLevelCount
+_MTL_INLINE NS::UInteger MTL::Texture::mipmapLevelCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(mipmapLevelCount));
+}
+
+// property: sampleCount
+_MTL_INLINE NS::UInteger MTL::Texture::sampleCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(sampleCount));
+}
+
+// property: arrayLength
+_MTL_INLINE NS::UInteger MTL::Texture::arrayLength() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(arrayLength));
+}
+
+// property: usage
+_MTL_INLINE MTL::TextureUsage MTL::Texture::usage() const
+{
+    return Object::sendMessage<MTL::TextureUsage>(this, _MTL_PRIVATE_SEL(usage));
+}
+
+// property: shareable
+_MTL_INLINE bool MTL::Texture::shareable() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isShareable));
+}
+
+// property: framebufferOnly
+_MTL_INLINE bool MTL::Texture::framebufferOnly() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isFramebufferOnly));
+}
+
+// property: firstMipmapInTail
+_MTL_INLINE NS::UInteger MTL::Texture::firstMipmapInTail() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(firstMipmapInTail));
+}
+
+// property: tailSizeInBytes
+_MTL_INLINE NS::UInteger MTL::Texture::tailSizeInBytes() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(tailSizeInBytes));
+}
+
+// property: isSparse
+_MTL_INLINE bool MTL::Texture::isSparse() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(isSparse));
+}
+
+// property: allowGPUOptimizedContents
+_MTL_INLINE bool MTL::Texture::allowGPUOptimizedContents() const
+{
+    return Object::sendMessage<bool>(this, _MTL_PRIVATE_SEL(allowGPUOptimizedContents));
+}
+
+// method: getBytes:bytesPerRow:bytesPerImage:fromRegion:mipmapLevel:slice:
+_MTL_INLINE void MTL::Texture::getBytes(const void* pixelBytes, NS::UInteger bytesPerRow, NS::UInteger bytesPerImage, MTL::Region region, NS::UInteger level, NS::UInteger slice)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(getBytes_bytesPerRow_bytesPerImage_fromRegion_mipmapLevel_slice_), pixelBytes, bytesPerRow, bytesPerImage, region, level, slice);
+}
+
+// method: replaceRegion:mipmapLevel:slice:withBytes:bytesPerRow:bytesPerImage:
+_MTL_INLINE void MTL::Texture::replaceRegion(MTL::Region region, NS::UInteger level, NS::UInteger slice, const void* pixelBytes, NS::UInteger bytesPerRow, NS::UInteger bytesPerImage)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(replaceRegion_mipmapLevel_slice_withBytes_bytesPerRow_bytesPerImage_), region, level, slice, pixelBytes, bytesPerRow, bytesPerImage);
+}
+
+// method: getBytes:bytesPerRow:fromRegion:mipmapLevel:
+_MTL_INLINE void MTL::Texture::getBytes(const void* pixelBytes, NS::UInteger bytesPerRow, MTL::Region region, NS::UInteger level)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(getBytes_bytesPerRow_fromRegion_mipmapLevel_), pixelBytes, bytesPerRow, region, level);
+}
+
+// method: replaceRegion:mipmapLevel:withBytes:bytesPerRow:
+_MTL_INLINE void MTL::Texture::replaceRegion(MTL::Region region, NS::UInteger level, const void* pixelBytes, NS::UInteger bytesPerRow)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(replaceRegion_mipmapLevel_withBytes_bytesPerRow_), region, level, pixelBytes, bytesPerRow);
+}
+
+// method: newTextureViewWithPixelFormat:
+_MTL_INLINE MTL::Texture* MTL::Texture::newTextureView(MTL::PixelFormat pixelFormat)
+{
+    return Object::sendMessage<MTL::Texture*>(this, _MTL_PRIVATE_SEL(newTextureViewWithPixelFormat_), pixelFormat);
+}
+
+// method: newTextureViewWithPixelFormat:textureType:levels:slices:
+_MTL_INLINE MTL::Texture* MTL::Texture::newTextureView(MTL::PixelFormat pixelFormat, MTL::TextureType textureType, NS::Range levelRange, NS::Range sliceRange)
+{
+    return Object::sendMessage<MTL::Texture*>(this, _MTL_PRIVATE_SEL(newTextureViewWithPixelFormat_textureType_levels_slices_), pixelFormat, textureType, levelRange, sliceRange);
+}
+
+// method: newSharedTextureHandle
+_MTL_INLINE MTL::SharedTextureHandle* MTL::Texture::newSharedTextureHandle()
+{
+    return Object::sendMessage<MTL::SharedTextureHandle*>(this, _MTL_PRIVATE_SEL(newSharedTextureHandle));
+}
+
+// property: remoteStorageTexture
+_MTL_INLINE MTL::Texture* MTL::Texture::remoteStorageTexture() const
+{
+    return Object::sendMessage<MTL::Texture*>(this, _MTL_PRIVATE_SEL(remoteStorageTexture));
+}
+
+// method: newRemoteTextureViewForDevice:
+_MTL_INLINE MTL::Texture* MTL::Texture::newRemoteTextureViewForDevice(const MTL::Device* device)
+{
+    return Object::sendMessage<MTL::Texture*>(this, _MTL_PRIVATE_SEL(newRemoteTextureViewForDevice_), device);
+}
+
+// property: swizzle
+_MTL_INLINE MTL::TextureSwizzleChannels MTL::Texture::swizzle() const
+{
+    return Object::sendMessage<MTL::TextureSwizzleChannels>(this, _MTL_PRIVATE_SEL(swizzle));
+}
+
+// method: newTextureViewWithPixelFormat:textureType:levels:slices:swizzle:
+_MTL_INLINE MTL::Texture* MTL::Texture::newTextureView(MTL::PixelFormat pixelFormat, MTL::TextureType textureType, NS::Range levelRange, NS::Range sliceRange, MTL::TextureSwizzleChannels swizzle)
+{
+    return Object::sendMessage<MTL::Texture*>(this, _MTL_PRIVATE_SEL(newTextureViewWithPixelFormat_textureType_levels_slices_swizzle_), pixelFormat, textureType, levelRange, sliceRange, swizzle);
+}
diff --git a/metal-cpp/Metal/MTLTypes.hpp b/metal-cpp/Metal/MTLTypes.hpp
new file mode 100644
index 0000000..768c4d9
--- /dev/null
+++ b/metal-cpp/Metal/MTLTypes.hpp
@@ -0,0 +1,163 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLTypes.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLTypes.hpp"
+
+namespace MTL
+{
+struct Origin
+{
+    Origin() = default;
+
+    Origin(NS::UInteger x, NS::UInteger y, NS::UInteger z);
+
+    static Origin Make(NS::UInteger x, NS::UInteger y, NS::UInteger z);
+
+    NS::UInteger  x;
+    NS::UInteger  y;
+    NS::UInteger  z;
+} _MTL_PACKED;
+
+struct Size
+{
+    Size() = default;
+
+    Size(NS::UInteger width, NS::UInteger height, NS::UInteger depth);
+
+    static Size  Make(NS::UInteger width, NS::UInteger height, NS::UInteger depth);
+
+    NS::UInteger width;
+    NS::UInteger height;
+    NS::UInteger depth;
+} _MTL_PACKED;
+
+struct Region
+{
+    Region() = default;
+
+    Region(NS::UInteger x, NS::UInteger width);
+
+    Region(NS::UInteger x, NS::UInteger y, NS::UInteger width, NS::UInteger height);
+
+    Region(NS::UInteger x, NS::UInteger y, NS::UInteger z, NS::UInteger width, NS::UInteger height, NS::UInteger depth);
+
+    static Region Make1D(NS::UInteger x, NS::UInteger width);
+
+    static Region Make2D(NS::UInteger x, NS::UInteger y, NS::UInteger width, NS::UInteger height);
+
+    static Region Make3D(NS::UInteger x, NS::UInteger y, NS::UInteger z, NS::UInteger width, NS::UInteger height, NS::UInteger depth);
+
+    MTL::Origin   origin;
+    MTL::Size     size;
+} _MTL_PACKED;
+
+struct SamplePosition;
+
+using Coordinate2D = SamplePosition;
+
+struct SamplePosition
+{
+    SamplePosition() = default;
+
+    SamplePosition(float _x, float _y);
+
+    static SamplePosition Make(float x, float y);
+
+    float                 x;
+    float                 y;
+} _MTL_PACKED;
+
+}
+
+_MTL_INLINE MTL::Origin::Origin(NS::UInteger _x, NS::UInteger _y, NS::UInteger _z)
+    : x(_x)
+    , y(_y)
+    , z(_z)
+{
+}
+
+_MTL_INLINE MTL::Origin MTL::Origin::Make(NS::UInteger x, NS::UInteger y, NS::UInteger z)
+{
+    return Origin(x, y, z);
+}
+
+_MTL_INLINE MTL::Size::Size(NS::UInteger _width, NS::UInteger _height, NS::UInteger _depth)
+    : width(_width)
+    , height(_height)
+    , depth(_depth)
+{
+}
+
+_MTL_INLINE MTL::Size MTL::Size::Make(NS::UInteger width, NS::UInteger height, NS::UInteger depth)
+{
+    return Size(width, height, depth);
+}
+
+_MTL_INLINE MTL::Region::Region(NS::UInteger x, NS::UInteger width)
+    : origin(x, 0, 0)
+    , size(width, 1, 1)
+{
+}
+
+_MTL_INLINE MTL::Region::Region(NS::UInteger x, NS::UInteger y, NS::UInteger width, NS::UInteger height)
+    : origin(x, y, 0)
+    , size(width, height, 1)
+{
+}
+
+_MTL_INLINE MTL::Region::Region(NS::UInteger x, NS::UInteger y, NS::UInteger z, NS::UInteger width, NS::UInteger height, NS::UInteger depth)
+    : origin(x, y, z)
+    , size(width, height, depth)
+{
+}
+
+_MTL_INLINE MTL::Region MTL::Region::Make1D(NS::UInteger x, NS::UInteger width)
+{
+    return Region(x, width);
+}
+
+_MTL_INLINE MTL::Region MTL::Region::Make2D(NS::UInteger x, NS::UInteger y, NS::UInteger width, NS::UInteger height)
+{
+    return Region(x, y, width, height);
+}
+
+_MTL_INLINE MTL::Region MTL::Region::Make3D(NS::UInteger x, NS::UInteger y, NS::UInteger z, NS::UInteger width, NS::UInteger height, NS::UInteger depth)
+{
+    return Region(x, y, z, width, height, depth);
+}
+
+_MTL_INLINE MTL::SamplePosition::SamplePosition(float _x, float _y)
+    : x(_x)
+    , y(_y)
+{
+}
+
+_MTL_INLINE MTL::SamplePosition MTL::SamplePosition::Make(float x, float y)
+{
+    return SamplePosition(x, y);
+}
diff --git a/metal-cpp/Metal/MTLVertexDescriptor.hpp b/metal-cpp/Metal/MTLVertexDescriptor.hpp
new file mode 100644
index 0000000..e17f823
--- /dev/null
+++ b/metal-cpp/Metal/MTLVertexDescriptor.hpp
@@ -0,0 +1,344 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLVertexDescriptor.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLVertexDescriptor.hpp"
+
+namespace MTL
+{
+_MTL_ENUM(NS::UInteger, VertexFormat) {
+    VertexFormatInvalid = 0,
+    VertexFormatUChar2 = 1,
+    VertexFormatUChar3 = 2,
+    VertexFormatUChar4 = 3,
+    VertexFormatChar2 = 4,
+    VertexFormatChar3 = 5,
+    VertexFormatChar4 = 6,
+    VertexFormatUChar2Normalized = 7,
+    VertexFormatUChar3Normalized = 8,
+    VertexFormatUChar4Normalized = 9,
+    VertexFormatChar2Normalized = 10,
+    VertexFormatChar3Normalized = 11,
+    VertexFormatChar4Normalized = 12,
+    VertexFormatUShort2 = 13,
+    VertexFormatUShort3 = 14,
+    VertexFormatUShort4 = 15,
+    VertexFormatShort2 = 16,
+    VertexFormatShort3 = 17,
+    VertexFormatShort4 = 18,
+    VertexFormatUShort2Normalized = 19,
+    VertexFormatUShort3Normalized = 20,
+    VertexFormatUShort4Normalized = 21,
+    VertexFormatShort2Normalized = 22,
+    VertexFormatShort3Normalized = 23,
+    VertexFormatShort4Normalized = 24,
+    VertexFormatHalf2 = 25,
+    VertexFormatHalf3 = 26,
+    VertexFormatHalf4 = 27,
+    VertexFormatFloat = 28,
+    VertexFormatFloat2 = 29,
+    VertexFormatFloat3 = 30,
+    VertexFormatFloat4 = 31,
+    VertexFormatInt = 32,
+    VertexFormatInt2 = 33,
+    VertexFormatInt3 = 34,
+    VertexFormatInt4 = 35,
+    VertexFormatUInt = 36,
+    VertexFormatUInt2 = 37,
+    VertexFormatUInt3 = 38,
+    VertexFormatUInt4 = 39,
+    VertexFormatInt1010102Normalized = 40,
+    VertexFormatUInt1010102Normalized = 41,
+    VertexFormatUChar4Normalized_BGRA = 42,
+    VertexFormatUChar = 45,
+    VertexFormatChar = 46,
+    VertexFormatUCharNormalized = 47,
+    VertexFormatCharNormalized = 48,
+    VertexFormatUShort = 49,
+    VertexFormatShort = 50,
+    VertexFormatUShortNormalized = 51,
+    VertexFormatShortNormalized = 52,
+    VertexFormatHalf = 53,
+};
+
+_MTL_ENUM(NS::UInteger, VertexStepFunction) {
+    VertexStepFunctionConstant = 0,
+    VertexStepFunctionPerVertex = 1,
+    VertexStepFunctionPerInstance = 2,
+    VertexStepFunctionPerPatch = 3,
+    VertexStepFunctionPerPatchControlPoint = 4,
+};
+
+class VertexBufferLayoutDescriptor : public NS::Copying<VertexBufferLayoutDescriptor>
+{
+public:
+    static class VertexBufferLayoutDescriptor* alloc();
+
+    class VertexBufferLayoutDescriptor*        init();
+
+    NS::UInteger                               stride() const;
+    void                                       setStride(NS::UInteger stride);
+
+    MTL::VertexStepFunction                    stepFunction() const;
+    void                                       setStepFunction(MTL::VertexStepFunction stepFunction);
+
+    NS::UInteger                               stepRate() const;
+    void                                       setStepRate(NS::UInteger stepRate);
+};
+
+class VertexBufferLayoutDescriptorArray : public NS::Referencing<VertexBufferLayoutDescriptorArray>
+{
+public:
+    static class VertexBufferLayoutDescriptorArray* alloc();
+
+    class VertexBufferLayoutDescriptorArray*        init();
+
+    class VertexBufferLayoutDescriptor*             object(NS::UInteger index);
+
+    void                                            setObject(const class VertexBufferLayoutDescriptor* bufferDesc, NS::UInteger index);
+};
+
+class VertexAttributeDescriptor : public NS::Copying<VertexAttributeDescriptor>
+{
+public:
+    static class VertexAttributeDescriptor* alloc();
+
+    class VertexAttributeDescriptor*        init();
+
+    MTL::VertexFormat                       format() const;
+    void                                    setFormat(MTL::VertexFormat format);
+
+    NS::UInteger                            offset() const;
+    void                                    setOffset(NS::UInteger offset);
+
+    NS::UInteger                            bufferIndex() const;
+    void                                    setBufferIndex(NS::UInteger bufferIndex);
+};
+
+class VertexAttributeDescriptorArray : public NS::Referencing<VertexAttributeDescriptorArray>
+{
+public:
+    static class VertexAttributeDescriptorArray* alloc();
+
+    class VertexAttributeDescriptorArray*        init();
+
+    class VertexAttributeDescriptor*             object(NS::UInteger index);
+
+    void                                         setObject(const class VertexAttributeDescriptor* attributeDesc, NS::UInteger index);
+};
+
+class VertexDescriptor : public NS::Copying<VertexDescriptor>
+{
+public:
+    static class VertexDescriptor*           alloc();
+
+    class VertexDescriptor*                  init();
+
+    static class VertexDescriptor*           vertexDescriptor();
+
+    class VertexBufferLayoutDescriptorArray* layouts() const;
+
+    class VertexAttributeDescriptorArray*    attributes() const;
+
+    void                                     reset();
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::VertexBufferLayoutDescriptor* MTL::VertexBufferLayoutDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::VertexBufferLayoutDescriptor>(_MTL_PRIVATE_CLS(MTLVertexBufferLayoutDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::VertexBufferLayoutDescriptor* MTL::VertexBufferLayoutDescriptor::init()
+{
+    return NS::Object::init<MTL::VertexBufferLayoutDescriptor>();
+}
+
+// property: stride
+_MTL_INLINE NS::UInteger MTL::VertexBufferLayoutDescriptor::stride() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(stride));
+}
+
+_MTL_INLINE void MTL::VertexBufferLayoutDescriptor::setStride(NS::UInteger stride)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStride_), stride);
+}
+
+// property: stepFunction
+_MTL_INLINE MTL::VertexStepFunction MTL::VertexBufferLayoutDescriptor::stepFunction() const
+{
+    return Object::sendMessage<MTL::VertexStepFunction>(this, _MTL_PRIVATE_SEL(stepFunction));
+}
+
+_MTL_INLINE void MTL::VertexBufferLayoutDescriptor::setStepFunction(MTL::VertexStepFunction stepFunction)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStepFunction_), stepFunction);
+}
+
+// property: stepRate
+_MTL_INLINE NS::UInteger MTL::VertexBufferLayoutDescriptor::stepRate() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(stepRate));
+}
+
+_MTL_INLINE void MTL::VertexBufferLayoutDescriptor::setStepRate(NS::UInteger stepRate)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setStepRate_), stepRate);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::VertexBufferLayoutDescriptorArray* MTL::VertexBufferLayoutDescriptorArray::alloc()
+{
+    return NS::Object::alloc<MTL::VertexBufferLayoutDescriptorArray>(_MTL_PRIVATE_CLS(MTLVertexBufferLayoutDescriptorArray));
+}
+
+// method: init
+_MTL_INLINE MTL::VertexBufferLayoutDescriptorArray* MTL::VertexBufferLayoutDescriptorArray::init()
+{
+    return NS::Object::init<MTL::VertexBufferLayoutDescriptorArray>();
+}
+
+// method: objectAtIndexedSubscript:
+_MTL_INLINE MTL::VertexBufferLayoutDescriptor* MTL::VertexBufferLayoutDescriptorArray::object(NS::UInteger index)
+{
+    return Object::sendMessage<MTL::VertexBufferLayoutDescriptor*>(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), index);
+}
+
+// method: setObject:atIndexedSubscript:
+_MTL_INLINE void MTL::VertexBufferLayoutDescriptorArray::setObject(const MTL::VertexBufferLayoutDescriptor* bufferDesc, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), bufferDesc, index);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::VertexAttributeDescriptor* MTL::VertexAttributeDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::VertexAttributeDescriptor>(_MTL_PRIVATE_CLS(MTLVertexAttributeDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::VertexAttributeDescriptor* MTL::VertexAttributeDescriptor::init()
+{
+    return NS::Object::init<MTL::VertexAttributeDescriptor>();
+}
+
+// property: format
+_MTL_INLINE MTL::VertexFormat MTL::VertexAttributeDescriptor::format() const
+{
+    return Object::sendMessage<MTL::VertexFormat>(this, _MTL_PRIVATE_SEL(format));
+}
+
+_MTL_INLINE void MTL::VertexAttributeDescriptor::setFormat(MTL::VertexFormat format)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFormat_), format);
+}
+
+// property: offset
+_MTL_INLINE NS::UInteger MTL::VertexAttributeDescriptor::offset() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(offset));
+}
+
+_MTL_INLINE void MTL::VertexAttributeDescriptor::setOffset(NS::UInteger offset)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setOffset_), offset);
+}
+
+// property: bufferIndex
+_MTL_INLINE NS::UInteger MTL::VertexAttributeDescriptor::bufferIndex() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(bufferIndex));
+}
+
+_MTL_INLINE void MTL::VertexAttributeDescriptor::setBufferIndex(NS::UInteger bufferIndex)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setBufferIndex_), bufferIndex);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::VertexAttributeDescriptorArray* MTL::VertexAttributeDescriptorArray::alloc()
+{
+    return NS::Object::alloc<MTL::VertexAttributeDescriptorArray>(_MTL_PRIVATE_CLS(MTLVertexAttributeDescriptorArray));
+}
+
+// method: init
+_MTL_INLINE MTL::VertexAttributeDescriptorArray* MTL::VertexAttributeDescriptorArray::init()
+{
+    return NS::Object::init<MTL::VertexAttributeDescriptorArray>();
+}
+
+// method: objectAtIndexedSubscript:
+_MTL_INLINE MTL::VertexAttributeDescriptor* MTL::VertexAttributeDescriptorArray::object(NS::UInteger index)
+{
+    return Object::sendMessage<MTL::VertexAttributeDescriptor*>(this, _MTL_PRIVATE_SEL(objectAtIndexedSubscript_), index);
+}
+
+// method: setObject:atIndexedSubscript:
+_MTL_INLINE void MTL::VertexAttributeDescriptorArray::setObject(const MTL::VertexAttributeDescriptor* attributeDesc, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setObject_atIndexedSubscript_), attributeDesc, index);
+}
+
+// static method: alloc
+_MTL_INLINE MTL::VertexDescriptor* MTL::VertexDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::VertexDescriptor>(_MTL_PRIVATE_CLS(MTLVertexDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::VertexDescriptor* MTL::VertexDescriptor::init()
+{
+    return NS::Object::init<MTL::VertexDescriptor>();
+}
+
+// static method: vertexDescriptor
+_MTL_INLINE MTL::VertexDescriptor* MTL::VertexDescriptor::vertexDescriptor()
+{
+    return Object::sendMessage<MTL::VertexDescriptor*>(_MTL_PRIVATE_CLS(MTLVertexDescriptor), _MTL_PRIVATE_SEL(vertexDescriptor));
+}
+
+// property: layouts
+_MTL_INLINE MTL::VertexBufferLayoutDescriptorArray* MTL::VertexDescriptor::layouts() const
+{
+    return Object::sendMessage<MTL::VertexBufferLayoutDescriptorArray*>(this, _MTL_PRIVATE_SEL(layouts));
+}
+
+// property: attributes
+_MTL_INLINE MTL::VertexAttributeDescriptorArray* MTL::VertexDescriptor::attributes() const
+{
+    return Object::sendMessage<MTL::VertexAttributeDescriptorArray*>(this, _MTL_PRIVATE_SEL(attributes));
+}
+
+// method: reset
+_MTL_INLINE void MTL::VertexDescriptor::reset()
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(reset));
+}
diff --git a/metal-cpp/Metal/MTLVisibleFunctionTable.hpp b/metal-cpp/Metal/MTLVisibleFunctionTable.hpp
new file mode 100644
index 0000000..b34d3a1
--- /dev/null
+++ b/metal-cpp/Metal/MTLVisibleFunctionTable.hpp
@@ -0,0 +1,96 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/MTLVisibleFunctionTable.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "MTLDefines.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLPrivate.hpp"
+
+#include <Foundation/Foundation.hpp>
+
+#include "MTLFunctionHandle.hpp"
+#include "MTLResource.hpp"
+
+namespace MTL
+{
+class VisibleFunctionTableDescriptor : public NS::Copying<VisibleFunctionTableDescriptor>
+{
+public:
+    static class VisibleFunctionTableDescriptor* alloc();
+
+    class VisibleFunctionTableDescriptor*        init();
+
+    static class VisibleFunctionTableDescriptor* visibleFunctionTableDescriptor();
+
+    NS::UInteger                                 functionCount() const;
+    void                                         setFunctionCount(NS::UInteger functionCount);
+};
+
+class VisibleFunctionTable : public NS::Referencing<VisibleFunctionTable, Resource>
+{
+public:
+    void setFunction(const class FunctionHandle* function, NS::UInteger index);
+
+    void setFunctions(const class FunctionHandle* functions[], NS::Range range);
+};
+
+}
+
+// static method: alloc
+_MTL_INLINE MTL::VisibleFunctionTableDescriptor* MTL::VisibleFunctionTableDescriptor::alloc()
+{
+    return NS::Object::alloc<MTL::VisibleFunctionTableDescriptor>(_MTL_PRIVATE_CLS(MTLVisibleFunctionTableDescriptor));
+}
+
+// method: init
+_MTL_INLINE MTL::VisibleFunctionTableDescriptor* MTL::VisibleFunctionTableDescriptor::init()
+{
+    return NS::Object::init<MTL::VisibleFunctionTableDescriptor>();
+}
+
+// static method: visibleFunctionTableDescriptor
+_MTL_INLINE MTL::VisibleFunctionTableDescriptor* MTL::VisibleFunctionTableDescriptor::visibleFunctionTableDescriptor()
+{
+    return Object::sendMessage<MTL::VisibleFunctionTableDescriptor*>(_MTL_PRIVATE_CLS(MTLVisibleFunctionTableDescriptor), _MTL_PRIVATE_SEL(visibleFunctionTableDescriptor));
+}
+
+// property: functionCount
+_MTL_INLINE NS::UInteger MTL::VisibleFunctionTableDescriptor::functionCount() const
+{
+    return Object::sendMessage<NS::UInteger>(this, _MTL_PRIVATE_SEL(functionCount));
+}
+
+_MTL_INLINE void MTL::VisibleFunctionTableDescriptor::setFunctionCount(NS::UInteger functionCount)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFunctionCount_), functionCount);
+}
+
+// method: setFunction:atIndex:
+_MTL_INLINE void MTL::VisibleFunctionTable::setFunction(const MTL::FunctionHandle* function, NS::UInteger index)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFunction_atIndex_), function, index);
+}
+
+// method: setFunctions:withRange:
+_MTL_INLINE void MTL::VisibleFunctionTable::setFunctions(const MTL::FunctionHandle* functions[], NS::Range range)
+{
+    Object::sendMessage<void>(this, _MTL_PRIVATE_SEL(setFunctions_withRange_), functions, range);
+}
diff --git a/metal-cpp/Metal/Metal.hpp b/metal-cpp/Metal/Metal.hpp
new file mode 100644
index 0000000..96a3606
--- /dev/null
+++ b/metal-cpp/Metal/Metal.hpp
@@ -0,0 +1,80 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// Metal/Metal.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "MTLAccelerationStructure.hpp"
+#include "MTLAccelerationStructureCommandEncoder.hpp"
+#include "MTLAccelerationStructureTypes.hpp"
+#include "MTLArgument.hpp"
+#include "MTLArgumentEncoder.hpp"
+#include "MTLBinaryArchive.hpp"
+#include "MTLBlitCommandEncoder.hpp"
+#include "MTLBlitPass.hpp"
+#include "MTLBuffer.hpp"
+#include "MTLCaptureManager.hpp"
+#include "MTLCaptureScope.hpp"
+#include "MTLCommandBuffer.hpp"
+#include "MTLCommandEncoder.hpp"
+#include "MTLCommandQueue.hpp"
+#include "MTLComputeCommandEncoder.hpp"
+#include "MTLComputePass.hpp"
+#include "MTLComputePipeline.hpp"
+#include "MTLCounters.hpp"
+#include "MTLDefines.hpp"
+#include "MTLDepthStencil.hpp"
+#include "MTLDevice.hpp"
+#include "MTLDrawable.hpp"
+#include "MTLDynamicLibrary.hpp"
+#include "MTLEvent.hpp"
+#include "MTLFence.hpp"
+#include "MTLFunctionConstantValues.hpp"
+#include "MTLFunctionDescriptor.hpp"
+#include "MTLFunctionHandle.hpp"
+#include "MTLFunctionLog.hpp"
+#include "MTLFunctionStitching.hpp"
+#include "MTLHeaderBridge.hpp"
+#include "MTLHeap.hpp"
+#include "MTLIndirectCommandBuffer.hpp"
+#include "MTLIndirectCommandEncoder.hpp"
+#include "MTLIntersectionFunctionTable.hpp"
+#include "MTLLibrary.hpp"
+#include "MTLLinkedFunctions.hpp"
+#include "MTLParallelRenderCommandEncoder.hpp"
+#include "MTLPipeline.hpp"
+#include "MTLPixelFormat.hpp"
+#include "MTLPrivate.hpp"
+#include "MTLRasterizationRate.hpp"
+#include "MTLRenderCommandEncoder.hpp"
+#include "MTLRenderPass.hpp"
+#include "MTLRenderPipeline.hpp"
+#include "MTLResource.hpp"
+#include "MTLResourceStateCommandEncoder.hpp"
+#include "MTLResourceStatePass.hpp"
+#include "MTLSampler.hpp"
+#include "MTLStageInputOutputDescriptor.hpp"
+#include "MTLTexture.hpp"
+#include "MTLTypes.hpp"
+#include "MTLVertexDescriptor.hpp"
+#include "MTLVisibleFunctionTable.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/QuartzCore/CADefines.hpp b/metal-cpp/QuartzCore/CADefines.hpp
new file mode 100644
index 0000000..81f7eff
--- /dev/null
+++ b/metal-cpp/QuartzCore/CADefines.hpp
@@ -0,0 +1,41 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// QuartzCore/CADefines.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "../Foundation/NSDefines.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#define _CA_EXPORT _NS_EXPORT
+#define _CA_EXTERN _NS_EXTERN
+#define _CA_INLINE _NS_INLINE
+#define _CA_PACKED _NS_PACKED
+
+#define _CA_CONST(type, name) _NS_CONST(type, name)
+#define _CA_ENUM(type, name) _NS_ENUM(type, name)
+#define _CA_OPTIONS(type, name) _NS_OPTIONS(type, name)
+
+#define _CA_VALIDATE_SIZE(ns, name) _NS_VALIDATE_SIZE(ns, name)
+#define _CA_VALIDATE_ENUM(ns, name) _NS_VALIDATE_ENUM(ns, name)
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/QuartzCore/CAMetalDrawable.hpp b/metal-cpp/QuartzCore/CAMetalDrawable.hpp
new file mode 100644
index 0000000..8d08757
--- /dev/null
+++ b/metal-cpp/QuartzCore/CAMetalDrawable.hpp
@@ -0,0 +1,57 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// QuartzCore/CAMetalDrawable.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "../Metal/MTLDrawable.hpp"
+#include "../Metal/MTLTexture.hpp"
+
+#include "CADefines.hpp"
+#include "CAPrivate.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace CA
+{
+class MetalDrawable : public NS::Referencing<MetalDrawable, MTL::Drawable>
+{
+public:
+    class MetalLayer* layer() const;
+    MTL::Texture*     texture() const;
+};
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_CA_INLINE CA::MetalLayer* CA::MetalDrawable::layer() const
+{
+    return Object::sendMessage<MetalLayer*>(this, _MTL_PRIVATE_SEL(layer));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+_CA_INLINE MTL::Texture* CA::MetalDrawable::texture() const
+{
+    return Object::sendMessage<MTL::Texture*>(this, _MTL_PRIVATE_SEL(texture));
+}
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/QuartzCore/CAPrivate.hpp b/metal-cpp/QuartzCore/CAPrivate.hpp
new file mode 100644
index 0000000..e6453ec
--- /dev/null
+++ b/metal-cpp/QuartzCore/CAPrivate.hpp
@@ -0,0 +1,109 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// QuartzCore/CAPrivate.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "CADefines.hpp"
+
+#include <objc/runtime.h>
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#define _CA_PRIVATE_CLS(symbol) (Private::Class::s_k##symbol)
+#define _CA_PRIVATE_SEL(accessor) (Private::Selector::s_k##accessor)
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#if defined(CA_PRIVATE_IMPLEMENTATION)
+
+#define _CA_PRIVATE_VISIBILITY __attribute__((visibility("default")))
+#define _CA_PRIVATE_IMPORT __attribute__((weak_import))
+
+#if __OBJC__
+#define _CA_PRIVATE_OBJC_LOOKUP_CLASS(symbol) ((__bridge void*)objc_lookUpClass(#symbol))
+#else
+#define _CA_PRIVATE_OBJC_LOOKUP_CLASS(symbol) objc_lookUpClass(#symbol)
+#endif // __OBJC__
+
+#define _CA_PRIVATE_DEF_CLS(symbol) void* s_k##symbol _CA_PRIVATE_VISIBILITY = _CA_PRIVATE_OBJC_LOOKUP_CLASS(symbol);
+#define _CA_PRIVATE_DEF_PRO(symbol)
+#define _CA_PRIVATE_DEF_SEL(accessor, symbol) SEL s_k##accessor _CA_PRIVATE_VISIBILITY = sel_registerName(symbol);
+#define _CA_PRIVATE_DEF_STR(type, symbol)                \
+    _CA_EXTERN type const CA##symbol _CA_PRIVATE_IMPORT; \
+    type const                       CA::symbol = (nullptr != &CA##symbol) ? CA##symbol : nullptr;
+
+#else
+
+#define _CA_PRIVATE_DEF_CLS(symbol) extern void* s_k##symbol;
+#define _CA_PRIVATE_DEF_PRO(symbol)
+#define _CA_PRIVATE_DEF_SEL(accessor, symbol) extern SEL s_k##accessor;
+#define _CA_PRIVATE_DEF_STR(type, symbol)
+
+#endif // CA_PRIVATE_IMPLEMENTATION
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace CA
+{
+namespace Private
+{
+    namespace Class
+    {
+
+    } // Class
+} // Private
+} // CA
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace CA
+{
+namespace Private
+{
+    namespace Protocol
+    {
+
+        _CA_PRIVATE_DEF_PRO(CAMetalDrawable);
+
+    } // Protocol
+} // Private
+} // CA
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+namespace CA
+{
+namespace Private
+{
+    namespace Selector
+    {
+
+        _CA_PRIVATE_DEF_SEL(layer,
+            "layer");
+        _CA_PRIVATE_DEF_SEL(texture,
+            "texture");
+
+    } // Class
+} // Private
+} // CA
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/QuartzCore/QuartzCore.hpp b/metal-cpp/QuartzCore/QuartzCore.hpp
new file mode 100644
index 0000000..2e1eb46
--- /dev/null
+++ b/metal-cpp/QuartzCore/QuartzCore.hpp
@@ -0,0 +1,27 @@
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+//
+// QuartzCore/QuartzCore.hpp
+//
+// Copyright 2020-2021 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#pragma once
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#include "CAMetalDrawable.hpp"
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/metal-cpp/README.md b/metal-cpp/README.md
new file mode 100644
index 0000000..9950715
--- /dev/null
+++ b/metal-cpp/README.md
@@ -0,0 +1,245 @@
+## About
+
+**metal-cpp** is a low overhead and header only C++ interface for Metal that helps developers add Metal functionality to graphics applications that are written in C++ (such as game engines). **metal-cpp** removes the need to create a shim and allows developers to call Metal functions directly from anywhere in their existing C++ code.
+
+
+## Highlights
+
+- Drop in C++ alternative interface to the Metal Objective-C headers.
+- Direct mapping of all Metal Objective-C classes, constants and enums to C++ in the MTL C++ namespace.
+- No measurable overhead compared to calling Metal Objective-C headers, due to inlining of C++ function calls.
+- No usage of wrapper containers that require additional allocations.
+- Requires C++17 due to the usage of `constexpr` in `NS::Object`.
+- Identical header files and function/constant/enum availability for iOS, macOS and tvOS.
+- Backwards compatibility: All `bool MTL::Device::supports...()` functions check if their required selectors exist and automatically return `false` if not.
+- String (`ErrorDomain`) constants are weak linked and automatically set to `nullptr` if not available.
+
+## Memory Allocation Policy
+
+**metal-cpp** follows the object allocation policies of Cocoa and Cocoa Touch. Understanding those rules is especially important when using metal-cpp, as C++ objects are not eligible for automatic reference counting (ARC).
+
+**metal-cpp** objects are reference counted. To help convey and manage object lifecycles, the following conventions are observed:
+
+1. *You own any object returned by methods whose name begins with* `alloc` *,* `new` *,* `copy` *, or* `mutableCopy`. The method returns these objects with `retainCount` equals to `1`.
+2. *You can take ownership of an object by calling its* ```retain()``` *method*. A received object is normally guaranteed to remain valid within the method it was received in. You use `retain` in two situations: (1) In the implementation of an accessor method (a setter) or to take ownership of an object; and (2) To prevent an object from being deallocated as a side-effect of some other operation.
+3. *When you no longer need it, you must relinquish ownership of an object you own*. You relinquish ownership by calling its `release()` or `autorelease()` method.
+4. *You must not relinquish ownership of an object you do not own*.
+
+When an object's `retainCount` reaches `0`, the object is immediately deallocated. It is illegal to call methods on a deallocated object and it may lead to an application crash.
+
+### AutoreleasePools and Objects
+
+Several methods that create temporary objects in **metal-cpp** add them to an `AutoreleasePool` to help manage their lifetimes. In these situations, after **metal-cpp** creates the object, it adds it to an `AutoreleasePool`, which will release its objects when you release (or drain) it.
+
+By adding temporary objects to an AutoreleasePool, you do not need to explicitly call `release()` to deallocate them. Instead, you can rely on the `AutoreleasePool` to implicitly manage those lifetimes.
+
+If you create an object with a method that does not begin with `alloc`, `new`, `copy`, or `mutableCopy`, the creating method adds the object to an autorelease pool.
+
+The typical scope of an `AutoreleasePool` is one frame of rendering for the main thread of the program. When the thread returns control to the RunLoop (an object responsible for receiving input and events from the windowing system), the pool is *drained*, releasing its objects.
+
+You can create and manage additional `AutoreleasePool`s at smaller scopes to reduce your program's working set, and you are required to do so for any additional threads your program creates.
+
+If an object's lifecycle needs to be extended beyond the `AutoreleasePool`'s scope, you can claim ownership of it (avoiding its release beyond the pool's scope) by calling its `retain()` method before its pool is drained. In these cases, you will be responsible for making the appropriate `release()` call on the object after you no longer need it. 
+
+You can find a more-detailed introduction to the memory management rules here: https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/MemoryMgmt/Articles/mmRules.html.
+
+For more details about the application's RunLoop, please find its documentation here: https://developer.apple.com/documentation/foundation/nsrunloop
+
+### Use and debug AutoreleasePools
+
+When you create an autoreleased object and there is no enclosing `AutoreleasePool`, the object is leaked.
+
+To prevent this, you normally create an `AutoreleasePool` in your program's `main` function, and in the entry function for every thread you create. You may also create additional `AutoreleasePool`s to avoid growing your program's high memory watermark when you create several autoreleased objects, such as when rendering.
+
+Use the Environment Variable `OBJC_DEBUG_MISSING_POOLS=YES` to print a runtime warning when an autoreleased object is leaked because no enclosing `AutoreleasePool` is available for its thread.
+
+You can also run `leaks --autoreleasePools` on a memgraph file or a process ID (macOS only) to view a listing of your program's `AutoreleasePool`s and all objects they contain.
+
+### nullptr
+
+Similar to Objective-C, it is legal to call any method, including `retain()` and `release()`, on `nullptr` "objects". While calling methods on `nullptr` still does incur in function call overhead, the effective result is equivalent of a NOP.
+
+Conversely, do not assume that because calling a method on a pointer did not result in a crash, that the pointed-to object is valid.
+
+## Adding metal-cpp to a Project
+
+Simply include `Metal/Metal.hpp`. To ensure that the selector and class symbols are linked, add to one of your cpp files:
+
+```cpp
+#define NS_PRIVATE_IMPLEMENTATION
+#define MTL_PRIVATE_IMPLEMENTATION
+
+#include "Metal/Metal.hpp"
+```
+
+If you want to use the QuartzCore wrapper, add:
+
+```cpp
+#define CA_PRIVATE_IMPLEMENTATION
+
+#include "QuartzCore/QuartzCore.hpp"
+```
+
+## Generating a Single Header File
+
+Purely optional: You can generate a single header file that contains all **metal-cpp** headers via:
+
+```shell
+./SingleHeader/MakeSingleHeader.py Foundation/Foundation.hpp QuartzCore/QuartzCore.hpp Metal/Metal.hpp
+```
+
+By default the generated header file will be written to `./SingleHeader/Metal.hpp`
+
+## Examples
+
+#### Creating the device
+
+###### Objective-C (with automatic reference counting)
+
+```objc
+id< MTLDevice > device = MTLCreateSystemDefaultDevice();
+
+// ...
+```
+
+###### Objective-C
+
+```objc
+id< MTLDevice > device = MTLCreateSystemDefaultDevice();
+
+// ...
+
+[device release];
+```
+
+###### C++
+
+```cpp
+MTL::Device* pDevice = MTL::CreateSystemDefaultDevice();
+
+// ...
+
+pDevice->release();
+```
+
+#### Metal function calls map directly to C++
+
+###### Objective-C (with automatic reference counting)
+
+```objc
+MTLSamplerDescriptor* samplerDescriptor = [[MTLSamplerDescriptor alloc] init];
+
+[samplerDescriptor setSAddressMode: MTLSamplerAddressModeRepeat];
+[samplerDescriptor setTAddressMode: MTLSamplerAddressModeRepeat];
+[samplerDescriptor setRAddressMode: MTLSamplerAddressModeRepeat];
+[samplerDescriptor setMagFilter: MTLSamplerMinMagFilterLinear];
+[samplerDescriptor setMinFilter: MTLSamplerMinMagFilterLinear];
+[samplerDescriptor setMipFilter: MTLSamplerMipFilterLinear];
+[samplerDescriptor setSupportArgumentBuffers: YES];
+
+id< MTLSamplerState > samplerState = [device newSamplerStateWithDescriptor:samplerDescriptor];
+```
+
+###### Objective-C
+
+```objc
+MTLSamplerDescriptor* samplerDescriptor = [[MTLSamplerDescriptor alloc] init];
+
+[samplerDescriptor setSAddressMode: MTLSamplerAddressModeRepeat];
+[samplerDescriptor setTAddressMode: MTLSamplerAddressModeRepeat];
+[samplerDescriptor setRAddressMode: MTLSamplerAddressModeRepeat];
+[samplerDescriptor setMagFilter: MTLSamplerMinMagFilterLinear];
+[samplerDescriptor setMinFilter: MTLSamplerMinMagFilterLinear];
+[samplerDescriptor setMipFilter: MTLSamplerMipFilterLinear];
+[samplerDescriptor setSupportArgumentBuffers: YES];
+
+id< MTLSamplerState > samplerState = [device newSamplerStateWithDescriptor:samplerDescriptor];
+
+[samplerDescriptor release];
+
+// ...
+
+[samplerState release];
+```
+
+###### C++
+
+```cpp
+MTL::SamplerDescriptor* pSamplerDescriptor = MTL::SamplerDescriptor::alloc()->init();
+
+pSamplerDescriptor->setSAddressMode( MTL::SamplerAddressModeRepeat );
+pSamplerDescriptor->setTAddressMode( MTL::SamplerAddressModeRepeat );
+pSamplerDescriptor->setRAddressMode( MTL::SamplerAddressModeRepeat );
+pSamplerDescriptor->setMagFilter( MTL::SamplerMinMagFilterLinear );
+pSamplerDescriptor->setMinFilter( MTL::SamplerMinMagFilterLinear );
+pSamplerDescriptor->setMipFilter( MTL::SamplerMipFilterLinear );
+pSamplerDescriptor->setSupportArgumentBuffers( true );
+
+MTL::SamplerState* pSamplerState = pDevice->newSamplerState( pSamplerDescriptor );
+
+pSamplerDescriptor->release();
+
+// ...
+
+pSamplerState->release();
+```
+
+#### A subset of bindings for Foundation classes is provided for seamless integration
+
+###### Objective-C (with automatic reference counting)
+
+```objc
+NSAutoreleasePool*  pool   = [[NSAutoreleasePool alloc] init];
+NSString*           string = [NSString stringWithCString: "Hello World" encoding: NSASCIIStringEncoding];
+
+printf( "string = \"%s\"\n", [string cStringUsingEncoding: NSASCIIStringEncoding] );
+```
+
+###### Objective-C
+
+```objc
+NSAutoreleasePool* pool   = [[NSAutoreleasePool alloc] init];
+NSString*          string = [NSString stringWithCString: "Hello World" encoding: NSASCIIStringEncoding];
+								
+printf( "string = \"%s\"\n", [string cStringUsingEncoding: NSASCIIStringEncoding] );
+
+[pool release];
+```
+
+###### C++
+
+```cpp
+NS::AutoreleasePool* pPool   = NS::AutoreleasePool::alloc()->init();
+NS::String*          pString = NS::String::string( "Hello World", NS::ASCIIStringEncoding );
+
+printf( "pString = \"%s\"\n", pString->cString( NS::ASCIIStringEncoding ) );
+
+pPool->release();
+```
+
+####  Interoperability with CoreFoundation
+
+```cpp
+MTL::AccelerationStructureTriangleGeometryDescriptor* pGeoDescriptor  = MTL::AccelerationStructureTriangleGeometryDescriptor::alloc()->init();
+CFTypeRef                                             descriptors[]   = { ( CFTypeRef )( pGeoDescriptor ) };
+NS::Array*                                            pGeoDescriptors = ( NS::Array* )( CFArrayCreate( kCFAllocatorDefault, descriptors, SIZEOF_ARRAY( descriptors), &kCFTypeArrayCallBacks ) );
+
+// ...
+
+pGeoDescriptors->release();
+```
+
+#### Accessing a CAMetalDrawable
+
+```objc
+#import <QuartzCore/CAMetalLayer.h>
+#import <QuartzCore/QuartzCore.hpp>
+
+// ...
+
+CAMetalLayer*         metalLayer         = /* get your layer from your view */;
+id< CAMetalDrawable > metalDrawable      = [metalLayer nextDrawable];
+CA::MetalDrawable*    pMetalCppDrawable  = ( __bridge CA::MetalDrawable* ) metalDrawable;
+
+// ...
+```
diff --git a/metal-cpp/SingleHeader/MakeSingleHeader.py b/metal-cpp/SingleHeader/MakeSingleHeader.py
new file mode 100644
index 0000000..e58e49e
--- /dev/null
+++ b/metal-cpp/SingleHeader/MakeSingleHeader.py
@@ -0,0 +1,271 @@
+#!/usr/bin/python
+
+#--------------------------------------------------------------------------------------------------------------------------------------------------------------
+#
+# SingleHeader/MakeSingleHeader.py
+#
+# Copyright 2020-2021 Apple Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#--------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+import argparse
+import datetime
+import logging
+import os
+import re
+import subprocess
+import sys
+
+#--------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+class HeaderPrefix( object ):
+	__template 			= ( '//\n'
+							'// {file}\n'
+							'//\n'
+							'// {meta_data}\n'
+							'//\n'
+							'// Copyright 2020-2021 Apple Inc.\n'
+							'//\n'
+							'// Licensed under the Apache License, Version 2.0 (the "License");\n'
+							'// you may not use this file except in compliance with the License.\n'
+							'// You may obtain a copy of the License at\n'
+							'//\n'
+							'//     http://www.apache.org/licenses/LICENSE-2.0\n'
+							'//\n'
+							'// Unless required by applicable law or agreed to in writing, software\n'
+							'// distributed under the License is distributed on an "AS IS" BASIS,\n'
+							'// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n'
+							'// See the License for the specific language governing permissions and\n'
+							'// limitations under the License.\n'
+							'//\n'
+							'\n' )
+
+	__template_commit	=	'Autogenerated from commit {commit}.'
+	__template_date		=	'Autogenerated on %B %d, %Y.'
+
+	def __init__( self, file ):
+		self.__file = file
+
+	def __str__( self ):
+		return self.__template.format( file = self.__file, meta_data = self.__meta_data_string() )
+
+	def __get_commit_hash( self ):
+		git_commit_hash = None
+
+		try:
+			git_dir			= os.path.dirname( os.path.realpath( __file__ ) )
+			proc 			= subprocess.Popen( [ 'git', 'rev-parse', 'HEAD' ], cwd = git_dir, stdout = subprocess.PIPE, stderr = subprocess.PIPE )
+			git_commit_hash = proc.stdout.read().decode( 'utf-8', 'replace' ).strip()
+		except:
+			logging.error( 'Failed to determine git commit hash!' )
+			pass
+
+		return git_commit_hash
+
+	def __get_commit_string( self ):
+		meta_data		= None
+		git_commit_hash	= self.__get_commit_hash()
+
+		if git_commit_hash:
+			meta_data = self.__template_commit.format( commit = git_commit_hash )
+
+		return meta_data
+
+	def __get_date_string( self ):
+		today = datetime.date.today()
+ 
+		return today.strftime( self.__template_date )
+
+	def __meta_data_string( self ):
+		meta_data = self.__get_commit_string()
+
+		if not meta_data:
+			meta_data = self.__get_date_string()
+
+		return meta_data
+
+#--------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+class SingleHeader( object ):
+	__pragma_once = '#pragma once\n\n'
+
+	def __init__( self ):
+		self.__header_paths = list()
+
+	def __str__( self ):
+		return self.process()
+
+	def append( self, header_path ):
+		self.__header_paths.append( header_path )
+
+	def process( self ):
+		out_header 				= self.__pragma_once
+
+		self.__included_headers	= set()
+		self.__base_path 		= list()
+
+		for header_path in self.__header_paths:
+			out_header += self.__process_header( header_path )
+
+		return self.__strip_empty_lines( out_header )
+
+	def __read_header( self, path ):
+		path = os.path.realpath( path )
+
+		try:
+			f = open( path, 'r' )
+		except:
+			raise RuntimeError( 'Failed to open file \"' + path + '\" for read!' )
+
+		return f.read()
+
+	def __strip_pragma_once( self, header ):
+		return re.sub( '\\s*#pragma once\s*\\/\\/-*\\n', '', header )
+
+	def __strip_comments( self, header ):
+		return re.sub( '^//.*\\n', '', header, flags = re.MULTILINE )
+
+	def __strip_empty_lines( self, header ):
+		return re.sub( '\\n\\n+', '\\n\\n', header, flags = re.MULTILINE )
+
+	def __substitute_include_directive( self, match ):
+		header_path = match.group( 'HEADER_PATH' )
+
+		logging.info( '\tSubstituting \"' + header_path + '\"...' )
+
+		return self.__process_header( os.path.join( self.__base_path[-1], header_path ) )
+
+	def __process_include_directives( self, header ):
+		return re.sub( '^\\s*#include\\s\\"(?P<HEADER_PATH>\\S*)\\"', self.__substitute_include_directive, header, flags = re.MULTILINE )
+
+	def __process_foundation_directives( self, header ):
+		if header.find("#include <Foundation/Foundation.hpp>") != -1:
+			logging.info( '\tSubstituting <Foundation/Foundation.hpp>...' )
+			return header.replace("#include <Foundation/Foundation.hpp>", self.__process_header( os.path.join( self.__base_path[-1], "../Foundation/Foundation.hpp" ) ) )
+		return header
+
+
+	def __process_header( self, header_path ):
+		out_header = ''		
+
+		header_path = os.path.realpath( header_path )
+
+		if not header_path in self.__included_headers:
+			logging.info( 'Processing \"' + header_path + '\"...' )
+
+			self.__base_path.append( os.path.dirname( header_path ) )
+			self.__included_headers.add( header_path )
+			
+			out_header = self.__read_header( header_path )
+			out_header = self.__strip_pragma_once( out_header )
+			out_header = self.__strip_comments( out_header )
+			out_header = self.__process_include_directives( out_header )
+			out_header = self.__process_foundation_directives( out_header )
+
+			self.__base_path.pop()
+		else:
+			logging.info( '\tSkipping \"' + header_path + '\"...' )
+
+		return out_header
+
+#--------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+def create_argument_parser():
+	parser 			= argparse.ArgumentParser()
+	base_path 		= os.path.dirname( os.path.realpath( __file__ ) )
+	output_path		= os.path.join( base_path, 'Metal.hpp' )
+
+	parser.add_argument( '-o', '--output',  dest = 'output_path', metavar = 'PATH', default = output_path, help = 'Output path for the single header file.' )
+	parser.add_argument( '-v', '--verbose', action = 'store_true',  help = 'Show verbose output.' )
+	parser.add_argument( dest = 'header_paths', metavar = 'HEADER_FILE', nargs='+', help = 'Input header file.' )
+
+	return parser
+
+#--------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+def parse_arguments():
+	parser	= create_argument_parser()
+	args	= parser.parse_args()
+
+	if args.verbose:
+		logging.getLogger().setLevel( logging.INFO )
+	else:
+		logging.getLogger().setLevel( logging.ERROR )
+
+	return args
+
+#--------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+def make_header( args ):
+	prefix = HeaderPrefix( os.path.basename( args.output_path ) )
+	header = SingleHeader()
+	
+	for header_path in args.header_paths:
+		header.append( header_path )
+
+	return str( prefix ) + str( header )
+
+#--------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+def make_dir( path ):
+	try:
+		if not os.path.exists( path ):
+			os.makedirs( path )
+	except os.error:
+		pass
+	except:
+		raise
+
+#--------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+def write_header( args, content ):
+	path = os.path.realpath( args.output_path )
+
+	logging.info( 'Writing \"' + path + '\"...' )
+
+	make_dir( os.path.dirname( path ) )
+
+	try:
+		f = open( path, 'w' )
+	except:
+		raise RuntimeError( 'Failed to open file \"' + path + '\" for write!' )
+
+	f.write( content )
+
+#--------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+if __name__ == '__main__':
+	result = -1
+	
+	try:
+		if sys.getdefaultencoding().lower() == 'ascii':
+			reload( sys )
+			sys.setdefaultencoding( 'utf-8' )
+
+		args 	= parse_arguments()
+		header 	= make_header( args )
+
+		write_header( args, header )
+
+		result = 0
+
+	except ( KeyboardInterrupt, SystemExit ):
+	 	pass
+	except:
+	 	raise
+
+	sys.exit( result )
+
+#--------------------------------------------------------------------------------------------------------------------------------------------------------------
diff --git a/precision_results/FP32_precision.png b/precision_results/FP32_precision.png
new file mode 100644
index 0000000..9d06dc1
Binary files /dev/null and b/precision_results/FP32_precision.png differ
diff --git a/precision_results/FP64_precision.png b/precision_results/FP64_precision.png
new file mode 100644
index 0000000..20e6e4e
Binary files /dev/null and b/precision_results/FP64_precision.png differ
diff --git a/vkFFT/vkFFT.h b/vkFFT/vkFFT.h
index ac566f1..b86fcc0 100644
--- a/vkFFT/vkFFT.h
+++ b/vkFFT/vkFFT.h
@@ -58,8 +58,17 @@
 #endif
 #elif(VKFFT_BACKEND==4)
 #include <ze_api.h>
+#elif(VKFFT_BACKEND==5)
+#define NS_PRIVATE_IMPLEMENTATION
+#define CA_PRIVATE_IMPLEMENTATION
+#define MTL_PRIVATE_IMPLEMENTATION
+#include "Foundation/Foundation.hpp"
+#include "QuartzCore/QuartzCore.hpp"
+#include "Metal/Metal.hpp"
+#endif
+#ifdef VkFFT_use_FP128_Bluestein_RaderFFT
+#include "fftw3.h"
 #endif
-
 typedef struct {
 	//WHDCN layout
 
@@ -93,6 +102,9 @@ typedef struct {
 	ze_context_handle_t* context;
 	ze_command_queue_handle_t* commandQueue;
 	uint32_t commandQueueID;
+#elif(VKFFT_BACKEND==5)
+	MTL::Device* device;
+	MTL::CommandQueue* queue;
 #endif
 
 	//data parameters:
@@ -141,6 +153,12 @@ typedef struct {
 	void** inputBuffer;//pointer to device buffer used to read data from if isInputFormatted is enabled
 	void** outputBuffer;//pointer to device buffer used to read data from if isOutputFormatted is enabled
 	void** kernel;//pointer to device buffer used to read kernel data from if performConvolution is enabled
+#elif(VKFFT_BACKEND==5)
+	MTL::Buffer** buffer;//pointer to device buffer used for computations
+	MTL::Buffer** tempBuffer;//needed if reorderFourStep is enabled to transpose the array. Same size as buffer. Default 0. Setting to non zero value enables manual user allocation
+	MTL::Buffer** inputBuffer;//pointer to device buffer used to read data from if isInputFormatted is enabled
+	MTL::Buffer** outputBuffer;//pointer to device buffer used to read data from if isOutputFormatted is enabled
+	MTL::Buffer** kernel;//pointer to device buffer used to read kernel data from if performConvolution is enabled
 #endif
 	uint64_t bufferOffset;//specify if VkFFT has to offset the first element position inside the buffer. In bytes. Default 0 
 	uint64_t tempBufferOffset;//specify if VkFFT has to offset the first element position inside the temp buffer. In bytes. Default 0 
@@ -150,6 +168,9 @@ typedef struct {
 	uint64_t specifyOffsetsAtLaunch;//specify if offsets will be selected with launch parameters VkFFTLaunchParams (0 - off, 1 - on). Default 0
 
 	//optional: (default 0 if not stated otherwise)
+#if(VKFFT_BACKEND==0)
+	VkPipelineCache* pipelineCache;//pointer to Vulkan pipeline cache
+#endif
 	uint64_t coalescedMemory;//in bytes, for Nvidia and AMD is equal to 32, Intel is equal 64, scaled for half precision. Gonna work regardles, but if specified by user correctly, the performance will be higher.
 	uint64_t aimThreads;//aim at this many threads per block. Default 128
 	uint64_t numSharedBanks;//how many banks shared memory has. Default 32
@@ -169,7 +190,8 @@ typedef struct {
 	uint64_t disableMergeSequencesR2C; //disable merging of two real sequences to reduce calculations (0 - off, 1 - on)
 	uint64_t normalize; //normalize inverse transform (0 - off, 1 - on)
 	uint64_t disableReorderFourStep; // disables unshuffling of Four step algorithm. Requires tempbuffer allocation (0 - off, 1 - on)
-	uint64_t useLUT; //switches from calculating sincos to using precomputed LUT tables (0 - off, 1 - on). Configured by initialization routine
+	int64_t useLUT; //switches from calculating sincos to using precomputed LUT tables (-1 - off, 0 - auto, 1 - on). Configured by initialization routine
+	int64_t useLUT_4step; //switches from calculating sincos to using precomputed LUT tables for intermediate roots of 1 in the Four-step FFT algorithm. (-1 - off, 0 - auto, 1 - on). Configured by initialization routine
 	uint64_t makeForwardPlanOnly; //generate code only for forward FFT (0 - off, 1 - on)
 	uint64_t makeInversePlanOnly; //generate code only for inverse FFT (0 - off, 1 - on)
 
@@ -183,10 +205,12 @@ typedef struct {
 	uint64_t keepShaderCode;//will keep shader code and print all executed shaders during the plan execution in order (0 - off, 1 - on)
 	uint64_t printMemoryLayout;//will print order of buffers used in shaders (0 - off, 1 - on)
 
-	uint64_t saveApplicationToString;//will save all compiled binaries to VkFFTApplication.saveApplicationString (will be allocated by VkFFT, deallocated with deleteVkFFT call). (0 - off, 1 - on)
+	uint64_t saveApplicationToString;//will save all compiled binaries to VkFFTApplication.saveApplicationString (will be allocated by VkFFT, deallocated with deleteVkFFT call). Currently disabled in Metal backend. (0 - off, 1 - on)
+
+	uint64_t loadApplicationFromString;//will load all binaries from loadApplicationString instead of recompiling them (must be allocated by user, must contain what saveApplicationToString call generated previously in VkFFTApplication.saveApplicationString). Currently disabled in Metal backend. (0 - off, 1 - on). Mutually exclusive with saveApplicationToString
+	void* loadApplicationString;//memory binary array through which user can load VkFFT binaries, must be provided by user if loadApplicationFromString = 1. Use rb/wb flags to load/save.
 
-	uint64_t loadApplicationFromString;//will load all binaries from loadApplicationString instead of recompiling them (must be allocated by user, must contain what saveApplicationToString call generated previously in VkFFTApplication.saveApplicationString). (0 - off, 1 - on). Mutually exclusive with saveApplicationToString
-	void* loadApplicationString;//memory array (uint32_t* for Vulkan, char* for CUDA/HIP/OpenCL) through which user can load VkFFT binaries, must be provided by user if loadApplicationFromString = 1.
+	uint64_t disableSetLocale;//disables all VkFFT attempts to set locale to C - user must ensure that VkFFT has C locale during the plan initialization. This option is needed for multithreading. Default 0.
 
 	//optional Bluestein optimizations: (default 0 if not stated otherwise)
 	uint64_t fixMaxRadixBluestein;//controls the padding of sequences in Bluestein convolution. If specified, padded sequence will be made of up to fixMaxRadixBluestein primes. Default: 2 for CUDA and Vulkan/OpenCL/HIP up to 1048576 combined dimension FFT system, 7 for Vulkan/OpenCL/HIP past after. Min = 2, Max = 13.
@@ -196,6 +220,12 @@ typedef struct {
 	uint64_t* primeSizes; // described in useCustomBluesteinPaddingPattern
 	uint64_t* paddedSizes; // described in useCustomBluesteinPaddingPattern
 
+	uint64_t fixMinRaderPrimeMult;//start direct multiplication Rader's algorithm for radix primes from this number. This means that VkFFT will inline custom Rader kernels if sequence is divisible by these primes. Default is 17, as VkFFT has kernels for 2-13. If you make it less than 13, VkFFT will switch from these kernels to Rader.
+	uint64_t fixMaxRaderPrimeMult;//switch from Mult Rader's algorithm for radix primes from this number. Current limitation for Rader is maxThreadNum/2+1, realistically you would want to switch somewhere on 30-100 range. Default is vendor-specific (currently ~40)
+
+	uint64_t fixMinRaderPrimeFFT;//start FFT convolution version of Rader for radix primes from this number. Better than direct multiplication version for almost all primes (except small ones, like 17-23 on some GPUs). Must be bigger or equal to fixMinRaderPrimeMult. Deafult 29 on AMD and 17 on other GPUs. 
+	uint64_t fixMaxRaderPrimeFFT;//switch to Bluestein's algorithm for radix primes from this number. Switch may happen earlier if prime can't fit in shared memory. Default is 16384, which is bigger than most current GPU's shared memory.
+
 	//optional zero padding control parameters: (default 0 if not stated otherwise)
 	uint64_t performZeropadding[3]; // don't read some data/perform computations if some input sequences are zeropadded for each axis (0 - off, 1 - on)
 	uint64_t fft_zeropad_left[3];//specify start boundary of zero block in the system for each axis
@@ -218,11 +248,13 @@ typedef struct {
 	uint64_t registerBoost4Step; //specify if register file overutilization should be used in big sequences (>2^14), same definition as registerBoost. Default 1
 
 	//not used techniques:
-	uint64_t swapTo3Stage4Step; //specify at which power of 2 to switch from 2 upload to 3 upload 4-step FFT, in case if making max sequence size lower than coalesced sequence helps to combat TLB misses. Default 0 - disabled. Must be at least 17
+	uint64_t swapTo3Stage4Step; //specify at which number to switch from 2 upload to 3 upload 4-step FFT, in case if making max sequence size lower than coalesced sequence helps to combat TLB misses. Default 0 - disabled. Must be at least 131072
 	uint64_t devicePageSize;//in KB, the size of a page on the GPU. Setting to 0 disables local buffer split in pages
 	uint64_t localPageSize;//in KB, the size to split page into if sequence spans multiple devicePageSize pages
 
 	//automatically filled based on device info (still can be reconfigured by user):
+	uint64_t computeCapabilityMajor; // CUDA/HIP compute capability of the device
+	uint64_t computeCapabilityMinor; // CUDA/HIP compute capability of the device
 	uint64_t maxComputeWorkGroupCount[3]; // maxComputeWorkGroupCount from VkPhysicalDeviceLimits
 	uint64_t maxComputeWorkGroupSize[3]; // maxComputeWorkGroupCount from VkPhysicalDeviceLimits
 	uint64_t maxThreadsNum; //max number of threads from VkPhysicalDeviceLimits
@@ -236,6 +268,7 @@ typedef struct {
 	int64_t maxCodeLength; //specify how big can be buffer used for code generation (in char). Default 4000000 chars.
 	int64_t maxTempLength; //specify how big can be buffer used for intermediate string sprintfs be (in char). Default 5000 chars. If code segfaults for some reason - try increasing this number.
 	uint64_t autoCustomBluesteinPaddingPattern; // default value for useCustomBluesteinPaddingPattern
+	uint64_t useRaderUintLUT; // allocate additional LUT to store g_pow
 	uint64_t vendorID; // vendorID 0x10DE - NVIDIA, 0x8086 - Intel, 0x1002 - AMD, etc.
 #if(VKFFT_BACKEND==0)
 	VkDeviceMemory tempBufferDeviceMemory;//Filled at app creation
@@ -249,10 +282,14 @@ typedef struct {
 	hipEvent_t* stream_event;//Filled at app creation
 	uint64_t streamCounter;//Filled at app creation
 	uint64_t streamID;//Filled at app creation
+	int64_t  useStrict32BitAddress; // guarantee 32 bit addresses in bytes instead of number of elements. This results in fewer instructions generated. -1: Disable, 0: Infer based on size, 1: enable. Has no effect with useUint64.
 #elif(VKFFT_BACKEND==3)
 	cl_command_queue* commandQueue;
 #elif(VKFFT_BACKEND==4)
 	ze_command_list_handle_t* commandList;//Filled at app execution
+#elif(VKFFT_BACKEND==5)
+	MTL::CommandBuffer* commandBuffer;//Filled at app execution
+	MTL::ComputeCommandEncoder* commandEncoder;//Filled at app execution
 #endif
 } VkFFTConfiguration;//parameters specified at plan creation
 
@@ -293,6 +330,15 @@ typedef struct {
 	void** inputBuffer;//pointer to device buffer used to read data from if isInputFormatted is enabled
 	void** outputBuffer;//pointer to device buffer used to read data from if isOutputFormatted is enabled
 	void** kernel;//pointer to device buffer used to read kernel data from if performConvolution is enabled
+#elif(VKFFT_BACKEND==5)
+	MTL::CommandBuffer* commandBuffer;//commandBuffer to which FFT is appended
+	MTL::ComputeCommandEncoder* commandEncoder;//encoder associated with commandBuffer
+
+	MTL::Buffer** buffer;//pointer to array of buffers (or one buffer) used for computations
+	MTL::Buffer** tempBuffer;//needed if reorderFourStep is enabled to transpose the array. Same sum size or bigger as buffer (can be split in multiple). Default 0. Setting to non zero value enables manual user allocation
+	MTL::Buffer** inputBuffer;//pointer to array of input buffers (or one buffer) used to read data from if isInputFormatted is enabled
+	MTL::Buffer** outputBuffer;//pointer to array of output buffers (or one buffer) used for write data to if isOutputFormatted is enabled
+	MTL::Buffer** kernel;//pointer to array of kernel buffers (or one buffer) used for read kernel data from if performConvolution is enabled
 #endif
 	//following parameters can be specified during kernels launch, if specifyOffsetsAtLaunch parameter was enabled during the initializeVkFFT call
 	uint64_t bufferOffset;//specify if VkFFT has to offset the first element position inside the buffer. In bytes. Default 0 
@@ -332,7 +378,7 @@ typedef enum VkFFTResult {
 	VKFFT_ERROR_EMPTY_kernelSize = 2011,
 	VKFFT_ERROR_EMPTY_kernel = 2012,
 	VKFFT_ERROR_EMPTY_applicationString = 2013,
-	VKFFT_ERROR_EMPRY_useCustomBluesteinPaddingPattern_arrays = 2014,
+	VKFFT_ERROR_EMPTY_useCustomBluesteinPaddingPattern_arrays = 2014,
 	VKFFT_ERROR_UNSUPPORTED_RADIX = 3001,
 	VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH = 3002,
 	VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH_R2C = 3003,
@@ -394,9 +440,238 @@ typedef enum VkFFTResult {
 	VKFFT_ERROR_FAILED_TO_DESTROY_COMMAND_LIST = 4054,
 	VKFFT_ERROR_FAILED_TO_SUBMIT_BARRIER = 4055
 } VkFFTResult;
+
+static inline const char* getVkFFTErrorString(VkFFTResult result)
+{
+	switch (result)
+	{
+	case VKFFT_SUCCESS:
+		return "VKFFT_SUCCESS";
+	case VKFFT_ERROR_MALLOC_FAILED:
+		return "VKFFT_ERROR_MALLOC_FAILED";
+	case VKFFT_ERROR_INSUFFICIENT_CODE_BUFFER:
+		return "VKFFT_ERROR_INSUFFICIENT_CODE_BUFFER";
+	case VKFFT_ERROR_INSUFFICIENT_TEMP_BUFFER:
+		return "VKFFT_ERROR_INSUFFICIENT_TEMP_BUFFER";
+	case VKFFT_ERROR_PLAN_NOT_INITIALIZED:
+		return "VKFFT_ERROR_PLAN_NOT_INITIALIZED";
+	case VKFFT_ERROR_NULL_TEMP_PASSED:
+		return "VKFFT_ERROR_NULL_TEMP_PASSED";
+	case VKFFT_ERROR_INVALID_PHYSICAL_DEVICE:
+		return "VKFFT_ERROR_INVALID_PHYSICAL_DEVICE";
+	case VKFFT_ERROR_INVALID_DEVICE:
+		return "VKFFT_ERROR_INVALID_DEVICE";
+	case VKFFT_ERROR_INVALID_QUEUE:
+		return "VKFFT_ERROR_INVALID_QUEUE";
+	case VKFFT_ERROR_INVALID_COMMAND_POOL:
+		return "VKFFT_ERROR_INVALID_COMMAND_POOL";
+	case VKFFT_ERROR_INVALID_FENCE:
+		return "VKFFT_ERROR_INVALID_FENCE";
+	case VKFFT_ERROR_ONLY_FORWARD_FFT_INITIALIZED:
+		return "VKFFT_ERROR_ONLY_FORWARD_FFT_INITIALIZED";
+	case VKFFT_ERROR_ONLY_INVERSE_FFT_INITIALIZED:
+		return "VKFFT_ERROR_ONLY_INVERSE_FFT_INITIALIZED";
+	case VKFFT_ERROR_INVALID_CONTEXT:
+		return "VKFFT_ERROR_INVALID_CONTEXT";
+	case VKFFT_ERROR_INVALID_PLATFORM:
+		return "VKFFT_ERROR_INVALID_PLATFORM";
+	case VKFFT_ERROR_ENABLED_saveApplicationToString:
+		return "VKFFT_ERROR_ENABLED_saveApplicationToString";
+	case VKFFT_ERROR_EMPTY_FILE:
+		return "VKFFT_ERROR_EMPTY_FILE";
+	case VKFFT_ERROR_EMPTY_FFTdim:
+		return "VKFFT_ERROR_EMPTY_FFTdim";
+	case VKFFT_ERROR_EMPTY_size:
+		return "VKFFT_ERROR_EMPTY_size";
+	case VKFFT_ERROR_EMPTY_bufferSize:
+		return "VKFFT_ERROR_EMPTY_bufferSize";
+	case VKFFT_ERROR_EMPTY_buffer:
+		return "VKFFT_ERROR_EMPTY_buffer";
+	case VKFFT_ERROR_EMPTY_tempBufferSize:
+		return "VKFFT_ERROR_EMPTY_tempBufferSize";
+	case VKFFT_ERROR_EMPTY_tempBuffer:
+		return "VKFFT_ERROR_EMPTY_tempBuffer";
+	case VKFFT_ERROR_EMPTY_inputBufferSize:
+		return "VKFFT_ERROR_EMPTY_inputBufferSize";
+	case VKFFT_ERROR_EMPTY_inputBuffer:
+		return "VKFFT_ERROR_EMPTY_inputBuffer";
+	case VKFFT_ERROR_EMPTY_outputBufferSize:
+		return "VKFFT_ERROR_EMPTY_outputBufferSize";
+	case VKFFT_ERROR_EMPTY_outputBuffer:
+		return "VKFFT_ERROR_EMPTY_outputBuffer";
+	case VKFFT_ERROR_EMPTY_kernelSize:
+		return "VKFFT_ERROR_EMPTY_kernelSize";
+	case VKFFT_ERROR_EMPTY_kernel:
+		return "VKFFT_ERROR_EMPTY_kernel";
+	case VKFFT_ERROR_EMPTY_applicationString:
+		return "VKFFT_ERROR_EMPTY_applicationString";
+	case VKFFT_ERROR_EMPTY_useCustomBluesteinPaddingPattern_arrays:
+		return "VKFFT_ERROR_EMPTY_useCustomBluesteinPaddingPattern_arrays";
+	case VKFFT_ERROR_UNSUPPORTED_RADIX:
+		return "VKFFT_ERROR_UNSUPPORTED_RADIX";
+	case VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH:
+		return "VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH";
+	case VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH_R2C:
+		return "VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH_R2C";
+	case VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH_DCT:
+		return "VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH_DCT";
+	case VKFFT_ERROR_UNSUPPORTED_FFT_OMIT:
+		return "VKFFT_ERROR_UNSUPPORTED_FFT_OMIT";
+	case VKFFT_ERROR_FAILED_TO_ALLOCATE:
+		return "VKFFT_ERROR_FAILED_TO_ALLOCATE";
+	case VKFFT_ERROR_FAILED_TO_MAP_MEMORY:
+		return "VKFFT_ERROR_FAILED_TO_MAP_MEMORY";
+	case VKFFT_ERROR_FAILED_TO_ALLOCATE_COMMAND_BUFFERS:
+		return "VKFFT_ERROR_FAILED_TO_ALLOCATE_COMMAND_BUFFERS";
+	case VKFFT_ERROR_FAILED_TO_BEGIN_COMMAND_BUFFER:
+		return "VKFFT_ERROR_FAILED_TO_BEGIN_COMMAND_BUFFER";
+	case VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER:
+		return "VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER";
+	case VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE:
+		return "VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE";
+	case VKFFT_ERROR_FAILED_TO_WAIT_FOR_FENCES:
+		return "VKFFT_ERROR_FAILED_TO_WAIT_FOR_FENCES";
+	case VKFFT_ERROR_FAILED_TO_RESET_FENCES:
+		return "VKFFT_ERROR_FAILED_TO_RESET_FENCES";
+	case VKFFT_ERROR_FAILED_TO_CREATE_DESCRIPTOR_POOL:
+		return "VKFFT_ERROR_FAILED_TO_CREATE_DESCRIPTOR_POOL";
+	case VKFFT_ERROR_FAILED_TO_CREATE_DESCRIPTOR_SET_LAYOUT:
+		return "VKFFT_ERROR_FAILED_TO_CREATE_DESCRIPTOR_SET_LAYOUT";
+	case VKFFT_ERROR_FAILED_TO_ALLOCATE_DESCRIPTOR_SETS:
+		return "VKFFT_ERROR_FAILED_TO_ALLOCATE_DESCRIPTOR_SETS";
+	case VKFFT_ERROR_FAILED_TO_CREATE_PIPELINE_LAYOUT:
+		return "VKFFT_ERROR_FAILED_TO_CREATE_PIPELINE_LAYOUT";
+	case VKFFT_ERROR_FAILED_SHADER_PREPROCESS:
+		return "VKFFT_ERROR_FAILED_SHADER_PREPROCESS";
+	case VKFFT_ERROR_FAILED_SHADER_PARSE:
+		return "VKFFT_ERROR_FAILED_SHADER_PARSE";
+	case VKFFT_ERROR_FAILED_SHADER_LINK:
+		return "VKFFT_ERROR_FAILED_SHADER_LINK";
+	case VKFFT_ERROR_FAILED_SPIRV_GENERATE:
+		return "VKFFT_ERROR_FAILED_SPIRV_GENERATE";
+	case VKFFT_ERROR_FAILED_TO_CREATE_SHADER_MODULE:
+		return "VKFFT_ERROR_FAILED_TO_CREATE_SHADER_MODULE";
+	case VKFFT_ERROR_FAILED_TO_CREATE_INSTANCE:
+		return "VKFFT_ERROR_FAILED_TO_CREATE_INSTANCE";
+	case VKFFT_ERROR_FAILED_TO_SETUP_DEBUG_MESSENGER:
+		return "VKFFT_ERROR_FAILED_TO_SETUP_DEBUG_MESSENGER";
+	case VKFFT_ERROR_FAILED_TO_FIND_PHYSICAL_DEVICE:
+		return "VKFFT_ERROR_FAILED_TO_FIND_PHYSICAL_DEVICE";
+	case VKFFT_ERROR_FAILED_TO_CREATE_DEVICE:
+		return "VKFFT_ERROR_FAILED_TO_CREATE_DEVICE";
+	case VKFFT_ERROR_FAILED_TO_CREATE_FENCE:
+		return "VKFFT_ERROR_FAILED_TO_CREATE_FENCE";
+	case VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_POOL:
+		return "VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_POOL";
+	case VKFFT_ERROR_FAILED_TO_CREATE_BUFFER:
+		return "VKFFT_ERROR_FAILED_TO_CREATE_BUFFER";
+	case VKFFT_ERROR_FAILED_TO_ALLOCATE_MEMORY:
+		return "VKFFT_ERROR_FAILED_TO_ALLOCATE_MEMORY";
+	case VKFFT_ERROR_FAILED_TO_BIND_BUFFER_MEMORY:
+		return "VKFFT_ERROR_FAILED_TO_BIND_BUFFER_MEMORY";
+	case VKFFT_ERROR_FAILED_TO_FIND_MEMORY:
+		return "VKFFT_ERROR_FAILED_TO_FIND_MEMORY";
+	case VKFFT_ERROR_FAILED_TO_SYNCHRONIZE:
+		return "VKFFT_ERROR_FAILED_TO_SYNCHRONIZE";
+	case VKFFT_ERROR_FAILED_TO_COPY:
+		return "VKFFT_ERROR_FAILED_TO_COPY";
+	case VKFFT_ERROR_FAILED_TO_CREATE_PROGRAM:
+		return "VKFFT_ERROR_FAILED_TO_CREATE_PROGRAM";
+	case VKFFT_ERROR_FAILED_TO_COMPILE_PROGRAM:
+		return "VKFFT_ERROR_FAILED_TO_COMPILE_PROGRAM";
+	case VKFFT_ERROR_FAILED_TO_GET_CODE_SIZE:
+		return "VKFFT_ERROR_FAILED_TO_GET_CODE_SIZE";
+	case VKFFT_ERROR_FAILED_TO_GET_CODE:
+		return "VKFFT_ERROR_FAILED_TO_GET_CODE";
+	case VKFFT_ERROR_FAILED_TO_DESTROY_PROGRAM:
+		return "VKFFT_ERROR_FAILED_TO_DESTROY_PROGRAM";
+	case VKFFT_ERROR_FAILED_TO_LOAD_MODULE:
+		return "VKFFT_ERROR_FAILED_TO_LOAD_MODULE";
+	case VKFFT_ERROR_FAILED_TO_GET_FUNCTION:
+		return "VKFFT_ERROR_FAILED_TO_GET_FUNCTION";
+	case VKFFT_ERROR_FAILED_TO_SET_DYNAMIC_SHARED_MEMORY:
+		return "VKFFT_ERROR_FAILED_TO_SET_DYNAMIC_SHARED_MEMORY";
+	case VKFFT_ERROR_FAILED_TO_MODULE_GET_GLOBAL:
+		return "VKFFT_ERROR_FAILED_TO_MODULE_GET_GLOBAL";
+	case VKFFT_ERROR_FAILED_TO_LAUNCH_KERNEL:
+		return "VKFFT_ERROR_FAILED_TO_LAUNCH_KERNEL";
+	case VKFFT_ERROR_FAILED_TO_EVENT_RECORD:
+		return "VKFFT_ERROR_FAILED_TO_EVENT_RECORD";
+	case VKFFT_ERROR_FAILED_TO_ADD_NAME_EXPRESSION:
+		return "VKFFT_ERROR_FAILED_TO_ADD_NAME_EXPRESSION";
+	case VKFFT_ERROR_FAILED_TO_INITIALIZE:
+		return "VKFFT_ERROR_FAILED_TO_INITIALIZE";
+	case VKFFT_ERROR_FAILED_TO_SET_DEVICE_ID:
+		return "VKFFT_ERROR_FAILED_TO_SET_DEVICE_ID";
+	case VKFFT_ERROR_FAILED_TO_GET_DEVICE:
+		return "VKFFT_ERROR_FAILED_TO_GET_DEVICE";
+	case VKFFT_ERROR_FAILED_TO_CREATE_CONTEXT:
+		return "VKFFT_ERROR_FAILED_TO_CREATE_CONTEXT";
+	case VKFFT_ERROR_FAILED_TO_CREATE_PIPELINE:
+		return "VKFFT_ERROR_FAILED_TO_CREATE_PIPELINE";
+	case VKFFT_ERROR_FAILED_TO_SET_KERNEL_ARG:
+		return "VKFFT_ERROR_FAILED_TO_SET_KERNEL_ARG";
+	case VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_QUEUE:
+		return "VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_QUEUE";
+	case VKFFT_ERROR_FAILED_TO_RELEASE_COMMAND_QUEUE:
+		return "VKFFT_ERROR_FAILED_TO_RELEASE_COMMAND_QUEUE";
+	case VKFFT_ERROR_FAILED_TO_ENUMERATE_DEVICES:
+		return "VKFFT_ERROR_FAILED_TO_ENUMERATE_DEVICES";
+	case VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE:
+		return "VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE";
+	case VKFFT_ERROR_FAILED_TO_CREATE_EVENT:
+		return "VKFFT_ERROR_FAILED_TO_CREATE_EVENT";
+	case VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST:
+		return "VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST";
+	case VKFFT_ERROR_FAILED_TO_DESTROY_COMMAND_LIST:
+		return "VKFFT_ERROR_FAILED_TO_DESTROY_COMMAND_LIST";
+	case VKFFT_ERROR_FAILED_TO_SUBMIT_BARRIER:
+		return "VKFFT_ERROR_FAILED_TO_SUBMIT_BARRIER";
+	}
+	return "Unknown VkFFT error";
+}
+
+
+typedef struct VkFFTRaderContainer VkFFTRaderContainer;
+
+struct VkFFTRaderContainer {
+	uint64_t prime;
+	uint64_t generator;
+	uint64_t multiplier;
+	uint64_t inline_rader_g_pow;
+	uint64_t raderUintLUToffset;
+
+	uint64_t type; //0 - FFT, 1 - Direct multiplication
+
+	uint64_t raderRegisters;
+	uint64_t rader_min_registers;
+
+	//Direct multiplication parameters
+
+	//FFT parameters
+	uint64_t registers_per_thread;
+	uint64_t min_registers_per_thread;
+	uint64_t loc_multipliers[33];
+	uint64_t registers_per_thread_per_radix[33];
+	uint64_t stageRadix[20];
+	uint64_t numStages;
+	uint64_t numSubPrimes;
+	uint64_t stage_rader_generator[20];
+	uint64_t containerFFTDim;
+	uint64_t containerFFTNum;
+	uint64_t subLogicalGroupSizeMax;//how many threads are needed per Rader transform
+	uint64_t RaderKernelOffsetLUT;
+	uint64_t RaderRadixOffsetLUT;
+	uint64_t RaderRadixOffsetLUTiFFT;
+	void* raderFFTkernel;
+
+	struct VkFFTRaderContainer* container;
+};
+
 typedef struct {
 	uint64_t size[3];
 	uint64_t localSize[3];
+	uint64_t numSubgroups;
 	uint64_t sourceFFTSize;
 	uint64_t fftDim;
 	uint64_t inverse;
@@ -415,6 +690,8 @@ typedef struct {
 	uint64_t readToRegisters;
 	uint64_t writeFromRegisters;
 	uint64_t LUT;
+	uint64_t LUT_4step;
+	uint64_t raderUintLUT;
 	uint64_t useCoalescedLUTUploadToSM;
 	uint64_t useBluesteinFFT;
 	uint64_t reverseBluesteinMultiUpload;
@@ -481,6 +758,7 @@ typedef struct {
 	uint64_t kernelNumberByteSize;
 	uint64_t maxStageSumLUT;
 	uint64_t unroll;
+	uint64_t swapComputeWorkGroupID;
 	uint64_t convolutionStep;
 	uint64_t symmetricKernel;
 	uint64_t supportAxis;
@@ -491,31 +769,69 @@ typedef struct {
 	uint64_t resolveBankConflictFirstStages;
 	uint64_t sharedStrideBankConflictFirstStages;
 	uint64_t sharedStrideReadWriteConflict;
+
+	uint64_t sharedStrideRaderFFT;
+	uint64_t sharedShiftRaderFFT;
+
 	uint64_t maxSharedStride;
 	uint64_t axisSwapped;
 	uint64_t mergeSequencesR2C;
 
-	uint64_t numBuffersBound[6];
+	uint64_t numBuffersBound[10];
 	uint64_t convolutionBindingID;
 	uint64_t LUTBindingID;
 	uint64_t BluesteinConvolutionBindingID;
 	uint64_t BluesteinMultiplicationBindingID;
 
+	uint64_t useRader;
+	uint64_t numRaderPrimes;
+	uint64_t minRaderFFTThreadNum;
+	VkFFTRaderContainer* raderContainer;
+	VkFFTRaderContainer* currentRaderContainer;
+	uint64_t RaderUintLUTBindingID;
+
+	uint64_t useRaderMult;
+	uint64_t additionalRaderSharedSize;
+	uint64_t RaderKernelOffsetShared[33];
+	uint64_t RaderKernelOffsetLUT[33];
+	uint64_t rader_generator[33];
+	uint64_t fixMinRaderPrimeMult;//start Rader algorithm for primes from this number
+	uint64_t fixMaxRaderPrimeMult;//switch from Rader to Bluestein algorithm for primes from this number
+	uint64_t fixMinRaderPrimeFFT;//start Rader algorithm for primes from this number
+	uint64_t fixMaxRaderPrimeFFT;//switch from Rader to Bluestein algorithm for primes from this number
+
+	uint64_t inline_rader_g_pow;
+	uint64_t inline_rader_kernel;
+
+	uint64_t raderRegisters;
+	uint64_t rader_min_registers;
+
+	uint64_t useRaderFFT;
+
 	uint64_t performOffsetUpdate;
 	uint64_t performBufferSetUpdate;
 	uint64_t useUint64;
+#if(VKFFT_BACKEND==2)
+	int64_t  useStrict32BitAddress;
+#endif
+	uint64_t disableSetLocale;
+
 	char** regIDs;
 	char* disableThreadsStart;
 	char* disableThreadsEnd;
 	char sdataID[50];
 	char inoutID[50];
 	char combinedID[50];
+	char raderIDx[50];
+	char raderIDx2[50];
 	char gl_LocalInvocationID_x[50];
 	char gl_LocalInvocationID_y[50];
 	char gl_LocalInvocationID_z[50];
 	char gl_GlobalInvocationID_x[200];
 	char gl_GlobalInvocationID_y[200];
 	char gl_GlobalInvocationID_z[200];
+	char gl_SubgroupInvocationID[200];
+	char gl_SubgroupID[200];
 	char tshuffle[50];
 	char sharedStride[50];
 	char gl_WorkGroupSize_x[50];
@@ -530,6 +846,7 @@ typedef struct {
 	char temp[50];
 	char w[50];
 	char iw[50];
+	char x0[33][40];
 	char locID[33][40];
 	char* code0;
 	char* output;
@@ -538,11 +855,14 @@ typedef struct {
 	int64_t currentLen;
 	int64_t maxCodeLength;
 	int64_t maxTempLength;
-	const char* oldLocale;
+	char oldLocale[100];
 } VkFFTSpecializationConstantsLayout;
 typedef struct {
 	uint32_t dataUint32[10];
 	uint64_t dataUint64[10];
+#if(VKFFT_BACKEND == 5)
+	MTL::Buffer* dataUintBuffer;
+#endif
 	//specify what can be in layout
 	uint64_t performWorkGroupShift[3];
 	uint64_t workGroupShift[3];
@@ -576,6 +896,8 @@ typedef struct {
 	VkPipeline pipeline;
 	VkDeviceMemory bufferLUTDeviceMemory;
 	VkBuffer bufferLUT;
+	VkDeviceMemory bufferRaderUintLUTDeviceMemory;
+	VkBuffer bufferRaderUintLUT;
 	VkDeviceMemory* bufferBluesteinDeviceMemory;
 	VkDeviceMemory* bufferBluesteinFFTDeviceMemory;
 	VkBuffer* bufferBluestein;
@@ -586,6 +908,7 @@ typedef struct {
 	CUmodule VkFFTModule;
 	CUfunction VkFFTKernel;
 	void* bufferLUT;
+	void* bufferRaderUintLUT;
 	CUdeviceptr consts_addr;
 	void** bufferBluestein;
 	void** bufferBluesteinFFT;
@@ -595,6 +918,7 @@ typedef struct {
 	hipModule_t VkFFTModule;
 	hipFunction_t VkFFTKernel;
 	void* bufferLUT;
+	void* bufferRaderUintLUT;
 	hipDeviceptr_t consts_addr;
 	void** bufferBluestein;
 	void** bufferBluesteinFFT;
@@ -604,6 +928,7 @@ typedef struct {
 	cl_program  program;
 	cl_kernel kernel;
 	cl_mem bufferLUT;
+	cl_mem bufferRaderUintLUT;
 	cl_mem* bufferBluestein;
 	cl_mem* bufferBluesteinFFT;
 #elif(VKFFT_BACKEND==4)
@@ -612,14 +937,25 @@ typedef struct {
 	ze_module_handle_t VkFFTModule;
 	ze_kernel_handle_t VkFFTKernel;
 	void* bufferLUT;
+	void* bufferRaderUintLUT;
 	void** bufferBluestein;
 	void** bufferBluesteinFFT;
+#elif(VKFFT_BACKEND==5)
+	MTL::Buffer** inputBuffer;
+	MTL::Buffer** outputBuffer;
+	MTL::Library* library;
+	MTL::ComputePipelineState* pipeline;
+	MTL::Buffer* bufferLUT;
+	MTL::Buffer* bufferRaderUintLUT;
+	MTL::Buffer** bufferBluestein;
+	MTL::Buffer** bufferBluesteinFFT;
 #endif
 
 	void* binary;
 	uint64_t binarySize;
 
 	uint64_t bufferLUTSize;
+	uint64_t bufferRaderUintLUTSize;
 	uint64_t referenceLUT;
 } VkFFTAxis;
 
@@ -645,6 +981,8 @@ typedef struct {
 	//Bluestein buffers reused among plans
 	uint64_t useBluesteinFFT[3];
 #if(VKFFT_BACKEND==0)
+	VkDeviceMemory bufferRaderUintLUTDeviceMemory[3][4];
+	VkBuffer bufferRaderUintLUT[3][4];
 	VkDeviceMemory bufferBluesteinDeviceMemory[3];
 	VkDeviceMemory bufferBluesteinFFTDeviceMemory[3];
 	VkDeviceMemory bufferBluesteinIFFTDeviceMemory[3];
@@ -652,26 +990,42 @@ typedef struct {
 	VkBuffer bufferBluesteinFFT[3];
 	VkBuffer bufferBluesteinIFFT[3];
 #elif(VKFFT_BACKEND==1)
+	void* bufferRaderUintLUT[3][4];
 	void* bufferBluestein[3];
 	void* bufferBluesteinFFT[3];
 	void* bufferBluesteinIFFT[3];
 #elif(VKFFT_BACKEND==2)
+	void* bufferRaderUintLUT[3][4];
 	void* bufferBluestein[3];
 	void* bufferBluesteinFFT[3];
 	void* bufferBluesteinIFFT[3];
 #elif(VKFFT_BACKEND==3)
+	cl_mem bufferRaderUintLUT[3][4];
 	cl_mem bufferBluestein[3];
 	cl_mem bufferBluesteinFFT[3];
 	cl_mem bufferBluesteinIFFT[3];
 #elif(VKFFT_BACKEND==4)
+	void* bufferRaderUintLUT[3][4];
 	void* bufferBluestein[3];
 	void* bufferBluesteinFFT[3];
 	void* bufferBluesteinIFFT[3];
+#elif(VKFFT_BACKEND==5)
+	MTL::Buffer* bufferRaderUintLUT[3][4];
+	MTL::Buffer* bufferBluestein[3];
+	MTL::Buffer* bufferBluesteinFFT[3];
+	MTL::Buffer* bufferBluesteinIFFT[3];
 #endif
+	uint64_t bufferRaderUintLUTSize[3][4];
 	uint64_t bufferBluesteinSize[3];
 	void* applicationBluesteinString[3];
 	uint64_t applicationBluesteinStringSize[3];
 
+	uint64_t numRaderFFTPrimes;
+	uint64_t rader_primes[30];
+	uint64_t rader_buffer_size[30];
+	void* raderFFTkernel[30];
+	uint64_t applicationStringOffsetRader;
+
 	uint64_t currentApplicationStringPos;
 
 	uint64_t applicationStringSize;//size of saveApplicationString in bytes
@@ -684,13 +1038,13 @@ static inline VkFFTResult VkAppendLine(VkFFTSpecializationConstantsLayout* sc) {
 	if (sc->currentLen + sc->tempLen > sc->maxCodeLength) return VKFFT_ERROR_INSUFFICIENT_CODE_BUFFER;
 	sc->currentLen += sprintf(sc->output + sc->currentLen, "%s", sc->tempStr);
 	return VKFFT_SUCCESS;
-};
+}
 static inline VkFFTResult VkAppendLineFromInput(VkFFTSpecializationConstantsLayout* sc, const char* in) {
 	//appends code line stored in tempStr to generated code
 	if (sc->currentLen + (int64_t)strlen(in) > sc->maxCodeLength) return VKFFT_ERROR_INSUFFICIENT_CODE_BUFFER;
 	sc->currentLen += sprintf(sc->output + sc->currentLen, "%s", in);
 	return VKFFT_SUCCESS;
-};
+}
 static inline VkFFTResult appendLicense(VkFFTSpecializationConstantsLayout* sc) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	sc->tempLen = sprintf(sc->tempStr, "\
@@ -726,7 +1080,7 @@ static inline VkFFTResult VkMovComplex(VkFFTSpecializationConstantsLayout* sc, c
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
 static inline VkFFTResult VkMovReal(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	sc->tempLen = sprintf(sc->tempStr, "\
@@ -734,7 +1088,7 @@ static inline VkFFTResult VkMovReal(VkFFTSpecializationConstantsLayout* sc, cons
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
 static inline VkFFTResult VkSharedStore(VkFFTSpecializationConstantsLayout* sc, const char* id, const char* in) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	sc->tempLen = sprintf(sc->tempStr, "\
@@ -742,7 +1096,7 @@ static inline VkFFTResult VkSharedStore(VkFFTSpecializationConstantsLayout* sc,
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
 static inline VkFFTResult VkSharedLoad(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* id) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	sc->tempLen = sprintf(sc->tempStr, "\
@@ -750,7 +1104,7 @@ static inline VkFFTResult VkSharedLoad(VkFFTSpecializationConstantsLayout* sc, c
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
 static inline VkFFTResult VkAddReal(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_2) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	sc->tempLen = sprintf(sc->tempStr, "\
@@ -758,7 +1112,7 @@ static inline VkFFTResult VkAddReal(VkFFTSpecializationConstantsLayout* sc, cons
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
 static inline VkFFTResult VkAddComplex(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_2) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	sc->tempLen = sprintf(sc->tempStr, "\
@@ -767,7 +1121,7 @@ static inline VkFFTResult VkAddComplex(VkFFTSpecializationConstantsLayout* sc, c
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
 static inline VkFFTResult VkAddComplexInv(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_2) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	sc->tempLen = sprintf(sc->tempStr, "\
@@ -776,7 +1130,23 @@ static inline VkFFTResult VkAddComplexInv(VkFFTSpecializationConstantsLayout* sc
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
+static inline VkFFTResult VkAddComplex_x(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_2) {
+	VkFFTResult res = VKFFT_SUCCESS;
+	sc->tempLen = sprintf(sc->tempStr, "\
+	%s.x = %s.x + %s.x;\n", out, in_1, in_2);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	return res;
+}
+static inline VkFFTResult VkAddComplex_y(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_2) {
+	VkFFTResult res = VKFFT_SUCCESS;
+	sc->tempLen = sprintf(sc->tempStr, "\
+	%s.y = %s.y + %s.y;\n", out, in_1, in_2);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	return res;
+}
 static inline VkFFTResult VkSubComplex(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_2) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	sc->tempLen = sprintf(sc->tempStr, "\
@@ -785,7 +1155,23 @@ static inline VkFFTResult VkSubComplex(VkFFTSpecializationConstantsLayout* sc, c
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
+static inline VkFFTResult VkSubComplex_x(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_2) {
+	VkFFTResult res = VKFFT_SUCCESS;
+	sc->tempLen = sprintf(sc->tempStr, "\
+	%s.x = %s.x - %s.x;\n", out, in_1, in_2);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	return res;
+}
+static inline VkFFTResult VkSubComplex_y(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_2) {
+	VkFFTResult res = VKFFT_SUCCESS;
+	sc->tempLen = sprintf(sc->tempStr, "\
+	%s.y = %s.y - %s.y;\n", out, in_1, in_2);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	return res;
+}
 static inline VkFFTResult VkSubReal(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_2) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	sc->tempLen = sprintf(sc->tempStr, "\
@@ -793,24 +1179,75 @@ static inline VkFFTResult VkSubReal(VkFFTSpecializationConstantsLayout* sc, cons
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
-static inline VkFFTResult VkFMAComplex(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_num, const char* in_2) {
+}
+static inline VkFFTResult VkFMA3Complex(VkFFTSpecializationConstantsLayout* sc, const char* out_1, const char* out_2, const char* in_1, const char* in_num, const char* in_conj) {
+	VkFFTResult res = VKFFT_SUCCESS;
+	//sc->tempLen = sprintf(sc->tempStr, "	printf(\"%%d %%f %%f %%f %%f \\n \", %s, %s.x, %s.y, %s.x, %s.y);\n\n", sc->gl_LocalInvocationID_x, in_1, in_1, in_conj, in_conj);
+	//res = VkAppendLine(sc);
+	//if (res != VKFFT_SUCCESS) return res;
+	sc->tempLen = sprintf(sc->tempStr, "\
+	%s.x = fma(%s.x, %s.x, %s.x);\n\
+	%s.y = fma(%s.y, %s.x, %s.y);\n", out_1, in_1, in_num, out_1, out_1, in_conj, in_num, out_1);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	sc->tempLen = sprintf(sc->tempStr, "\
+	%s.x = fma(%s.y, %s.y, %s.x);\n\
+	%s.y = fma(%s.x, %s.y, %s.y);\n", out_2, in_1, in_num, out_2, out_2, in_conj, in_num, out_2);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	/*sc->tempLen = sprintf(sc->tempStr, "\
+	temp2.x = fma(%s.x, %s.x, %s.x);\n\
+	%s.x = temp2.x;\n\
+	temp2.y = fma(%s.y, %s.x, %s.y);\n\
+	%s.y = temp2.y;\n", in_1, in_num, out_1, out_1, in_conj, in_num, out_1, out_1);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	sc->tempLen = sprintf(sc->tempStr, "\
+	temp2.x = fma(%s.y, %s.y, %s.x);\n\
+	%s.x = temp2.x;\n\
+	temp2.y = fma(%s.x, %s.y, %s.y);\n\
+	%s.y = temp2.y;\n", in_1, in_num, out_2, out_2, in_conj, in_num, out_2, out_2);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;*/
+	//sc->tempLen = sprintf(sc->tempStr, "	printf(\"%%d %%f %%f %%f %%f \\n \", %s, %s.x, %s.y, %s.x, %s.y);\n\n", sc->gl_LocalInvocationID_x, out_1, out_1, out_2, out_2);
+	//res = VkAppendLine(sc);
+	//if (res != VKFFT_SUCCESS) return res;
+	return res;
+}
+static inline VkFFTResult VkFMA3Complex_const_w(VkFFTSpecializationConstantsLayout* sc, const char* out_1, const char* out_2, const char* in_1, const char* in_num_x, const char* in_num_y, const char* in_conj) {
 	VkFFTResult res = VKFFT_SUCCESS;
+	//sc->tempLen = sprintf(sc->tempStr, "	printf(\"%%d %%f %%f %%f %%f \\n \", %s, %s.x, %s.y, %s.x, %s.y);\n\n", sc->gl_LocalInvocationID_x, in_1, in_1, in_conj, in_conj);
+	//res = VkAppendLine(sc);
+	//if (res != VKFFT_SUCCESS) return res;
 	sc->tempLen = sprintf(sc->tempStr, "\
 	%s.x = fma(%s.x, %s, %s.x);\n\
-	%s.y = fma(%s.y, %s, %s.y);\n", out, in_1, in_num, in_2, out, in_1, in_num, in_2);
+	%s.y = fma(%s.y, %s, %s.y);\n", out_1, in_1, in_num_x, out_1, out_1, in_conj, in_num_x, out_1);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	sc->tempLen = sprintf(sc->tempStr, "\
+	%s.x = fma(%s.y, %s, %s.x);\n\
+	%s.y = fma(%s.x, %s, %s.y);\n", out_2, in_1, in_num_y, out_2, out_2, in_conj, in_num_y, out_2);
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
+static inline VkFFTResult VkFMAComplex(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_num, const char* in_2) {
+	VkFFTResult res = VKFFT_SUCCESS;
+	sc->tempLen = sprintf(sc->tempStr, "\
+    %s.x = fma(%s.x, %s, %s.x);\n\
+    %s.y = fma(%s.y, %s, %s.y);\n", out, in_1, in_num, in_2, out, in_1, in_num, in_2);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	return res;
+}
 static inline VkFFTResult VkFMAReal(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_num, const char* in_2) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	sc->tempLen = sprintf(sc->tempStr, "\
-	%s = fma(%s, %s, %s);\n", out, in_1, in_num, in_2);
+    %s = fma(%s, %s, %s);\n", out, in_1, in_num, in_2);
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
 static inline VkFFTResult VkMulComplex(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_2, const char* temp) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	if (strcmp(out, in_1) && strcmp(out, in_2)) {
@@ -831,7 +1268,7 @@ static inline VkFFTResult VkMulComplex(VkFFTSpecializationConstantsLayout* sc, c
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
 static inline VkFFTResult VkMulComplexConj(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_2, const char* temp) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	if (strcmp(out, in_1) && strcmp(out, in_2)) {
@@ -852,7 +1289,7 @@ static inline VkFFTResult VkMulComplexConj(VkFFTSpecializationConstantsLayout* s
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
 static inline VkFFTResult VkMulComplexNumber(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_num) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	sc->tempLen = sprintf(sc->tempStr, "\
@@ -861,7 +1298,7 @@ static inline VkFFTResult VkMulComplexNumber(VkFFTSpecializationConstantsLayout*
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
 static inline VkFFTResult VkMulComplexNumberImag(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_num, const char* temp) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	if (strcmp(out, in_1)) {
@@ -882,7 +1319,7 @@ static inline VkFFTResult VkMulComplexNumberImag(VkFFTSpecializationConstantsLay
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
 static inline VkFFTResult VkDivComplexNumber(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_num) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	sc->tempLen = sprintf(sc->tempStr, "\
@@ -891,7 +1328,7 @@ static inline VkFFTResult VkDivComplexNumber(VkFFTSpecializationConstantsLayout*
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
 
 static inline VkFFTResult VkMulReal(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_2) {
 	VkFFTResult res = VKFFT_SUCCESS;
@@ -900,7 +1337,7 @@ static inline VkFFTResult VkMulReal(VkFFTSpecializationConstantsLayout* sc, cons
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
 
 static inline VkFFTResult VkShuffleComplex(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_2, const char* temp) {
 	VkFFTResult res = VKFFT_SUCCESS;
@@ -922,7 +1359,7 @@ static inline VkFFTResult VkShuffleComplex(VkFFTSpecializationConstantsLayout* s
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
 static inline VkFFTResult VkShuffleComplexInv(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_2, const char* temp) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	if (strcmp(out, in_2)) {
@@ -943,7 +1380,7 @@ static inline VkFFTResult VkShuffleComplexInv(VkFFTSpecializationConstantsLayout
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
 static inline VkFFTResult VkModReal(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_num) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	sc->tempLen = sprintf(sc->tempStr, "\
@@ -951,7 +1388,7 @@ static inline VkFFTResult VkModReal(VkFFTSpecializationConstantsLayout* sc, cons
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
 static inline VkFFTResult VkDivReal(VkFFTSpecializationConstantsLayout* sc, const char* out, const char* in_1, const char* in_num) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	sc->tempLen = sprintf(sc->tempStr, "\
@@ -959,7 +1396,7 @@ static inline VkFFTResult VkDivReal(VkFFTSpecializationConstantsLayout* sc, cons
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
-};
+}
 static inline VkFFTResult VkPermute(VkFFTSpecializationConstantsLayout* sc, const uint64_t* permute, const uint64_t num_elem, const uint64_t type, char** regIDs, const char* temp) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	char temp_ID[33][20];
@@ -1045,7 +1482,39 @@ static inline VkFFTResult VkPermute(VkFFTSpecializationConstantsLayout* sc, cons
 		}*/
 	}
 	return res;
-};
+}
+static inline VkFFTResult VkSubgroupAdd(VkFFTSpecializationConstantsLayout* sc, const char* in, const char* out, const uint64_t subWarpSplit) {
+	VkFFTResult res = VKFFT_SUCCESS;
+
+#if (VKFFT_BACKEND==0)
+	sc->tempLen = sprintf(sc->tempStr, "	%s.x = subgroupAdd(%s.x);\n", out, in);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	sc->tempLen = sprintf(sc->tempStr, "	%s.y = subgroupAdd(%s.y);\n", out, in);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+#elif (VKFFT_BACKEND==1)
+	//v1
+	/*for (int i = 1; i < sc->warpSize / subWarpSplit; i *= 2) {
+		sc->tempLen = sprintf(sc->tempStr, "	%s.x += __shfl_xor_sync(0xffffffff, %s.x, %d);\n", out, in, i);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		sc->tempLen = sprintf(sc->tempStr, "	%s.y += __shfl_xor_sync(0xffffffff, %s.y, %d);\n", out, in, i);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+	}
+	//v2
+	for (int i = (int)sc->warpSize / 2 / subWarpSplit; i > 0; i /= 2) {
+		sc->tempLen = sprintf(sc->tempStr, "	%s.x += __shfl_down_sync(0xffffffff, %s.x, %d);\n", out, in, i);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		sc->tempLen = sprintf(sc->tempStr, "	%s.y += __shfl_down_sync(0xffffffff, %s.y, %d);\n", out, in, i);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+	}*/
+#endif
+	return res;
+}
 
 static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfiguration inputLaunchConfiguration);
 static inline VkFFTResult VkFFTAppend(VkFFTApplication* app, int inverse, VkFFTLaunchParams* launchParams);
@@ -1093,6 +1562,12 @@ static inline VkFFTResult appendExtensions(VkFFTSpecializationConstantsLayout* s
 		res = VkAppendLine(sc);
 		if (res != VKFFT_SUCCESS) return res;
 	}
+#elif(VKFFT_BACKEND==5)
+	sc->tempLen = sprintf(sc->tempStr, "\
+#include <metal_math>\n\
+using namespace metal;\n");
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
 #endif
 	return res;
 }
@@ -1114,6 +1589,10 @@ static inline VkFFTResult appendConstant(VkFFTSpecializationConstantsLayout* sc,
 	sc->tempLen = sprintf(sc->tempStr, "__constant %s %s = %s%s;\n", type, name, defaultVal, LFending);
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
+#elif(VKFFT_BACKEND==5)
+	sc->tempLen = sprintf(sc->tempStr, "constant %s %s = %s%s;\n", type, name, defaultVal, LFending);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
 #else
 	sc->tempLen = sprintf(sc->tempStr, "const %s %s = %s%s;\n", type, name, defaultVal, LFending);
 	res = VkAppendLine(sc);
@@ -1149,6 +1628,10 @@ static inline VkFFTResult appendBarrierVkFFT(VkFFTSpecializationConstantsLayout*
 	sc->tempLen = sprintf(sc->tempStr, "%sbarrier(CLK_LOCAL_MEM_FENCE);\n\n", tabs);
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
+#elif(VKFFT_BACKEND==5)
+	sc->tempLen = sprintf(sc->tempStr, "%sthreadgroup_barrier(mem_flags::mem_none);\n\n", tabs);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
 #endif
 	return res;
 }
@@ -1172,6 +1655,10 @@ static inline VkFFTResult appendPushConstantsVkFFT(VkFFTSpecializationConstantsL
 	sc->tempLen = sprintf(sc->tempStr, "	typedef struct {\n");
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
+#elif(VKFFT_BACKEND==5)
+	sc->tempLen = sprintf(sc->tempStr, "    typedef struct {\n");
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
 #endif
 	if (sc->performWorkGroupShift[0]) {
 		res = appendPushConstant(sc, uintType, "workGroupShiftX");
@@ -1219,26 +1706,213 @@ static inline VkFFTResult appendPushConstantsVkFFT(VkFFTSpecializationConstantsL
 	sc->tempLen = sprintf(sc->tempStr, "	}PushConsts;\n");
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
+#elif(VKFFT_BACKEND==5)
+	sc->tempLen = sprintf(sc->tempStr, "    }PushConsts;\n");
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
 #endif
 	return res;
 }
 static inline VkFFTResult appendConstantsVkFFT(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	char LFending[4] = "";
+	char uintType_32[30];
 	if (!strcmp(floatType, "float")) sprintf(LFending, "f");
 #if(VKFFT_BACKEND==0)
 	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
+	sprintf(uintType_32, "uint");
 #elif(VKFFT_BACKEND==1)
 	if (!strcmp(floatType, "double")) sprintf(LFending, "l");
+	sprintf(uintType_32, "unsigned int");
 #elif(VKFFT_BACKEND==2)
 	if (!strcmp(floatType, "double")) sprintf(LFending, "l");
+	sprintf(uintType_32, "unsigned int");
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
+	sprintf(uintType_32, "unsigned int");
 	//if (!strcmp(floatType, "double")) sprintf(LFending, "l");
+#elif(VKFFT_BACKEND==5)
+	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
+	sprintf(uintType_32, "uint");
 #endif
-	res = appendConstant(sc, floatType, "loc_PI", "3.1415926535897932384626433832795", LFending);
-	if (res != VKFFT_SUCCESS) return res;
-	res = appendConstant(sc, floatType, "loc_SQRT1_2", "0.70710678118654752440084436210485", LFending);
-	if (res != VKFFT_SUCCESS) return res;
+
+	//res = appendConstant(sc, floatType, "loc_PI", "3.1415926535897932384626433832795", LFending);
+	//if (res != VKFFT_SUCCESS) return res;
+	//res = appendConstant(sc, floatType, "loc_SQRT1_2", "0.70710678118654752440084436210485", LFending);
+	//if (res != VKFFT_SUCCESS) return res;
+	if (sc->useRader) {
+		for (uint64_t i = 0; i < sc->numRaderPrimes; i++) {
+			if (sc->raderContainer[i].prime > 0) {
+				if (sc->inline_rader_g_pow == 1) {
+					uint64_t g_pow = 1;
+#if((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
+					sc->tempLen = sprintf(sc->tempStr, "__constant %s g_pow_%" PRIu64 "[%" PRIu64 "]= {1", uintType_32, sc->raderContainer[i].prime, sc->raderContainer[i].prime);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+#elif(VKFFT_BACKEND==5)
+					sc->tempLen = sprintf(sc->tempStr, "constant %s g_pow_%" PRIu64 "[%" PRIu64 "]= {1", uintType_32, sc->raderContainer[i].prime, sc->raderContainer[i].prime);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+#else
+					sc->tempLen = sprintf(sc->tempStr, "const %s g_pow_%" PRIu64 "[%" PRIu64 "]= {1", uintType_32, sc->raderContainer[i].prime, sc->raderContainer[i].prime);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+#endif
+					for (uint64_t t = 0; t < sc->raderContainer[i].prime - 1; t++) {
+						g_pow = (g_pow * sc->raderContainer[i].generator) % sc->raderContainer[i].prime;
+						sc->tempLen = sprintf(sc->tempStr, ", %" PRIu64 "", g_pow);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					sc->tempLen = sprintf(sc->tempStr, "};\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				if (sc->inline_rader_kernel) {
+#if((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
+					sc->tempLen = sprintf(sc->tempStr, "__constant %s r_rader_kernel_%" PRIu64 "[%" PRIu64 "]= {", floatType, sc->raderContainer[i].prime, sc->raderContainer[i].prime - 1);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+#elif(VKFFT_BACKEND==5)
+					sc->tempLen = sprintf(sc->tempStr, "constant %s r_rader_kernel_%" PRIu64 "[%" PRIu64 "]= {", floatType, sc->raderContainer[i].prime, sc->raderContainer[i].prime - 1);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+#else
+					sc->tempLen = sprintf(sc->tempStr, "const %s r_rader_kernel_%" PRIu64 "[%" PRIu64 "]= {", floatType, sc->raderContainer[i].prime, sc->raderContainer[i].prime - 1);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+#endif
+					if (sc->raderContainer[i].type == 0) {
+						for (uint64_t j = 0; j < (sc->raderContainer[i].prime - 1); j++) {//fix later
+							if (!strcmp(floatType, "double")) {
+								double* raderFFTKernel = (double*)sc->raderContainer[i].raderFFTkernel;
+								sc->tempLen = sprintf(sc->tempStr, "%.17e%s ", raderFFTKernel[2 * j] / (sc->raderContainer[i].prime - 1), LFending);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							if (!strcmp(floatType, "float")) {
+								float* raderFFTKernel = (float*)sc->raderContainer[i].raderFFTkernel;
+								sc->tempLen = sprintf(sc->tempStr, "%.8e%s ", raderFFTKernel[2 * j] / (sc->raderContainer[i].prime - 1), LFending);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							if (j < (sc->raderContainer[i].prime - 2)) {
+								sc->tempLen = sprintf(sc->tempStr, ", ");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							else {
+								sc->tempLen = sprintf(sc->tempStr, "};\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+					}
+					else {
+						long double double_PI = 3.14159265358979323846264338327950288419716939937510L;
+						for (uint64_t j = 0; j < (sc->raderContainer[i].prime - 1); j++) {//fix later
+							uint64_t g_pow = 1;
+							for (uint64_t t = 0; t < sc->raderContainer[i].prime - 1 - j; t++) {
+								g_pow = (g_pow * sc->raderContainer[i].generator) % sc->raderContainer[i].prime;
+							}
+							if (!strcmp(floatType, "double")) {
+								double* raderFFTKernel = (double*)sc->raderContainer[i].raderFFTkernel;
+								sc->tempLen = sprintf(sc->tempStr, "%.17e%s ", (double)cos(2.0 * g_pow * double_PI / sc->raderContainer[i].prime), LFending);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							if (!strcmp(floatType, "float")) {
+								float* raderFFTKernel = (float*)sc->raderContainer[i].raderFFTkernel;
+								sc->tempLen = sprintf(sc->tempStr, "%.8e%s ", (float)cos(2.0 * g_pow * double_PI / sc->raderContainer[i].prime), LFending);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							if (j < (sc->raderContainer[i].prime - 2)) {
+								sc->tempLen = sprintf(sc->tempStr, ", ");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							else {
+								sc->tempLen = sprintf(sc->tempStr, "};\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+					}
+#if((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
+					sc->tempLen = sprintf(sc->tempStr, "__constant %s i_rader_kernel_%" PRIu64 "[%" PRIu64 "]= {", floatType, sc->raderContainer[i].prime, sc->raderContainer[i].prime - 1);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+#elif(VKFFT_BACKEND==5)
+					sc->tempLen = sprintf(sc->tempStr, "constant %s i_rader_kernel_%" PRIu64 "[%" PRIu64 "]= {", floatType, sc->raderContainer[i].prime, sc->raderContainer[i].prime - 1);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+#else
+					sc->tempLen = sprintf(sc->tempStr, "const %s i_rader_kernel_%" PRIu64 "[%" PRIu64 "]= {", floatType, sc->raderContainer[i].prime, sc->raderContainer[i].prime - 1);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+#endif
+					if (sc->raderContainer[i].type == 0) {
+						for (uint64_t j = 0; j < (sc->raderContainer[i].prime - 1); j++) {//fix later
+							if (!strcmp(floatType, "double")) {
+								double* raderFFTKernel = (double*)sc->raderContainer[i].raderFFTkernel;
+								sc->tempLen = sprintf(sc->tempStr, "%.17e%s ", raderFFTKernel[2 * j + 1] / (sc->raderContainer[i].prime - 1), LFending);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							if (!strcmp(floatType, "float")) {
+								float* raderFFTKernel = (float*)sc->raderContainer[i].raderFFTkernel;
+								sc->tempLen = sprintf(sc->tempStr, "%.8e%s ", raderFFTKernel[2 * j + 1] / (sc->raderContainer[i].prime - 1), LFending);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+
+							if (j < (sc->raderContainer[i].prime - 2)) {
+								sc->tempLen = sprintf(sc->tempStr, ", ");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							else {
+								sc->tempLen = sprintf(sc->tempStr, "};\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+					}
+					else {
+						long double double_PI = 3.14159265358979323846264338327950288419716939937510L;
+						for (uint64_t j = 0; j < (sc->raderContainer[i].prime - 1); j++) {//fix later
+							uint64_t g_pow = 1;
+							for (uint64_t t = 0; t < sc->raderContainer[i].prime - 1 - j; t++) {
+								g_pow = (g_pow * sc->raderContainer[i].generator) % sc->raderContainer[i].prime;
+							}
+							if (!strcmp(floatType, "double")) {
+								double* raderFFTKernel = (double*)sc->raderContainer[i].raderFFTkernel;
+								sc->tempLen = sprintf(sc->tempStr, "%.17e%s ", (double)(-sin(2.0 * g_pow * double_PI / sc->raderContainer[i].prime)), LFending);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							if (!strcmp(floatType, "float")) {
+								float* raderFFTKernel = (float*)sc->raderContainer[i].raderFFTkernel;
+								sc->tempLen = sprintf(sc->tempStr, "%.8e%s ", (float)(-sin(2.0 * g_pow * double_PI / sc->raderContainer[i].prime)), LFending);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							if (j < (sc->raderContainer[i].prime - 2)) {
+								sc->tempLen = sprintf(sc->tempStr, ", ");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							else {
+								sc->tempLen = sprintf(sc->tempStr, "};\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+					}
+				}
+			}
+		}
+	}
 	return res;
 }
 static inline VkFFTResult appendSinCos20(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType) {
@@ -1270,7 +1944,13 @@ static inline VkFFTResult appendSinCos20(VkFFTSpecializationConstantsLayout* sc,
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
 	//if (!strcmp(floatType, "double")) sprintf(LFending, "l");
 	sprintf(functionDefinitions, "static __inline__ ");
+#elif(VKFFT_BACKEND==5)
+	if (!strcmp(floatType, "half")) sprintf(vecType, "half2");
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
 #endif
+#if(VKFFT_BACKEND==0)
 	res = appendConstant(sc, floatType, "loc_2_PI", "0.63661977236758134307553505349006", LFending);
 	if (res != VKFFT_SUCCESS) return res;
 	res = appendConstant(sc, floatType, "loc_PI_2", "1.5707963267948966192313216916398", LFending);
@@ -1314,6 +1994,39 @@ static inline VkFFTResult appendSinCos20(VkFFTSpecializationConstantsLayout* sc,
 }\n\n", functionDefinitions, vecType, vecType);
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
+#elif ((VKFFT_BACKEND == 1) || (VKFFT_BACKEND == 2))
+	sc->tempLen = sprintf(sc->tempStr, "\
+%s%s sincos_20(%s x)\n\
+{\n\
+	%s cos_sin;\n\
+	sincos(x, &cos_sin.y, &cos_sin.x);\n\
+	return cos_sin;\n\
+}\n\n", functionDefinitions, vecType, floatType, vecType);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	/*sc->tempLen = sprintf(sc->tempStr, "\
+%s%s normalize(%s v)\n\
+{\n\
+	%s inv_norm = rsqrt(v.x*v.x + v.y*v.y);\n\
+	v.x = v.x * inv_norm;\n\
+	v.y = v.y * inv_norm;\n\
+	return v;\n\
+}\n", functionDefinitions, vecType, vecType, floatType);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;*/
+#elif((VKFFT_BACKEND == 3) || (VKFFT_BACKEND == 4))
+	sc->tempLen = sprintf(sc->tempStr, "\
+%s%s sincos_20(%s x)\n\
+{\n\
+	%s cos_sin;\n\
+	%s cos_val;\n\
+	cos_sin.y = sincos(x, &cos_val);\n\
+	cos_sin.x = cos_val;\n\
+	return cos_sin;\n\
+}\n\n", functionDefinitions, vecType, floatType, vecType, floatType);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+#endif
 	return res;
 }
 static inline VkFFTResult appendConversion(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* floatTypeDifferent) {
@@ -1434,6 +2147,19 @@ layout(std430, binding = %" PRIu64 ") buffer DataIn{\n\
 			sc->inputNumberByteSize = 2 * sizeof(double);
 			sprintf(vecType, "double2");
 		}
+#elif(VKFFT_BACKEND==5)
+		if (!strcmp(floatTypeMemory, "half")) {
+			sc->inputNumberByteSize = 2 * 2;
+			sprintf(vecType, "half2");
+		}
+		if (!strcmp(floatTypeMemory, "float")) {
+			sc->inputNumberByteSize = 2 * sizeof(float);
+			sprintf(vecType, "float2");
+		}
+		if (!strcmp(floatTypeMemory, "double")) {
+			sc->inputNumberByteSize = 2 * sizeof(double);
+			sprintf(vecType, "double2");
+		}
 #endif
 		break;
 	}
@@ -1547,6 +2273,19 @@ layout(std430, binding = %" PRIu64 ") buffer DataOut{\n\
 			sc->outputNumberByteSize = 2 * sizeof(double);
 			sprintf(vecType, "double2");
 		}
+#elif(VKFFT_BACKEND==5)
+		if (!strcmp(floatTypeMemory, "half")) {
+			sc->outputNumberByteSize = 2 * 2;
+			sprintf(vecType, "half2");
+		}
+		if (!strcmp(floatTypeMemory, "float")) {
+			sc->outputNumberByteSize = 2 * sizeof(float);
+			sprintf(vecType, "float2");
+		}
+		if (!strcmp(floatTypeMemory, "double")) {
+			sc->outputNumberByteSize = 2 * sizeof(double);
+			sprintf(vecType, "double2");
+		}
 #endif
 		break;
 	}
@@ -1658,6 +2397,19 @@ layout(std430, binding = %" PRIu64 ") buffer Kernel_FFT{\n\
 		sc->kernelNumberByteSize = 2 * sizeof(double);
 		sprintf(vecType, "double2");
 	}
+#elif(VKFFT_BACKEND==5)
+	if (!strcmp(floatTypeMemory, "half")) {
+		sc->kernelNumberByteSize = 2 * 2;
+		sprintf(vecType, "half2");
+	}
+	if (!strcmp(floatTypeMemory, "float")) {
+		sc->kernelNumberByteSize = 2 * sizeof(float);
+		sprintf(vecType, "float2");
+	}
+	if (!strcmp(floatTypeMemory, "double")) {
+		sc->kernelNumberByteSize = 2 * sizeof(double);
+		sprintf(vecType, "double2");
+	}
 #endif
 	return res;
 }
@@ -1682,14 +2434,39 @@ layout(std430, binding = %" PRIu64 ") readonly buffer DataLUT {\n\
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+#elif(VKFFT_BACKEND==5)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+#endif
+	return res;
+}
+static inline VkFFTResult appendRaderUintLUTLayoutVkFFT(VkFFTSpecializationConstantsLayout* sc, uint64_t id) {
+	VkFFTResult res = VKFFT_SUCCESS;
+	char uintType_32[30];
+#if(VKFFT_BACKEND==0)
+	sprintf(uintType_32, "uint");
+	sc->tempLen = sprintf(sc->tempStr, "\
+layout(std430, binding = %" PRIu64 ") readonly buffer DataRaderUintLUT {\n\
+%s g_pow[];\n\
+};\n", id, uintType_32);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+#elif(VKFFT_BACKEND==1)
+	sprintf(uintType_32, "unsigned int");
+#elif(VKFFT_BACKEND==2)
+	sprintf(uintType_32, "unsigned int");
+#elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
+	sprintf(uintType_32, "unsigned int");
+#elif(VKFFT_BACKEND==5)
+	sprintf(uintType_32, "uint");
 #endif
 	return res;
 }
 static inline VkFFTResult appendBluesteinLayoutVkFFT(VkFFTSpecializationConstantsLayout* sc, uint64_t id, const char* floatType) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	char vecType[30];
-	uint64_t loc_id = id;
 #if(VKFFT_BACKEND==0)
+	uint64_t loc_id = id;
 	if (!strcmp(floatType, "float")) sprintf(vecType, "vec2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "dvec2");
 	if (sc->BluesteinConvolutionStep) {
@@ -1719,6 +2496,9 @@ layout(std430, binding = %" PRIu64 ") readonly buffer DataBluesteinMultiplicatio
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+#elif(VKFFT_BACKEND==5)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
 #endif
 	return res;
 }
@@ -2012,9 +2792,9 @@ static inline VkFFTResult indexOutputVkFFT(VkFFTSpecializationConstantsLayout* s
 	return res;
 }
 
-static inline VkFFTResult inlineRadixKernelVkFFT(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType, uint64_t radix, uint64_t stageSize, uint64_t stageSizeSum, double stageAngle, char** regID) {
+static inline VkFFTResult inlineRadixKernelVkFFT(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType, uint64_t radix, uint64_t stageSize, uint64_t stageSizeSum, long double stageAngle, char** regID) {
 	VkFFTResult res = VKFFT_SUCCESS;
-	double double_PI = 3.1415926535897932384626433832795;
+	long double double_PI = 3.14159265358979323846264338327950288419716939937510L;
 	char vecType[30];
 	char LFending[4] = "";
 	if (!strcmp(floatType, "float")) sprintf(LFending, "f");
@@ -2042,6 +2822,12 @@ static inline VkFFTResult inlineRadixKernelVkFFT(VkFFTSpecializationConstantsLay
 	char cosDef[20] = "native_cos";
 	char sinDef[20] = "native_sin";
 	//if (!strcmp(floatType, "double")) sprintf(LFending, "l");
+#elif(VKFFT_BACKEND==5)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	char cosDef[20] = "cos";
+	char sinDef[20] = "sin";
+	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
 #endif
 	char* temp = sc->temp;
 	//sprintf(temp, "loc_0");
@@ -2074,39 +2860,39 @@ if (res != VKFFT_SUCCESS) return res;
 			if (res != VKFFT_SUCCESS) return res;
 		}
 		else {
-		if (sc->LUT) {
+			if (sc->LUT) {
 				if (sc->useCoalescedLUTUploadToSM) {
 					sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID];\n", w);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-			sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId];\n", w);
-			res = VkAppendLine(sc);
-			if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId];\n", w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				if (stageAngle < 0) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
 				}
-			if (!sc->inverse) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
-			}
-		}
-		else {
-			if (!strcmp(floatType, "float")) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle);\n", w, cosDef);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle);\n", w, sinDef);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
 			}
-			if (!strcmp(floatType, "double")) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle);\n", w);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
+			else {
+				if (!strcmp(floatType, "float")) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle);\n", w, cosDef);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle);\n", w, sinDef);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				if (!strcmp(floatType, "double")) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle);\n", w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
 			}
 		}
-		}
 		res = VkMulComplex(sc, temp, regID[1], w, 0);
 		if (res != VKFFT_SUCCESS) return res;
 		res = VkSubComplex(sc, regID[1], regID[0], temp);
@@ -2161,40 +2947,40 @@ if (res != VKFFT_SUCCESS) return res;
 			if (res != VKFFT_SUCCESS) return res;
 		}
 		else {
-		if (sc->LUT) {
+			if (sc->LUT) {
 				if (sc->useCoalescedLUTUploadToSM) {
 					sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID];\n", w);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-			sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId];\n", w);
-			res = VkAppendLine(sc);
-			if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId];\n", w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				if (stageAngle < 0) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
 				}
-			if (!sc->inverse) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
-			}
-		}
-		else {
-			if (!strcmp(floatType, "float")) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 4.0 / 3.0, LFending);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 4.0 / 3.0, LFending);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
-				//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 4.0 / 3.0, 4.0 / 3.0);
 			}
-			if (!strcmp(floatType, "double")) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 4.0 / 3.0, LFending);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
+			else {
+				if (!strcmp(floatType, "float")) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 4.0 / 3.0, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 4.0 / 3.0, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 4.0 / 3.0, 4.0 / 3.0);
+				}
+				if (!strcmp(floatType, "double")) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 4.0 / 3.0, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
 			}
 		}
-		}
 		res = VkMulComplex(sc, sc->locID[2], regID[2], w, 0);
 		/*sc->tempLen = sprintf(sc->tempStr, "\
 loc_2.x = temp%s.x * w.x - temp%s.y * w.y;\n\
@@ -2208,45 +2994,42 @@ loc_2.y = temp%s.y * w.x + temp%s.x * w.y;\n", regID[2], regID[2], regID[2], reg
 			if (res != VKFFT_SUCCESS) return res;
 		}
 		else {
-		if (sc->LUT) {
+			if (sc->LUT) {
 				if (sc->useCoalescedLUTUploadToSM) {
 					sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n", w, stageSize);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-			sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n", w, stageSize);
-			res = VkAppendLine(sc);
-			if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n", w, stageSize);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				if (stageAngle < 0) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
 				}
-			if (!sc->inverse) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
-			}
-		}
-		else {
-			if (!strcmp(floatType, "float")) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 / 3.0, LFending);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 / 3.0, LFending);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
-				//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 / 3.0, 2.0 / 3.0);
 			}
-			if (!strcmp(floatType, "double")) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s=sincos_20(angle*%.17e%s);\n", w, 2.0 / 3.0, LFending);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
+			else {
+				if (!strcmp(floatType, "float")) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 / 3.0, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 / 3.0, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 / 3.0, 2.0 / 3.0);
+				}
+				if (!strcmp(floatType, "double")) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s=sincos_20(angle*%.17e%s);\n", w, 2.0 / 3.0, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
 				}
 			}
 		}
 		res = VkMulComplex(sc, sc->locID[1], regID[1], w, 0);
 		if (res != VKFFT_SUCCESS) return res;
-		/*sc->tempLen = sprintf(sc->tempStr, "\
-loc_1.x = temp%s.x * w.x - temp%s.y * w.y;\n\
-loc_1.y = temp%s.y * w.x + temp%s.x * w.y;\n", regID[1], regID[1], regID[1], regID[1]);*/
 		res = VkAddComplex(sc, regID[1], sc->locID[1], sc->locID[2]);
 		if (res != VKFFT_SUCCESS) return res;
 		res = VkSubComplex(sc, regID[2], sc->locID[1], sc->locID[2]);
@@ -2320,36 +3103,36 @@ temp%s.y = loc_1.y + loc_2.x; \n", regID[1], regID[1], regID[2], regID[2]);*/
 			if (res != VKFFT_SUCCESS) return res;
 		}
 		else {
-		if (sc->LUT) {
+			if (sc->LUT) {
 				if (sc->useCoalescedLUTUploadToSM) {
 					sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID];\n", w);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-			sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId];\n", w);
-			res = VkAppendLine(sc);
-			if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId];\n", w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				if (stageAngle < 0) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
 				}
-			if (!sc->inverse) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
-			}
-		}
-		else {
-			if (!strcmp(floatType, "float")) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle);\n", w, cosDef);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle);\n", w, sinDef);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
 			}
-			if (!strcmp(floatType, "double")) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle);\n", w);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
+			else {
+				if (!strcmp(floatType, "float")) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle);\n", w, cosDef);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle);\n", w, sinDef);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				if (!strcmp(floatType, "double")) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle);\n", w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
 				}
 			}
 		}
@@ -2384,36 +3167,42 @@ temp%s = temp%s + temp;\n\n\
 			if (res != VKFFT_SUCCESS) return res;
 		}
 		else {
-		if (sc->LUT) {
+			if (sc->LUT) {
 				if (sc->useCoalescedLUTUploadToSM) {
 					sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n", w, stageSize);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-			sc->tempLen = sprintf(sc->tempStr, "	%s=twiddleLUT[LUTId+%" PRIu64 "];\n", w, stageSize);
-			res = VkAppendLine(sc);
-			if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s=twiddleLUT[LUTId+%" PRIu64 "];\n", w, stageSize);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				if (stageAngle < 0) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
 				}
-			if (!sc->inverse) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
-			}
-		}
-		else {
-			if (!strcmp(floatType, "float")) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(0.5%s*angle);\n", w, cosDef, LFending);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(0.5%s*angle);\n", w, sinDef, LFending);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
 			}
-			if (!strcmp(floatType, "double")) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s + %s(1.0, 0.0));\n", w, w, vecType);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
+			else {
+				if (!strcmp(floatType, "float")) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(0.5%s*angle);\n", w, cosDef, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(0.5%s*angle);\n", w, sinDef, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				if (!strcmp(floatType, "double")) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(0.5%s*angle);\n", w, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					/*sc->tempLen = sprintf(sc->tempStr, "	%s.x=%s.x+1.0%s;\n", w, w, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s);\n", w, w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;*/
 				}
 			}
 		}
@@ -2458,8 +3247,8 @@ temp%s = temp%s + temp;\n\n", regID[1], regID[1], regID[1], regID[1], regID[1],
 		if (res != VKFFT_SUCCESS) return res;
 		res = VkAddComplex(sc, regID[2], regID[2], temp);
 		if (res != VKFFT_SUCCESS) return res;
-		res = VkMovComplex(sc, temp, regID[1]);
-		if (res != VKFFT_SUCCESS) return res;
+		//res = VkMovComplex(sc, temp, regID[1]);
+		//if (res != VKFFT_SUCCESS) return res;
 
 		uint64_t permute2[4] = { 0,2,1,3 };
 		res = VkPermute(sc, permute2, 4, 1, regID, temp);
@@ -2526,73 +3315,73 @@ if (res != VKFFT_SUCCESS) return res;
 				if (res != VKFFT_SUCCESS) return res;
 			}
 			else {
-			if (i == radix - 1) {
-				if (sc->LUT) {
+				if (i == radix - 1) {
+					if (sc->LUT) {
 						if (sc->useCoalescedLUTUploadToSM) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID];\n", w);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId];\n", w);
-					res = VkAppendLine(sc);
-					if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId];\n", w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (stageAngle < 0) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
 						}
-					if (!sc->inverse) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-					}
-				}
-				else {
-					if (!strcmp(floatType, "float")) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
 					}
-					if (!strcmp(floatType, "double")) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
+					else {
+						if (!strcmp(floatType, "float")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
+						}
+						if (!strcmp(floatType, "double")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 				}
-			}
-			else {
-				if (sc->LUT) {
+				else {
+					if (sc->LUT) {
 						if (sc->useCoalescedLUTUploadToSM) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, (radix - 1 - i) * stageSize);
-					res = VkAppendLine(sc);
-					if (res != VKFFT_SUCCESS) return res;
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
 						}
 						else {
 							sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, (radix - 1 - i) * stageSize);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-					if (!sc->inverse) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-					}
-				}
-				else {
-					if (!strcmp(floatType, "float")) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
+						if (stageAngle < 0) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
-					if (!strcmp(floatType, "double")) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
+					else {
+						if (!strcmp(floatType, "float")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
+						}
+						if (!strcmp(floatType, "double")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
 						}
 					}
 				}
@@ -2749,7 +3538,7 @@ temp%s.y = loc_1.y + loc_4.x; \n", regID[1], regID[1], regID[2], regID[2], regID
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						if (!sc->inverse) {
+						if (stageAngle < 0) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -2784,7 +3573,7 @@ temp%s.y = loc_1.y + loc_4.x; \n", regID[1], regID[1], regID[2], regID[2], regID
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						if (!sc->inverse) {
+						if (stageAngle < 0) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -2815,7 +3604,7 @@ temp%s.y = loc_1.y + loc_4.x; \n", regID[1], regID[1], regID[2], regID[2], regID
 		//res = VkMovComplex(sc, regID[1], sc->locID[1]);
 		//if (res != VKFFT_SUCCESS) return res;
 
-		uint64_t P = 3;
+		//uint64_t P = 3;
 		uint64_t Q = 2;
 		for (uint64_t i = 0; i < Q; i++) {
 			res = VkMovComplex(sc, sc->locID[0], regID[i]);
@@ -2971,73 +3760,73 @@ if (res != VKFFT_SUCCESS) return res;
 				if (res != VKFFT_SUCCESS) return res;
 			}
 			else {
-			if (i == radix - 1) {
-				if (sc->LUT) {
+				if (i == radix - 1) {
+					if (sc->LUT) {
 						if (sc->useCoalescedLUTUploadToSM) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID];\n", w);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId];\n", w);
-					res = VkAppendLine(sc);
-					if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId];\n", w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (stageAngle < 0) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
 						}
-					if (!sc->inverse) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-					}
-				}
-				else {
-					if (!strcmp(floatType, "float")) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
 					}
-					if (!strcmp(floatType, "double")) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
+					else {
+						if (!strcmp(floatType, "float")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
+						}
+						if (!strcmp(floatType, "double")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 				}
-			}
-			else {
-				if (sc->LUT) {
+				else {
+					if (sc->LUT) {
 						if (sc->useCoalescedLUTUploadToSM) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, (radix - 1 - i) * stageSize);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, (radix - 1 - i) * stageSize);
-					res = VkAppendLine(sc);
-					if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, (radix - 1 - i) * stageSize);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (stageAngle < 0) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
 						}
-					if (!sc->inverse) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-					}
-				}
-				else {
-					if (!strcmp(floatType, "float")) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
 					}
-					if (!strcmp(floatType, "double")) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
+					else {
+						if (!strcmp(floatType, "float")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
+						}
+						if (!strcmp(floatType, "double")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
 						}
 					}
 				}
@@ -3234,41 +4023,41 @@ if (res != VKFFT_SUCCESS) return res;*/
 			if (res != VKFFT_SUCCESS) return res;
 		}
 		else {
-		if (sc->LUT) {
+			if (sc->LUT) {
 				if (sc->useCoalescedLUTUploadToSM) {
 					sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID];\n", w);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-			sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId];\n", w);
-			res = VkAppendLine(sc);
-			if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId];\n", w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				if (stageAngle < 0) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
 				}
-			if (!sc->inverse) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
 			}
-		}
-		else {
-			if (!strcmp(floatType, "float")) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle);\n", w, cosDef);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle);\n", w, sinDef);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
+			else {
+				if (!strcmp(floatType, "float")) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle);\n", w, cosDef);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle);\n", w, sinDef);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
 					if (!strcmp(floatType, "double")) {
 						sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle);\n", w);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
-			}
-			if (!strcmp(floatType, "double")) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle);\n", w);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
+				}
+				if (!strcmp(floatType, "double")) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle);\n", w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
 				}
 			}
 		}
@@ -3294,36 +4083,42 @@ temp%s = temp%s + temp;\n\n", regID[i + 4], regID[i + 4], regID[i + 4], regID[i
 			if (res != VKFFT_SUCCESS) return res;
 		}
 		else {
-		if (sc->LUT) {
+			if (sc->LUT) {
 				if (sc->useCoalescedLUTUploadToSM) {
 					sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, stageSize);
-			res = VkAppendLine(sc);
-			if (res != VKFFT_SUCCESS) return res;
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
 					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, stageSize);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
-			if (!sc->inverse) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
-			}
-		}
-		else {
-			if (!strcmp(floatType, "float")) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(0.5%s*angle);\n", w, cosDef, LFending);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(0.5%s*angle);\n", w, sinDef, LFending);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
+				if (stageAngle < 0) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
 			}
-			if (!strcmp(floatType, "double")) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s + %s(1.0, 0.0));\n", w, w, vecType);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
+			else {
+				if (!strcmp(floatType, "float")) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(0.5%s*angle);\n", w, cosDef, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(0.5%s*angle);\n", w, sinDef, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				if (!strcmp(floatType, "double")) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(0.5%s*angle);\n", w, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					/*sc->tempLen = sprintf(sc->tempStr, "	%s.x=%s.x+1.0%s;\n", w, w, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s);\n", w, w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;*/
 				}
 			}
 		}
@@ -3381,37 +4176,43 @@ temp%s = temp%s + temp;\n\n", regID[i + 2], regID[i + 2], regID[i + 2], regID[i
 			if (res != VKFFT_SUCCESS) return res;
 		}
 		else {
-		if (sc->LUT) {
+			if (sc->LUT) {
 				if (sc->useCoalescedLUTUploadToSM) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, 2*stageSize);
-			res = VkAppendLine(sc);
-			if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, 2 * stageSize);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, 2*stageSize);
+					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, 2 * stageSize);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				if (stageAngle < 0) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
-			if (!sc->inverse) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
-			}
-		}
-		else {
-			if (!strcmp(floatType, "float")) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(0.25%s*angle);\n", w, cosDef, LFending);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(0.25%s*angle);\n", w, sinDef, LFending);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
-				//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(0.25*angle), sin(0.25*angle));\n\n", vecType);
 			}
-			if (!strcmp(floatType, "double")) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s + %s(1.0, 0.0));\n", w, w, vecType);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
+			else {
+				if (!strcmp(floatType, "float")) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(0.25%s*angle);\n", w, cosDef, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(0.25%s*angle);\n", w, sinDef, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(0.25*angle), sin(0.25*angle));\n\n", vecType);
+				}
+				if (!strcmp(floatType, "double")) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(0.25%s*angle);\n", w, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					/*sc->tempLen = sprintf(sc->tempStr, "	%s.x=%s.x+1.0%s;\n", w, w, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s);\n", w, w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;*/
 				}
 			}
 		}
@@ -3456,18 +4257,18 @@ temp.y = temp%s.y * iw.x + temp%s.x * iw.y;\n\
 temp%s = temp%s - temp;\n\
 temp%s = temp%s + temp;\n\n", regID[3], regID[3], regID[3], regID[3], regID[3], regID[2], regID[2], regID[2]);*/
 		if (stageAngle < 0) {
-			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * loc_SQRT1_2 + %s.y * loc_SQRT1_2;\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s + %s.y * %.17e%s;\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
-			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * loc_SQRT1_2 - %s.x * loc_SQRT1_2;\n\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s - %s.x * %.17e%s;\n\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
 		}
 		else {
-			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * loc_SQRT1_2 - %s.y * loc_SQRT1_2;\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s - %s.y * %.17e%s;\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
-			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * loc_SQRT1_2 + %s.x * loc_SQRT1_2;\n\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s + %s.x * %.17e%s;\n\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
 		}
@@ -3578,7 +4379,7 @@ temp%s = temp;\n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						if (!sc->inverse) {
+						if (stageAngle < 0) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -3613,7 +4414,7 @@ temp%s = temp;\n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						if (!sc->inverse) {
+						if (stageAngle < 0) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -3686,36 +4487,36 @@ temp%s = temp;\n\
 		for (uint64_t i = 0; i < P; i++) {
 			if (i > 0) {
 				if (stageAngle < 0) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, cos(2 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, (double)cos(2 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, -sin(2 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, (double)-sin(2 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, cos(2 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, (double)cos(2 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, sin(2 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, (double)sin(2 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				res = VkMulComplex(sc, sc->locID[1], regID[Q * i + 1], w, temp);
 				if (res != VKFFT_SUCCESS) return res;
 				if (stageAngle < 0) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, cos(4 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, (double)cos(4 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, -sin(4 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, (double)-sin(4 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, cos(4 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, (double)cos(4 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, sin(4 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, (double)sin(4 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
@@ -3821,7 +4622,7 @@ temp%s = temp;\n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						if (!sc->inverse) {
+						if (stageAngle < 0) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -3856,7 +4657,7 @@ temp%s = temp;\n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						if (!sc->inverse) {
+						if (stageAngle < 0) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -3970,18 +4771,18 @@ temp%s = temp;\n\
 		for (uint64_t i = 0; i < P; i++) {
 			if (i > 0) {
 				if (stageAngle < 0) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, cos(2 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, (double)cos(2 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, -sin(2 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, (double)-sin(2 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, cos(2 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, (double)cos(2 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, sin(2 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, (double)sin(2 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
@@ -4008,11 +4809,213 @@ temp%s = temp;\n\
 		break;
 	}
 	case 11: {
+		char* tf_x[20];
+		char* tf_y[20];
+		for (uint64_t i = 0; i < 10; i++) {
+			tf_x[i] = (char*)malloc(sizeof(char) * 50);
+			if (!tf_x[i]) {
+				for (uint64_t j = 0; j < i; j++) {
+					free(tf_x[j]);
+					tf_x[j] = 0;
+				}
+				return VKFFT_ERROR_MALLOC_FAILED;
+			}
+			tf_y[i] = (char*)malloc(sizeof(char) * 50);
+			if (!tf_y[i]) {
+				for (uint64_t j = 0; j < 10; j++) {
+					free(tf_x[j]);
+					tf_x[j] = 0;
+				}
+				for (uint64_t j = 0; j < i; j++) {
+					free(tf_y[j]);
+					tf_y[j] = 0;
+				}
+				return VKFFT_ERROR_MALLOC_FAILED;
+			}
+		}
+		sprintf(tf_x[0], "8.4125353283118116886306336876800e-01%s", LFending);
+		sprintf(tf_x[1], "-9.5949297361449738990105129410324e-01%s", LFending);
+		sprintf(tf_x[2], "-1.4231483827328514046015907335008e-01%s", LFending);
+		sprintf(tf_x[3], "-6.5486073394528506407246543075118e-01%s", LFending);
+		sprintf(tf_x[4], "4.1541501300188642567903264668505e-01%s", LFending);
+		sprintf(tf_x[5], "8.4125353283118116886306336876800e-01%s", LFending);
+		sprintf(tf_x[6], "-9.5949297361449738990105129410324e-01%s", LFending);
+		sprintf(tf_x[7], "-1.4231483827328514046015907335008e-01%s", LFending);
+		sprintf(tf_x[8], "-6.5486073394528506407246543075118e-01%s", LFending);
+		sprintf(tf_x[9], "4.1541501300188642567903264668505e-01%s", LFending);
+		if (stageAngle < 0) {
+			sprintf(tf_y[0], "-5.4064081745559758210122047739077e-01%s", LFending);
+			sprintf(tf_y[1], "2.8173255684142969773359373164556e-01%s", LFending);
+			sprintf(tf_y[2], "-9.8982144188093273235937163967435e-01%s", LFending);
+			sprintf(tf_y[3], "7.5574957435425828375808593451168e-01%s", LFending);
+			sprintf(tf_y[4], "9.0963199535451837136413102968824e-01%s", LFending);
+			sprintf(tf_y[5], "5.4064081745559758210122047739077e-01%s", LFending);
+			sprintf(tf_y[6], "-2.8173255684142969773359373164556e-01%s", LFending);
+			sprintf(tf_y[7], "9.8982144188093273235937163967435e-01%s", LFending);
+			sprintf(tf_y[8], "-7.5574957435425828375808593451168e-01%s", LFending);
+			sprintf(tf_y[9], "-9.0963199535451837136413102968824e-01%s", LFending);
+		}
+		else {
+			sprintf(tf_y[0], "5.4064081745559758210122047739077e-01%s", LFending);
+			sprintf(tf_y[1], "-2.8173255684142969773359373164556e-01%s", LFending);
+			sprintf(tf_y[2], "9.8982144188093273235937163967435e-01%s", LFending);
+			sprintf(tf_y[3], "-7.5574957435425828375808593451168e-01%s", LFending);
+			sprintf(tf_y[4], "-9.0963199535451837136413102968824e-01%s", LFending);
+			sprintf(tf_y[5], "-5.4064081745559758210122047739077e-01%s", LFending);
+			sprintf(tf_y[6], "2.8173255684142969773359373164556e-01%s", LFending);
+			sprintf(tf_y[7], "-9.8982144188093273235937163967435e-01%s", LFending);
+			sprintf(tf_y[8], "7.5574957435425828375808593451168e-01%s", LFending);
+			sprintf(tf_y[9], "9.0963199535451837136413102968824e-01%s", LFending);
+		}
+		for (uint64_t i = radix - 1; i > 0; i--) {
+			if (stageSize == 1) {
+				sc->tempLen = sprintf(sc->tempStr, "	%s.x = 1;\n", w);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				sc->tempLen = sprintf(sc->tempStr, "	%s.y = 0;\n", w);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			else {
+				if (i == radix - 1) {
+					if (sc->LUT) {
+						if (sc->useCoalescedLUTUploadToSM) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID];\n", w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						else {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId];\n", w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (stageAngle < 0) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+					else {
+						if (!strcmp(floatType, "float")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
+						}
+						if (!strcmp(floatType, "double")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+				}
+				else {
+					if (sc->LUT) {
+						if (sc->useCoalescedLUTUploadToSM) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, (radix - 1 - i) * stageSize);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						else {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, (radix - 1 - i) * stageSize);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (stageAngle < 0) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+					else {
+						if (!strcmp(floatType, "float")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
+						}
+						if (!strcmp(floatType, "double")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+				}
+			}
+			res = VkMulComplex(sc, sc->locID[i], regID[i], w, 0);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		res = VkMovComplex(sc, sc->locID[0], regID[0]);
+		if (res != VKFFT_SUCCESS) return res;
+		uint64_t permute[11] = { 0,1,2,4,8,5,10,9,7,3,6 };
+		res = VkPermute(sc, permute, 11, 0, 0, w);
+		if (res != VKFFT_SUCCESS) return res;
+		for (uint64_t i = 0; i < 5; i++) {
+			res = VkSubComplex_x(sc, regID[i + 6], sc->locID[i + 1], sc->locID[i + 6]);
+			if (res != VKFFT_SUCCESS) return res;
+			res = VkAddComplex_x(sc, regID[i + 1], sc->locID[i + 1], sc->locID[i + 6]);
+			if (res != VKFFT_SUCCESS) return res;
+			res = VkAddComplex_y(sc, regID[i + 6], sc->locID[i + 1], sc->locID[i + 6]);
+			if (res != VKFFT_SUCCESS) return res;
+			res = VkSubComplex_y(sc, regID[i + 1], sc->locID[i + 1], sc->locID[i + 6]);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		for (uint64_t i = 0; i < 5; i++) {
+			res = VkAddComplex_x(sc, regID[0], regID[0], regID[i + 1]);
+			if (res != VKFFT_SUCCESS) return res;
+			res = VkAddComplex_y(sc, regID[0], regID[0], regID[i + 6]);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		for (uint64_t i = 1; i < 6; i++) {
+			sc->tempLen = sprintf(sc->tempStr, "\
+	%s=%s;\n", sc->locID[i], sc->locID[0]);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		for (uint64_t i = 6; i < 11; i++) {
+			sc->tempLen = sprintf(sc->tempStr, "\
+	%s.x=0;\n\
+	%s.y=0;\n", sc->locID[i], sc->locID[i]);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		for (uint64_t i = 0; i < 5; i++) {
+			for (uint64_t j = 0; j < 5; j++) {
+				uint64_t id = ((10 - i) + j) % 10;
+				res = VkFMA3Complex_const_w(sc, sc->locID[j + 1], sc->locID[j + 6], regID[i + 1], tf_x[id], tf_y[id], regID[i + 6]);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+		}
+		for (uint64_t i = 1; i < 6; i++) {
+			res = VkSubComplex_x(sc, regID[i], sc->locID[i], sc->locID[i + 5]);
+			if (res != VKFFT_SUCCESS) return res;
+			res = VkAddComplex_y(sc, regID[i], sc->locID[i], sc->locID[i + 5]);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		for (uint64_t i = 1; i < 6; i++) {
+			res = VkAddComplex_x(sc, regID[i + 5], sc->locID[i], sc->locID[i + 5]);
+			if (res != VKFFT_SUCCESS) return res;
+			res = VkSubComplex_y(sc, regID[i + 5], sc->locID[i], sc->locID[i + 5]);
+			if (res != VKFFT_SUCCESS) return res;
+		}
 
-		char* tf[20];
-		//char* tf2[4];
-		//char* tf2inv[4];
-		//VkAppendLine(sc, "	{\n");
+		uint64_t permute2[11] = { 0,1,10,3,9,7,2,4,8,5,6 };
+		res = VkPermute(sc, permute2, 11, 1, regID, w);
+		if (res != VKFFT_SUCCESS) return res;
+		for (uint64_t i = 0; i < 10; i++) {
+			free(tf_x[i]);
+			tf_x[i] = 0;
+			free(tf_y[i]);
+			tf_y[i] = 0;
+		}
+		//old version
+		/*char* tf[50];
 		for (uint64_t i = 0; i < 20; i++) {
 			tf[i] = (char*)malloc(sizeof(char) * 50);
 			if (!tf[i]) {
@@ -4022,44 +5025,42 @@ temp%s = temp;\n\
 				}
 				return VKFFT_ERROR_MALLOC_FAILED;
 			}
-			//tf2[i] = (char*)malloc(sizeof(char) * 50);
-			//tf2inv[i] = (char*)malloc(sizeof(char) * 50);
 		}
-		sprintf(tf[0], "-1.10000000000000000e+00%s", LFending);
+		sprintf(tf[0], "-1.1000000000000000000000000000000e+00%s", LFending);
 
-		sprintf(tf[2], "2.53097611605958783e-01%s", LFending);
-		sprintf(tf[3], "-1.28820061077367898e+00%s", LFending);
-		sprintf(tf[4], "3.04632239669212490e-01%s", LFending);
-		sprintf(tf[5], "-3.91339615511917427e-01%s", LFending);
-		sprintf(tf[6], "-2.87102225339285022e+00%s", LFending);
-		sprintf(tf[7], "1.37490798661638380e+00%s", LFending);
-		sprintf(tf[8], "8.17178135341212419e-01%s", LFending);
-		sprintf(tf[9], "1.80074650644567891e+00%s", LFending);
-		sprintf(tf[10], "-8.59492973614497502e-01%s", LFending);
+		sprintf(tf[2], "2.5309761160595911633208743296564e-01%s", LFending);
+		sprintf(tf[3], "-1.2882006107736785338602203410119e+00%s", LFending);
+		sprintf(tf[4], "3.0463223966921237906291253239033e-01%s", LFending);
+		sprintf(tf[5], "-3.9133961551191742689326247273129e-01%s", LFending);
+		sprintf(tf[6], "-2.8710222533928502208766531111905e+00%s", LFending);
+		sprintf(tf[7], "1.3749079866163838037351752063842e+00%s", LFending);
+		sprintf(tf[8], "8.1717813534121219731787277851254e-01%s", LFending);
+		sprintf(tf[9], "1.8007465064456784631374830496497e+00%s", LFending);
+		sprintf(tf[10], "-8.5949297361449739085514920589048e-01%s", LFending);
 
 		if (stageAngle < 0) {
-			sprintf(tf[1], "3.31662479035539914e-01%s", LFending);
-			sprintf(tf[11], "-2.37347045474827967e+00%s", LFending);
-			sprintf(tf[12], "-2.48363930874935801e-02%s", LFending);
-			sprintf(tf[13], "4.74017017512828764e-01%s", LFending);
-			sprintf(tf[14], "7.42183927770612595e-01%s", LFending);
-			sprintf(tf[15], "1.40647330909460866e+00%s", LFending);
-			sprintf(tf[16], "-1.19136455219594772e+00%s", LFending);
-			sprintf(tf[17], "7.08088885039503180e-01%s", LFending);
-			sprintf(tf[18], "2.58908260614167995e-01%s", LFending);
-			sprintf(tf[19], "-4.99299221941104307e-02%s", LFending);
+			sprintf(tf[1], "3.3166247903553996989600705092016e-01%s", LFending);
+			sprintf(tf[11], "-2.3734704547482796677115857164608e+00%s", LFending);
+			sprintf(tf[12], "-2.4836393087493469078452790199663e-02%s", LFending);
+			sprintf(tf[13], "4.7401701751282859786940093727026e-01%s", LFending);
+			sprintf(tf[14], "7.4218392777061303888785914750770e-01%s", LFending);
+			sprintf(tf[15], "1.4064733090946088811534764317912e+00%s", LFending);
+			sprintf(tf[16], "-1.1913645521959481676788072945783e+00%s", LFending);
+			sprintf(tf[17], "7.0808888503950306869683117838576e-01%s", LFending);
+			sprintf(tf[18], "2.5890826061416793990588303131517e-01%s", LFending);
+			sprintf(tf[19], "-4.9929922194110284983104008915689e-02%s", LFending);
 		}
 		else {
-			sprintf(tf[1], "-3.31662479035539914e-01%s", LFending);
-			sprintf(tf[11], "2.37347045474827967e+00%s", LFending);
-			sprintf(tf[12], "2.48363930874935801e-02%s", LFending);
-			sprintf(tf[13], "-4.74017017512828764e-01%s", LFending);
-			sprintf(tf[14], "-7.42183927770612595e-01%s", LFending);
-			sprintf(tf[15], "-1.40647330909460866e+00%s", LFending);
-			sprintf(tf[16], "1.19136455219594772e+00%s", LFending);
-			sprintf(tf[17], "-7.08088885039503180e-01%s", LFending);
-			sprintf(tf[18], "-2.58908260614167995e-01%s", LFending);
-			sprintf(tf[19], "4.99299221941104307e-02%s", LFending);
+			sprintf(tf[1], "-3.3166247903553996989600705092016e-01%s", LFending);
+			sprintf(tf[11], "2.3734704547482796677115857164608e+00%s", LFending);
+			sprintf(tf[12], "2.4836393087493469078452790199663e-02%s", LFending);
+			sprintf(tf[13], "-4.7401701751282859786940093727026e-01%s", LFending);
+			sprintf(tf[14], "-7.4218392777061303888785914750770e-01%s", LFending);
+			sprintf(tf[15], "-1.4064733090946088811534764317912e+00%s", LFending);
+			sprintf(tf[16], "1.1913645521959481676788072945783e+00%s", LFending);
+			sprintf(tf[17], "-7.0808888503950306869683117838576e-01%s", LFending);
+			sprintf(tf[18], "-2.5890826061416793990588303131517e-01%s", LFending);
+			sprintf(tf[19], "4.9929922194110284983104008915689e-02%s", LFending);
 		}
 		for (uint64_t i = radix - 1; i > 0; i--) {
 			if (stageSize == 1) {
@@ -4071,73 +5072,73 @@ temp%s = temp;\n\
 				if (res != VKFFT_SUCCESS) return res;
 			}
 			else {
-			if (i == radix - 1) {
-				if (sc->LUT) {
+				if (i == radix - 1) {
+					if (sc->LUT) {
 						if (sc->useCoalescedLUTUploadToSM) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID];\n", w);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId];\n", w);
-					res = VkAppendLine(sc);
-					if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId];\n", w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (stageAngle < 0) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
 						}
-					if (!sc->inverse) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-					}
-				}
-				else {
-					if (!strcmp(floatType, "float")) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
 					}
-					if (!strcmp(floatType, "double")) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
+					else {
+						if (!strcmp(floatType, "float")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
+						}
+						if (!strcmp(floatType, "double")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 				}
-			}
-			else {
-				if (sc->LUT) {
-						if  (sc->useCoalescedLUTUploadToSM) {
+				else {
+					if (sc->LUT) {
+						if (sc->useCoalescedLUTUploadToSM) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, (radix - 1 - i) * stageSize);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, (radix - 1 - i) * stageSize);
-					res = VkAppendLine(sc);
-					if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, (radix - 1 - i) * stageSize);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (stageAngle < 0) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
 						}
-					if (!sc->inverse) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-					}
-				}
-				else {
-					if (!strcmp(floatType, "float")) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
 					}
-					if (!strcmp(floatType, "double")) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
+					else {
+						if (!strcmp(floatType, "float")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
+						}
+						if (!strcmp(floatType, "double")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
 						}
 					}
 				}
@@ -4291,6 +5292,7 @@ temp%s = temp;\n\
 			free(tf[i]);
 			tf[i] = 0;
 		}
+		*/
 		break;
 	}
 	case 12: {
@@ -4330,7 +5332,7 @@ temp%s = temp;\n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						if (!sc->inverse) {
+						if (stageAngle < 0) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -4355,7 +5357,7 @@ temp%s = temp;\n\
 				}
 				else {
 					if (sc->LUT) {
-						if  (sc->useCoalescedLUTUploadToSM) {
+						if (sc->useCoalescedLUTUploadToSM) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, (radix - 1 - i) * stageSize);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -4365,7 +5367,7 @@ temp%s = temp;\n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						if (!sc->inverse) {
+						if (stageAngle < 0) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -4439,18 +5441,18 @@ temp%s = temp;\n\
 			for (uint64_t j = 0; j < Q; j++) {
 				if (i > 0) {
 					if (stageAngle < 0) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, cos(2 * i * j * double_PI / radix), LFending);
+						sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, (double)cos(2 * i * j * double_PI / radix), LFending);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, -sin(2 * i * j * double_PI / radix), LFending);
+						sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, (double)-sin(2 * i * j * double_PI / radix), LFending);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					else {
-						sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, cos(2 * i * j * double_PI / radix), LFending);
+						sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, (double)cos(2 * i * j * double_PI / radix), LFending);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, sin(2 * i * j * double_PI / radix), LFending);
+						sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, (double)sin(2 * i * j * double_PI / radix), LFending);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
@@ -4512,8 +5514,225 @@ temp%s = temp;\n\
 		break;
 	}
 	case 13: {
+		char* tf_x[20];
+		char* tf_y[20];
+		for (uint64_t i = 0; i < 12; i++) {
+			tf_x[i] = (char*)malloc(sizeof(char) * 50);
+			if (!tf_x[i]) {
+				for (uint64_t j = 0; j < i; j++) {
+					free(tf_x[j]);
+					tf_x[j] = 0;
+				}
+				return VKFFT_ERROR_MALLOC_FAILED;
+			}
+			tf_y[i] = (char*)malloc(sizeof(char) * 50);
+			if (!tf_y[i]) {
+				for (uint64_t j = 0; j < 12; j++) {
+					free(tf_x[j]);
+					tf_x[j] = 0;
+				}
+				for (uint64_t j = 0; j < i; j++) {
+					free(tf_y[j]);
+					tf_y[j] = 0;
+				}
+				return VKFFT_ERROR_MALLOC_FAILED;
+			}
+		}
+		sprintf(tf_x[0], "8.8545602565320989587194927539215e-01%s", LFending);
+		sprintf(tf_x[1], "-9.7094181742605202719252621701429e-01%s", LFending);
+		sprintf(tf_x[2], "1.2053668025532305345994812592614e-01%s", LFending);
+		sprintf(tf_x[3], "-7.4851074817110109868448578063216e-01%s", LFending);
+		sprintf(tf_x[4], "-3.5460488704253562600274447824678e-01%s", LFending);
+		sprintf(tf_x[5], "5.6806474673115580237845248512407e-01%s", LFending);
+		sprintf(tf_x[6], "8.8545602565320989608878970988926e-01%s", LFending);
+		sprintf(tf_x[7], "-9.7094181742605202719252621701429e-01%s", LFending);
+		sprintf(tf_x[8], "1.2053668025532305324988395500707e-01%s", LFending);
+		sprintf(tf_x[9], "-7.4851074817110109863027567200788e-01%s", LFending);
+		sprintf(tf_x[10], "-3.5460488704253562600274447824678e-01%s", LFending);
+		sprintf(tf_x[11], "5.6806474673115580248687270237262e-01%s", LFending);
+		if (stageAngle < 0) {
+			sprintf(tf_y[0], "-4.6472317204376854566250792943904e-01%s", LFending);
+			sprintf(tf_y[1], "2.3931566428755776706062234626682e-01%s", LFending);
+			sprintf(tf_y[2], "9.9270887409805399278096144088934e-01%s", LFending);
+			sprintf(tf_y[3], "-6.6312265824079520232193704631918e-01%s", LFending);
+			sprintf(tf_y[4], "9.3501624268541482344965776185575e-01%s", LFending);
+			sprintf(tf_y[5], "8.2298386589365639468820687318917e-01%s", LFending);
+			sprintf(tf_y[6], "4.6472317204376854531014222338126e-01%s", LFending);
+			sprintf(tf_y[7], "-2.3931566428755776695220212901827e-01%s", LFending);
+			sprintf(tf_y[8], "-9.9270887409805399283517154951362e-01%s", LFending);
+			sprintf(tf_y[9], "6.6312265824079520243035726356773e-01%s", LFending);
+			sprintf(tf_y[10], "-9.3501624268541482344965776185575e-01%s", LFending);
+			sprintf(tf_y[11], "-8.2298386589365639457978665594062e-01%s", LFending);
+		}
+		else {
+			sprintf(tf_y[0], "4.6472317204376854566250792943904e-01%s", LFending);
+			sprintf(tf_y[1], "-2.3931566428755776706062234626682e-01%s", LFending);
+			sprintf(tf_y[2], "-9.9270887409805399278096144088934e-01%s", LFending);
+			sprintf(tf_y[3], "6.6312265824079520232193704631918e-01%s", LFending);
+			sprintf(tf_y[4], "-9.3501624268541482344965776185575e-01%s", LFending);
+			sprintf(tf_y[5], "-8.2298386589365639468820687318917e-01%s", LFending);
+			sprintf(tf_y[6], "-4.6472317204376854531014222338126e-01%s", LFending);
+			sprintf(tf_y[7], "2.3931566428755776695220212901827e-01%s", LFending);
+			sprintf(tf_y[8], "9.9270887409805399283517154951362e-01%s", LFending);
+			sprintf(tf_y[9], "-6.6312265824079520243035726356773e-01%s", LFending);
+			sprintf(tf_y[10], "9.3501624268541482344965776185575e-01%s", LFending);
+			sprintf(tf_y[11], "8.2298386589365639457978665594062e-01%s", LFending);
+		}
+		for (uint64_t i = radix - 1; i > 0; i--) {
+			if (stageSize == 1) {
+				sc->tempLen = sprintf(sc->tempStr, "	%s.x = 1;\n", w);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				sc->tempLen = sprintf(sc->tempStr, "	%s.y = 0;\n", w);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			else {
+				if (i == radix - 1) {
+					if (sc->LUT) {
+						if (sc->useCoalescedLUTUploadToSM) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID];\n", w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						else {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId];\n", w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (stageAngle < 0) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+					else {
+						if (!strcmp(floatType, "float")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
+						}
+						if (!strcmp(floatType, "double")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+				}
+				else {
+					if (sc->LUT) {
+						if (sc->useCoalescedLUTUploadToSM) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, (radix - 1 - i) * stageSize);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						else {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, (radix - 1 - i) * stageSize);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (stageAngle < 0) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+					else {
+						if (!strcmp(floatType, "float")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
+						}
+						if (!strcmp(floatType, "double")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+				}
+			}
+			res = VkMulComplex(sc, sc->locID[i], regID[i], w, 0);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		res = VkMovComplex(sc, sc->locID[0], regID[0]);
+		if (res != VKFFT_SUCCESS) return res;
+		uint64_t permute[13] = { 0, 1, 2, 4, 8, 3, 6, 12, 11, 9, 5, 10, 7 };
+		res = VkPermute(sc, permute, 13, 0, 0, w);
+		if (res != VKFFT_SUCCESS) return res;
+		for (uint64_t i = 0; i < 6; i++) {
+			res = VkSubComplex_x(sc, regID[i + 7], sc->locID[i + 1], sc->locID[i + 7]);
+			if (res != VKFFT_SUCCESS) return res;
+			res = VkAddComplex_x(sc, regID[i + 1], sc->locID[i + 1], sc->locID[i + 7]);
+			if (res != VKFFT_SUCCESS) return res;
+			res = VkAddComplex_y(sc, regID[i + 7], sc->locID[i + 1], sc->locID[i + 7]);
+			if (res != VKFFT_SUCCESS) return res;
+			res = VkSubComplex_y(sc, regID[i + 1], sc->locID[i + 1], sc->locID[i + 7]);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		for (uint64_t i = 0; i < 6; i++) {
+			res = VkAddComplex_x(sc, regID[0], regID[0], regID[i + 1]);
+			if (res != VKFFT_SUCCESS) return res;
+			res = VkAddComplex_y(sc, regID[0], regID[0], regID[i + 7]);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		for (uint64_t i = 1; i < 7; i++) {
+			sc->tempLen = sprintf(sc->tempStr, "\
+	%s=%s;\n", sc->locID[i], sc->locID[0]);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		for (uint64_t i = 7; i < 13; i++) {
+			sc->tempLen = sprintf(sc->tempStr, "\
+	%s.x=0;\n\
+	%s.y=0;\n", sc->locID[i], sc->locID[i]);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		for (uint64_t i = 0; i < 6; i++) {
+			for (uint64_t j = 0; j < 6; j++) {
+				uint64_t id = ((12 - i) + j) % 12;
+				res = VkFMA3Complex_const_w(sc, sc->locID[j + 1], sc->locID[j + 7], regID[i + 1], tf_x[id], tf_y[id], regID[i + 7]);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+		}
+		for (uint64_t i = 1; i < 7; i++) {
+			res = VkSubComplex_x(sc, regID[i], sc->locID[i], sc->locID[i + 6]);
+			if (res != VKFFT_SUCCESS) return res;
+			res = VkAddComplex_y(sc, regID[i], sc->locID[i], sc->locID[i + 6]);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		for (uint64_t i = 1; i < 7; i++) {
+			res = VkAddComplex_x(sc, regID[i + 6], sc->locID[i], sc->locID[i + 6]);
+			if (res != VKFFT_SUCCESS) return res;
+			res = VkSubComplex_y(sc, regID[i + 6], sc->locID[i], sc->locID[i + 6]);
+			if (res != VKFFT_SUCCESS) return res;
+		}
 
-		char* tf[20];
+		uint64_t permute2[13] = { 0,1,12,9,11,4,8,2,10,5,3,6,7 };
+		res = VkPermute(sc, permute2, 13, 1, regID, w);
+		//if (res != VKFFT_SUCCESS) return res;
+		for (uint64_t i = 0; i < 12; i++) {
+			free(tf_x[i]);
+			tf_x[i] = 0;
+			free(tf_y[i]);
+			tf_y[i] = 0;
+		}
+		for (uint64_t i = 0; i < 12; i++) {
+			free(tf_x[i]);
+			tf_x[i] = 0;
+			free(tf_y[i]);
+			tf_y[i] = 0;
+		}
+		//old version
+		/*char* tf[20];
 		//char* tf2[4];
 		//char* tf2inv[4];
 		//VkAppendLine(sc, "	{\n");
@@ -4576,77 +5795,77 @@ temp%s = temp;\n\
 				if (res != VKFFT_SUCCESS) return res;
 			}
 			else {
-			if (i == radix - 1) {
-				if (sc->LUT) {
+				if (i == radix - 1) {
+					if (sc->LUT) {
 						if (sc->useCoalescedLUTUploadToSM) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID];\n", w);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId];\n", w);
-					res = VkAppendLine(sc);
-					if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId];\n", w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (stageAngle < 0) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
 						}
-					if (!sc->inverse) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-					}
-				}
-				else {
-					if (!strcmp(floatType, "float")) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
 					}
-					if (!strcmp(floatType, "double")) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
+					else {
+						if (!strcmp(floatType, "float")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
+						}
+						if (!strcmp(floatType, "double")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 				}
-			}
-			else {
-				if (sc->LUT) {
+				else {
+					if (sc->LUT) {
 						if (sc->useCoalescedLUTUploadToSM) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, (radix - 1 - i) * stageSize);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, (radix - 1 - i) * stageSize);
-					res = VkAppendLine(sc);
-					if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, (radix - 1 - i) * stageSize);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (stageAngle < 0) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
 						}
-					if (!sc->inverse) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-					}
-				}
-				else {
-					if (!strcmp(floatType, "float")) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
 					}
-					if (!strcmp(floatType, "double")) {
-						sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
+					else {
+						if (!strcmp(floatType, "float")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s(angle*%.17e%s);\n", w, cosDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s(angle*%.17e%s);\n", w, sinDef, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
+						}
+						if (!strcmp(floatType, "double")) {
+							sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(angle*%.17e%s);\n", w, 2.0 * i / radix, LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 				}
 			}
-			}
 			res = VkMulComplex(sc, sc->locID[i], regID[i], w, 0);
 			if (res != VKFFT_SUCCESS) return res;
 
@@ -4797,7 +6016,7 @@ temp%s = temp;\n\
 		for (uint64_t i = 0; i < 20; i++) {
 			free(tf[i]);
 			tf[i] = 0;
-		}
+		}*/
 		break;
 	}
 	case 14: {
@@ -4852,7 +6071,7 @@ temp%s = temp;\n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						if (!sc->inverse) {
+						if (stageAngle < 0) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -4887,7 +6106,7 @@ temp%s = temp;\n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						if (!sc->inverse) {
+						if (stageAngle < 0) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -5038,18 +6257,18 @@ temp%s = temp;\n\
 		for (uint64_t i = 0; i < P; i++) {
 			if (i > 0) {
 				if (stageAngle < 0) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, cos(2 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, (double)cos(2 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, -sin(2 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, (double)-sin(2 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, cos(2 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, (double)cos(2 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, sin(2 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, (double)sin(2 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
@@ -5133,7 +6352,7 @@ temp%s = temp;\n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						if (!sc->inverse) {
+						if (stageAngle < 0) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -5168,7 +6387,7 @@ temp%s = temp;\n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						if (!sc->inverse) {
+						if (stageAngle < 0) {
 							sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -5282,36 +6501,36 @@ temp%s = temp;\n\
 		for (uint64_t i = 0; i < P; i++) {
 			if (i > 0) {
 				if (stageAngle < 0) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, cos(2 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, (double)cos(2 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, -sin(2 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, (double)-sin(2 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, cos(2 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, (double)cos(2 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, sin(2 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, (double)sin(2 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				res = VkMulComplex(sc, sc->locID[1], regID[Q * i + 1], w, temp);
 				if (res != VKFFT_SUCCESS) return res;
 				if (stageAngle < 0) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, cos(4 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, (double)cos(4 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, -sin(4 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, (double)-sin(4 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, cos(4 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %.17e%s;\n", w, (double)cos(4 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, sin(4 * i * double_PI / radix), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %.17e%s;\n\n", w, (double)sin(4 * i * double_PI / radix), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
@@ -5389,7 +6608,7 @@ temp%s = temp;\n\
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
-				if (!sc->inverse) {
+				if (stageAngle < 0) {
 					sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
@@ -5444,7 +6663,7 @@ temp%s = temp;\n\
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
-				if (!sc->inverse) {
+				if (stageAngle < 0) {
 					sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
@@ -5460,9 +6679,15 @@ temp%s = temp;\n\
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				if (!strcmp(floatType, "double")) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s + %s(1.0, 0.0));\n", w, w, vecType);
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(0.5%s*angle);\n", w, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					/*sc->tempLen = sprintf(sc->tempStr, "	%s.x=%s.x+1.0%s;\n", w, w, LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s);\n", w, w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;*/
 				}
 			}
 		}
@@ -5512,16 +6737,16 @@ temp%s = temp;\n\
 		else {
 			if (sc->LUT) {
 				if (sc->useCoalescedLUTUploadToSM) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, 2*stageSize);
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, 2 * stageSize);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, 2*stageSize);
+					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, 2 * stageSize);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
-				if (!sc->inverse) {
+				if (stageAngle < 0) {
 					sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
@@ -5538,9 +6763,15 @@ temp%s = temp;\n\
 					//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(0.25*angle), sin(0.25*angle));\n\n", vecType);
 				}
 				if (!strcmp(floatType, "double")) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s + %s(1.0, 0.0));\n", w, w, vecType);
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(0.25%s*angle);\n", w, LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					/*sc->tempLen = sprintf(sc->tempStr, "	%s.x=%s.x+1.0%s;\n", w, w, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s);\n", w, w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;*/
 				}
 			}
 		}
@@ -5579,18 +6810,18 @@ temp%s = temp;\n\
 			if (res != VKFFT_SUCCESS) return res;
 		}
 		if (stageAngle < 0) {
-			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * loc_SQRT1_2 + %s.y * loc_SQRT1_2;\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s + %s.y * %.17e%s;\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
-			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * loc_SQRT1_2 - %s.x * loc_SQRT1_2;\n\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s - %s.x * %.17e%s;\n\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
 		}
 		else {
-			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * loc_SQRT1_2 - %s.y * loc_SQRT1_2;\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s - %s.y * %.17e%s;\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
-			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * loc_SQRT1_2 + %s.x * loc_SQRT1_2;\n\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s + %s.x * %.17e%s;\n\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
 		}
@@ -5640,16 +6871,16 @@ temp%s = temp;\n\
 		else {
 			if (sc->LUT) {
 				if (sc->useCoalescedLUTUploadToSM) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, 3*stageSize);
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, 3 * stageSize);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, 3*stageSize);
+					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, 3 * stageSize);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
-				if (!sc->inverse) {
+				if (stageAngle < 0) {
 					sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
@@ -5666,9 +6897,15 @@ temp%s = temp;\n\
 					//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(0.25*angle), sin(0.25*angle));\n\n", vecType);
 				}
 				if (!strcmp(floatType, "double")) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s + %s(1.0, 0.0));\n", w, w, vecType);
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(0.125%s*angle);\n", w, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					/*sc->tempLen = sprintf(sc->tempStr, "	%s.x=%s.x+1.0%s;\n", w, w, LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s);\n", w, w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;*/
 				}
 			}
 		}
@@ -5710,18 +6947,18 @@ temp%s = temp;\n\
 
 
 		if (stageAngle < 0) {
-			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * loc_SQRT1_2 + %s.y * loc_SQRT1_2;\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s + %s.y * %.17e%s;\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
-			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * loc_SQRT1_2 - %s.x * loc_SQRT1_2;\n\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s - %s.x * %.17e%s;\n\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
 		}
 		else {
-			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * loc_SQRT1_2 - %s.y * loc_SQRT1_2;\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s - %s.y * %.17e%s;\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
-			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * loc_SQRT1_2 + %s.x * loc_SQRT1_2;\n\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s + %s.x * %.17e%s;\n\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
 		}
@@ -5765,18 +7002,18 @@ temp%s = temp;\n\
 
 		for (uint64_t j = 0; j < 2; j++) {
 			if (stageAngle < 0) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s + %s.y * %.17e%s;\n", iw, w, cos((2 * j + 1) * double_PI / 8), LFending, w, sin((2 * j + 1) * double_PI / 8), LFending);
+				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s + %s.y * %.17e%s;\n", iw, w, (double)cos((2 * j + 1) * double_PI / 8), LFending, w, (double)sin((2 * j + 1) * double_PI / 8), LFending);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s - %s.x * %.17e%s;\n\n", iw, w, cos((2 * j + 1) * double_PI / 8), LFending, w, sin((2 * j + 1) * double_PI / 8), LFending);
+				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s - %s.x * %.17e%s;\n\n", iw, w, (double)cos((2 * j + 1) * double_PI / 8), LFending, w, (double)sin((2 * j + 1) * double_PI / 8), LFending);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
 			}
 			else {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s - %s.y * %.17e%s;\n", iw, w, cos((2 * j + 1) * double_PI / 8), LFending, w, sin((2 * j + 1) * double_PI / 8), LFending);
+				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s - %s.y * %.17e%s;\n", iw, w, (double)cos((2 * j + 1) * double_PI / 8), LFending, w, (double)sin((2 * j + 1) * double_PI / 8), LFending);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s + %s.x * %.17e%s;\n\n", iw, w, cos((2 * j + 1) * double_PI / 8), LFending, w, sin((2 * j + 1) * double_PI / 8), LFending);
+				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s + %s.x * %.17e%s;\n\n", iw, w, (double)cos((2 * j + 1) * double_PI / 8), LFending, w, (double)sin((2 * j + 1) * double_PI / 8), LFending);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
 			}
@@ -5887,7 +7124,7 @@ temp%s = temp;\n\
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
-				if (!sc->inverse) {
+				if (stageAngle < 0) {
 					sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
@@ -5942,7 +7179,7 @@ temp%s = temp;\n\
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
-				if (!sc->inverse) {
+				if (stageAngle < 0) {
 					sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
@@ -5958,9 +7195,15 @@ temp%s = temp;\n\
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				if (!strcmp(floatType, "double")) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s + %s(1.0, 0.0));\n", w, w, vecType);
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(0.5%s*angle);\n", w, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					/*sc->tempLen = sprintf(sc->tempStr, "	%s.x=%s.x+1.0%s;\n", w, w, LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s);\n", w, w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;*/
 				}
 			}
 		}
@@ -6010,16 +7253,16 @@ temp%s = temp;\n\
 		else {
 			if (sc->LUT) {
 				if (sc->useCoalescedLUTUploadToSM) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, 2*stageSize);
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, 2 * stageSize);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, 2*stageSize);
+					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, 2 * stageSize);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
-				if (!sc->inverse) {
+				if (stageAngle < 0) {
 					sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
@@ -6036,9 +7279,15 @@ temp%s = temp;\n\
 					//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(0.25*angle), sin(0.25*angle));\n\n", vecType);
 				}
 				if (!strcmp(floatType, "double")) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s + %s(1.0, 0.0));\n", w, w, vecType);
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(0.25%s*angle);\n", w, LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					/*sc->tempLen = sprintf(sc->tempStr, "	%s.x=%s.x+1.0%s;\n", w, w, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s);\n", w, w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;*/
 				}
 			}
 		}
@@ -6077,18 +7326,18 @@ temp%s = temp;\n\
 			if (res != VKFFT_SUCCESS) return res;
 		}
 		if (stageAngle < 0) {
-			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * loc_SQRT1_2 + %s.y * loc_SQRT1_2;\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s + %s.y * %.17e%s;\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
-			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * loc_SQRT1_2 - %s.x * loc_SQRT1_2;\n\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s - %s.x * %.17e%s;\n\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
 		}
 		else {
-			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * loc_SQRT1_2 - %s.y * loc_SQRT1_2;\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s - %s.y * %.17e%s;\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
-			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * loc_SQRT1_2 + %s.x * loc_SQRT1_2;\n\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s + %s.x * %.17e%s;\n\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
 		}
@@ -6138,16 +7387,16 @@ temp%s = temp;\n\
 		else {
 			if (sc->LUT) {
 				if (sc->useCoalescedLUTUploadToSM) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, 3*stageSize);
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, 3 * stageSize);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, 3*stageSize);
+					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, 3 * stageSize);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
-				if (!sc->inverse) {
+				if (stageAngle < 0) {
 					sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
@@ -6164,9 +7413,15 @@ temp%s = temp;\n\
 					//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(0.25*angle), sin(0.25*angle));\n\n", vecType);
 				}
 				if (!strcmp(floatType, "double")) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s + %s(1.0, 0.0));\n", w, w, vecType);
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(0.125%s*angle);\n", w, LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					/*sc->tempLen = sprintf(sc->tempStr, "	%s.x=%s.x+1.0%s;\n", w, w, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s);\n", w, w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;*/
 				}
 			}
 		}
@@ -6208,18 +7463,18 @@ temp%s = temp;\n\
 
 
 		if (stageAngle < 0) {
-			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * loc_SQRT1_2 + %s.y * loc_SQRT1_2;\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s + %s.y * %.17e%s;\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
-			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * loc_SQRT1_2 - %s.x * loc_SQRT1_2;\n\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s - %s.x * %.17e%s;\n\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
 		}
 		else {
-			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * loc_SQRT1_2 - %s.y * loc_SQRT1_2;\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s - %s.y * %.17e%s;\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
-			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * loc_SQRT1_2 + %s.x * loc_SQRT1_2;\n\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s + %s.x * %.17e%s;\n\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
 		}
@@ -6263,18 +7518,18 @@ temp%s = temp;\n\
 
 		for (uint64_t j = 0; j < 2; j++) {
 			if (stageAngle < 0) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s + %s.y * %.17e%s;\n", iw, w, cos((2 * j + 1) * double_PI / 8), LFending, w, sin((2 * j + 1) * double_PI / 8), LFending);
+				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s + %s.y * %.17e%s;\n", iw, w, (double)cos((2 * j + 1) * double_PI / 8), LFending, w, (double)sin((2 * j + 1) * double_PI / 8), LFending);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s - %s.x * %.17e%s;\n\n", iw, w, cos((2 * j + 1) * double_PI / 8), LFending, w, sin((2 * j + 1) * double_PI / 8), LFending);
+				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s - %s.x * %.17e%s;\n\n", iw, w, (double)cos((2 * j + 1) * double_PI / 8), LFending, w, (double)sin((2 * j + 1) * double_PI / 8), LFending);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
 			}
 			else {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s - %s.y * %.17e%s;\n", iw, w, cos((2 * j + 1) * double_PI / 8), LFending, w, sin((2 * j + 1) * double_PI / 8), LFending);
+				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s - %s.y * %.17e%s;\n", iw, w, (double)cos((2 * j + 1) * double_PI / 8), LFending, w, (double)sin((2 * j + 1) * double_PI / 8), LFending);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s + %s.x * %.17e%s;\n\n", iw, w, cos((2 * j + 1) * double_PI / 8), LFending, w, sin((2 * j + 1) * double_PI / 8), LFending);
+				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s + %s.x * %.17e%s;\n\n", iw, w, (double)cos((2 * j + 1) * double_PI / 8), LFending, w, (double)sin((2 * j + 1) * double_PI / 8), LFending);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
 			}
@@ -6327,16 +7582,16 @@ temp%s = temp;\n\
 		else {
 			if (sc->LUT) {
 				if (sc->useCoalescedLUTUploadToSM) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, 4* stageSize);
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sdata[stageInvocationID+%" PRIu64 "];\n\n", w, 4 * stageSize);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, 4* stageSize);
+					sc->tempLen = sprintf(sc->tempStr, "	%s = twiddleLUT[LUTId+%" PRIu64 "];\n\n", w, 4 * stageSize);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
-				if (!sc->inverse) {
+				if (stageAngle < 0) {
 					sc->tempLen = sprintf(sc->tempStr, "	%s.y = -%s.y;\n", w, w);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
@@ -6353,9 +7608,15 @@ temp%s = temp;\n\
 					//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(0.25*angle), sin(0.25*angle));\n\n", vecType);
 				}
 				if (!strcmp(floatType, "double")) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s + %s(1.0, 0.0));\n", w, w, vecType);
+					sc->tempLen = sprintf(sc->tempStr, "	%s = sincos_20(0.0625%s*angle);\n", w, LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					/*sc->tempLen = sprintf(sc->tempStr, "	%s.x=%s.x+1.0%s;\n", w, w, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s=normalize(%s);\n", w, w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;*/
 				}
 			}
 		}
@@ -6397,18 +7658,18 @@ temp%s = temp;\n\
 
 
 		if (stageAngle < 0) {
-			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * loc_SQRT1_2 + %s.y * loc_SQRT1_2;\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s + %s.y * %.17e%s;\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
-			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * loc_SQRT1_2 - %s.x * loc_SQRT1_2;\n\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s - %s.x * %.17e%s;\n\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
 		}
 		else {
-			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * loc_SQRT1_2 - %s.y * loc_SQRT1_2;\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s - %s.y * %.17e%s;\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
-			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * loc_SQRT1_2 + %s.x * loc_SQRT1_2;\n\n", iw, w, w);
+			sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s + %s.x * %.17e%s;\n\n", iw, w, 0.70710678118654752440084436210485, LFending, w, 0.70710678118654752440084436210485, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
 		}
@@ -6452,18 +7713,18 @@ temp%s = temp;\n\
 
 		for (uint64_t j = 0; j < 2; j++) {
 			if (stageAngle < 0) {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s + %s.y * %.17e%s;\n", iw, w, cos((2 * j + 1) * double_PI / 8), LFending, w, sin((2 * j + 1) * double_PI / 8), LFending);
+				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s + %s.y * %.17e%s;\n", iw, w, (double)cos((2 * j + 1) * double_PI / 8), LFending, w, (double)sin((2 * j + 1) * double_PI / 8), LFending);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s - %s.x * %.17e%s;\n\n", iw, w, cos((2 * j + 1) * double_PI / 8), LFending, w, sin((2 * j + 1) * double_PI / 8), LFending);
+				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s - %s.x * %.17e%s;\n\n", iw, w, (double)cos((2 * j + 1) * double_PI / 8), LFending, w, (double)sin((2 * j + 1) * double_PI / 8), LFending);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
 			}
 			else {
-				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s - %s.y * %.17e%s;\n", iw, w, cos((2 * j + 1) * double_PI / 8), LFending, w, sin((2 * j + 1) * double_PI / 8), LFending);
+				sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s - %s.y * %.17e%s;\n", iw, w, (double)cos((2 * j + 1) * double_PI / 8), LFending, w, (double)sin((2 * j + 1) * double_PI / 8), LFending);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
-				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s + %s.x * %.17e%s;\n\n", iw, w, cos((2 * j + 1) * double_PI / 8), LFending, w, sin((2 * j + 1) * double_PI / 8), LFending);
+				sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s + %s.x * %.17e%s;\n\n", iw, w, (double)cos((2 * j + 1) * double_PI / 8), LFending, w, (double)sin((2 * j + 1) * double_PI / 8), LFending);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
 			}
@@ -6508,36 +7769,36 @@ temp%s = temp;\n\
 		for (uint64_t j = 0; j < 4; j++) {
 			if ((j == 1) || (j == 2)) {
 				if (stageAngle < 0) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s + %s.y * %.17e%s;\n", iw, w, cos((7 - 2 * j) * double_PI / 16), LFending, w, sin((7 - 2 * j) * double_PI / 16), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s + %s.y * %.17e%s;\n", iw, w, (double)cos((7 - 2 * j) * double_PI / 16), LFending, w, (double)sin((7 - 2 * j) * double_PI / 16), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s - %s.x * %.17e%s;\n\n", iw, w, cos((7 - 2 * j) * double_PI / 16), LFending, w, sin((7 - 2 * j) * double_PI / 16), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s - %s.x * %.17e%s;\n\n", iw, w, (double)cos((7 - 2 * j) * double_PI / 16), LFending, w, (double)sin((7 - 2 * j) * double_PI / 16), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s - %s.y * %.17e%s;\n", iw, w, cos((7 - 2 * j) * double_PI / 16), LFending, w, sin((7 - 2 * j) * double_PI / 16), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s - %s.y * %.17e%s;\n", iw, w, (double)cos((7 - 2 * j) * double_PI / 16), LFending, w, (double)sin((7 - 2 * j) * double_PI / 16), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s + %s.x * %.17e%s;\n\n", iw, w, cos((7 - 2 * j) * double_PI / 16), LFending, w, sin((7 - 2 * j) * double_PI / 16), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s + %s.x * %.17e%s;\n\n", iw, w, (double)cos((7 - 2 * j) * double_PI / 16), LFending, w, (double)sin((7 - 2 * j) * double_PI / 16), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 			}
 			else {
 				if (stageAngle < 0) {
-					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s + %s.y * %.17e%s;\n", iw, w, cos((2 * j + 1) * double_PI / 16), LFending, w, sin((2 * j + 1) * double_PI / 16), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s + %s.y * %.17e%s;\n", iw, w, (double)cos((2 * j + 1) * double_PI / 16), LFending, w, (double)sin((2 * j + 1) * double_PI / 16), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s - %s.x * %.17e%s;\n\n", iw, w, cos((2 * j + 1) * double_PI / 16), LFending, w, sin((2 * j + 1) * double_PI / 16), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s - %s.x * %.17e%s;\n\n", iw, w, (double)cos((2 * j + 1) * double_PI / 16), LFending, w, (double)sin((2 * j + 1) * double_PI / 16), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
 				else {
-					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s - %s.y * %.17e%s;\n", iw, w, cos((2 * j + 1) * double_PI / 16), LFending, w, sin((2 * j + 1) * double_PI / 16), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.x = %s.x * %.17e%s - %s.y * %.17e%s;\n", iw, w, (double)cos((2 * j + 1) * double_PI / 16), LFending, w, (double)sin((2 * j + 1) * double_PI / 16), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s + %s.x * %.17e%s;\n\n", iw, w, cos((2 * j + 1) * double_PI / 16), LFending, w, sin((2 * j + 1) * double_PI / 16), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "	%s.y = %s.y * %.17e%s + %s.x * %.17e%s;\n\n", iw, w, (double)cos((2 * j + 1) * double_PI / 16), LFending, w, (double)sin((2 * j + 1) * double_PI / 16), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
@@ -6694,6 +7955,9 @@ static inline VkFFTResult appendSharedMemoryVkFFT(VkFFTSpecializationConstantsLa
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 		sprintf(vecType, "float2");
 		sprintf(sharedDefinitions, "__local");
+#elif(VKFFT_BACKEND==5)
+		sprintf(vecType, "float2");
+		sprintf(sharedDefinitions, "shared");
 #endif
 		vecSize = 8;
 	}
@@ -6710,9 +7974,16 @@ static inline VkFFTResult appendSharedMemoryVkFFT(VkFFTSpecializationConstantsLa
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 		sprintf(vecType, "double2");
 		sprintf(sharedDefinitions, "__local");
+#elif(VKFFT_BACKEND==5)
+		sprintf(vecType, "double2");
+		sprintf(sharedDefinitions, "shared");
 #endif
 		vecSize = 16;
 	}
+	if (sc->useRaderMult) {
+		sc->sharedMemSize -= sc->additionalRaderSharedSize * vecSize;
+		sc->sharedMemSizePow2 -= sc->additionalRaderSharedSize * vecSize;
+	}
 	maxSequenceSharedMemory = sc->sharedMemSize / vecSize;
 	//maxSequenceSharedMemoryPow2 = sc->sharedMemSizePow2 / vecSize;
 	uint64_t mergeR2C = (sc->mergeSequencesR2C && (sc->axis_id == 0)) ? 2 : 0;
@@ -6723,19 +7994,56 @@ static inline VkFFTResult appendSharedMemoryVkFFT(VkFFTSpecializationConstantsLa
 		sc->sharedStrideBankConflictFirstStages = ((sc->fftDim > sc->numSharedBanks / 2) && ((sc->fftDim & (sc->fftDim - 1)) == 0)) ? sc->fftDim / sc->registerBoost * (sc->numSharedBanks / 2 + 1) / (sc->numSharedBanks / 2) : sc->fftDim / sc->registerBoost;
 		sc->sharedStrideReadWriteConflict = ((sc->numSharedBanks / 2 <= sc->localSize[1])) ? sc->fftDim / sc->registerBoost + 1 : sc->fftDim / sc->registerBoost + (sc->numSharedBanks / 2) / sc->localSize[1];
 		if (sc->sharedStrideReadWriteConflict < sc->fftDim / sc->registerBoost + mergeR2C) sc->sharedStrideReadWriteConflict = sc->fftDim / sc->registerBoost + mergeR2C;
+		if (sc->useRaderFFT) {
+			uint64_t max_stride = sc->fftDim;
+			uint64_t max_shift = 0;
+			for (uint64_t i = 0; i < sc->numRaderPrimes; i++) {
+
+				for (uint64_t j = 0; j < sc->raderContainer[i].numStages; j++) {
+					if (sc->raderContainer[i].containerFFTNum < 8) {
+						uint64_t subLogicalGroupSize = (uint64_t)ceil(sc->raderContainer[i].containerFFTDim / (double)sc->raderContainer[i].registers_per_thread_per_radix[sc->raderContainer[i].stageRadix[j]]); // hopefully it is not <1, will fix 
+						uint64_t shift = (subLogicalGroupSize > (sc->raderContainer[i].containerFFTDim % (sc->numSharedBanks / 2))) ? subLogicalGroupSize - sc->raderContainer[i].containerFFTDim % (sc->numSharedBanks / 2) : 0;
+						if (j == 0) shift = (sc->raderContainer[i].containerFFTDim % (sc->numSharedBanks / 2)) ? 0 : 1;
+						uint64_t loc_stride = sc->raderContainer[i].containerFFTDim + shift;
+						if (sc->raderContainer[i].containerFFTNum * (loc_stride + 1) > max_stride) {
+							max_stride = sc->raderContainer[i].containerFFTNum * (loc_stride + 1);
+							if (shift > max_shift) max_shift = shift;
+						}
+					}
+				}
+			}
+			sc->sharedShiftRaderFFT = max_shift;
+			sc->sharedStrideRaderFFT = max_stride;
+		}
+
 		sc->maxSharedStride = (sc->sharedStrideBankConflictFirstStages < sc->sharedStrideReadWriteConflict) ? sc->sharedStrideReadWriteConflict : sc->sharedStrideBankConflictFirstStages;
+
+		if (sc->useRaderFFT)
+			sc->maxSharedStride = (sc->maxSharedStride < sc->sharedStrideRaderFFT) ? sc->sharedStrideRaderFFT : sc->maxSharedStride;
+
 		sc->usedSharedMemory = vecSize * sc->localSize[1] * sc->maxSharedStride;
 		sc->maxSharedStride = ((sc->sharedMemSize < sc->usedSharedMemory)) ? sc->fftDim / sc->registerBoost : sc->maxSharedStride;
 
 		sc->sharedStrideBankConflictFirstStages = (sc->maxSharedStride == sc->fftDim / sc->registerBoost) ? sc->fftDim / sc->registerBoost : sc->sharedStrideBankConflictFirstStages;
 		sc->sharedStrideReadWriteConflict = (sc->maxSharedStride == sc->fftDim / sc->registerBoost) ? sc->fftDim / sc->registerBoost : sc->sharedStrideReadWriteConflict;
+		if (sc->useRaderFFT) {
+			sc->sharedStrideRaderFFT = (sc->maxSharedStride == sc->fftDim / sc->registerBoost) ? sc->fftDim / sc->registerBoost : sc->sharedStrideRaderFFT;
+			sc->sharedShiftRaderFFT = (sc->maxSharedStride == sc->fftDim / sc->registerBoost) ? 0 : sc->sharedShiftRaderFFT;
+		}
 		//sc->maxSharedStride += mergeR2C;
 		//printf("%" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", sc->maxSharedStride, sc->sharedStrideBankConflictFirstStages, sc->sharedStrideReadWriteConflict, sc->localSize[1], sc->fftDim);
 		sc->tempLen = sprintf(sc->tempStr, "%s sharedStride = %" PRIu64 ";\n", uintType, sc->sharedStrideReadWriteConflict);
 		res = VkAppendLine(sc);
 		if (res != VKFFT_SUCCESS) return res;
+		sc->usedSharedMemory = vecSize * sc->localSize[1] * sc->maxSharedStride;
+		if (sc->useRaderMult) {
+			for (uint64_t i = 0; i < 20; i++) {
+				sc->RaderKernelOffsetShared[i] += sc->usedSharedMemory / vecSize;
+			}
+			sc->usedSharedMemory += sc->additionalRaderSharedSize * vecSize;
+		}
 #if(VKFFT_BACKEND==0)
-		sc->tempLen = sprintf(sc->tempStr, "%s %s sdata[%" PRIu64 "];// sharedStride - fft size,  gl_WorkGroupSize.y - grouped consecutive ffts\n\n", sharedDefinitions, vecType, sc->localSize[1] * sc->maxSharedStride);
+		sc->tempLen = sprintf(sc->tempStr, "%s %s sdata[%" PRIu64 "];// sharedStride - fft size,  gl_WorkGroupSize.y - grouped consecutive ffts\n\n", sharedDefinitions, vecType, sc->usedSharedMemory / vecSize);
 		res = VkAppendLine(sc);
 		if (res != VKFFT_SUCCESS) return res;
 #elif(VKFFT_BACKEND==1)
@@ -6751,11 +8059,10 @@ static inline VkFFTResult appendSharedMemoryVkFFT(VkFFTSpecializationConstantsLa
 		if (res != VKFFT_SUCCESS) return res;
 		//sc->tempLen = sprintf(sc->tempStr, "%s %s sdata[];// sharedStride - fft size,  gl_WorkGroupSize.y - grouped consecutive ffts\n\n", sharedDefinitions, vecType);
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
-		sc->tempLen = sprintf(sc->tempStr, "%s %s sdata[%" PRIu64 "];// sharedStride - fft size,  gl_WorkGroupSize.y - grouped consecutive ffts\n\n", sharedDefinitions, vecType, sc->localSize[1] * sc->maxSharedStride);
+		sc->tempLen = sprintf(sc->tempStr, "%s %s sdata[%" PRIu64 "];// sharedStride - fft size,  gl_WorkGroupSize.y - grouped consecutive ffts\n\n", sharedDefinitions, vecType, sc->usedSharedMemory / vecSize);
 		res = VkAppendLine(sc);
 		if (res != VKFFT_SUCCESS) return res;
 #endif
-		sc->usedSharedMemory = vecSize * sc->localSize[1] * sc->maxSharedStride;
 		break;
 	}
 	case 1: case 2: case 111: case 121: case 131: case 141: case 143: case 145://grouped_c2c + single_c2c_strided
@@ -6767,8 +8074,15 @@ static inline VkFFTResult appendSharedMemoryVkFFT(VkFFTSpecializationConstantsLa
 		sc->tempLen = sprintf(sc->tempStr, "%s sharedStride = %" PRIu64 ";\n", uintType, sc->maxSharedStride);
 		res = VkAppendLine(sc);
 		if (res != VKFFT_SUCCESS) return res;
+		sc->usedSharedMemory = vecSize * sc->maxSharedStride * (sc->fftDim + mergeR2C) / sc->registerBoost;
+		if (sc->useRaderMult) {
+			for (uint64_t i = 0; i < 20; i++) {
+				sc->RaderKernelOffsetShared[i] += sc->usedSharedMemory / vecSize;
+			}
+			sc->usedSharedMemory += sc->additionalRaderSharedSize * vecSize;
+		}
 #if(VKFFT_BACKEND==0)
-		sc->tempLen = sprintf(sc->tempStr, "%s %s sdata[%" PRIu64 "];\n\n", sharedDefinitions, vecType, sc->maxSharedStride * (sc->fftDim + mergeR2C) / sc->registerBoost);
+		sc->tempLen = sprintf(sc->tempStr, "%s %s sdata[%" PRIu64 "];\n\n", sharedDefinitions, vecType, sc->usedSharedMemory / vecSize);
 		res = VkAppendLine(sc);
 		if (res != VKFFT_SUCCESS) return res;
 #elif(VKFFT_BACKEND==1)
@@ -6784,31 +8098,43 @@ static inline VkFFTResult appendSharedMemoryVkFFT(VkFFTSpecializationConstantsLa
 		if (res != VKFFT_SUCCESS) return res;
 		//sc->tempLen = sprintf(sc->tempStr, "%s %s sdata[];\n\n", sharedDefinitions, vecType);
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
-		sc->tempLen = sprintf(sc->tempStr, "%s %s sdata[%" PRIu64 "];\n\n", sharedDefinitions, vecType, sc->maxSharedStride * (sc->fftDim + mergeR2C) / sc->registerBoost);
+		sc->tempLen = sprintf(sc->tempStr, "%s %s sdata[%" PRIu64 "];\n\n", sharedDefinitions, vecType, sc->usedSharedMemory / vecSize);
 		res = VkAppendLine(sc);
 		if (res != VKFFT_SUCCESS) return res;
 #endif
-		sc->usedSharedMemory = vecSize * sc->maxSharedStride * (sc->fftDim + mergeR2C) / sc->registerBoost;
 		break;
 	}
 	}
+	if (sc->useRaderMult) {
+		sc->sharedMemSize += sc->additionalRaderSharedSize * vecSize;
+		sc->sharedMemSizePow2 += sc->additionalRaderSharedSize * vecSize;
+	}
 	return res;
 }
 static inline VkFFTResult appendInitialization(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType, uint64_t initType) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	char vecType[30];
+	char uintType_32[30];
 #if(VKFFT_BACKEND==0)
 	if (!strcmp(floatType, "float")) sprintf(vecType, "vec2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "dvec2");
+	sprintf(uintType_32, "uint");
 #elif(VKFFT_BACKEND==1)
 	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	sprintf(uintType_32, "unsigned int");
 #elif(VKFFT_BACKEND==2)
 	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	sprintf(uintType_32, "unsigned int");
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	sprintf(uintType_32, "unsigned int");
+#elif(VKFFT_BACKEND==5)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	sprintf(uintType_32, "uint");
 #endif
 	//sc->tempLen = sprintf(sc->tempStr, "	uint dum=gl_LocalInvocationID.x;\n");
 	uint64_t logicalStoragePerThread = sc->registers_per_thread * sc->registerBoost;
@@ -6907,6 +8233,7 @@ temp%" PRIu64 "[i]=%s(dum, dum);\n", logicalRegistersPerThread, i, vecType);*/
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	sprintf(sc->w, "w");
+
 	uint64_t maxNonPow2Radix = sc->maxNonPow2Radix;
 	for (uint64_t i = 0; i < sc->usedLocRegs; i++) {
 		sprintf(sc->locID[i], "loc_%" PRIu64 "", i);
@@ -6921,9 +8248,52 @@ temp%" PRIu64 "[i]=%s(dum, dum);\n", logicalRegistersPerThread, i, vecType);*/
 		if (res != VKFFT_SUCCESS) return res;
 	}
 	sprintf(sc->temp, "%s", sc->locID[0]);
+	if (sc->useRaderFFT) {
+		for (uint64_t i = 0; i < 2; i++) {
+			sprintf(sc->x0[i], "x0_%" PRIu64 "", i);
+			sc->tempLen = sprintf(sc->tempStr, "	%s %s;\n", vecType, sc->x0[i]);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			sc->tempLen = sprintf(sc->tempStr, "	%s.x=0;\n", sc->x0[i]);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			sc->tempLen = sprintf(sc->tempStr, "	%s.y=0;\n", sc->x0[i]);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+	}
+	if (sc->useRaderMult) {
+		int64_t rader_fft_regs = (sc->useRaderFFT) ? 2 : 0;
+		int64_t rader_mult_regs = sc->raderRegisters / 2 - rader_fft_regs;
+		if (rader_mult_regs <= (int64_t)sc->usedLocRegs - 1) {
+			for (int64_t i = 0; i < rader_mult_regs; i++) {
+				sprintf(sc->x0[i + rader_fft_regs], "%s", sc->locID[i + 1]);
+			}
+		}
+		else {
+			for (int64_t i = 0; i < (int64_t)sc->usedLocRegs - 1; i++) {
+				sprintf(sc->x0[i + rader_fft_regs], "%s", sc->locID[i + 1]);
+			}
+			for (int64_t i = sc->usedLocRegs - 1; i < rader_mult_regs; i++) {
+				sprintf(sc->x0[i + rader_fft_regs], "x0_%" PRIu64 "", i + rader_fft_regs);
+				sc->tempLen = sprintf(sc->tempStr, "	%s %s;\n", vecType, sc->x0[i + rader_fft_regs]);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				sc->tempLen = sprintf(sc->tempStr, "	%s.x=0;\n", sc->x0[i + rader_fft_regs]);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				sc->tempLen = sprintf(sc->tempStr, "	%s.y=0;\n", sc->x0[i + rader_fft_regs]);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+		}
+	}
+	//sc->tempLen = sprintf(sc->tempStr, "	%s temp2;\n", vecType);
+	//res = VkAppendLine(sc);
+	//if (res != VKFFT_SUCCESS) return res;
 	uint64_t useRadix8plus = 0;
 	for (uint64_t i = 0; i < sc->numStages; i++)
-		if ((sc->stageRadix[i] == 8) || (sc->stageRadix[i] == 16) || (sc->stageRadix[i] == 32)) useRadix8plus = 1;
+		if ((sc->stageRadix[i] == 8) || (sc->stageRadix[i] == 16) || (sc->stageRadix[i] == 32) || (sc->useRaderFFT)) useRadix8plus = 1;
 	if (useRadix8plus == 1) {
 		if (maxNonPow2Radix > 1) sprintf(sc->iw, "%s", sc->locID[1]);
 		else {
@@ -6955,10 +8325,49 @@ temp%" PRIu64 "[i]=%s(dum, dum);\n", logicalRegistersPerThread, i, vecType);*/
 	sc->tempLen = sprintf(sc->tempStr, "	%s %s=0;\n", uintType, sc->inoutID);
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
+	if ((sc->fftDim < sc->fft_dim_full) || (initType == 1) || (initType == 2)) {
+		sc->tempLen = sprintf(sc->tempStr, "	%s disableThreads=1;\n", uintType_32);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+	}
+	//initialize subgroups ids
+	if (sc->useRader) {
+		sc->tempLen = sprintf(sc->tempStr, "	%s %s = 0;\n", uintType, sc->raderIDx);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		sc->tempLen = sprintf(sc->tempStr, "	%s %s = 0;\n", uintType, sc->raderIDx2);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		/*#if((VKFFT_BACKEND==1)||(VKFFT_BACKEND==2))
+				sprintf(sc->gl_SubgroupInvocationID, "gl_SubgroupInvocationID");
+				sprintf(sc->gl_SubgroupID, "gl_SubgroupID");
+				if (sc->localSize[1] == 1) {
+					sc->tempLen = sprintf(sc->tempStr, "	%s %s=(threadIdx.x %% %" PRIu64 ");\n", uintType, sc->gl_SubgroupInvocationID, sc->warpSize);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s %s=(threadIdx.x / %" PRIu64 ");\n", uintType, sc->gl_SubgroupID, sc->warpSize);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				else {
+					sc->tempLen = sprintf(sc->tempStr, "	%s %s=((threadIdx.x+threadIdx.y*blockDim.x) %% %" PRIu64 ");\n", uintType, sc->gl_SubgroupInvocationID, sc->warpSize);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "	%s %s=((threadIdx.x+threadIdx.y*blockDim.x) / %" PRIu64 ");\n", uintType, sc->gl_SubgroupID, sc->warpSize);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+		#endif*/
+	}
 	if (sc->LUT) {
 		sc->tempLen = sprintf(sc->tempStr, "	%s LUTId=0;\n", uintType);
 		res = VkAppendLine(sc);
 		if (res != VKFFT_SUCCESS) return res;
+		if (!sc->LUT_4step) {
+			sc->tempLen = sprintf(sc->tempStr, "	%s angle=0;\n", floatType);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
 	}
 	else {
 		sc->tempLen = sprintf(sc->tempStr, "	%s angle=0;\n", floatType);
@@ -7239,7 +8648,7 @@ static inline VkFFTResult appendZeropadEnd(VkFFTSpecializationConstantsLayout* s
 	else {
 		switch (sc->axis_id) {
 		case 0: {
-			char idY[500] = "";
+			//char idY[500] = "";
 			if (sc->performZeropaddingFull[1]) {
 				if (sc->fft_zeropad_left_full[1] < sc->fft_zeropad_right_full[1]) {
 					sc->tempLen = sprintf(sc->tempStr, "		}\n");
@@ -7361,7 +8770,7 @@ static inline VkFFTResult appendZeropadStartReadWriteStage(VkFFTSpecializationCo
 		case 0: {
 			char idY[500] = "";
 			char idZ[500] = "";
-			uint64_t mult = (sc->mergeSequencesR2C) ? 2 : 1;
+			//uint64_t mult = (sc->mergeSequencesR2C) ? 2 : 1;
 			if (readStage) {
 				sprintf(idY, "(%s/%" PRIu64 ") %% %" PRIu64 "", sc->inoutID, sc->inputStride[1], sc->inputStride[2] / sc->inputStride[1]);
 				sprintf(idZ, "(%s/%" PRIu64 ") %% %" PRIu64 "", sc->inoutID, sc->inputStride[2], sc->inputStride[3] / sc->inputStride[2]);
@@ -7494,46 +8903,87 @@ static inline VkFFTResult appendZeropadEndReadWriteStage(VkFFTSpecializationCons
 }
 static inline VkFFTResult appendSetSMToZero(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* floatTypeMemory, const char* uintType, uint64_t readType) {
 	VkFFTResult res = VKFFT_SUCCESS;
+	uint64_t used_registers_read = 1;
+	switch (readType) {
+	case 0: case 5: case 6: case 110: case 120: case 130: case 140: case 142: case 144:
+		used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+		break;
+	case 1: case 2: case 111: case 121: case 131: case 141: case 143: case 145:
+		used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+		break;
+	}
+	if (sc->registerBoost > 1) used_registers_read /= sc->registerBoost;
 	//appendZeropadStart(sc);
 	for (uint64_t k = 0; k < sc->registerBoost; k++) {
-		for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+		for (uint64_t i = 0; i < used_registers_read; i++) {
 			switch (readType) {
 			case 0: case 5: case 6: case 110: case 120: case 130: case 140: case 142: case 144:
 			{
 				if (sc->localSize[1] == 1)
-					sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+					sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0]);
 				else
-					sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+					sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
 
 				if (sc->axisSwapped) {
+					if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					sc->tempLen = sprintf(sc->tempStr, "		sdata[(combinedID %% %" PRIu64 ") * sharedStride + (combinedID / %" PRIu64 ")].x = 0;\n", sc->fftDim, sc->fftDim);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					sc->tempLen = sprintf(sc->tempStr, "		sdata[(combinedID %% %" PRIu64 ") * sharedStride + (combinedID / %" PRIu64 ")].y = 0;\n", sc->fftDim, sc->fftDim);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 				}
 				else {
+					if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+						sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[1]);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					sc->tempLen = sprintf(sc->tempStr, "		sdata[(combinedID %% %" PRIu64 ") + (combinedID / %" PRIu64 ") * sharedStride].x = 0;\n", sc->fftDim, sc->fftDim);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					sc->tempLen = sprintf(sc->tempStr, "		sdata[(combinedID %% %" PRIu64 ") + (combinedID / %" PRIu64 ") * sharedStride].y = 0;\n", sc->fftDim, sc->fftDim);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+						sc->tempLen = sprintf(sc->tempStr, "		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 				}
 				break;
 			}
 			case 1: case 2: case 111: case 121: case 131: case 141: case 143: case 145://single_c2c
 			{
-				sc->tempLen = sprintf(sc->tempStr, "		sdata[%s*(%s+%" PRIu64 ")+%s].x=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x);
+				if (sc->localSize[1] * (i + 1) > sc->fftDim) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, sc->fftDim - sc->localSize[1] * i);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				sc->tempLen = sprintf(sc->tempStr, "		sdata[%s*(%s+%" PRIu64 ")+%s].x=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
-				sc->tempLen = sprintf(sc->tempStr, "		sdata[%s*(%s+%" PRIu64 ")+%s].y=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x);
+				sc->tempLen = sprintf(sc->tempStr, "		sdata[%s*(%s+%" PRIu64 ")+%s].y=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
-
+				if (sc->localSize[1] * (i + 1) > sc->fftDim) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
 				break;
 			}
 			}
@@ -7550,7 +9000,7 @@ static inline VkFFTResult setReadToRegisters(VkFFTSpecializationConstantsLayout*
 	switch (readType) {
 	case 0: //single_c2c
 	{
-		if ((sc->localSize[1] > 1) || ((sc->performR2C) && (sc->actualInverse)) || (sc->localSize[0] * sc->stageRadix[0] * (sc->registers_per_thread_per_radix[sc->stageRadix[0]] / sc->stageRadix[0]) > sc->fftDim))
+		if ((sc->localSize[1] > 1) || ((sc->performR2C) && (sc->actualInverse)) || (sc->localSize[0] * sc->stageRadix[0] * (sc->registers_per_thread_per_radix[sc->stageRadix[0]] / sc->stageRadix[0]) > sc->fftDim) || (sc->rader_generator[0] > 0))
 			sc->readToRegisters = 0;
 		else
 			sc->readToRegisters = 1;
@@ -7558,7 +9008,7 @@ static inline VkFFTResult setReadToRegisters(VkFFTSpecializationConstantsLayout*
 	}
 	case 1: //grouped_c2c
 	{
-		if (sc->localSize[1] * sc->stageRadix[0] * (sc->registers_per_thread_per_radix[sc->stageRadix[0]] / sc->stageRadix[0]) > sc->fftDim)
+		if ((sc->localSize[1] * sc->stageRadix[0] * (sc->registers_per_thread_per_radix[sc->stageRadix[0]] / sc->stageRadix[0]) > sc->fftDim) || (sc->rader_generator[0] > 0))
 			sc->readToRegisters = 0;
 		else
 			sc->readToRegisters = 1;
@@ -7566,7 +9016,7 @@ static inline VkFFTResult setReadToRegisters(VkFFTSpecializationConstantsLayout*
 	}
 	case 2: //single_c2c_strided
 	{
-		if (sc->localSize[1] * sc->stageRadix[0] * (sc->registers_per_thread_per_radix[sc->stageRadix[0]] / sc->stageRadix[0]) > sc->fftDim)
+		if ((sc->localSize[1] * sc->stageRadix[0] * (sc->registers_per_thread_per_radix[sc->stageRadix[0]] / sc->stageRadix[0]) > sc->fftDim) || (sc->rader_generator[0] > 0))
 			sc->readToRegisters = 0;
 		else
 			sc->readToRegisters = 1;
@@ -7574,7 +9024,7 @@ static inline VkFFTResult setReadToRegisters(VkFFTSpecializationConstantsLayout*
 	}
 	case 5://single_r2c
 	{
-		if ((sc->axisSwapped) || (sc->localSize[1] > 1) || (sc->localSize[0] * sc->stageRadix[0] * (sc->registers_per_thread_per_radix[sc->stageRadix[0]] / sc->stageRadix[0]) > sc->fftDim))
+		if ((sc->axisSwapped) || (sc->localSize[1] > 1) || (sc->localSize[0] * sc->stageRadix[0] * (sc->registers_per_thread_per_radix[sc->stageRadix[0]] / sc->stageRadix[0]) > sc->fftDim) || (sc->rader_generator[0] > 0))
 			sc->readToRegisters = 0;
 		else
 			sc->readToRegisters = 1;
@@ -7582,7 +9032,10 @@ static inline VkFFTResult setReadToRegisters(VkFFTSpecializationConstantsLayout*
 	}
 	case 6: //single_c2r
 	{
-		sc->readToRegisters = 1;
+		if ((sc->rader_generator[0] > 0) || ((sc->fftDim % sc->localSize[0]) && (!sc->axisSwapped)) || ((sc->fftDim % sc->localSize[1]) && (sc->axisSwapped)))
+			sc->readToRegisters = 0;
+		else
+			sc->readToRegisters = 1;
 		break;
 	}
 	case 110: case 111: case 120: case 121: case 130: case 131: case 140: case 141: case 142: case 143:
@@ -7590,9 +9043,22 @@ static inline VkFFTResult setReadToRegisters(VkFFTSpecializationConstantsLayout*
 		sc->readToRegisters = 0;
 		break;
 	}
-	case 144: case 145:
+	case 144:
 	{
-		sc->readToRegisters = 1;
+		uint64_t registers_first_stage = (sc->stageRadix[0] < sc->fixMinRaderPrimeMult) ? sc->registers_per_thread_per_radix[sc->stageRadix[0]] : 1;
+		if ((sc->rader_generator[0] > 0) || (sc->fftDim % registers_first_stage))
+			sc->readToRegisters = 0;
+		else
+			sc->readToRegisters = 1;
+		break;
+	}
+	case 145:
+	{
+		uint64_t registers_first_stage = (sc->stageRadix[0] < sc->fixMinRaderPrimeMult) ? sc->registers_per_thread_per_radix[sc->stageRadix[0]] : 1;
+		if ((sc->rader_generator[0] > 0) || (sc->fftDim % registers_first_stage))
+			sc->readToRegisters = 0;
+		else
+			sc->readToRegisters = 1;
 		break;
 	}
 	}
@@ -7600,10 +9066,11 @@ static inline VkFFTResult setReadToRegisters(VkFFTSpecializationConstantsLayout*
 }
 static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* floatTypeMemory, const char* uintType, uint64_t readType) {
 	VkFFTResult res = VKFFT_SUCCESS;
-	double double_PI = 3.1415926535897932384626433832795;
+	long double double_PI = 3.14159265358979323846264338327950288419716939937510L;
 	char vecType[30];
 	char inputsStruct[20] = "";
 	char LFending[4] = "";
+	char uintType_32[30];
 	if (!strcmp(floatType, "float")) sprintf(LFending, "f");
 #if(VKFFT_BACKEND==0)
 	if (sc->inputBufferBlockNum == 1)
@@ -7613,12 +9080,14 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 	if (!strcmp(floatType, "float")) sprintf(vecType, "vec2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "dvec2");
 	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
+	sprintf(uintType_32, "uint");
 	char cosDef[20] = "cos";
 	char sinDef[20] = "sin";
 #elif(VKFFT_BACKEND==1)
 	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
 	if (!strcmp(floatType, "double")) sprintf(LFending, "l");
+	sprintf(uintType_32, "unsigned int");
 	sprintf(inputsStruct, "inputs");
 	char cosDef[20] = "__cosf";
 	char sinDef[20] = "__sinf";
@@ -7626,6 +9095,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
 	if (!strcmp(floatType, "double")) sprintf(LFending, "l");
+	sprintf(uintType_32, "unsigned int");
 	sprintf(inputsStruct, "inputs");
 	char cosDef[20] = "__cosf";
 	char sinDef[20] = "__sinf";
@@ -7633,8 +9103,17 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
 	sprintf(inputsStruct, "inputs");
+	sprintf(uintType_32, "unsigned int");
 	char cosDef[20] = "native_cos";
 	char sinDef[20] = "native_sin";
+#elif(VKFFT_BACKEND==5)
+	sprintf(inputsStruct, "inputs");
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
+	sprintf(uintType_32, "uint");
+	char cosDef[20] = "cos";
+	char sinDef[20] = "sin";
 #endif
 	char convTypeLeft[20] = "";
 	char convTypeRight[20] = "";
@@ -7652,6 +9131,9 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 			sprintf(convTypeLeft, "(float)");
 			//sprintf(convTypeRight, "");
+#elif(VKFFT_BACKEND==5)
+			sprintf(convTypeLeft, "float(");
+			sprintf(convTypeRight, ")");
 #endif
 		}
 		else {
@@ -7667,6 +9149,9 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 			sprintf(convTypeLeft, "conv_float2(");
 			sprintf(convTypeRight, ")");
+#elif(VKFFT_BACKEND==5)
+			sprintf(convTypeLeft, "conv_float2(");
+			sprintf(convTypeRight, ")");
 #endif
 		}
 	}
@@ -7684,6 +9169,9 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 			sprintf(convTypeLeft, "(double)");
 			//sprintf(convTypeRight, "");
+#elif(VKFFT_BACKEND==5)
+			sprintf(convTypeLeft, "double(");
+			sprintf(convTypeRight, ")");
 #endif
 		}
 		else {
@@ -7699,6 +9187,9 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 			sprintf(convTypeLeft, "conv_double2(");
 			sprintf(convTypeRight, ")");
+#elif(VKFFT_BACKEND==5)
+			sprintf(convTypeLeft, "conv_double2(");
+			sprintf(convTypeRight, ")");
 #endif
 		}
 	}
@@ -7736,21 +9227,29 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 		char shiftY2[100] = "";
 		if (sc->performWorkGroupShift[1])
 			sprintf(shiftY, " + consts.workGroupShiftY ");
+		uint64_t used_registers_read = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+		if (sc->registerBoost > 1) used_registers_read /= sc->registerBoost;
 		if (sc->fftDim < sc->fft_dim_full) {
 			if (sc->axisSwapped) {
-				sc->tempLen = sprintf(sc->tempStr, "		%s numActiveThreads = ((%s/%" PRIu64 ")==%" PRIu64 ") ? %" PRIu64 " : %" PRIu64 ";\n", uintType, sc->gl_WorkGroupID_x, sc->firstStageStartSize / sc->fftDim, ((uint64_t)floor(sc->fft_dim_full / ((double)sc->localSize[0] * sc->fftDim))) / (sc->firstStageStartSize / sc->fftDim), (sc->fft_dim_full - (sc->firstStageStartSize / sc->fftDim) * ((((uint64_t)floor(sc->fft_dim_full / ((double)sc->localSize[0] * sc->fftDim))) / (sc->firstStageStartSize / sc->fftDim)) * sc->localSize[0] * sc->fftDim)) / sc->min_registers_per_thread / (sc->firstStageStartSize / sc->fftDim), sc->localSize[0] * sc->localSize[1]);// sc->fft_dim_full, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[0] * sc->firstStageStartSize, sc->fft_dim_full / (sc->localSize[0] * sc->fftDim));
+				sc->tempLen = sprintf(sc->tempStr, "		%s numActiveThreads = ((%s/%" PRIu64 ")==%" PRIu64 ") ? %" PRIu64 " : %" PRIu64 ";\n", uintType, sc->gl_WorkGroupID_x, sc->firstStageStartSize / sc->fftDim, ((uint64_t)floor(sc->fft_dim_full / ((double)sc->localSize[0] * sc->fftDim))) / (sc->firstStageStartSize / sc->fftDim), (uint64_t)ceil(((sc->fft_dim_full - (sc->firstStageStartSize / sc->fftDim) * ((((uint64_t)floor(sc->fft_dim_full / ((double)sc->localSize[0] * sc->fftDim))) / (sc->firstStageStartSize / sc->fftDim)) * sc->localSize[0] * sc->fftDim)) / (sc->firstStageStartSize / sc->fftDim)) / (double)used_registers_read), sc->localSize[0] * sc->localSize[1]);// sc->fft_dim_full, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[0] * sc->firstStageStartSize, sc->fft_dim_full / (sc->localSize[0] * sc->fftDim));
 				//sc->tempLen = sprintf(sc->tempStr, "		if (numActiveThreads>%" PRIu64 ") numActiveThreads = %" PRIu64 ";\n", sc->localSize[0]* sc->localSize[1], sc->localSize[0]* sc->localSize[1]);
 				//sprintf(sc->disableThreadsStart, "		if((%s+%" PRIu64 "*%s)< numActiveThreads) {\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
-				sprintf(sc->disableThreadsStart, "		if(%s * %" PRIu64 " + (((%s%s) %% %" PRIu64 ") * %" PRIu64 " + ((%s%s) / %" PRIu64 ") * %" PRIu64 ") < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, sc->firstStageStartSize, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[0] * sc->firstStageStartSize, sc->fft_dim_full);
+				sc->tempLen = sprintf(sc->tempStr, "		disableThreads = (%s * %" PRIu64 " + (((%s%s) %% %" PRIu64 ") * %" PRIu64 " + ((%s%s) / %" PRIu64 ") * %" PRIu64 ") < %" PRIu64 ") ? 1 : 0;\n", sc->gl_LocalInvocationID_x, sc->firstStageStartSize, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[0] * sc->firstStageStartSize, sc->fft_dim_full);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				sprintf(sc->disableThreadsStart, "		if(disableThreads>0) {\n");
 				sc->tempLen = sprintf(sc->tempStr, "		if((%s+%" PRIu64 "*%s)< numActiveThreads) {\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
 				sprintf(sc->disableThreadsEnd, "}");
 			}
 			else {
-				sprintf(sc->disableThreadsStart, "		if(%s * %" PRIu64 " + (((%s%s) %% %" PRIu64 ") * %" PRIu64 " + ((%s%s) / %" PRIu64 ") * %" PRIu64 ") < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, sc->firstStageStartSize, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[1] * sc->firstStageStartSize, sc->fft_dim_full);
+				sc->tempLen = sprintf(sc->tempStr, "		disableThreads = (%s * %" PRIu64 " + (((%s%s) %% %" PRIu64 ") * %" PRIu64 " + ((%s%s) / %" PRIu64 ") * %" PRIu64 ") < %" PRIu64 ") ? 1 : 0;\n", sc->gl_LocalInvocationID_y, sc->firstStageStartSize, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[1] * sc->firstStageStartSize, sc->fft_dim_full);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				sprintf(sc->disableThreadsStart, "		if(disableThreads>0) {\n");
 				res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 				if (res != VKFFT_SUCCESS) return res;
 				sprintf(sc->disableThreadsEnd, "}");
@@ -7761,15 +9260,14 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
 		}
-
 		if (sc->fftDim == sc->fft_dim_full) {
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < used_registers_read; i++) {
 
 					if (sc->localSize[1] == 1)
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0]);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->inputStride[0] > 1)
@@ -7780,14 +9278,37 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->axisSwapped) {
 						if (sc->size[sc->axis_id + 1] % sc->localSize[0] != 0) {
-							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + (%s%s)*%" PRIu64 "< %" PRIu64 "){", sc->fftDim, sc->gl_WorkGroupID_y, shiftY2, sc->localSize[0], sc->size[sc->axis_id + 1]);
+#if (VKFFT_BACKEND!=2) //AMD compiler fix
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + (%s%s)*%" PRIu64 "< %" PRIu64 "){\n", sc->fftDim, sc->gl_WorkGroupID_y, shiftY2, sc->localSize[0], sc->size[sc->axis_id + 1]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+#else
+							sc->tempLen = sprintf(sc->tempStr, "		if(!(combinedID / %" PRIu64 " + (%s%s)*%" PRIu64 "< %" PRIu64 ")) %s = 0; {\n", sc->fftDim, sc->gl_WorkGroupID_y, shiftY2, sc->localSize[0], sc->size[sc->axis_id + 1], sc->inoutID);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+#endif
+						}
+
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
 					}
 					else {
 						if (sc->size[sc->axis_id + 1] % sc->localSize[1] != 0) {
-							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + (%s%s)*%" PRIu64 "< %" PRIu64 "){", sc->fftDim, sc->gl_WorkGroupID_y, shiftY2, sc->localSize[1], sc->size[sc->axis_id + 1]);
+#if (VKFFT_BACKEND!=2) //AMD compiler fix
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + (%s%s)*%" PRIu64 "< %" PRIu64 "){\n", sc->fftDim, sc->gl_WorkGroupID_y, shiftY2, sc->localSize[1], sc->size[sc->axis_id + 1]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+#else
+							sc->tempLen = sprintf(sc->tempStr, "		if(!(combinedID / %" PRIu64 " + (%s%s)*%" PRIu64 "< %" PRIu64 ")) %s = 0; {\n", sc->fftDim, sc->gl_WorkGroupID_y, shiftY2, sc->localSize[1], sc->size[sc->axis_id + 1], sc->inoutID);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+#endif
+						}
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[1]);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
@@ -7904,6 +9425,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 					else {
 						if (sc->size[sc->axis_id + 1] % sc->localSize[1] != 0) {
@@ -7911,6 +9437,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 
 				}
@@ -7918,27 +9449,27 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 		}
 		else {
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < used_registers_read; i++) {
 					/*
 					if (sc->localSize[1] == 1)
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0]);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 
 					sc->tempLen = sprintf(sc->tempStr, "		inoutID = (combinedID %% %" PRIu64 ") + (combinedID / %" PRIu64 ") * %" PRIu64 " + (((%s%s) %% %" PRIu64 ") * %" PRIu64 " + ((%s%s) / %" PRIu64 ") * %" PRIu64 ");\n", sc->fftDim, sc->fftDim, sc->firstStageStartSize, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[1] * sc->firstStageStartSize);
 					*/
 					if (sc->axisSwapped) {
-						if ((sc->fft_dim_full - (sc->firstStageStartSize / sc->fftDim) * ((((uint64_t)floor(sc->fft_dim_full / ((double)sc->localSize[0] * sc->fftDim))) / (sc->firstStageStartSize / sc->fftDim)) * sc->localSize[0] * sc->fftDim)) / sc->min_registers_per_thread / (sc->firstStageStartSize / sc->fftDim) > sc->localSize[0]) {
+						if ((sc->fft_dim_full - (sc->firstStageStartSize / sc->fftDim) * ((((uint64_t)floor(sc->fft_dim_full / ((double)sc->localSize[0] * sc->fftDim))) / (sc->firstStageStartSize / sc->fftDim)) * sc->localSize[0] * sc->fftDim)) / used_registers_read / (sc->firstStageStartSize / sc->fftDim) > sc->localSize[0]) {
 							if (sc->localSize[1] == 1)
-								sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+								sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0]);
 							else
-								sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 "*numActiveThreads;\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread));
+								sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 "*numActiveThreads;\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_read));
 						}
 						else {
 							if (sc->localSize[1] == 1)
-								sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 "*numActiveThreads;\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread));
+								sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 "*numActiveThreads;\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_read));
 							else
-								sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 "*numActiveThreads;\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread));
+								sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 "*numActiveThreads;\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_read));
 						}
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
@@ -7947,7 +9478,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					else {
-						sc->tempLen = sprintf(sc->tempStr, "		inoutID = %s+%" PRIu64 "+%s * %" PRIu64 " + (((%s%s) %% %" PRIu64 ") * %" PRIu64 " + ((%s%s) / %" PRIu64 ") * %" PRIu64 ");\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0], sc->gl_LocalInvocationID_y, sc->firstStageStartSize, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[1] * sc->firstStageStartSize);
+						sc->tempLen = sprintf(sc->tempStr, "		inoutID = %s+%" PRIu64 "+%s * %" PRIu64 " + (((%s%s) %% %" PRIu64 ") * %" PRIu64 " + ((%s%s) / %" PRIu64 ") * %" PRIu64 ");\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0], sc->gl_LocalInvocationID_y, sc->firstStageStartSize, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[1] * sc->firstStageStartSize);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
@@ -7967,6 +9498,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					res = appendZeropadStartReadWriteStage(sc, 1);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->readToRegisters) {
+						//not used
 						if (sc->inputBufferBlockNum == 1)
 							sc->tempLen = sprintf(sc->tempStr, "			%s = %s%s[%s]%s;\n", sc->regIDs[i + k * sc->registers_per_thread], convTypeLeft, inputsStruct, sc->inoutID, convTypeRight);
 						else
@@ -7976,21 +9508,40 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					}
 					else {
 						if (sc->axisSwapped) {
-
+							if (sc->fftDim % sc->localSize[1]) {
+								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
 							if (sc->inputBufferBlockNum == 1)
 								sc->tempLen = sprintf(sc->tempStr, "		sdata[(combinedID / %" PRIu64 ") + sharedStride*(combinedID %% %" PRIu64 ")] = %s%s[inoutID]%s;\n", sc->fftDim, sc->fftDim, convTypeLeft, inputsStruct, convTypeRight);
 							else
 								sc->tempLen = sprintf(sc->tempStr, "		sdata[(combinedID / %" PRIu64 ") + sharedStride*(combinedID %% %" PRIu64 ")] = %sinputBlocks[inoutID / %" PRIu64 "]%s[inoutID %% %" PRIu64 "]%s;\n", sc->fftDim, sc->fftDim, convTypeLeft, sc->inputBufferBlockSize, inputsStruct, sc->inputBufferBlockSize, convTypeRight);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
+							if (sc->fftDim % sc->localSize[1]) {
+								sc->tempLen = sprintf(sc->tempStr, "		}\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
 						}
 						else {
+							if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+								sc->tempLen = sprintf(sc->tempStr, "		if(%s < %" PRIu64 "){\n", sc->gl_LocalInvocationID_x, sc->fftDim - (i + k * used_registers_read) * sc->localSize[0]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
 							if (sc->inputBufferBlockNum == 1)
-								sc->tempLen = sprintf(sc->tempStr, "		sdata[sharedStride*%s + (%s + %" PRIu64 ")] = %s%s[inoutID]%s;\n", sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0], convTypeLeft, inputsStruct, convTypeRight);
+								sc->tempLen = sprintf(sc->tempStr, "		sdata[sharedStride*%s + (%s + %" PRIu64 ")] = %s%s[inoutID]%s;\n", sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0], convTypeLeft, inputsStruct, convTypeRight);
 							else
-								sc->tempLen = sprintf(sc->tempStr, "		sdata[sharedStride*%s + (%s + %" PRIu64 ")] = %sinputBlocks[inoutID / %" PRIu64 "]%s[inoutID %% %" PRIu64 "]%s;\n", sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0], convTypeLeft, sc->inputBufferBlockSize, inputsStruct, sc->inputBufferBlockSize, convTypeRight);
+								sc->tempLen = sprintf(sc->tempStr, "		sdata[sharedStride*%s + (%s + %" PRIu64 ")] = %sinputBlocks[inoutID / %" PRIu64 "]%s[inoutID %% %" PRIu64 "]%s;\n", sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0], convTypeLeft, sc->inputBufferBlockSize, inputsStruct, sc->inputBufferBlockSize, convTypeRight);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
+							if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+								sc->tempLen = sprintf(sc->tempStr, "		}\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
 						}
 					}
 					res = appendZeropadEndReadWriteStage(sc);
@@ -8014,10 +9565,10 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 								if (res != VKFFT_SUCCESS) return res;
 							}
 							else {
-								sc->tempLen = sprintf(sc->tempStr, "			sdata[sharedStride*%s + (%s + %" PRIu64 ")].x = 0;\n", sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+								sc->tempLen = sprintf(sc->tempStr, "			sdata[sharedStride*%s + (%s + %" PRIu64 ")].x = 0;\n", sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0]);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
-								sc->tempLen = sprintf(sc->tempStr, "			sdata[sharedStride*%s + (%s + %" PRIu64 ")].y = 0;\n", sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+								sc->tempLen = sprintf(sc->tempStr, "			sdata[sharedStride*%s + (%s + %" PRIu64 ")].y = 0;\n", sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0]);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
@@ -8039,14 +9590,19 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 		char shiftX[500] = "";
 		if (sc->performWorkGroupShift[0])
 			sprintf(shiftX, " + consts.workGroupShiftX * %s ", sc->gl_WorkGroupSize_x);
+		sc->tempLen = sprintf(sc->tempStr, "		disableThreads = (((%s%s) / %" PRIu64 ") %% (%" PRIu64 ")+((%s%s) / %" PRIu64 ") * (%" PRIu64 ") < %" PRIu64 ") ? 1 : 0;\n", sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x * sc->stageStartSize, sc->fftDim * sc->stageStartSize, sc->size[sc->axis_id]);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		sprintf(sc->disableThreadsStart, "		if(disableThreads>0) {\n");
 
-		sprintf(sc->disableThreadsStart, "		if (((%s%s) / %" PRIu64 ") %% (%" PRIu64 ")+((%s%s) / %" PRIu64 ") * (%" PRIu64 ") < %" PRIu64 ") {\n", sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x * sc->stageStartSize, sc->fftDim * sc->stageStartSize, sc->size[sc->axis_id]);
 		res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 		if (res != VKFFT_SUCCESS) return res;
 		sprintf(sc->disableThreadsEnd, "}");
+		uint64_t used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+		if (sc->registerBoost > 1) used_registers_read /= sc->registerBoost;
 		for (uint64_t k = 0; k < sc->registerBoost; k++) {
-			for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
-				sc->tempLen = sprintf(sc->tempStr, "		inoutID = (%" PRIu64 " * (%s + %" PRIu64 ") + ((%s%s) / %" PRIu64 ") %% (%" PRIu64 ")+((%s%s) / %" PRIu64 ") * (%" PRIu64 "));\n", sc->stageStartSize, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x * sc->stageStartSize, sc->fftDim * sc->stageStartSize);
+			for (uint64_t i = 0; i < used_registers_read; i++) {
+				sc->tempLen = sprintf(sc->tempStr, "		inoutID = (%" PRIu64 " * (%s + %" PRIu64 ") + ((%s%s) / %" PRIu64 ") %% (%" PRIu64 ")+((%s%s) / %" PRIu64 ") * (%" PRIu64 "));\n", sc->stageStartSize, sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x * sc->stageStartSize, sc->fftDim * sc->stageStartSize);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
 				if (sc->zeropadBluestein[0]) {
@@ -8070,6 +9626,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 				if (res != VKFFT_SUCCESS) return res;
 				res = appendZeropadStartReadWriteStage(sc, 1);
 				if (res != VKFFT_SUCCESS) return res;
+				if ((1 + i + k * used_registers_read) * sc->localSize[1] >= (sc->fftDim)) {
+					sc->tempLen = sprintf(sc->tempStr, "		if(%s < %" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->fftDim - (i + k * used_registers_read) * sc->localSize[1]);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
 				if (sc->readToRegisters) {
 					if (sc->inputBufferBlockNum == 1)
 						sc->tempLen = sprintf(sc->tempStr, "			%s=%s%s[%s]%s;\n", sc->regIDs[i + k * sc->registers_per_thread], convTypeLeft, inputsStruct, sc->inoutID, convTypeRight);
@@ -8080,9 +9641,14 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 				}
 				else {
 					if (sc->inputBufferBlockNum == 1)
-						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s]=%s%s[%s]%s;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeLeft, inputsStruct, sc->inoutID, convTypeRight);
+						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s]=%s%s[%s]%s;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeLeft, inputsStruct, sc->inoutID, convTypeRight);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s]=%sinputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "]%s;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeLeft, sc->inoutID, sc->inputBufferBlockSize, inputsStruct, sc->inoutID, sc->inputBufferBlockSize, convTypeRight);
+						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s]=%sinputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "]%s;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeLeft, sc->inoutID, sc->inputBufferBlockSize, inputsStruct, sc->inoutID, sc->inputBufferBlockSize, convTypeRight);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				if ((1 + i + k * used_registers_read) * sc->localSize[1] >= (sc->fftDim)) {
+					sc->tempLen = sprintf(sc->tempStr, "		}\n");
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
@@ -8098,10 +9664,10 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					else {
-						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s].x=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x);
+						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s].x=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s].y=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x);
+						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s].y=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
@@ -8119,10 +9685,10 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					else {
-						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s].x=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x);
+						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s].x=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s].y=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x);
+						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s].y=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
@@ -8144,13 +9710,18 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			sprintf(shiftX, " + consts.workGroupShiftX * %s ", sc->gl_WorkGroupSize_x);
 
 		//sc->tempLen = sprintf(sc->tempStr, "		if(gl_GlobalInvolcationID.x%s >= %" PRIu64 ") return; \n", shiftX, sc->size[0] / axis->specializationConstants.fftDim);
-		sprintf(sc->disableThreadsStart, "		if (((%s%s) / %" PRIu64 ") * (%" PRIu64 ") < %" PRIu64 ") {\n", sc->gl_GlobalInvocationID_x, shiftX, sc->stageStartSize, sc->stageStartSize * sc->fftDim, sc->fft_dim_full);
+		sc->tempLen = sprintf(sc->tempStr, "		disableThreads = (((%s%s) / %" PRIu64 ") * (%" PRIu64 ") < %" PRIu64 ") ? 1 : 0;\n", sc->gl_GlobalInvocationID_x, shiftX, sc->stageStartSize, sc->stageStartSize * sc->fftDim, sc->fft_dim_full);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		sprintf(sc->disableThreadsStart, "		if(disableThreads>0) {\n");
 		res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 		if (res != VKFFT_SUCCESS) return res;
 		sprintf(sc->disableThreadsEnd, "}");
+		uint64_t used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+		if (sc->registerBoost > 1) used_registers_read /= sc->registerBoost;
 		for (uint64_t k = 0; k < sc->registerBoost; k++) {
-			for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
-				sc->tempLen = sprintf(sc->tempStr, "		inoutID = (%s%s) %% (%" PRIu64 ") + %" PRIu64 " * (%s + %" PRIu64 ") + ((%s%s) / %" PRIu64 ") * (%" PRIu64 ");\n", sc->gl_GlobalInvocationID_x, shiftX, sc->stageStartSize, sc->stageStartSize, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_GlobalInvocationID_x, shiftX, sc->stageStartSize, sc->stageStartSize * sc->fftDim);
+			for (uint64_t i = 0; i < used_registers_read; i++) {
+				sc->tempLen = sprintf(sc->tempStr, "		inoutID = (%s%s) %% (%" PRIu64 ") + %" PRIu64 " * (%s + %" PRIu64 ") + ((%s%s) / %" PRIu64 ") * (%" PRIu64 ");\n", sc->gl_GlobalInvocationID_x, shiftX, sc->stageStartSize, sc->stageStartSize, sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_GlobalInvocationID_x, shiftX, sc->stageStartSize, sc->stageStartSize * sc->fftDim);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
 				if (sc->zeropadBluestein[0]) {
@@ -8173,6 +9744,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 				if (res != VKFFT_SUCCESS) return res;
 				res = appendZeropadStartReadWriteStage(sc, 1);
 				if (res != VKFFT_SUCCESS) return res;
+				if ((1 + i + k * used_registers_read) * sc->localSize[1] >= (sc->fftDim)) {
+					sc->tempLen = sprintf(sc->tempStr, "		if(%s < %" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->fftDim - (i + k * used_registers_read) * sc->localSize[1]);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
 				if (sc->readToRegisters) {
 					if (sc->inputBufferBlockNum == 1)
 						sc->tempLen = sprintf(sc->tempStr, "			%s=%s%s[%s]%s;\n", sc->regIDs[i + k * sc->registers_per_thread], convTypeLeft, inputsStruct, sc->inoutID, convTypeRight);
@@ -8183,9 +9759,14 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 				}
 				else {
 					if (sc->inputBufferBlockNum == 1)
-						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s]=%s%s[%s]%s;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeLeft, inputsStruct, sc->inoutID, convTypeRight);
+						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s]=%s%s[%s]%s;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeLeft, inputsStruct, sc->inoutID, convTypeRight);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s]=%sinputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "]%s;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeLeft, sc->inoutID, sc->inputBufferBlockSize, inputsStruct, sc->inoutID, sc->inputBufferBlockSize, convTypeRight);
+						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s]=%sinputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "]%s;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeLeft, sc->inoutID, sc->inputBufferBlockSize, inputsStruct, sc->inoutID, sc->inputBufferBlockSize, convTypeRight);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				if ((1 + i + k * used_registers_read) * sc->localSize[1] >= (sc->fftDim)) {
+					sc->tempLen = sprintf(sc->tempStr, "		}\n");
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
@@ -8201,10 +9782,10 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					else {
-						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s].x=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x);
+						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s].x=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s].y=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x);
+						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s].y=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
@@ -8222,10 +9803,10 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					else {
-						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s].x=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x);
+						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s].x=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s].y=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x);
+						sc->tempLen = sprintf(sc->tempStr, "			sdata[%s*(%s+%" PRIu64 ")+%s].y=0;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
@@ -8250,13 +9831,15 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			sprintf(shiftY, " + consts.workGroupShiftY ");
 		uint64_t mult = (sc->mergeSequencesR2C) ? 2 : 1;
 		if (sc->fftDim == sc->fft_dim_full) {
+			uint64_t used_registers_read = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+			if (sc->registerBoost > 1) used_registers_read /= sc->registerBoost;
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < used_registers_read; i++) {
 
 					if (sc->localSize[1] == 1)
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0]);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 
@@ -8272,6 +9855,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 					else {
 						if ((uint64_t)ceil(sc->size[1] / (double)mult) % sc->localSize[1] != 0) {
@@ -8279,6 +9867,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[1]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 					if (sc->zeropadBluestein[0]) {
 						sc->tempLen = sprintf(sc->tempStr, "		if((combinedID %% %" PRIu64 ") < %" PRIu64 "){\n", sc->fft_dim_full, sc->fft_zeropad_Bluestein_left_read[sc->axis_id]);
@@ -8452,6 +10045,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					if (sc->axisSwapped) {
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						if ((uint64_t)ceil(sc->size[1] / (double)mult) % sc->localSize[0] != 0) {
 							sc->tempLen = sprintf(sc->tempStr, "		}");
 							res = VkAppendLine(sc);
@@ -8459,6 +10057,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						}
 					}
 					else {
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						if ((uint64_t)ceil(sc->size[1] / (double)mult) % sc->localSize[1] != 0) {
 							sc->tempLen = sprintf(sc->tempStr, "		}");
 							res = VkAppendLine(sc);
@@ -8485,11 +10088,14 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 		if (sc->performWorkGroupShift[1])
 			sprintf(shiftY, " + consts.workGroupShiftY ");
 		if (sc->fftDim < sc->fft_dim_full) {
+			//not implemented
 			if (sc->axisSwapped)
-				sprintf(sc->disableThreadsStart, "		if(%s * %" PRIu64 " + (((%s%s) %% %" PRIu64 ") * %" PRIu64 " + ((%s%s) / %" PRIu64 ") * %" PRIu64 ") < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, sc->firstStageStartSize, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[0] * sc->firstStageStartSize, sc->fft_dim_full);
+				sc->tempLen = sprintf(sc->tempStr, "		disableThreads = (%s * %" PRIu64 " + (((%s%s) %% %" PRIu64 ") * %" PRIu64 " + ((%s%s) / %" PRIu64 ") * %" PRIu64 ") < %" PRIu64 ") ? 1 : 0;\n", sc->gl_LocalInvocationID_x, sc->firstStageStartSize, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[0] * sc->firstStageStartSize, sc->fft_dim_full);
 			else
-				sprintf(sc->disableThreadsStart, "		if(%s * %" PRIu64 " + (((%s%s) %% %" PRIu64 ") * %" PRIu64 " + ((%s%s) / %" PRIu64 ") * %" PRIu64 ") < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, sc->firstStageStartSize, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[1] * sc->firstStageStartSize, sc->fft_dim_full);
-
+				sc->tempLen = sprintf(sc->tempStr, "		disableThreads = (%s * %" PRIu64 " + (((%s%s) %% %" PRIu64 ") * %" PRIu64 " + ((%s%s) / %" PRIu64 ") * %" PRIu64 ") < %" PRIu64 ") ? 1 : 0;\n", sc->gl_LocalInvocationID_y, sc->firstStageStartSize, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[1] * sc->firstStageStartSize, sc->fft_dim_full);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			sprintf(sc->disableThreadsStart, "		if(disableThreads>0) {\n");
 			res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 			if (res != VKFFT_SUCCESS) return res;
 			sprintf(sc->disableThreadsEnd, "}");
@@ -8499,10 +10105,14 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
 		}
-
+		uint64_t used_registers_read = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+		if (sc->registerBoost > 1) used_registers_read /= sc->registerBoost;
 		uint64_t mult = (sc->mergeSequencesR2C) ? 2 : 1;
 		if (sc->fftDim == sc->fft_dim_full) {
-			if (sc->zeropadBluestein[0]) sc->fftDim = sc->fft_zeropad_Bluestein_left_read[sc->axis_id];
+			if (sc->zeropadBluestein[0]) {
+				sc->fftDim = sc->fft_zeropad_Bluestein_left_read[sc->axis_id];
+				used_registers_read = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+			}
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
 				uint64_t num_in = (sc->axisSwapped) ? (uint64_t)ceil(mult * (sc->fftDim / 2 + 1) / (double)sc->localSize[1]) : (uint64_t)ceil(mult * (sc->fftDim / 2 + 1) / (double)sc->localSize[0]);
 				//num_in =(uint64_t)ceil(num_in / (double)sc->min_registers_per_thread);
@@ -8522,11 +10132,17 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->axisSwapped) {
 						if (sc->size[sc->axis_id + 1] % sc->localSize[0] != 0) {
+#if (VKFFT_BACKEND!=2) //AMD compiler fix
 							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + (%s%s)*%" PRIu64 "< %" PRIu64 "){\n", sc->fftDim / 2 + 1, sc->gl_WorkGroupID_y, shiftY2, mult * sc->localSize[0], sc->size[sc->axis_id + 1]);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
+#else
+							sc->tempLen = sprintf(sc->tempStr, "		if(!(combinedID / %" PRIu64 " + (%s%s)*%" PRIu64 "< %" PRIu64 ")) %s = 0; {\n", sc->fftDim / 2 + 1, sc->gl_WorkGroupID_y, shiftY2, mult * sc->localSize[0], sc->size[sc->axis_id + 1], sc->inoutID);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+#endif
 						}
-						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] >= mult * (sc->fftDim / 2 + 1) * sc->localSize[0]) {
+						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] > mult * (sc->fftDim / 2 + 1) * sc->localSize[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", mult * (sc->fftDim / 2 + 1) * sc->localSize[0]);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -8534,11 +10150,17 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					}
 					else {
 						if (sc->size[sc->axis_id + 1] % sc->localSize[1] != 0) {
+#if (VKFFT_BACKEND!=2) //AMD compiler fix
 							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + (%s%s)*%" PRIu64 "< %" PRIu64 "){\n", sc->fftDim / 2 + 1, sc->gl_WorkGroupID_y, shiftY2, mult * sc->localSize[1], sc->size[sc->axis_id + 1]);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
+#else
+							sc->tempLen = sprintf(sc->tempStr, "		if(!(combinedID / %" PRIu64 " + (%s%s)*%" PRIu64 "< %" PRIu64 ")) %s = 0; {\n", sc->fftDim / 2 + 1, sc->gl_WorkGroupID_y, shiftY2, mult * sc->localSize[1], sc->size[sc->axis_id + 1], sc->inoutID);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+#endif
 						}
-						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] >= mult * (sc->fftDim / 2 + 1) * sc->localSize[1]) {
+						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] > mult * (sc->fftDim / 2 + 1) * sc->localSize[1]) {
 							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", mult * (sc->fftDim / 2 + 1) * sc->localSize[1]);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -8620,14 +10242,14 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					if (sc->axisSwapped) {
-						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] >= mult * (sc->fftDim / 2 + 1) * sc->localSize[0]) {
+						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] > mult * (sc->fftDim / 2 + 1) * sc->localSize[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "		}\n");
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
 					}
 					else {
-						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] >= mult * (sc->fftDim / 2 + 1) * sc->localSize[1]) {
+						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] > mult * (sc->fftDim / 2 + 1) * sc->localSize[1]) {
 							sc->tempLen = sprintf(sc->tempStr, "		}\n");
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -8651,7 +10273,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 				}
 				res = appendBarrierVkFFT(sc, 1);
 				if (res != VKFFT_SUCCESS) return res;
-				for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < used_registers_read; i++) {
 					if (sc->mergeSequencesR2C) {
 						if (sc->axisSwapped) {
 							if (i < ((sc->fftDim / 2 + 1) / sc->localSize[1])) {
@@ -8664,6 +10286,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							}
 							else {
 								if (i >= (uint64_t)ceil((sc->fftDim / 2 + 1) / (double)sc->localSize[1])) {
+									if ((1 + i + k * used_registers_read) * sc->localSize[1] > sc->fftDim) {
+										sc->tempLen = sprintf(sc->tempStr, "		if(%s < %" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->fftDim - (i + k * used_registers_read) * sc->localSize[1]);
+										res = VkAppendLine(sc);
+										if (res != VKFFT_SUCCESS) return res;
+									}
 									if ((((uint64_t)ceil(sc->fftDim / 2.0) - 1 - (sc->localSize[1] - ((sc->fftDim / 2) % sc->localSize[1] + 1))) > (i - ((int64_t)ceil((sc->fftDim / 2 + 1) / (double)sc->localSize[1]))) * sc->localSize[1]) && ((uint64_t)ceil(sc->fftDim / 2.0) - 1 > (sc->localSize[1] - ((sc->fftDim / 2) % sc->localSize[1] + 1)))) {
 										if (sc->zeropadBluestein[0]) {
 											sc->tempLen = sprintf(sc->tempStr, "		if(%" PRIu64 " > %s){\n", ((uint64_t)ceil(sc->fftDim / 2.0) - 1 - (sc->localSize[1] - ((sc->fftDim / 2) % sc->localSize[1] + 1))) - (i - ((int64_t)ceil((sc->fftDim / 2 + 1) / (double)sc->localSize[1]))) * sc->localSize[1], sc->gl_LocalInvocationID_y);
@@ -8699,6 +10326,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 										res = VkAppendLine(sc);
 										if (res != VKFFT_SUCCESS) return res;
 									}
+									if ((1 + i + k * used_registers_read) * sc->localSize[1] > sc->fftDim) {
+										sc->tempLen = sprintf(sc->tempStr, "		}\n");
+										res = VkAppendLine(sc);
+										if (res != VKFFT_SUCCESS) return res;
+									}
 								}
 								else {
 									if (sc->localSize[1] > sc->fftDim) {
@@ -8755,6 +10387,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							}
 							else {
 								if (i >= (uint64_t)ceil((sc->fftDim / 2 + 1) / (double)sc->localSize[0])) {
+									if ((1 + i + k * used_registers_read) * sc->localSize[0] > sc->fftDim) {
+										sc->tempLen = sprintf(sc->tempStr, "		if(%s < %" PRIu64 "){\n", sc->gl_LocalInvocationID_x, sc->fftDim - (i + k * used_registers_read) * sc->localSize[0]);
+										res = VkAppendLine(sc);
+										if (res != VKFFT_SUCCESS) return res;
+									}
 									if ((((uint64_t)ceil(sc->fftDim / 2.0) - 1 - (sc->localSize[0] - ((sc->fftDim / 2) % sc->localSize[0] + 1))) > (i - ((int64_t)ceil((sc->fftDim / 2 + 1) / (double)sc->localSize[0]))) * sc->localSize[0]) && ((uint64_t)ceil(sc->fftDim / 2.0) - 1 > (sc->localSize[0] - ((sc->fftDim / 2) % sc->localSize[0] + 1)))) {
 										if (sc->zeropadBluestein[0]) {
 											sc->tempLen = sprintf(sc->tempStr, "		if(%" PRIu64 " > %s){\n", ((uint64_t)ceil(sc->fftDim / 2.0) - 1 - (sc->localSize[0] - ((sc->fftDim / 2) % sc->localSize[0] + 1))) - (i - ((int64_t)ceil((sc->fftDim / 2 + 1) / (double)sc->localSize[0]))) * sc->localSize[0], sc->gl_LocalInvocationID_x);
@@ -8790,6 +10427,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 										res = VkAppendLine(sc);
 										if (res != VKFFT_SUCCESS) return res;
 									}
+									if ((1 + i + k * used_registers_read) * sc->localSize[0] > sc->fftDim) {
+										sc->tempLen = sprintf(sc->tempStr, "		}\n");
+										res = VkAppendLine(sc);
+										if (res != VKFFT_SUCCESS) return res;
+									}
 								}
 								else {
 									if (sc->localSize[0] > sc->fftDim) {
@@ -8848,6 +10490,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							}
 							else {
 								if (i >= (uint64_t)ceil((sc->fftDim / 2 + 1) / (double)sc->localSize[1])) {
+									if ((1 + i + k * used_registers_read) * sc->localSize[1] > sc->fftDim) {
+										sc->tempLen = sprintf(sc->tempStr, "		if(%s < %" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->fftDim - (i + k * used_registers_read) * sc->localSize[1]);
+										res = VkAppendLine(sc);
+										if (res != VKFFT_SUCCESS) return res;
+									}
 									if ((((uint64_t)ceil(sc->fftDim / 2.0) - 1 - (sc->localSize[1] - ((sc->fftDim / 2) % sc->localSize[1] + 1))) > (i - ((int64_t)ceil((sc->fftDim / 2 + 1) / (double)sc->localSize[1]))) * sc->localSize[1]) && ((uint64_t)ceil(sc->fftDim / 2.0) - 1 > (sc->localSize[1] - ((sc->fftDim / 2) % sc->localSize[1] + 1)))) {
 										if (sc->zeropadBluestein[0]) {
 											sc->tempLen = sprintf(sc->tempStr, "		if(%" PRIu64 " > %s){\n", ((uint64_t)ceil(sc->fftDim / 2.0) - 1 - (sc->localSize[1] - ((sc->fftDim / 2) % sc->localSize[1] + 1))) - (i - ((int64_t)ceil((sc->fftDim / 2 + 1) / (double)sc->localSize[1]))) * sc->localSize[1], sc->gl_LocalInvocationID_y);
@@ -8883,6 +10530,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 										res = VkAppendLine(sc);
 										if (res != VKFFT_SUCCESS) return res;
 									}
+									if ((1 + i + k * used_registers_read) * sc->localSize[1] > sc->fftDim) {
+										sc->tempLen = sprintf(sc->tempStr, "		}\n");
+										res = VkAppendLine(sc);
+										if (res != VKFFT_SUCCESS) return res;
+									}
 								}
 								else {
 									if (sc->localSize[1] > sc->fftDim) {
@@ -8939,6 +10591,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							}
 							else {
 								if (i >= (uint64_t)ceil((sc->fftDim / 2 + 1) / (double)sc->localSize[0])) {
+									if ((1 + i + k * used_registers_read) * sc->localSize[0] > sc->fftDim) {
+										sc->tempLen = sprintf(sc->tempStr, "		if(%s < %" PRIu64 "){\n", sc->gl_LocalInvocationID_x, sc->fftDim - (i + k * used_registers_read) * sc->localSize[0]);
+										res = VkAppendLine(sc);
+										if (res != VKFFT_SUCCESS) return res;
+									}
 									if ((((uint64_t)ceil(sc->fftDim / 2.0) - 1 - (sc->localSize[0] - ((sc->fftDim / 2) % sc->localSize[0] + 1))) > (i - ((int64_t)ceil((sc->fftDim / 2 + 1) / (double)sc->localSize[0]))) * sc->localSize[0]) && ((uint64_t)ceil(sc->fftDim / 2.0) - 1 > (sc->localSize[0] - ((sc->fftDim / 2) % sc->localSize[0] + 1)))) {
 										if (sc->zeropadBluestein[0]) {
 											sc->tempLen = sprintf(sc->tempStr, "		if(%" PRIu64 " > %s){\n", ((uint64_t)ceil(sc->fftDim / 2.0) - 1 - (sc->localSize[0] - ((sc->fftDim / 2) % sc->localSize[0] + 1))) - (i - ((int64_t)ceil((sc->fftDim / 2 + 1) / (double)sc->localSize[0]))) * sc->localSize[0], sc->gl_LocalInvocationID_x);
@@ -8974,6 +10631,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 										res = VkAppendLine(sc);
 										if (res != VKFFT_SUCCESS) return res;
 									}
+									if ((1 + i + k * used_registers_read) * sc->localSize[0] > sc->fftDim) {
+										sc->tempLen = sprintf(sc->tempStr, "		}\n");
+										res = VkAppendLine(sc);
+										if (res != VKFFT_SUCCESS) return res;
+									}
 								}
 								else {
 									if (sc->localSize[0] > sc->fftDim) {
@@ -9024,7 +10686,54 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 				}
 			}
 			//sc->readToRegisters = 1;
-			if (sc->zeropadBluestein[0]) sc->fftDim = sc->fft_dim_full;
+			if (sc->zeropadBluestein[0]) {
+				sc->fftDim = sc->fft_dim_full;
+				used_registers_read = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+			}
+			if (!sc->readToRegisters) {
+				res = appendBarrierVkFFT(sc, 1);
+				if (res != VKFFT_SUCCESS) return res;
+				for (uint64_t k = 0; k < sc->registerBoost; k++) {
+					for (uint64_t i = 0; i < used_registers_read; i++) {
+						if (sc->axisSwapped) {
+							if ((1 + i + k * used_registers_read) * sc->localSize[1] > sc->fftDim) {
+								sc->tempLen = sprintf(sc->tempStr, "		if(%s < %" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->fftDim - (i + k * used_registers_read) * sc->localSize[1]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							sc->tempLen = sprintf(sc->tempStr, "			sdata[(%s+%" PRIu64 ") * sharedStride + %s].x = %s.x;\n", sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x, sc->regIDs[i + k * sc->registers_per_thread]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "			sdata[(%s+%" PRIu64 ") * sharedStride + %s].y = %s.y;\n", sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x, sc->regIDs[i + k * sc->registers_per_thread]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							if ((1 + i + k * used_registers_read) * sc->localSize[1] > sc->fftDim) {
+								sc->tempLen = sprintf(sc->tempStr, "		}\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+						else {
+							if ((1 + i + k * used_registers_read) * sc->localSize[0] > sc->fftDim) {
+								sc->tempLen = sprintf(sc->tempStr, "		if(%s < %" PRIu64 "){\n", sc->gl_LocalInvocationID_x, sc->fftDim - (i + k * used_registers_read) * sc->localSize[0]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							sc->tempLen = sprintf(sc->tempStr, "			sdata[(%s) * sharedStride + (%s+%" PRIu64 ")].x = %s.x;\n", sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0], sc->regIDs[i + k * sc->registers_per_thread]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "			sdata[(%s) * sharedStride + (%s+%" PRIu64 ")].y = %s.y;\n", sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0], sc->regIDs[i + k * sc->registers_per_thread]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							if ((1 + i + k * used_registers_read) * sc->localSize[0] > sc->fftDim) {
+								sc->tempLen = sprintf(sc->tempStr, "		}\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+					}
+				}
+			}
 		}
 		else {
 
@@ -9506,6 +11215,8 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 		if (sc->performWorkGroupShift[1])
 			sprintf(shiftY, " + consts.workGroupShiftY ");
 		uint64_t mult = (sc->mergeSequencesR2C) ? 2 : 1;
+		uint64_t used_registers_read = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+		if (sc->registerBoost > 1) used_registers_read /= sc->registerBoost;
 		if (sc->fftDim == sc->fft_dim_full) {
 			if (sc->zeropadBluestein[0]) {
 				res = appendSetSMToZero(sc, floatType, floatTypeMemory, uintType, readType);
@@ -9513,14 +11224,15 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 				res = appendBarrierVkFFT(sc, 1);
 				if (res != VKFFT_SUCCESS) return res;
 				sc->fftDim = sc->fft_zeropad_Bluestein_left_read[sc->axis_id];
+				used_registers_read = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
 			}
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < used_registers_read; i++) {
 
 					if (sc->localSize[1] == 1)
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0]);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->inputStride[0] > 1)
@@ -9530,6 +11242,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->axisSwapped) {
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						if ((uint64_t)ceil(sc->size[1] / (double)mult) % sc->localSize[0] != 0) {
 							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + (%s%s)*%" PRIu64 "< %" PRIu64 "){", sc->fftDim, sc->gl_WorkGroupID_y, shiftY, sc->localSize[0], (uint64_t)ceil(sc->size[1] / (double)mult));
 							res = VkAppendLine(sc);
@@ -9542,6 +11259,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						}
 					}
 					else {
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[1]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						if ((uint64_t)ceil(sc->size[1] / (double)mult) % sc->localSize[1] != 0) {
 							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + (%s%s)*%" PRIu64 "< %" PRIu64 "){", sc->fftDim, sc->gl_WorkGroupID_y, shiftY, sc->localSize[1], (uint64_t)ceil(sc->size[1] / (double)mult));
 							res = VkAppendLine(sc);
@@ -9670,6 +11392,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 					else {
 						if ((uint64_t)ceil(sc->size[1] / (double)mult) % sc->localSize[1] != 0) {
@@ -9677,10 +11404,18 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 				}
 			}
-			if (sc->zeropadBluestein[0]) sc->fftDim = sc->fft_dim_full;
+			if (sc->zeropadBluestein[0]) {
+				sc->fftDim = sc->fft_dim_full;
+				used_registers_read = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+			}
 		}
 		else {
 			//Not implemented
@@ -9699,6 +11434,8 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 		if (sc->performWorkGroupShift[1])
 			sprintf(shiftY, " + consts.workGroupShiftY ");
 		uint64_t mult = (sc->mergeSequencesR2C) ? 2 : 1;
+		uint64_t used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+		if (sc->registerBoost > 1) used_registers_read /= sc->registerBoost;
 		if (sc->fftDim == sc->fft_dim_full) {
 			if (sc->zeropadBluestein[0]) {
 				res = appendSetSMToZero(sc, floatType, floatTypeMemory, uintType, readType);
@@ -9706,11 +11443,12 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 				res = appendBarrierVkFFT(sc, 1);
 				if (res != VKFFT_SUCCESS) return res;
 				sc->fftDim = sc->fft_zeropad_Bluestein_left_read[sc->axis_id];
+				used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
 			}
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < mult * sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < mult * used_registers_read; i++) {
 
-					//sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * mult * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+					//sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * mult * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 					//res = VkAppendLine(sc);
 					//if (res != VKFFT_SUCCESS) return res;
 
@@ -9720,18 +11458,23 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					if (sc->mergeSequencesR2C)
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 ") / %" PRIu64 ";\n", sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], mult);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 ") / %" PRIu64 ";\n", sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], mult);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 ");\n", sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 ");\n", sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					if ((1 + i + mult * k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (mult * sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					if (sc->zeropadBluestein[0]) {
 						sc->tempLen = sprintf(sc->tempStr, "		if((combinedID %% %" PRIu64 ") < %" PRIu64 "){\n", sc->fft_dim_full, sc->fft_zeropad_Bluestein_left_read[sc->axis_id]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					if (sc->mergeSequencesR2C)
-						sc->tempLen = sprintf(sc->tempStr, "		sdataID = (((combinedID %% %" PRIu64 ") %% 2) * %" PRIu64 " + (1-2*((combinedID %% %" PRIu64 ") %% 2)) * ((combinedID %% %" PRIu64 ")/2)) * sharedStride + (%s + ((%s + %" PRIu64 ") %% %" PRIu64 ") * %" PRIu64 ") / %" PRIu64 ";\n", sc->fftDim, sc->fftDim - 1, sc->fftDim, sc->fftDim, sc->gl_LocalInvocationID_x, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], mult, sc->localSize[0], mult);
+						sc->tempLen = sprintf(sc->tempStr, "		sdataID = (((combinedID %% %" PRIu64 ") %% 2) * %" PRIu64 " + (1-2*((combinedID %% %" PRIu64 ") %% 2)) * ((combinedID %% %" PRIu64 ")/2)) * sharedStride + (%s + ((%s + %" PRIu64 ") %% %" PRIu64 ") * %" PRIu64 ") / %" PRIu64 ";\n", sc->fftDim, sc->fftDim - 1, sc->fftDim, sc->fftDim, sc->gl_LocalInvocationID_x, sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], mult, sc->localSize[0], mult);
 					else
 						sc->tempLen = sprintf(sc->tempStr, "		sdataID = (((combinedID %% %" PRIu64 ") %% 2) * %" PRIu64 " + (1-2*((combinedID %% %" PRIu64 ") %% 2)) * ((combinedID %% %" PRIu64 ")/2)) * sharedStride + %s;\n", sc->fftDim, sc->fftDim - 1, sc->fftDim, sc->fftDim, sc->gl_LocalInvocationID_x);
 
@@ -9743,11 +11486,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					if (sc->mergeSequencesR2C) {
 						sprintf(index_x, "(%s + %" PRIu64 " * ((%s %% %" PRIu64 ") + (%s%s) * %" PRIu64 ")) %% (%" PRIu64 ")", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, mult, sc->gl_WorkGroupID_x, shiftX, mult, sc->fft_dim_x);
 
-						sprintf(index_y, "(%s/%" PRIu64 " + %" PRIu64 ")", sc->gl_LocalInvocationID_y, mult, (i + k * sc->min_registers_per_thread) * sc->localSize[1]);
+						sprintf(index_y, "(%s/%" PRIu64 " + %" PRIu64 ")", sc->gl_LocalInvocationID_y, mult, (i + k * used_registers_read) * sc->localSize[1]);
 					}
 					else {
 						sprintf(index_x, "(%s%s) %% (%" PRIu64 ")", sc->gl_GlobalInvocationID_x, shiftX2, sc->fft_dim_x);
-						sprintf(index_y, "(%s + %" PRIu64 ")", sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1]);
+						sprintf(index_y, "(%s + %" PRIu64 ")", sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1]);
 					}
 					res = indexInputVkFFT(sc, uintType, readType, index_x, index_y, requestCoordinate, requestBatch);
 					if (res != VKFFT_SUCCESS) return res;
@@ -9834,6 +11577,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
+					if ((1 + i + mult * k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (mult * sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					if ((uint64_t)ceil(sc->size[0] / (double)mult) % sc->localSize[0] != 0) {
 						sc->tempLen = sprintf(sc->tempStr, "		}\n");
 						res = VkAppendLine(sc);
@@ -9841,7 +11589,10 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					}
 				}
 			}
-			if (sc->zeropadBluestein[0]) sc->fftDim = sc->fft_dim_full;
+			if (sc->zeropadBluestein[0]) {
+				sc->fftDim = sc->fft_dim_full;
+				used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+			}
 		}
 		else {
 			//Not implemented
@@ -9890,7 +11641,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim / 2 + 1) * sc->localSize[0]) {
+						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim / 2 + 1) * sc->localSize[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", (sc->fftDim / 2 + 1) * sc->localSize[0]);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -9902,7 +11653,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim / 2 + 1) * sc->localSize[1]) {
+						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim / 2 + 1) * sc->localSize[1]) {
 							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", (sc->fftDim / 2 + 1) * sc->localSize[1]);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -9924,12 +11675,19 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					else {
-						sc->tempLen = sprintf(sc->tempStr, "		mult.x = %s(%.17e%s * (combinedID %% %" PRIu64 ") );\n", cosDef, double_PI / 2 / sc->fftDim, LFending, sc->fftDim / 2 + 1);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "		mult.y = %s(%.17e%s * (combinedID %% %" PRIu64 ") );\n", sinDef, double_PI / 2 / sc->fftDim, LFending, sc->fftDim / 2 + 1);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
+						if (!strcmp(floatType, "float")) {
+							sc->tempLen = sprintf(sc->tempStr, "		mult.x = %s(%.17e%s * (combinedID %% %" PRIu64 ") );\n", cosDef, (double)(double_PI / 2 / sc->fftDim), LFending, sc->fftDim / 2 + 1);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "		mult.y = %s(%.17e%s * (combinedID %% %" PRIu64 ") );\n", sinDef, (double)(double_PI / 2 / sc->fftDim), LFending, sc->fftDim / 2 + 1);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (!strcmp(floatType, "double")) {
+							sc->tempLen = sprintf(sc->tempStr, "		mult = sincos_20(%.17e%s * (combinedID %% %" PRIu64 ") );\n", (double)(double_PI / 2 / sc->fftDim), LFending, sc->fftDim / 2 + 1);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 					if (sc->zeropad[0]) {
 						sc->tempLen = sprintf(sc->tempStr, "		if((inoutID %% %" PRIu64 " < %" PRIu64 ")||(inoutID %% %" PRIu64 " >= %" PRIu64 ")){\n", sc->inputStride[1], sc->fft_zeropad_left_read[sc->axis_id], sc->inputStride[1], sc->fft_zeropad_right_read[sc->axis_id]);
@@ -10085,14 +11843,14 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					if (res != VKFFT_SUCCESS) return res;
 
 					if (sc->axisSwapped) {
-						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim / 2 + 1) * sc->localSize[0]) {
+						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim / 2 + 1) * sc->localSize[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "	}\n");
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
 					}
 					else {
-						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim / 2 + 1) * sc->localSize[1])
+						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim / 2 + 1) * sc->localSize[1])
 						{
 							sc->tempLen = sprintf(sc->tempStr, "	}\n");
 							res = VkAppendLine(sc);
@@ -10227,12 +11985,19 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					else {
-						sc->tempLen = sprintf(sc->tempStr, "		mult.x = %s(%.17e%s * (combinedID) );\n", cosDef, double_PI / 2 / sc->fftDim, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "		mult.y = %s(%.17e%s * (combinedID) );\n", sinDef, double_PI / 2 / sc->fftDim, LFending);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
+						if (!strcmp(floatType, "float")) {
+							sc->tempLen = sprintf(sc->tempStr, "		mult.x = %s(%.17e%s * (combinedID) );\n", cosDef, (double)(double_PI / 2 / sc->fftDim), LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "		mult.y = %s(%.17e%s * (combinedID) );\n", sinDef, (double)(double_PI / 2 / sc->fftDim), LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (!strcmp(floatType, "double")) {
+							sc->tempLen = sprintf(sc->tempStr, "		mult = sincos_20(%.17e%s * (combinedID) );\n", (double)(double_PI / 2 / sc->fftDim), LFending);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 					//sc->tempLen = sprintf(sc->tempStr, "		printf(\" %%f - %%f \\n\", mult.x, mult.y);\n");
 					//res = VkAppendLine(sc);
@@ -10802,6 +12567,8 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 		char shiftY[500] = "";
 		if (sc->performWorkGroupShift[1])
 			sprintf(shiftY, " + consts.workGroupShiftY ");
+		uint64_t used_registers_read = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+		if (sc->registerBoost > 1) used_registers_read /= sc->registerBoost;
 		if (sc->fftDim == sc->fft_dim_full) {
 			if (sc->zeropadBluestein[0]) {
 				res = appendSetSMToZero(sc, floatType, floatTypeMemory, uintType, readType);
@@ -10809,6 +12576,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 				res = appendBarrierVkFFT(sc, 1);
 				if (res != VKFFT_SUCCESS) return res;
 				sc->fftDim = sc->fft_zeropad_Bluestein_left_read[sc->axis_id];
+				used_registers_read = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
 			}
 			uint64_t maxBluesteinCutOff = 1;
 			if (sc->zeropadBluestein[0]) {
@@ -10818,12 +12586,12 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					maxBluesteinCutOff = 2 * sc->fftDim * sc->localSize[1];
 			}
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < 2 * sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < 2 * used_registers_read; i++) {
 
 					if (sc->localSize[1] == 1)
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * 2 * sc->min_registers_per_thread) * sc->localSize[0]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * 2 * used_registers_read) * sc->localSize[0]);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * 2 * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 
@@ -10834,6 +12602,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->axisSwapped) {
+						if ((1 + i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (2 * sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", 2 * sc->fftDim * sc->localSize[0]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						if ((uint64_t)ceil(sc->size[1]) % sc->localSize[0] != 0) {
 							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + (%s%s)*%" PRIu64 "< %" PRIu64 "){", 2 * sc->fftDim, sc->gl_WorkGroupID_y, shiftY, sc->localSize[0], (uint64_t)ceil(sc->size[1]));
 							res = VkAppendLine(sc);
@@ -10841,6 +12614,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						}
 					}
 					else {
+						if ((1 + i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (2 * sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", 2 * sc->fftDim * sc->localSize[1]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						if ((uint64_t)ceil(sc->size[1]) % sc->localSize[1] != 0) {
 							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + (%s%s)*%" PRIu64 "< %" PRIu64 "){", 2 * sc->fftDim, sc->gl_WorkGroupID_y, shiftY, sc->localSize[1], (uint64_t)ceil(sc->size[1]));
 							res = VkAppendLine(sc);
@@ -10867,7 +12645,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					if (res != VKFFT_SUCCESS) return res;
 					res = appendZeropadStartReadWriteStage(sc, 1);
 					if (res != VKFFT_SUCCESS) return res;
-#if(!((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4)))//OpenCL and Level Zero are  not handling barrier with thread-conditional writes to local memory - so this is a work-around
+#if(!((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4)||(VKFFT_BACKEND==5)))//OpenCL, Level Zero and Metal are  not handling barrier with thread-conditional writes to local memory - so this is a work-around
 					if (sc->inputBufferBlockNum == 1)
 						sc->tempLen = sprintf(sc->tempStr, "		%s.x = %s%s[%s]%s;\n", sc->regIDs[0], convTypeLeft, inputsStruct, sc->inoutID, convTypeRight);
 					else
@@ -10875,7 +12653,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 #else
-					if (i < sc->min_registers_per_thread) {
+					if (i < used_registers_read) {
 						if (sc->inputBufferBlockNum == 1)
 							sc->tempLen = sprintf(sc->tempStr, "		%s.x = %s%s[%s]%s;\n", sc->regIDs[i + k * sc->registers_per_thread], convTypeLeft, inputsStruct, sc->inoutID, convTypeRight);
 						else
@@ -10885,14 +12663,14 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					}
 					else {
 						if (sc->inputBufferBlockNum == 1)
-							sc->tempLen = sprintf(sc->tempStr, "		%s.y = %s%s[%s]%s;\n", sc->regIDs[i - sc->min_registers_per_thread + k * sc->registers_per_thread], convTypeLeft, inputsStruct, sc->inoutID, convTypeRight);
+							sc->tempLen = sprintf(sc->tempStr, "		%s.y = %s%s[%s]%s;\n", sc->regIDs[i - used_registers_read + k * sc->registers_per_thread], convTypeLeft, inputsStruct, sc->inoutID, convTypeRight);
 						else
-							sc->tempLen = sprintf(sc->tempStr, "		%s.y = %sinputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "]%s;\n", sc->regIDs[i - sc->min_registers_per_thread + k * sc->registers_per_thread], convTypeLeft, sc->inoutID, sc->inputBufferBlockSize, inputsStruct, sc->inoutID, sc->inputBufferBlockSize, convTypeRight);
+							sc->tempLen = sprintf(sc->tempStr, "		%s.y = %sinputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "]%s;\n", sc->regIDs[i - used_registers_read + k * sc->registers_per_thread], convTypeLeft, sc->inoutID, sc->inputBufferBlockSize, inputsStruct, sc->inoutID, sc->inputBufferBlockSize, convTypeRight);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
 #endif
-#if(!((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4)))//OpenCL and Level Zero are  not handling barrier with thread-conditional writes to local memory - so this is a work-around
+#if(!((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4)||(VKFFT_BACKEND==5)))//OpenCL, Level Zero and Metal are  not handling barrier with thread-conditional writes to local memory - so this is a work-around
 					if (sc->axisSwapped) {
 						//sc->tempLen = sprintf(sc->tempStr, "		sdataID = (((combinedID %% %" PRIu64 ") %% 2) * %" PRIu64 " + (1-2*((combinedID %% %" PRIu64 ") %% 2)) * ((combinedID %% %" PRIu64 ")/2)) * sharedStride + (combinedID / %" PRIu64 ");\n", sc->fftDim, sc->fftDim - 1, sc->fftDim, sc->fftDim, sc->fftDim);
 						sc->tempLen = sprintf(sc->tempStr, "		sdataID = ((combinedID %% %" PRIu64 ")/2) * sharedStride + (combinedID / %" PRIu64 ");\n", 2 * sc->fftDim, 2 * sc->fftDim);
@@ -10973,6 +12751,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
+						if ((1 + i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (2 * sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 					else {
 						if ((uint64_t)ceil(sc->size[1]) % sc->localSize[1] != 0) {
@@ -10980,17 +12763,22 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
+						if ((1 + i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (2 * sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 				}
 			}
-#if((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))//OpenCL is not handling barrier with thread-conditional writes to local memory - so this is a work-around
+#if((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4)||(VKFFT_BACKEND==5))//OpenCL is not handling barrier with thread-conditional writes to local memory - so this is a work-around
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < 2 * sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < 2 * used_registers_read; i++) {
 
 					if (sc->localSize[1] == 1)
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * 2 * sc->min_registers_per_thread) * sc->localSize[0]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * 2 * used_registers_read) * sc->localSize[0]);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * 2 * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 
@@ -11006,6 +12794,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
+						if ((1 + i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (2 * sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", 2 * sc->fftDim * sc->localSize[0]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 					else {
 						if ((uint64_t)ceil(sc->size[1]) % sc->localSize[1] != 0) {
@@ -11013,6 +12806,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
+						if ((1 + i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (2 * sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", 2 * sc->fftDim * sc->localSize[1]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 					if (sc->zeropadBluestein[0]) {
 						sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", maxBluesteinCutOff);
@@ -11044,7 +12842,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					}
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					if (i < sc->min_registers_per_thread) {
+					if (i < used_registers_read) {
 						sc->tempLen = sprintf(sc->tempStr, "		if (((combinedID %% %" PRIu64 ")%%2) == 0) {\n", 2 * sc->fftDim);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
@@ -11059,7 +12857,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						sc->tempLen = sprintf(sc->tempStr, "		if (((combinedID %% %" PRIu64 ")%%2) == 0) {\n", 2 * sc->fftDim);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "		sdata[sdataID].x = %s.y;\n", sc->regIDs[i - sc->min_registers_per_thread + k * sc->registers_per_thread]);
+						sc->tempLen = sprintf(sc->tempStr, "		sdata[sdataID].x = %s.y;\n", sc->regIDs[i - used_registers_read + k * sc->registers_per_thread]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 						sc->tempLen = sprintf(sc->tempStr, "		}\n");
@@ -11111,6 +12909,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					if (sc->axisSwapped) {
+						if ((1 + i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (2 * sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						if ((uint64_t)ceil(sc->size[1]) % sc->localSize[0] != 0) {
 							sc->tempLen = sprintf(sc->tempStr, "		}");
 							res = VkAppendLine(sc);
@@ -11118,6 +12921,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						}
 					}
 					else {
+						if ((1 + i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (2 * sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						if ((uint64_t)ceil(sc->size[1]) % sc->localSize[1] != 0) {
 							sc->tempLen = sprintf(sc->tempStr, "		}");
 							res = VkAppendLine(sc);
@@ -11129,12 +12937,12 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			res = appendBarrierVkFFT(sc, 1);
 			if (res != VKFFT_SUCCESS) return res;
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < 2 * sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < 2 * used_registers_read; i++) {
 
 					if (sc->localSize[1] == 1)
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * 2 * sc->min_registers_per_thread) * sc->localSize[0]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * 2 * used_registers_read) * sc->localSize[0]);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * 2 * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 
@@ -11150,6 +12958,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
+						if ((1 + i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (2 * sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", 2 * sc->fftDim * sc->localSize[0]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 					else {
 						if ((uint64_t)ceil(sc->size[1]) % sc->localSize[1] != 0) {
@@ -11157,6 +12970,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
+						if ((1 + i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (2 * sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", 2 * sc->fftDim * sc->localSize[1]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 					if (sc->zeropadBluestein[0]) {
 						sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", maxBluesteinCutOff);
@@ -11188,7 +13006,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					}
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					if (i < sc->min_registers_per_thread) {
+					if (i < used_registers_read) {
 						sc->tempLen = sprintf(sc->tempStr, "		if (((combinedID %% %" PRIu64 ")%%2) == 1) {\n", 2 * sc->fftDim);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
@@ -11203,7 +13021,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						sc->tempLen = sprintf(sc->tempStr, "		if (((combinedID %% %" PRIu64 ")%%2) == 1) {\n", 2 * sc->fftDim);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "		sdata[sdataID].y = %s.y;\n", sc->regIDs[i - sc->min_registers_per_thread + k * sc->registers_per_thread]);
+						sc->tempLen = sprintf(sc->tempStr, "		sdata[sdataID].y = %s.y;\n", sc->regIDs[i - used_registers_read + k * sc->registers_per_thread]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 						sc->tempLen = sprintf(sc->tempStr, "		}\n");
@@ -11255,6 +13073,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					if (sc->axisSwapped) {
+						if ((1 + i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (2 * sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						if ((uint64_t)ceil(sc->size[1]) % sc->localSize[0] != 0) {
 							sc->tempLen = sprintf(sc->tempStr, "		}");
 							res = VkAppendLine(sc);
@@ -11262,6 +13085,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						}
 					}
 					else {
+						if ((1 + i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (2 * sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						if ((uint64_t)ceil(sc->size[1]) % sc->localSize[1] != 0) {
 							sc->tempLen = sprintf(sc->tempStr, "		}");
 							res = VkAppendLine(sc);
@@ -11283,12 +13111,12 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					maxBluesteinCutOff = sc->fftDim * sc->localSize[1];
 			}
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < used_registers_read; i++) {
 
 					if (sc->localSize[1] == 1)
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0]);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->zeropadBluestein[0]) {
@@ -11297,9 +13125,19 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					if (sc->axisSwapped) {
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						sc->tempLen = sprintf(sc->tempStr, "		sdataID = (combinedID %% %" PRIu64 ") * sharedStride + (combinedID / %" PRIu64 ");\n", sc->fftDim, sc->fftDim);
 					}
 					else {
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[1]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						sc->tempLen = sprintf(sc->tempStr, "		sdataID = (combinedID %% %" PRIu64 ")  + (combinedID / %" PRIu64 ") * sharedStride;\n", sc->fftDim, sc->fftDim);
 					}
 					res = VkAppendLine(sc);
@@ -11344,6 +13182,20 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					sc->tempLen = sprintf(sc->tempStr, "		}\n");
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					if (sc->axisSwapped) {
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+					else {
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
 					if (sc->zeropadBluestein[0]) {
 						sc->tempLen = sprintf(sc->tempStr, "		}\n");
 						res = VkAppendLine(sc);
@@ -11362,12 +13214,12 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			res = appendZeropadStart(sc);
 			if (res != VKFFT_SUCCESS) return res;
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < used_registers_read; i++) {
 
 					if (sc->localSize[1] == 1)
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0]);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->zeropadBluestein[0]) {
@@ -11376,9 +13228,19 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					if (sc->axisSwapped) {
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						sc->tempLen = sprintf(sc->tempStr, "		sdataID = (combinedID %% %" PRIu64 ") * sharedStride + (combinedID / %" PRIu64 ");\n", sc->fftDim, sc->fftDim);
 					}
 					else {
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[1]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						sc->tempLen = sprintf(sc->tempStr, "		sdataID = (combinedID %% %" PRIu64 ") + (combinedID / %" PRIu64 ") * sharedStride;\n", sc->fftDim, sc->fftDim);
 					}
 					res = VkAppendLine(sc);
@@ -11389,7 +13251,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					sc->tempLen = sprintf(sc->tempStr, "		sdata[sdataID].x = %s.x;\n", sc->regIDs[i + k * sc->registers_per_thread]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-#if(!((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4)))//OpenCL and Level Zero are  not handling barrier with thread-conditional writes to local memory - so this is a work-around
+#if(!((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4)||(VKFFT_BACKEND==5)))//OpenCL, Level Zero and Metal are  not handling barrier with thread-conditional writes to local memory - so this is a work-around
 					if (sc->axisSwapped) {
 						sc->tempLen = sprintf(sc->tempStr, "		sdataID = (%" PRIu64 " - combinedID %% %" PRIu64 ") * sharedStride + (combinedID / %" PRIu64 ");\n", sc->fftDim, sc->fftDim, sc->fftDim);
 					}
@@ -11411,6 +13273,20 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					sc->tempLen = sprintf(sc->tempStr, "		}\n");
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					if (sc->axisSwapped) {
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+					else {
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
 					if (sc->zeropadBluestein[0]) {
 						sc->tempLen = sprintf(sc->tempStr, "		}\n");
 						res = VkAppendLine(sc);
@@ -11425,16 +13301,16 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			if (res != VKFFT_SUCCESS) return res;
 			res = appendBarrierVkFFT(sc, 1);
 			if (res != VKFFT_SUCCESS) return res;
-#if((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
+#if((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4)||(VKFFT_BACKEND==5))
 			res = appendZeropadStart(sc);
 			if (res != VKFFT_SUCCESS) return res;
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < used_registers_read; i++) {
 
 					if (sc->localSize[1] == 1)
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0]);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->zeropadBluestein[0]) {
@@ -11446,9 +13322,19 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->axisSwapped) {
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						sc->tempLen = sprintf(sc->tempStr, "		sdataID = (%" PRIu64 " - combinedID %% %" PRIu64 ") * sharedStride + (combinedID / %" PRIu64 ");\n", sc->fftDim, sc->fftDim, sc->fftDim);
 					}
 					else {
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[1]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						sc->tempLen = sprintf(sc->tempStr, "		sdataID = (%" PRIu64 " - combinedID %% %" PRIu64 ") + (combinedID / %" PRIu64 ") * sharedStride;\n", sc->fftDim, sc->fftDim, sc->fftDim);
 					}
 					res = VkAppendLine(sc);
@@ -11459,6 +13345,20 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					sc->tempLen = sprintf(sc->tempStr, "		}\n");
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					if (sc->axisSwapped) {
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+					else {
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
 					if (sc->zeropadBluestein[0]) {
 						sc->tempLen = sprintf(sc->tempStr, "		}\n");
 						res = VkAppendLine(sc);
@@ -11488,14 +13388,14 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->axisSwapped) {
-						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim / 2 + 1) * sc->localSize[0]) {
+						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim / 2 + 1) * sc->localSize[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", (sc->fftDim / 2 + 1) * sc->localSize[0]);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
 					}
 					else {
-						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim / 2 + 1) * sc->localSize[1]) {
+						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim / 2 + 1) * sc->localSize[1]) {
 							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", (sc->fftDim / 2 + 1) * sc->localSize[1]);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -11507,12 +13407,19 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					else {
-						sc->tempLen = sprintf(sc->tempStr, "		mult.x = %s(%.17e%s * (combinedID %% %" PRIu64 ") );\n", cosDef, double_PI / 2 / sc->fftDim, LFending, sc->fftDim / 2 + 1);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "		mult.y = %s(%.17e%s * (combinedID %% %" PRIu64 ") );\n", sinDef, double_PI / 2 / sc->fftDim, LFending, sc->fftDim / 2 + 1);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
+						if (!strcmp(floatType, "float")) {
+							sc->tempLen = sprintf(sc->tempStr, "		mult.x = %s(%.17e%s * (combinedID %% %" PRIu64 ") );\n", cosDef, (double)(double_PI / 2 / sc->fftDim), LFending, sc->fftDim / 2 + 1);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "		mult.y = %s(%.17e%s * (combinedID %% %" PRIu64 ") );\n", sinDef, (double)(double_PI / 2 / sc->fftDim), LFending, sc->fftDim / 2 + 1);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (!strcmp(floatType, "double")) {
+							sc->tempLen = sprintf(sc->tempStr, "		mult = sincos_20(%.17e%s * (combinedID %% %" PRIu64 ") );\n", (double)(double_PI / 2 / sc->fftDim), LFending, sc->fftDim / 2 + 1);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 
 					if (sc->axisSwapped) {
@@ -11578,14 +13485,14 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->axisSwapped) {
-						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim / 2 + 1) * sc->localSize[0]) {
+						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim / 2 + 1) * sc->localSize[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "		}\n");
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
 					}
 					else {
-						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim / 2 + 1) * sc->localSize[1]) {
+						if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim / 2 + 1) * sc->localSize[1]) {
 							sc->tempLen = sprintf(sc->tempStr, "		}\n");
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -11595,7 +13502,10 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			}
 			res = appendZeropadEnd(sc);
 			if (res != VKFFT_SUCCESS) return res;
-			if (sc->zeropadBluestein[0]) sc->fftDim = sc->fft_dim_full;
+			if (sc->zeropadBluestein[0]) {
+				sc->fftDim = sc->fft_dim_full;
+				used_registers_read = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+			}
 		}
 		else {
 			//Not implemented
@@ -11613,6 +13523,8 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 		char shiftY[500] = "";
 		if (sc->performWorkGroupShift[1])
 			sprintf(shiftY, " + consts.workGroupShiftY ");
+		uint64_t used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+		if (sc->registerBoost > 1) used_registers_read /= sc->registerBoost;
 		if (sc->fftDim == sc->fft_dim_full) {
 			if (sc->zeropadBluestein[0]) {
 				res = appendSetSMToZero(sc, floatType, floatTypeMemory, uintType, readType);
@@ -11620,14 +13532,15 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 				res = appendBarrierVkFFT(sc, 1);
 				if (res != VKFFT_SUCCESS) return res;
 				sc->fftDim = sc->fft_zeropad_Bluestein_left_read[sc->axis_id];
+				used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
 			}
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < 2 * sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < 2 * used_registers_read; i++) {
 
 					if (sc->localSize[1] == 1)
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * 2 * sc->min_registers_per_thread) * sc->localSize[0]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * 2 * used_registers_read) * sc->localSize[0]);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * 2 * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->zeropadBluestein[0]) {
@@ -11635,6 +13548,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
+					if ((1 + i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (2 * sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", 2 * sc->fftDim * sc->localSize[0]);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					if ((uint64_t)ceil(sc->size[0]) % sc->localSize[0] != 0) {
 						sc->tempLen = sprintf(sc->tempStr, "		if ((%s%s) < %" PRIu64 ") {\n", sc->gl_GlobalInvocationID_x, shiftX2, (uint64_t)ceil(sc->size[0]));
 						res = VkAppendLine(sc);
@@ -11644,7 +13562,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					sprintf(index_x, "(%s%s) %% (%" PRIu64 ")", sc->gl_GlobalInvocationID_x, shiftX2, sc->fft_dim_x);
-					sprintf(index_y, "(%s + %" PRIu64 ")", sc->gl_LocalInvocationID_y, (i + k * 2 * sc->min_registers_per_thread) * sc->localSize[1]);
+					sprintf(index_y, "(%s + %" PRIu64 ")", sc->gl_LocalInvocationID_y, (i + k * 2 * used_registers_read) * sc->localSize[1]);
 					res = indexInputVkFFT(sc, uintType, readType, index_x, index_y, requestCoordinate, requestBatch);
 					if (res != VKFFT_SUCCESS) return res;
 					sc->tempLen = sprintf(sc->tempStr, ";\n");
@@ -11658,7 +13576,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 
 					res = appendZeropadStartReadWriteStage(sc, 1);
 					if (res != VKFFT_SUCCESS) return res;
-#if(!((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4)))//OpenCL and Level Zero are  not handling barrier with thread-conditional writes to local memory - so this is a work-around
+#if(!((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4)||(VKFFT_BACKEND==5)))//OpenCL, Level Zero and Metal are  not handling barrier with thread-conditional writes to local memory - so this is a work-around
 					if (sc->inputBufferBlockNum == 1)
 						sc->tempLen = sprintf(sc->tempStr, "		%s.x = %s%s[%s]%s;\n", sc->regIDs[0], convTypeLeft, inputsStruct, sc->inoutID, convTypeRight);
 					else
@@ -11666,7 +13584,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 #else
-					if (i < sc->min_registers_per_thread) {
+					if (i < used_registers_read) {
 						if (sc->inputBufferBlockNum == 1)
 							sc->tempLen = sprintf(sc->tempStr, "		%s.x = %s%s[%s]%s;\n", sc->regIDs[i + k * sc->registers_per_thread], convTypeLeft, inputsStruct, sc->inoutID, convTypeRight);
 						else
@@ -11676,14 +13594,14 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					}
 					else {
 						if (sc->inputBufferBlockNum == 1)
-							sc->tempLen = sprintf(sc->tempStr, "		%s.y = %s%s[%s]%s;\n", sc->regIDs[i - sc->min_registers_per_thread + k * sc->registers_per_thread], convTypeLeft, inputsStruct, sc->inoutID, convTypeRight);
+							sc->tempLen = sprintf(sc->tempStr, "		%s.y = %s%s[%s]%s;\n", sc->regIDs[i - used_registers_read + k * sc->registers_per_thread], convTypeLeft, inputsStruct, sc->inoutID, convTypeRight);
 						else
-							sc->tempLen = sprintf(sc->tempStr, "		%s.y = %sinputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "]%s;\n", sc->regIDs[i - sc->min_registers_per_thread + k * sc->registers_per_thread], convTypeLeft, sc->inoutID, sc->inputBufferBlockSize, inputsStruct, sc->inoutID, sc->inputBufferBlockSize, convTypeRight);
+							sc->tempLen = sprintf(sc->tempStr, "		%s.y = %sinputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "]%s;\n", sc->regIDs[i - used_registers_read + k * sc->registers_per_thread], convTypeLeft, sc->inoutID, sc->inputBufferBlockSize, inputsStruct, sc->inoutID, sc->inputBufferBlockSize, convTypeRight);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
 #endif
-#if(!((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4)))//OpenCL and Level Zero are  not handling barrier with thread-conditional writes to local memory - so this is a work-around
+#if(!((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4)||(VKFFT_BACKEND==5)))//OpenCL, Level Zero and Metal are  not handling barrier with thread-conditional writes to local memory - so this is a work-around
 					sc->tempLen = sprintf(sc->tempStr, "		sdataID = ((combinedID / %" PRIu64 ")/2) * sharedStride + (combinedID %% %" PRIu64 ");\n", sc->localSize[0], sc->localSize[0]);
 
 					res = VkAppendLine(sc);
@@ -11738,6 +13656,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
+					if ((1 + i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (2 * sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					if (sc->zeropadBluestein[0]) {
 						sc->tempLen = sprintf(sc->tempStr, "		}\n");
 						res = VkAppendLine(sc);
@@ -11745,14 +13668,14 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					}
 				}
 			}
-#if((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))//OpenCL is not handling barrier with thread-conditional writes to local memory - so this is a work-around
+#if((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4)||(VKFFT_BACKEND==5))//OpenCL is not handling barrier with thread-conditional writes to local memory - so this is a work-around
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < 2 * sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < 2 * used_registers_read; i++) {
 
 					if (sc->localSize[1] == 1)
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * 2 * sc->min_registers_per_thread) * sc->localSize[0]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * 2 * used_registers_read) * sc->localSize[0]);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * 2 * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->zeropadBluestein[0]) {
@@ -11760,6 +13683,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
+					if ((1 + i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (2 * sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", 2 * sc->fftDim * sc->localSize[0]);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					if ((uint64_t)ceil(sc->size[0]) % sc->localSize[0] != 0) {
 						sc->tempLen = sprintf(sc->tempStr, "		if ((%s%s) < %" PRIu64 ") {\n", sc->gl_GlobalInvocationID_x, shiftX2, (uint64_t)ceil(sc->size[0]));
 						res = VkAppendLine(sc);
@@ -11769,7 +13697,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					sprintf(index_x, "(%s%s) %% (%" PRIu64 ")", sc->gl_GlobalInvocationID_x, shiftX2, sc->fft_dim_x);
-					sprintf(index_y, "(%s + %" PRIu64 ")", sc->gl_LocalInvocationID_y, (i + k * 2 * sc->min_registers_per_thread) * sc->localSize[1]);
+					sprintf(index_y, "(%s + %" PRIu64 ")", sc->gl_LocalInvocationID_y, (i + k * 2 * used_registers_read) * sc->localSize[1]);
 					res = indexInputVkFFT(sc, uintType, readType, index_x, index_y, requestCoordinate, requestBatch);
 					if (res != VKFFT_SUCCESS) return res;
 					sc->tempLen = sprintf(sc->tempStr, ";\n");
@@ -11787,7 +13715,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					if (i < sc->min_registers_per_thread) {
+					if (i < used_registers_read) {
 						sc->tempLen = sprintf(sc->tempStr, "		if ((combinedID / %" PRIu64 ")%%2 == 0) {\n", sc->localSize[0]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
@@ -11802,7 +13730,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						sc->tempLen = sprintf(sc->tempStr, "		if ((combinedID / %" PRIu64 ")%%2 == 0) {\n", sc->localSize[0]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "		sdata[sdataID].x = %s.y;\n", sc->regIDs[i - sc->min_registers_per_thread + k * sc->registers_per_thread]);
+						sc->tempLen = sprintf(sc->tempStr, "		sdata[sdataID].x = %s.y;\n", sc->regIDs[i - used_registers_read + k * sc->registers_per_thread]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 						sc->tempLen = sprintf(sc->tempStr, "		}\n");
@@ -11843,6 +13771,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
+					if ((1 + i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (2 * sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					if (sc->zeropadBluestein[0]) {
 						sc->tempLen = sprintf(sc->tempStr, "		}\n");
 						res = VkAppendLine(sc);
@@ -11853,12 +13786,12 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			res = appendBarrierVkFFT(sc, 1);
 			if (res != VKFFT_SUCCESS) return res;
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < 2 * sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < 2 * used_registers_read; i++) {
 
 					if (sc->localSize[1] == 1)
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * 2 * sc->min_registers_per_thread) * sc->localSize[0]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * 2 * used_registers_read) * sc->localSize[0]);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * 2 * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->zeropadBluestein[0]) {
@@ -11866,6 +13799,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
+					if ((1 + i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (2 * sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", 2 * sc->fftDim * sc->localSize[0]);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					if ((uint64_t)ceil(sc->size[0]) % sc->localSize[0] != 0) {
 						sc->tempLen = sprintf(sc->tempStr, "		if ((%s%s) < %" PRIu64 ") {\n", sc->gl_GlobalInvocationID_x, shiftX2, (uint64_t)ceil(sc->size[0]));
 						res = VkAppendLine(sc);
@@ -11875,7 +13813,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					sprintf(index_x, "(%s%s) %% (%" PRIu64 ")", sc->gl_GlobalInvocationID_x, shiftX2, sc->fft_dim_x);
-					sprintf(index_y, "(%s + %" PRIu64 ")", sc->gl_LocalInvocationID_y, (i + k * 2 * sc->min_registers_per_thread) * sc->localSize[1]);
+					sprintf(index_y, "(%s + %" PRIu64 ")", sc->gl_LocalInvocationID_y, (i + k * 2 * used_registers_read) * sc->localSize[1]);
 					res = indexInputVkFFT(sc, uintType, readType, index_x, index_y, requestCoordinate, requestBatch);
 					if (res != VKFFT_SUCCESS) return res;
 					sc->tempLen = sprintf(sc->tempStr, ";\n");
@@ -11893,7 +13831,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					if (i < sc->min_registers_per_thread) {
+					if (i < used_registers_read) {
 						sc->tempLen = sprintf(sc->tempStr, "		if ((combinedID / %" PRIu64 ")%%2 == 1) {\n", sc->localSize[0]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
@@ -11908,7 +13846,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						sc->tempLen = sprintf(sc->tempStr, "		if ((combinedID / %" PRIu64 ")%%2 == 1) {\n", sc->localSize[0]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "		sdata[sdataID].y = %s.y;\n", sc->regIDs[i - sc->min_registers_per_thread + k * sc->registers_per_thread]);
+						sc->tempLen = sprintf(sc->tempStr, "		sdata[sdataID].y = %s.y;\n", sc->regIDs[i - used_registers_read + k * sc->registers_per_thread]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 						sc->tempLen = sprintf(sc->tempStr, "		}\n");
@@ -11949,6 +13887,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
+					if ((1 + i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (2 * sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					if (sc->zeropadBluestein[0]) {
 						sc->tempLen = sprintf(sc->tempStr, "		}\n");
 						res = VkAppendLine(sc);
@@ -11962,12 +13905,12 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			res = appendZeropadStart(sc);
 			if (res != VKFFT_SUCCESS) return res;
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < used_registers_read; i++) {
 
 					if (sc->localSize[1] == 1)
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0]);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->zeropadBluestein[0]) {
@@ -11975,6 +13918,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
+					if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					sc->tempLen = sprintf(sc->tempStr, "		sdataID = (combinedID / %" PRIu64 ") * sharedStride + (combinedID %% %" PRIu64 ");\n", sc->localSize[0], sc->localSize[0]);
 
 					res = VkAppendLine(sc);
@@ -12011,6 +13959,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					sc->tempLen = sprintf(sc->tempStr, "		}\n");
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					if (sc->zeropadBluestein[0]) {
 						sc->tempLen = sprintf(sc->tempStr, "		}\n");
 						res = VkAppendLine(sc);
@@ -12028,12 +13981,12 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			res = appendZeropadStart(sc);
 			if (res != VKFFT_SUCCESS) return res;
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < used_registers_read; i++) {
 
 					if (sc->localSize[1] == 1)
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0]);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->zeropadBluestein[0]) {
@@ -12041,6 +13994,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
+					if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					sc->tempLen = sprintf(sc->tempStr, "		sdataID = (combinedID / %" PRIu64 ") * sharedStride + (combinedID %% %" PRIu64 ");\n", sc->localSize[0], sc->localSize[0]);
 
 					res = VkAppendLine(sc);
@@ -12051,7 +14009,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					sc->tempLen = sprintf(sc->tempStr, "		sdata[sdataID].x = %s.x;\n", sc->regIDs[i + k * sc->registers_per_thread]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-#if(!((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4)))//OpenCL and Level Zero are  not handling barrier with thread-conditional writes to local memory - so this is a work-around
+#if(!((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4)||(VKFFT_BACKEND==5)))//OpenCL, Level Zero and Metal are  not handling barrier with thread-conditional writes to local memory - so this is a work-around
 					sc->tempLen = sprintf(sc->tempStr, "		sdataID = (%" PRIu64 " - combinedID / %" PRIu64 ") * sharedStride + (combinedID %% %" PRIu64 ");\n", sc->fftDim, sc->localSize[0], sc->localSize[0]);
 
 					res = VkAppendLine(sc);
@@ -12069,6 +14027,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					sc->tempLen = sprintf(sc->tempStr, "		}\n");
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					if (sc->zeropadBluestein[0]) {
 						sc->tempLen = sprintf(sc->tempStr, "		}\n");
 						res = VkAppendLine(sc);
@@ -12080,16 +14043,16 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			if (res != VKFFT_SUCCESS) return res;
 			res = appendBarrierVkFFT(sc, 1);
 			if (res != VKFFT_SUCCESS) return res;
-#if((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
+#if((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4)||(VKFFT_BACKEND==5))
 			res = appendZeropadStart(sc);
 			if (res != VKFFT_SUCCESS) return res;
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < used_registers_read; i++) {
 
 					if (sc->localSize[1] == 1)
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0]);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->zeropadBluestein[0]) {
@@ -12097,6 +14060,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
+					if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					sc->tempLen = sprintf(sc->tempStr, "		if((combinedID / %" PRIu64 ")>0){\n", sc->localSize[0]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
@@ -12110,6 +14078,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					sc->tempLen = sprintf(sc->tempStr, "		}\n");
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					if (sc->zeropadBluestein[0]) {
 						sc->tempLen = sprintf(sc->tempStr, "		}\n");
 						res = VkAppendLine(sc);
@@ -12136,7 +14109,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 
-					if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim / 2 + 1) * sc->localSize[0]) {
+					if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim / 2 + 1) * sc->localSize[0]) {
 						sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", (sc->fftDim / 2 + 1) * sc->localSize[0]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
@@ -12147,12 +14120,19 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					else {
-						sc->tempLen = sprintf(sc->tempStr, "		mult.x = %s(%.17e%s * (combinedID / %" PRIu64 ") );\n", cosDef, double_PI / 2 / sc->fftDim, LFending, sc->localSize[0]);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, "		mult.y = %s(%.17e%s * (combinedID / %" PRIu64 ") );\n", sinDef, double_PI / 2 / sc->fftDim, LFending, sc->localSize[0]);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
+						if (!strcmp(floatType, "float")) {
+							sc->tempLen = sprintf(sc->tempStr, "		mult.x = %s(%.17e%s * (combinedID / %" PRIu64 ") );\n", cosDef, (double)(double_PI / 2 / sc->fftDim), LFending, sc->localSize[0]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "		mult.y = %s(%.17e%s * (combinedID / %" PRIu64 ") );\n", sinDef, (double)(double_PI / 2 / sc->fftDim), LFending, sc->localSize[0]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (!strcmp(floatType, "double")) {
+							sc->tempLen = sprintf(sc->tempStr, "		mult = sincos_20(%.17e%s * (combinedID / %" PRIu64 ") );\n", (double)(double_PI / 2 / sc->fftDim), LFending, sc->localSize[0]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 
 					sc->tempLen = sprintf(sc->tempStr, "		sdataID = (combinedID / %" PRIu64 ") * sharedStride + (combinedID %% %" PRIu64 ");\n", sc->localSize[0], sc->localSize[0]);
@@ -12202,7 +14182,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					sc->tempLen = sprintf(sc->tempStr, "			}\n");
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim / 2 + 1) * sc->localSize[0]) {
+					if ((1 + i + k * num_in) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim / 2 + 1) * sc->localSize[0]) {
 						sc->tempLen = sprintf(sc->tempStr, "		}\n");
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
@@ -12211,7 +14191,10 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			}
 			res = appendZeropadEnd(sc);
 			if (res != VKFFT_SUCCESS) return res;
-			if (sc->zeropadBluestein[0]) sc->fftDim = sc->fft_dim_full;
+			if (sc->zeropadBluestein[0]) {
+				sc->fftDim = sc->fft_dim_full;
+				used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+			}
 		}
 		else {
 			//Not implemented
@@ -12227,6 +14210,8 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 		if (sc->performWorkGroupShift[1])
 			sprintf(shiftY, " + consts.workGroupShiftY ");
 		uint64_t mult = (sc->mergeSequencesR2C) ? 2 : 1;
+		uint64_t used_registers_read = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+		if (sc->registerBoost > 1) used_registers_read /= sc->registerBoost;
 		if (sc->fftDim == sc->fft_dim_full) {
 			if (sc->zeropadBluestein[0]) {
 				res = appendSetSMToZero(sc, floatType, floatTypeMemory, uintType, readType);
@@ -12234,14 +14219,15 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 				res = appendBarrierVkFFT(sc, 1);
 				if (res != VKFFT_SUCCESS) return res;
 				sc->fftDim = sc->fft_zeropad_Bluestein_left_read[sc->axis_id];
+				used_registers_read = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
 			}
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < used_registers_read; i++) {
 
 					if (sc->localSize[1] == 1)
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0]);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->inputStride[0] > 1)
@@ -12256,6 +14242,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						if (sc->zeropadBluestein[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
 							res = VkAppendLine(sc);
@@ -12268,6 +14259,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
+						if ((1 + i + k * 2 * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[1]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						if (sc->zeropadBluestein[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[1]);
 							res = VkAppendLine(sc);
@@ -12386,6 +14382,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					if (sc->axisSwapped) {
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						if ((uint64_t)ceil(sc->size[1] / (double)mult) % sc->localSize[0] != 0) {
 							sc->tempLen = sprintf(sc->tempStr, "		}");
 							res = VkAppendLine(sc);
@@ -12393,6 +14394,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						}
 					}
 					else {
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						if ((uint64_t)ceil(sc->size[1] / (double)mult) % sc->localSize[1] != 0) {
 							sc->tempLen = sprintf(sc->tempStr, "		}");
 							res = VkAppendLine(sc);
@@ -12406,9 +14412,9 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			res = appendZeropadStart(sc);
 			if (res != VKFFT_SUCCESS) return res;
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < used_registers_read; i++) {
 					if (!sc->axisSwapped) {
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 
@@ -12417,7 +14423,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						sc->tempLen = sprintf(sc->tempStr, "		inoutID = %" PRIu64 " + 4 * (combinedID %% %" PRIu64 ");\n", sc->fftDim / 2, sc->fftDim);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
@@ -12453,6 +14463,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			%s.y = -%s.y;}\n", 3 * sc->fftDim, 2 * sc->fftDim, sc->regIDs[i + k * sc->registers_per_thread], sc->regIDs[i + k * sc->registers_per_thread], sc->regIDs[i + k * sc->registers_per_thread], sc->regIDs[i + k * sc->registers_per_thread]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						if (sc->zeropadBluestein[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "		}\n");
 							res = VkAppendLine(sc);
@@ -12460,7 +14475,7 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						}
 					}
 					else {
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 						if (sc->zeropadBluestein[0]) {
@@ -12468,6 +14483,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						sc->tempLen = sprintf(sc->tempStr, "		inoutID = %" PRIu64 " + 4 * combinedID;\n", sc->fftDim / 2);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
@@ -12503,6 +14523,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			%s.y = -%s.y;}\n", 3 * sc->fftDim, 2 * sc->fftDim, sc->regIDs[i + k * sc->registers_per_thread], sc->regIDs[i + k * sc->registers_per_thread], sc->regIDs[i + k * sc->registers_per_thread], sc->regIDs[i + k * sc->registers_per_thread]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
+						if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						if (sc->zeropadBluestein[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "		}\n");
 							res = VkAppendLine(sc);
@@ -12513,7 +14538,58 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			}
 			res = appendZeropadEnd(sc);
 			if (res != VKFFT_SUCCESS) return res;
-			if (sc->zeropadBluestein[0]) sc->fftDim = sc->fft_dim_full;
+			if (sc->zeropadBluestein[0]) {
+				sc->fftDim = sc->fft_dim_full;
+				used_registers_read = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+			}
+			if (!sc->readToRegisters) {
+				res = appendBarrierVkFFT(sc, 1);
+				if (res != VKFFT_SUCCESS) return res;
+				res = appendZeropadStart(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				for (uint64_t k = 0; k < sc->registerBoost; k++) {
+					for (uint64_t i = 0; i < used_registers_read; i++) {
+						if (sc->axisSwapped) {
+							if ((1 + i + k * used_registers_read) * sc->localSize[1] > sc->fftDim) {
+								sc->tempLen = sprintf(sc->tempStr, "		if(%s < %" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->fftDim - (i + k * used_registers_read) * sc->localSize[1]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							sc->tempLen = sprintf(sc->tempStr, "			sdata[(%s+%" PRIu64 ") * sharedStride + %s].x = %s.x;\n", sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x, sc->regIDs[i + k * sc->registers_per_thread]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "			sdata[(%s+%" PRIu64 ") * sharedStride + %s].y = %s.y;\n", sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x, sc->regIDs[i + k * sc->registers_per_thread]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							if ((1 + i + k * used_registers_read) * sc->localSize[1] > sc->fftDim) {
+								sc->tempLen = sprintf(sc->tempStr, "		}\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+						else {
+							if ((1 + i + k * used_registers_read) * sc->localSize[0] > sc->fftDim) {
+								sc->tempLen = sprintf(sc->tempStr, "		if(%s < %" PRIu64 "){\n", sc->gl_LocalInvocationID_x, sc->fftDim - (i + k * used_registers_read) * sc->localSize[0]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							sc->tempLen = sprintf(sc->tempStr, "			sdata[(%s) * sharedStride + (%s+%" PRIu64 ")].x = %s.x;\n", sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0], sc->regIDs[i + k * sc->registers_per_thread]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "			sdata[(%s) * sharedStride + (%s+%" PRIu64 ")].y = %s.y;\n", sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, (i + k * used_registers_read) * sc->localSize[0], sc->regIDs[i + k * sc->registers_per_thread]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							if ((1 + i + k * used_registers_read) * sc->localSize[0] > sc->fftDim) {
+								sc->tempLen = sprintf(sc->tempStr, "		}\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+					}
+				}
+				res = appendZeropadEnd(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
 		}
 		else {
 			//Not implemented
@@ -12532,6 +14608,8 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 		if (sc->performWorkGroupShift[1])
 			sprintf(shiftY, " + consts.workGroupShiftY ");
 		uint64_t mult = (sc->mergeSequencesR2C) ? 2 : 1;
+		uint64_t used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+		if (sc->registerBoost > 1) used_registers_read /= sc->registerBoost;
 		if (sc->fftDim == sc->fft_dim_full) {
 			if (sc->zeropadBluestein[0]) {
 				res = appendSetSMToZero(sc, floatType, floatTypeMemory, uintType, readType);
@@ -12539,11 +14617,12 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 				res = appendBarrierVkFFT(sc, 1);
 				if (res != VKFFT_SUCCESS) return res;
 				sc->fftDim = sc->fft_zeropad_Bluestein_left_read[sc->axis_id];
+				used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
 			}
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < mult * sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < mult * used_registers_read; i++) {
 
-					//sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * mult * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+					//sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * mult * used_registers_read) * sc->localSize[0] * sc->localSize[1]);
 					//res = VkAppendLine(sc);
 					//if (res != VKFFT_SUCCESS) return res;
 
@@ -12553,9 +14632,9 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					if (sc->mergeSequencesR2C)
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 ") / %" PRIu64 ";\n", sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], mult);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 ") / %" PRIu64 ";\n", sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], mult);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 ");\n", sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1]);
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 ");\n", sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->zeropadBluestein[0]) {
@@ -12563,7 +14642,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
-
+					if ((1 + i + k * mult * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (mult * sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					sc->tempLen = sprintf(sc->tempStr, "		sdataID = (combinedID %% %" PRIu64 ") * sharedStride + %s;\n", sc->fftDim, sc->gl_LocalInvocationID_x);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
@@ -12573,11 +14656,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 					if (sc->mergeSequencesR2C) {
 						sprintf(index_x, "(%s + %" PRIu64 " * ((%s %% %" PRIu64 ") + (%s%s) * %" PRIu64 ")) %% (%" PRIu64 ")", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, mult, sc->gl_WorkGroupID_x, shiftX, mult, sc->fft_dim_x);
 
-						sprintf(index_y, "(%s/%" PRIu64 " + %" PRIu64 ")", sc->gl_LocalInvocationID_y, mult, (i + k * sc->min_registers_per_thread) * sc->localSize[1]);
+						sprintf(index_y, "(%s/%" PRIu64 " + %" PRIu64 ")", sc->gl_LocalInvocationID_y, mult, (i + k * used_registers_read) * sc->localSize[1]);
 					}
 					else {
 						sprintf(index_x, "(%s%s) %% (%" PRIu64 ")", sc->gl_GlobalInvocationID_x, shiftX2, sc->fft_dim_x);
-						sprintf(index_y, "(%s + %" PRIu64 ")", sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1]);
+						sprintf(index_y, "(%s + %" PRIu64 ")", sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1]);
 					}
 					res = indexInputVkFFT(sc, uintType, readType, index_x, index_y, requestCoordinate, requestBatch);
 					if (res != VKFFT_SUCCESS) return res;
@@ -12618,6 +14701,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
+					if ((1 + i + k * mult * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (mult * sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					if (sc->zeropadBluestein[0]) {
 						sc->tempLen = sprintf(sc->tempStr, "		}\n");
 						res = VkAppendLine(sc);
@@ -12635,23 +14723,19 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			res = appendZeropadStart(sc);
 			if (res != VKFFT_SUCCESS) return res;
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
-					sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1]);
+				for (uint64_t i = 0; i < used_registers_read; i++) {
+					sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					if (sc->axisSwapped) {
-						if (sc->zeropadBluestein[0]) {
-							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim);
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
-						}
+					if (sc->zeropadBluestein[0]) {
+						sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
 					}
-					else {
-						if (sc->zeropadBluestein[0]) {
-							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim);
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
-						}
+					if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
 					}
 					sc->tempLen = sprintf(sc->tempStr, "		inoutID = %" PRIu64 " + 4 * combinedID;\n", sc->fftDim / 2);
 					res = VkAppendLine(sc);
@@ -12688,6 +14772,11 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			%s.y = -%s.y;}\n", 3 * sc->fftDim, 2 * sc->fftDim, sc->regIDs[i + k * sc->registers_per_thread], sc->regIDs[i + k * sc->registers_per_thread], sc->regIDs[i + k * sc->registers_per_thread], sc->regIDs[i + k * sc->registers_per_thread]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					if ((1 + i + k * used_registers_read) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+						sc->tempLen = sprintf(sc->tempStr, "		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					if (sc->zeropadBluestein[0]) {
 						sc->tempLen = sprintf(sc->tempStr, "		}\n");
 						res = VkAppendLine(sc);
@@ -12697,7 +14786,38 @@ static inline VkFFTResult appendReadDataVkFFT(VkFFTSpecializationConstantsLayout
 			}
 			res = appendZeropadEnd(sc);
 			if (res != VKFFT_SUCCESS) return res;
-			if (sc->zeropadBluestein[0]) sc->fftDim = sc->fft_dim_full;
+			if (sc->zeropadBluestein[0]) {
+				sc->fftDim = sc->fft_dim_full;
+				used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+			}
+			if (!sc->readToRegisters) {
+				res = appendBarrierVkFFT(sc, 1);
+				if (res != VKFFT_SUCCESS) return res;
+				res = appendZeropadStart(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				for (uint64_t k = 0; k < sc->registerBoost; k++) {
+					for (uint64_t i = 0; i < used_registers_read; i++) {
+						if ((1 + i + k * used_registers_read) * sc->localSize[1] > sc->fftDim) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(%s < %" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->fftDim - (i + k * used_registers_read) * sc->localSize[1]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						sc->tempLen = sprintf(sc->tempStr, "			sdata[(%s+%" PRIu64 ") * sharedStride + %s].x = %s.x;\n", sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x, sc->regIDs[i + k * sc->registers_per_thread]);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+						sc->tempLen = sprintf(sc->tempStr, "			sdata[(%s+%" PRIu64 ") * sharedStride + %s].y = %s.y;\n", sc->gl_LocalInvocationID_y, (i + k * used_registers_read) * sc->localSize[1], sc->gl_LocalInvocationID_x, sc->regIDs[i + k * sc->registers_per_thread]);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+						if ((1 + i + k * used_registers_read) * sc->localSize[1] > sc->fftDim) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+				}
+				res = appendZeropadEnd(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
 		}
 		else {
 			//Not implemented
@@ -12737,9 +14857,15 @@ static inline VkFFTResult appendReorder4StepRead(VkFFTSpecializationConstantsLay
 	char cosDef[20] = "native_cos";
 	char sinDef[20] = "native_sin";
 	//if (!strcmp(floatType, "double")) sprintf(LFending, "l");
+#elif(VKFFT_BACKEND==5)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	char cosDef[20] = "cos";
+	char sinDef[20] = "sin";
+	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
 #endif
 
-	uint64_t logicalRegistersPerThread = sc->registers_per_thread_per_radix[sc->stageRadix[0]];// (sc->registers_per_thread % sc->stageRadix[sc->numStages - 1] == 0) ? sc->registers_per_thread : sc->min_registers_per_thread;
+	uint64_t logicalRegistersPerThread = (sc->rader_generator[0] > 0) ? sc->min_registers_per_thread : sc->registers_per_thread_per_radix[sc->stageRadix[0]];// (sc->registers_per_thread % sc->stageRadix[sc->numStages - 1] == 0) ? sc->registers_per_thread : sc->min_registers_per_thread;
 	switch (reorderType) {
 	case 1: {//grouped_c2c
 		char shiftX[500] = "";
@@ -12761,9 +14887,14 @@ static inline VkFFTResult appendReorder4StepRead(VkFFTSpecializationConstantsLay
 			if (res != VKFFT_SUCCESS) return res;
 			res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 			if (res != VKFFT_SUCCESS) return res;
-			for (uint64_t i = 0; i < sc->fftDim / sc->localSize[1]; i++) {
+			for (uint64_t i = 0; i < (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]); i++) {
+				if (((sc->fftDim % sc->localSize[1]) != 0) && (i == ((uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) - 1))) {
+					sc->tempLen = sprintf(sc->tempStr, "	if(%s<%" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->fftDim % sc->localSize[1]);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
 				uint64_t id = (i / logicalRegistersPerThread) * sc->registers_per_thread + i % logicalRegistersPerThread;
-				if (sc->LUT) {
+				if ((sc->LUT) && (sc->LUT_4step)) {
 					sc->tempLen = sprintf(sc->tempStr, "		mult = twiddleLUT[%" PRIu64 "+(((%s%s)/%" PRIu64 ") %% (%" PRIu64 "))+%" PRIu64 "*(%s+%" PRIu64 ")];\n", sc->maxStageSumLUT, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->stageStartSize, sc->gl_LocalInvocationID_y, i * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
@@ -12774,7 +14905,7 @@ static inline VkFFTResult appendReorder4StepRead(VkFFTSpecializationConstantsLay
 					}
 				}
 				else {
-					sc->tempLen = sprintf(sc->tempStr, "		angle = 2 * loc_PI * ((((%s%s) / %" PRIu64 ") %% (%" PRIu64 ")) * (%s + %" PRIu64 ")) / %f%s;\n", sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->gl_LocalInvocationID_y, i * sc->localSize[1], (double)(sc->stageStartSize * sc->fftDim), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "		angle = 2 * %.17e%s * ((((%s%s) / %" PRIu64 ") %% (%" PRIu64 ")) * (%s + %" PRIu64 ")) / %.17e%s;\n", 3.1415926535897932384626433832795, LFending, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->gl_LocalInvocationID_y, i * sc->localSize[1], (double)(sc->stageStartSize * sc->fftDim), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (!strcmp(floatType, "float")) {
@@ -12826,6 +14957,11 @@ static inline VkFFTResult appendReorder4StepRead(VkFFTSpecializationConstantsLay
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
+				if (((sc->fftDim % sc->localSize[1]) != 0) && (i == ((uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) - 1))) {
+					sc->tempLen = sprintf(sc->tempStr, "	}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
 			}
 			res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
 			if (res != VKFFT_SUCCESS) return res;
@@ -12854,9 +14990,14 @@ static inline VkFFTResult appendReorder4StepRead(VkFFTSpecializationConstantsLay
 			if (res != VKFFT_SUCCESS) return res;
 			res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 			if (res != VKFFT_SUCCESS) return res;
-			for (uint64_t i = 0; i < sc->fftDim / sc->localSize[1]; i++) {
+			for (uint64_t i = 0; i < (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]); i++) {
+				if (((sc->fftDim % sc->localSize[1]) != 0) && (i == ((uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) - 1))) {
+					sc->tempLen = sprintf(sc->tempStr, "	if(%s<%" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->fftDim % sc->localSize[1]);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
 				uint64_t id = (i / logicalRegistersPerThread) * sc->registers_per_thread + i % logicalRegistersPerThread;
-				if (sc->LUT) {
+				if ((sc->LUT) && (sc->LUT_4step)) {
 					sc->tempLen = sprintf(sc->tempStr, "		mult = twiddleLUT[%" PRIu64 " + ((%s%s) %% (%" PRIu64 ")) + (%s + %" PRIu64 ") * %" PRIu64 "];\n", sc->maxStageSumLUT, sc->gl_GlobalInvocationID_x, shiftX, sc->stageStartSize, sc->gl_LocalInvocationID_y, i * sc->localSize[1], sc->stageStartSize);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
@@ -12867,7 +15008,7 @@ static inline VkFFTResult appendReorder4StepRead(VkFFTSpecializationConstantsLay
 					}
 				}
 				else {
-					sc->tempLen = sprintf(sc->tempStr, "		angle = 2 * loc_PI * ((((%s%s) %% (%" PRIu64 ")) * (%s + %" PRIu64 ")) / %f%s);\n", sc->gl_GlobalInvocationID_x, shiftX, sc->stageStartSize, sc->gl_LocalInvocationID_y, i * sc->localSize[1], (double)(sc->stageStartSize * sc->fftDim), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "		angle = 2 * %.17e%s * ((((%s%s) %% (%" PRIu64 ")) * (%s + %" PRIu64 ")) / %.17e%s);\n", 3.1415926535897932384626433832795, LFending, sc->gl_GlobalInvocationID_x, shiftX, sc->stageStartSize, sc->gl_LocalInvocationID_y, i * sc->localSize[1], (double)(sc->stageStartSize * sc->fftDim), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 
@@ -12920,6 +15061,11 @@ static inline VkFFTResult appendReorder4StepRead(VkFFTSpecializationConstantsLay
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
+				if (((sc->fftDim % sc->localSize[1]) != 0) && (i == ((uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) - 1))) {
+					sc->tempLen = sprintf(sc->tempStr, "	}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
 			}
 			res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
 			if (res != VKFFT_SUCCESS) return res;
@@ -12961,9 +15107,16 @@ static inline VkFFTResult appendReorder4StepWrite(VkFFTSpecializationConstantsLa
 	char cosDef[20] = "native_cos";
 	char sinDef[20] = "native_sin";
 	//if (!strcmp(floatType, "double")) sprintf(LFending, "l");
+#elif(VKFFT_BACKEND==5)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	char cosDef[20] = "cos";
+	char sinDef[20] = "sin";
+	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
+
 #endif
 
-	uint64_t logicalRegistersPerThread = sc->registers_per_thread_per_radix[sc->stageRadix[sc->numStages - 1]];// (sc->registers_per_thread % sc->stageRadix[sc->numStages - 1] == 0) ? sc->registers_per_thread : sc->min_registers_per_thread;
+	uint64_t logicalRegistersPerThread = (sc->rader_generator[sc->numStages - 1] > 0) ? sc->min_registers_per_thread : sc->registers_per_thread_per_radix[sc->stageRadix[sc->numStages - 1]];// (sc->registers_per_thread % sc->stageRadix[sc->numStages - 1] == 0) ? sc->registers_per_thread : sc->min_registers_per_thread;
 	switch (reorderType) {
 	case 1: {//grouped_c2c
 		char shiftX[500] = "";
@@ -12985,9 +15138,14 @@ static inline VkFFTResult appendReorder4StepWrite(VkFFTSpecializationConstantsLa
 			if (res != VKFFT_SUCCESS) return res;
 			res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 			if (res != VKFFT_SUCCESS) return res;
-			for (uint64_t i = 0; i < sc->fftDim / sc->localSize[1]; i++) {
+			for (uint64_t i = 0; i < (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]); i++) {
+				if (((sc->fftDim % sc->localSize[1]) != 0) && (i == ((uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) - 1))) {
+					sc->tempLen = sprintf(sc->tempStr, "	if(%s<%" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->fftDim % sc->localSize[1]);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
 				uint64_t id = (i / logicalRegistersPerThread) * sc->registers_per_thread + i % logicalRegistersPerThread;
-				if (sc->LUT) {
+				if ((sc->LUT) && (sc->LUT_4step)) {
 					sc->tempLen = sprintf(sc->tempStr, "		mult = twiddleLUT[%" PRIu64 "+(((%s%s)/%" PRIu64 ") %% (%" PRIu64 "))+%" PRIu64 "*(%s+%" PRIu64 ")];\n", sc->maxStageSumLUT, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->stageStartSize, sc->gl_LocalInvocationID_y, i * sc->localSize[1]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
@@ -12998,7 +15156,7 @@ static inline VkFFTResult appendReorder4StepWrite(VkFFTSpecializationConstantsLa
 					}
 				}
 				else {
-					sc->tempLen = sprintf(sc->tempStr, "		angle = 2 * loc_PI * ((((%s%s) / %" PRIu64 ") %% (%" PRIu64 ")) * (%s + %" PRIu64 ")) / %f%s;\n", sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->gl_LocalInvocationID_y, i * sc->localSize[1], (double)(sc->stageStartSize * sc->fftDim), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "		angle = 2 * %.17e%s * ((((%s%s) / %" PRIu64 ") %% (%" PRIu64 ")) * (%s + %" PRIu64 ")) / %.17e%s;\n", 3.1415926535897932384626433832795, LFending, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->gl_LocalInvocationID_y, i * sc->localSize[1], (double)(sc->stageStartSize * sc->fftDim), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->inverse) {
@@ -13068,6 +15226,11 @@ static inline VkFFTResult appendReorder4StepWrite(VkFFTSpecializationConstantsLa
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
+				if (((sc->fftDim % sc->localSize[1]) != 0) && (i == ((uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) - 1))) {
+					sc->tempLen = sprintf(sc->tempStr, "	}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
 			}
 			res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
 			if (res != VKFFT_SUCCESS) return res;
@@ -13096,9 +15259,14 @@ static inline VkFFTResult appendReorder4StepWrite(VkFFTSpecializationConstantsLa
 			if (res != VKFFT_SUCCESS) return res;
 			res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 			if (res != VKFFT_SUCCESS) return res;
-			for (uint64_t i = 0; i < sc->fftDim / sc->localSize[1]; i++) {
+			for (uint64_t i = 0; i < (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]); i++) {
+				if (((sc->fftDim % sc->localSize[1]) != 0) && (i == ((uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) - 1))) {
+					sc->tempLen = sprintf(sc->tempStr, "	if(%s<%" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->fftDim % sc->localSize[1]);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
 				uint64_t id = (i / logicalRegistersPerThread) * sc->registers_per_thread + i % logicalRegistersPerThread;
-				if (sc->LUT) {
+				if ((sc->LUT) && (sc->LUT_4step)) {
 					sc->tempLen = sprintf(sc->tempStr, "		mult = twiddleLUT[%" PRIu64 " + ((%s%s) %% (%" PRIu64 ")) + (%s + %" PRIu64 ") * %" PRIu64 "];\n", sc->maxStageSumLUT, sc->gl_GlobalInvocationID_x, shiftX, sc->stageStartSize, sc->gl_LocalInvocationID_y, i * sc->localSize[1], sc->stageStartSize);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
@@ -13109,7 +15277,7 @@ static inline VkFFTResult appendReorder4StepWrite(VkFFTSpecializationConstantsLa
 					}
 				}
 				else {
-					sc->tempLen = sprintf(sc->tempStr, "		angle = 2 * loc_PI * ((((%s%s) %% (%" PRIu64 ")) * (%s + %" PRIu64 ")) / %f%s);\n", sc->gl_GlobalInvocationID_x, shiftX, sc->stageStartSize, sc->gl_LocalInvocationID_y, i * sc->localSize[1], (double)(sc->stageStartSize * sc->fftDim), LFending);
+					sc->tempLen = sprintf(sc->tempStr, "		angle = 2 * %.17e%s * ((((%s%s) %% (%" PRIu64 ")) * (%s + %" PRIu64 ")) / %.17e%s);\n", 3.1415926535897932384626433832795, LFending, sc->gl_GlobalInvocationID_x, shiftX, sc->stageStartSize, sc->gl_LocalInvocationID_y, i * sc->localSize[1], (double)(sc->stageStartSize * sc->fftDim), LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->inverse) {
@@ -13179,6 +15347,11 @@ static inline VkFFTResult appendReorder4StepWrite(VkFFTSpecializationConstantsLa
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
+				if (((sc->fftDim % sc->localSize[1]) != 0) && (i == ((uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) - 1))) {
+					sc->tempLen = sprintf(sc->tempStr, "	}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
 			}
 			res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
 			if (res != VKFFT_SUCCESS) return res;
@@ -13200,37 +15373,41 @@ static inline VkFFTResult appendBluesteinMultiplication(VkFFTSpecializationConst
 #if(VKFFT_BACKEND==0)
 	if (!strcmp(floatType, "float")) sprintf(vecType, "vec2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "dvec2");
-	char cosDef[20] = "cos";
-	char sinDef[20] = "sin";
+	//char cosDef[20] = "cos";
+	//char sinDef[20] = "sin";
 	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
 #elif(VKFFT_BACKEND==1)
 	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
-	char cosDef[20] = "__cosf";
-	char sinDef[20] = "__sinf";
+	//char cosDef[20] = "__cosf";
+	//char sinDef[20] = "__sinf";
 	if (!strcmp(floatType, "double")) sprintf(LFending, "l");
 #elif(VKFFT_BACKEND==2)
 	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
-	char cosDef[20] = "__cosf";
-	char sinDef[20] = "__sinf";
+	//char cosDef[20] = "__cosf";
+	//char sinDef[20] = "__sinf";
 	if (!strcmp(floatType, "double")) sprintf(LFending, "l");
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
-	char cosDef[20] = "native_cos";
-	char sinDef[20] = "native_sin";
+	//char cosDef[20] = "native_cos";
+	//char sinDef[20] = "native_sin";
 	//if (!strcmp(floatType, "double")) sprintf(LFending, "l");
+#elif(VKFFT_BACKEND==5)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	//char cosDef[20] = "cos";
+	//char sinDef[20] = "sin";
+	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
 #endif
 	char shiftX[500] = "";
 	if (sc->performWorkGroupShift[0])
 		sprintf(shiftX, " + consts.workGroupShiftX * %s ", sc->gl_WorkGroupSize_x);
-	char requestCoordinate[100] = "";
-
 	char index_x[2000] = "";
-	char index_y[2000] = "";
-	char requestBatch[100] = "";
-	char separateRegisterStore[100] = "";
+	//char index_y[2000] = "";
+	//char requestBatch[100] = "";
+	//char separateRegisterStore[100] = "";
 	char kernelName[100] = "";
 	sprintf(kernelName, "BluesteinMultiplication");
 	if (!((sc->readToRegisters && (pre_or_post_multiplication == 0)) || (sc->writeFromRegisters && (pre_or_post_multiplication == 1)))) {
@@ -13239,7 +15416,41 @@ static inline VkFFTResult appendBluesteinMultiplication(VkFFTSpecializationConst
 	}
 	res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 	if (res != VKFFT_SUCCESS) return res;
-	for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+
+	uint64_t used_registers = 1;
+	switch (strideType) {
+	case 0: case 5: case 6: case 110: case 120: case 130: case 140: case 142: case 144:
+		used_registers = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+		break;
+	case 1: case 2: case 111: case 121: case 131: case 141: case 143: case 145:
+		used_registers = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+		break;
+	}
+	for (uint64_t i = 0; i < used_registers; i++) {
+		switch (strideType) {
+		case 0: case 5: case 6: case 110: case 120: case 130: case 140: case 142: case 144:
+		{
+			if (sc->localSize[0] * ((1 + i)) > sc->fftDim) {
+				uint64_t current_group_cut = sc->fftDim - i * sc->localSize[0];
+				sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s  < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, current_group_cut);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			break;
+		}
+		case 1: case 2: case 111: case 121: case 131: case 141: case 143: case 145:
+		{
+			if (sc->localSize[1] * ((1 + i)) > sc->fftDim) {
+				uint64_t current_group_cut = sc->fftDim - i * sc->localSize[1];
+				sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s  < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, current_group_cut);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			break;
+		}
+		}
 		switch (strideType) {
 		case 0: case 2: case 5: case 6: case 110: case 120: case 130: case 140: case 142: case 144:
 		{
@@ -13286,7 +15497,7 @@ static inline VkFFTResult appendBluesteinMultiplication(VkFFTSpecializationConst
 		sc->tempLen = sprintf(sc->tempStr, "		w = %s[%s];\n", kernelName, sc->inoutID);
 		res = VkAppendLine(sc);
 		if (res != VKFFT_SUCCESS) return res;
-		uint64_t k = 0;
+		//uint64_t k = 0;
 		if (!((sc->readToRegisters && (pre_or_post_multiplication == 0)) || (sc->writeFromRegisters && (pre_or_post_multiplication == 1)))) {
 			if ((strideType == 0) || (strideType == 5) || (strideType == 6) || (strideType == 110) || (strideType == 120) || (strideType == 130) || (strideType == 140) || (strideType == 142) || (strideType == 144)) {
 				sc->tempLen = sprintf(sc->tempStr, "\
@@ -13332,13 +15543,2875 @@ static inline VkFFTResult appendBluesteinMultiplication(VkFFTSpecializationConst
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
 		}
+		switch (strideType) {
+		case 0: case 5: case 6: case 110: case 120: case 130: case 140: case 142: case 144:
+		{
+			if (sc->localSize[0] * ((1 + i)) > sc->fftDim) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			break;
+		}
+		case 1: case 2: case 111: case 121: case 131: case 141: case 143: case 145:
+		{
+			if (sc->localSize[1] * ((1 + i)) > sc->fftDim) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			break;
+		}
+		}
+	}
+	res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
+	if (res != VKFFT_SUCCESS) return res;
+	return res;
+}
+
+static inline VkFFTResult appendFFTRaderStage(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType, uint64_t stageSize, uint64_t stageSizeSum, long double stageAngle, uint64_t stageRadix, uint64_t stageID, uint64_t strided) {
+	VkFFTResult res = VKFFT_SUCCESS;
+	long double double_PI = 3.14159265358979323846264338327950288419716939937510L;
+	char vecType[30];
+	char LFending[4] = "";
+	char tempNum[100] = "";
+	if (!strcmp(floatType, "float")) sprintf(LFending, "f");
+#if(VKFFT_BACKEND==0)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "vec2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "dvec2");
+	char cosDef[20] = "cos";
+	char sinDef[20] = "sin";
+	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
+#elif(VKFFT_BACKEND==1)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	char cosDef[20] = "__cosf";
+	char sinDef[20] = "__sinf";
+	if (!strcmp(floatType, "double")) sprintf(LFending, "l");
+#elif(VKFFT_BACKEND==2)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	char cosDef[20] = "__cosf";
+	char sinDef[20] = "__sinf";
+	if (!strcmp(floatType, "double")) sprintf(LFending, "l");
+#elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	char cosDef[20] = "native_cos";
+	char sinDef[20] = "native_sin";
+	//if (!strcmp(floatType, "double")) sprintf(LFending, "l");
+#elif(VKFFT_BACKEND==5)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	char cosDef[20] = "cos";
+	char sinDef[20] = "sin";
+	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
+#endif
+	char stageNormalization[50] = "";
+	uint64_t normalizationValue = 1;
+	if ((((sc->actualInverse) && (sc->normalize)) || (sc->convolutionStep && (stageAngle > 0))) && (stageSize == 1) && (sc->axis_upload_id == 0) && (!(sc->useBluesteinFFT && (stageAngle < 0)))) {
+		if ((sc->performDCT) && (sc->actualInverse)) {
+			if (sc->performDCT == 1)
+				normalizationValue = (sc->sourceFFTSize - 1) * 2;
+			else
+				normalizationValue = sc->sourceFFTSize * 2;
+		}
+		else
+			normalizationValue = sc->sourceFFTSize;
+	}
+	if (sc->useBluesteinFFT && (stageAngle > 0) && (stageSize == 1) && (sc->axis_upload_id == 0)) {
+		normalizationValue *= sc->fft_dim_full;
+	}
+	if (normalizationValue != 1) {
+		sprintf(stageNormalization, "%.17e%s", 1.0 / (double)(normalizationValue), LFending);
+	}
+	char convolutionInverse[10] = "";
+	sc->useCoalescedLUTUploadToSM = 0;
+	if (sc->convolutionStep) {
+		if (stageAngle < 0)
+			sprintf(convolutionInverse, ", 0");
+		else
+			sprintf(convolutionInverse, ", 1");
+	}
+	res = appendBarrierVkFFT(sc, 1);
+	if (res != VKFFT_SUCCESS) return res;
+
+
+	res = appendZeropadStart(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
+	if (res != VKFFT_SUCCESS) return res;
+	//rotate the stage
+	char* gl_LocalInvocationID = (strided) ? sc->gl_LocalInvocationID_y : sc->gl_LocalInvocationID_x;
+	if (stageSize > 1) {
+		uint64_t num_logical_subgroups = (strided) ? sc->localSize[1] : sc->localSize[0];
+		uint64_t num_logical_groups = (uint64_t)ceil((sc->fftDim) / (double)(num_logical_subgroups));
+		for (uint64_t t = 0; t < num_logical_groups; t++) {
+			if (((1 + t) * num_logical_subgroups) > sc->fftDim) {
+				uint64_t current_group_cut = sc->fftDim - t * num_logical_subgroups;
+				sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", gl_LocalInvocationID, current_group_cut);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = (%s+%" PRIu64 ") %% (%" PRIu64 ");\n", sc->stageInvocationID, gl_LocalInvocationID, t * num_logical_subgroups, stageSize);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+
+			if (sc->LUT)
+				sc->tempLen = sprintf(sc->tempStr, "		LUTId = stageInvocationID*%" PRIu64 " + %" PRIu64 ";\n", stageRadix, stageSizeSum);
+			else
+				sc->tempLen = sprintf(sc->tempStr, "		angle = stageInvocationID * %.17e%s;\n", (double)stageAngle, LFending);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			if (sc->LUT) {
+				sc->tempLen = sprintf(sc->tempStr, "		%s = twiddleLUT[LUTId+(%s+ %" PRIu64 ") / %" PRIu64 "];\n\n", sc->w, gl_LocalInvocationID, t * num_logical_subgroups, sc->fftDim / stageRadix);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				if (!sc->inverse) {
+					sc->tempLen = sprintf(sc->tempStr, "		%s.y = -%s.y;\n", sc->w, sc->w);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+			}
+			else {
+				if (!strcmp(floatType, "float")) {
+					sc->tempLen = sprintf(sc->tempStr, "		%s.x = %s(angle*%.17e%s*((%s+ %" PRIu64 ") / %" PRIu64 "));\n", sc->w, cosDef, 2.0 / stageRadix, LFending, gl_LocalInvocationID, t * num_logical_subgroups, sc->fftDim / stageRadix);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "		%s.y = %s(angle*%.17e%s*((%s+ %" PRIu64 ") / %" PRIu64 "));\n", sc->w, sinDef, 2.0 / stageRadix, LFending, gl_LocalInvocationID, t * num_logical_subgroups, sc->fftDim / stageRadix);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
+				}
+				if (!strcmp(floatType, "double")) {
+					sc->tempLen = sprintf(sc->tempStr, "		%s = sincos_20(angle*%.17e%s*((%s+ %" PRIu64 ") / %" PRIu64 "));\n", sc->w, 2.0 / stageRadix, LFending, gl_LocalInvocationID, t * num_logical_subgroups, sc->fftDim / stageRadix);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+			}
+			//sc->tempLen = sprintf(sc->tempStr, "	printf(\"%%d %%f %%f \\n \", %s, %s.x, %s.y);\n\n", sc->gl_LocalInvocationID_x, sc->w, sc->w);
+			//res = VkAppendLine(sc);
+			//if (res != VKFFT_SUCCESS) return res;
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = (%s+ %" PRIu64 ");\n", sc->sdataID, gl_LocalInvocationID, t * num_logical_subgroups);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+
+			if (sc->resolveBankConflictFirstStages == 1) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+	%s = (%s / %" PRIu64 ") * %" PRIu64 " + %s %% %" PRIu64 ";", sc->sdataID, sc->sdataID, sc->numSharedBanks / 2, sc->numSharedBanks / 2 + 1, sc->sdataID, sc->numSharedBanks / 2);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			if (strided) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s * sharedStride + %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_x);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			else {
+				if (sc->localSize[1] > 1) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + sharedStride * %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_y);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+			}
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = sdata[%s];\n", sc->regIDs[0], sc->sdataID);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+
+			res = VkMulComplex(sc, sc->temp, sc->regIDs[0], sc->w, 0);
+			if (res != VKFFT_SUCCESS) return res;
+
+			sc->tempLen = sprintf(sc->tempStr, "\
+		sdata[%s] = %s;\n", sc->sdataID, sc->temp);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			if (((1 + t) * num_logical_subgroups) > sc->fftDim) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+		}
+		res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
+		if (res != VKFFT_SUCCESS) return res;
+		res = appendZeropadEnd(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		res = appendBarrierVkFFT(sc, 1);
+		if (res != VKFFT_SUCCESS) return res;
+		res = appendZeropadStart(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
+		if (res != VKFFT_SUCCESS) return res;
+	}
+	uint64_t raderTranspose = ((sc->currentRaderContainer->containerFFTNum < 8) || (sc->currentRaderContainer->numStages == 1) || (strided)) ? 0 : 1;
+
+	// read x0 - to be used in the end
+	{
+		uint64_t locStageRadix = sc->currentRaderContainer->stageRadix[0];
+		uint64_t logicalStoragePerThread = sc->currentRaderContainer->registers_per_thread_per_radix[locStageRadix] * sc->registerBoost;// (sc->registers_per_thread % stageRadix == 0) ? sc->registers_per_thread * sc->registerBoost : sc->min_registers_per_thread * sc->registerBoost;
+		//uint64_t logicalRegistersPerThread = sc->currentRaderContainer->registers_per_thread_per_radix[locStageRadix];// (sc->registers_per_thread % stageRadix == 0) ? sc->registers_per_thread : sc->min_registers_per_thread;
+		uint64_t locFFTDim = sc->currentRaderContainer->containerFFTDim; //different length due to all -1 cutoffs
+		//uint64_t locFFTsCombined = sc->currentRaderContainer->containerFFTNum * locFFTDim;
+		//uint64_t logicalGroupSize = (uint64_t)ceil(locFFTsCombined / (double)logicalStoragePerThread);
+		uint64_t subLogicalGroupSize = (uint64_t)ceil(locFFTDim / (double)logicalStoragePerThread); // hopefully it is not <1, will fix 
+
+		if (!raderTranspose) {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s %% %" PRIu64 ";\n", sc->raderIDx, gl_LocalInvocationID, subLogicalGroupSize); //local id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s / %" PRIu64 ";\n", sc->raderIDx2, gl_LocalInvocationID, subLogicalGroupSize); //global prime id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		else {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s / %" PRIu64 ";\n", sc->raderIDx, gl_LocalInvocationID, sc->currentRaderContainer->containerFFTNum); //local id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s %% %" PRIu64 ";\n", sc->raderIDx2, gl_LocalInvocationID, sc->currentRaderContainer->containerFFTNum); //global prime id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		if (!raderTranspose) {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->raderIDx2, sc->currentRaderContainer->containerFFTNum);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		else {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->raderIDx, subLogicalGroupSize);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		sc->tempLen = sprintf(sc->tempStr, "\
+	%s = %s;\n", sc->sdataID, sc->raderIDx2);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		if (strided) {
+			sc->tempLen = sprintf(sc->tempStr, "\
+	%s = %s * sharedStride + %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_x);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		else {
+			if (sc->localSize[1] > 1) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+	%s = %s + sharedStride * %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_y);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+		}
+		sc->tempLen = sprintf(sc->tempStr, "\
+	%s = sdata[%s];\n", sc->x0[0], sc->sdataID);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+
+		sc->tempLen = sprintf(sc->tempStr, "\
+	}\n");
+		res = VkAppendLine(sc);
+	}
+	// read x0 for x0+x1 - 0-element
+	{
+		uint64_t locStageRadix = sc->currentRaderContainer->stageRadix[sc->currentRaderContainer->numStages - 1];
+		uint64_t logicalStoragePerThread = sc->currentRaderContainer->registers_per_thread_per_radix[locStageRadix] * sc->registerBoost;// (sc->registers_per_thread % stageRadix == 0) ? sc->registers_per_thread * sc->registerBoost : sc->min_registers_per_thread * sc->registerBoost;
+		//uint64_t logicalRegistersPerThread = sc->currentRaderContainer->registers_per_thread_per_radix[locStageRadix];// (sc->registers_per_thread % stageRadix == 0) ? sc->registers_per_thread : sc->min_registers_per_thread;
+		uint64_t locFFTDim = sc->currentRaderContainer->containerFFTDim; //different length due to all -1 cutoffs
+		//uint64_t locFFTsCombined = sc->currentRaderContainer->containerFFTNum * locFFTDim;
+		//uint64_t logicalGroupSize = (uint64_t)ceil(locFFTsCombined / (double)logicalStoragePerThread);
+		uint64_t subLogicalGroupSize = (uint64_t)ceil(locFFTDim / (double)logicalStoragePerThread); // hopefully it is not <1, will fix 
+		if (!raderTranspose) {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s %% %" PRIu64 ";\n", sc->raderIDx, gl_LocalInvocationID, subLogicalGroupSize); //local id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s / %" PRIu64 ";\n", sc->raderIDx2, gl_LocalInvocationID, subLogicalGroupSize); //global prime id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		else {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s / %" PRIu64 ";\n", sc->raderIDx, gl_LocalInvocationID, sc->currentRaderContainer->containerFFTNum); //local id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s %% %" PRIu64 ";\n", sc->raderIDx2, gl_LocalInvocationID, sc->currentRaderContainer->containerFFTNum); //global prime id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		if (!raderTranspose) {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->raderIDx2, sc->currentRaderContainer->containerFFTNum);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		else {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->raderIDx, subLogicalGroupSize);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+
+		sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s == 0) {\n", sc->raderIDx);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+
+		sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s;\n", sc->sdataID, sc->raderIDx2);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		if (strided) {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s * sharedStride + %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_x);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		else {
+			if (sc->localSize[1] > 1) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + sharedStride * %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_y);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+		}
+		sc->tempLen = sprintf(sc->tempStr, "\
+		%s = sdata[%s];\n", sc->x0[1], sc->sdataID);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+	}
+	if (sc->currentRaderContainer->numStages == 1) {
+		if (res != VKFFT_SUCCESS) return res;
+		res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
+		if (res != VKFFT_SUCCESS) return res;
+		res = appendZeropadEnd(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		res = appendBarrierVkFFT(sc, 1);
+		if (res != VKFFT_SUCCESS) return res;
+		res = appendZeropadStart(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
+		if (res != VKFFT_SUCCESS) return res;
+	}
+	uint64_t locStageSize = 1;
+	uint64_t locStageSizeSum = 0;
+	long double locStageAngle = -double_PI;
+	uint64_t shift = 0;
+	for (uint64_t rader_stage = 0; rader_stage < sc->currentRaderContainer->numStages; rader_stage++) {
+		uint64_t locStageRadix = sc->currentRaderContainer->stageRadix[rader_stage];
+		uint64_t logicalStoragePerThread = sc->currentRaderContainer->registers_per_thread_per_radix[locStageRadix] * sc->registerBoost;// (sc->registers_per_thread % stageRadix == 0) ? sc->registers_per_thread * sc->registerBoost : sc->min_registers_per_thread * sc->registerBoost;
+		uint64_t logicalRegistersPerThread = sc->currentRaderContainer->registers_per_thread_per_radix[locStageRadix];// (sc->registers_per_thread % stageRadix == 0) ? sc->registers_per_thread : sc->min_registers_per_thread;
+		uint64_t locFFTDim = sc->currentRaderContainer->containerFFTDim; //different length due to all -1 cutoffs
+		uint64_t locFFTsCombined = sc->currentRaderContainer->containerFFTNum * locFFTDim;
+		//uint64_t logicalGroupSize = (uint64_t)ceil(locFFTsCombined / (double)logicalStoragePerThread);
+		uint64_t subLogicalGroupSize = (uint64_t)ceil(locFFTDim / (double)logicalStoragePerThread); // hopefully it is not <1, will fix 
+		uint64_t locFFTDimStride = locFFTDim;
+		if (shift <= sc->sharedShiftRaderFFT) locFFTDimStride = locFFTDim + shift;
+		//local radix
+		if ((rader_stage == 0) || (!raderTranspose)) {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s %% %" PRIu64 ";\n", sc->raderIDx, gl_LocalInvocationID, subLogicalGroupSize); //local id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s / %" PRIu64 ";\n", sc->raderIDx2, gl_LocalInvocationID, subLogicalGroupSize); //global prime id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		else {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s / %" PRIu64 ";\n", sc->raderIDx, gl_LocalInvocationID, sc->currentRaderContainer->containerFFTNum); //local id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s %% %" PRIu64 ";\n", sc->raderIDx2, gl_LocalInvocationID, sc->currentRaderContainer->containerFFTNum); //global prime id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		for (uint64_t k = 0; k < sc->registerBoost; k++) {
+			if ((rader_stage == 0) || (!raderTranspose)) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->raderIDx2, sc->currentRaderContainer->containerFFTNum);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			else {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->raderIDx, subLogicalGroupSize);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			for (uint64_t j = 0; j < logicalRegistersPerThread / locStageRadix; j++) {
+				if (subLogicalGroupSize * ((j + k * logicalRegistersPerThread / locStageRadix) * locStageRadix) > locFFTDim) continue;
+				if (subLogicalGroupSize * ((1 + j + k * logicalRegistersPerThread / locStageRadix) * locStageRadix) > locFFTDim) {
+					uint64_t current_group_cut = locFFTDim / locStageRadix - (j + k * logicalRegistersPerThread / locStageRadix) * subLogicalGroupSize;
+
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->raderIDx, current_group_cut);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+
+				sc->tempLen = sprintf(sc->tempStr, "\
+		%s = (%s+ %" PRIu64 ") %% (%" PRIu64 ");\n", sc->stageInvocationID, sc->raderIDx, (j + k * logicalRegistersPerThread / locStageRadix) * subLogicalGroupSize, locStageSize);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				if (sc->LUT)
+					sc->tempLen = sprintf(sc->tempStr, "		LUTId = stageInvocationID + %" PRIu64 ";\n", locStageSizeSum + sc->currentRaderContainer->RaderRadixOffsetLUT);
+				else
+					sc->tempLen = sprintf(sc->tempStr, "		angle = stageInvocationID * %.17e%s;\n", (double)(locStageAngle), LFending);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				for (uint64_t i = 0; i < locStageRadix; i++) {
+					uint64_t g = sc->currentRaderContainer->generator;
+					if (rader_stage == 0) {
+						if (sc->inline_rader_g_pow == 1) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+			%s= g_pow_%" PRIu64 "[%s + %" PRIu64 "];\n", sc->sdataID, stageRadix, sc->raderIDx, j * subLogicalGroupSize + i * locFFTDim / locStageRadix);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						else if (sc->inline_rader_g_pow == 2) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+			%s= g_pow[%s + %" PRIu64 "];\n", sc->sdataID, sc->raderIDx, j * subLogicalGroupSize + i * locFFTDim / locStageRadix + sc->currentRaderContainer->raderUintLUToffset);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						else {
+							sc->tempLen = sprintf(sc->tempStr, "\
+			%s= (%s + %" PRIu64 ");\n\
+			%s=1;\n\
+			while (%s != 0)\n\
+			{\n\
+				%s = (%s * %" PRIu64 ") %% %" PRIu64 ";\n\
+				%s--;\n\
+			}\n", sc->inoutID, sc->raderIDx, j * subLogicalGroupSize + i * locFFTDim / locStageRadix, sc->sdataID, sc->inoutID, sc->sdataID, sc->sdataID, g, stageRadix, sc->inoutID);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + %s*%" PRIu64 ";\n", sc->sdataID, sc->raderIDx2, sc->sdataID, sc->currentRaderContainer->containerFFTNum);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					else {
+						if (!raderTranspose) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + %" PRIu64 " + %s*%" PRIu64 ";\n", sc->sdataID, sc->raderIDx, j * subLogicalGroupSize + i * locFFTDim / locStageRadix + sc->fftDim / stageRadix, sc->raderIDx2, locFFTDimStride);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						else {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		%s = (%s + %" PRIu64 ")*%" PRIu64 " + %s + %" PRIu64 ";\n", sc->sdataID, sc->raderIDx, j * subLogicalGroupSize + i * locFFTDim / locStageRadix, sc->currentRaderContainer->containerFFTNum, sc->raderIDx2, sc->fftDim / stageRadix);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+
+					uint64_t id = j + i * logicalRegistersPerThread / locStageRadix;
+					id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
+					if (!strided) {
+						if (sc->resolveBankConflictFirstStages == 1) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+	%s = (%s / %" PRIu64 ") * %" PRIu64 " + %s %% %" PRIu64 ";", sc->sdataID, sc->sdataID, sc->numSharedBanks / 2, sc->numSharedBanks / 2 + 1, sc->sdataID, sc->numSharedBanks / 2);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+					if (strided) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s * sharedStride + %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_x);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					else {
+						if (sc->localSize[1] > 1) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + sharedStride * %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_y);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s = sdata[%s];\n", sc->regIDs[id], sc->sdataID);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+
+				char** regID = (char**)malloc(sizeof(char*) * locStageRadix);
+				if (regID) {
+					for (uint64_t i = 0; i < locStageRadix; i++) {
+						regID[i] = (char*)malloc(sizeof(char) * 50);
+						if (!regID[i]) {
+							for (uint64_t p = 0; p < i; p++) {
+								free(regID[p]);
+								regID[p] = 0;
+							}
+							free(regID);
+							regID = 0;
+							return VKFFT_ERROR_MALLOC_FAILED;
+						}
+						uint64_t id = j + k * logicalRegistersPerThread / locStageRadix + i * logicalStoragePerThread / locStageRadix;
+						id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
+						sprintf(regID[i], "%s", sc->regIDs[id]);
+					}
+					res = inlineRadixKernelVkFFT(sc, floatType, uintType, locStageRadix, locStageSize, locStageSizeSum, locStageAngle, regID);
+					if (res != VKFFT_SUCCESS) return res;
+					for (uint64_t i = 0; i < locStageRadix; i++) {
+						uint64_t id = j + k * logicalRegistersPerThread / locStageRadix + i * logicalStoragePerThread / locStageRadix;
+						id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
+						sprintf(sc->regIDs[id], "%s", regID[i]);
+					}
+					for (uint64_t i = 0; i < locStageRadix; i++) {
+						free(regID[i]);
+						regID[i] = 0;
+					}
+					free(regID);
+					regID = 0;
+				}
+				else
+					return VKFFT_ERROR_MALLOC_FAILED;
+
+				if (subLogicalGroupSize * ((1 + j + k * logicalRegistersPerThread / locStageRadix) * locStageRadix) > locFFTDim) {
+					sc->tempLen = sprintf(sc->tempStr, "		}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+			}
+			sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		if (rader_stage != sc->currentRaderContainer->numStages - 1) {
+			res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
+			if (res != VKFFT_SUCCESS) return res;
+			res = appendZeropadEnd(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			res = appendBarrierVkFFT(sc, 1);
+			if (res != VKFFT_SUCCESS) return res;
+			res = appendZeropadStart(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		//local shuffle
+		char** tempID;
+		tempID = (char**)malloc(sizeof(char*) * sc->registers_per_thread * sc->registerBoost);
+		if (tempID) {
+			for (uint64_t i = 0; i < sc->registers_per_thread * sc->registerBoost; i++) {
+				tempID[i] = (char*)malloc(sizeof(char) * 50);
+				if (!tempID[i]) {
+					for (uint64_t j = 0; j < i; j++) {
+						free(tempID[j]);
+						tempID[j] = 0;
+					}
+					free(tempID);
+					tempID = 0;
+					return VKFFT_ERROR_MALLOC_FAILED;
+				}
+			}
+			for (uint64_t k = 0; k < sc->registerBoost; ++k) {
+				uint64_t t = 0;
+
+				if ((rader_stage == 0) || (!raderTranspose)) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->raderIDx2, sc->currentRaderContainer->containerFFTNum);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				else {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->raderIDx, subLogicalGroupSize);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				//last stage - save x1
+				if (rader_stage == sc->currentRaderContainer->numStages - 1) {
+
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s == 0) {\n", sc->raderIDx);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					res = VkAddComplex(sc, sc->x0[1], sc->x0[1], sc->regIDs[0]);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				if (!strided) {
+					if (rader_stage != 0) {
+						shift = (subLogicalGroupSize > (locFFTDim % (sc->numSharedBanks / 2))) ? subLogicalGroupSize - locFFTDim % (sc->numSharedBanks / 2) : 0;
+						if (shift <= sc->sharedShiftRaderFFT) locFFTDimStride = locFFTDim + shift;
+					}
+					else {
+						if (sc->sharedShiftRaderFFT > 0) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
+							if (res != VKFFT_SUCCESS) return res;
+							res = appendZeropadEnd(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "\
+		sharedStride = %" PRIu64 ";\n", sc->sharedStrideRaderFFT);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							res = appendZeropadStart(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
+							if (res != VKFFT_SUCCESS) return res;
+							if ((rader_stage == 0) || (!raderTranspose)) {
+								sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->raderIDx2, sc->currentRaderContainer->containerFFTNum);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							else {
+								sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->raderIDx, subLogicalGroupSize);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+						shift = ((locFFTDim % (sc->numSharedBanks / 2))) ? 0 : 1;
+						if (shift <= sc->sharedShiftRaderFFT) locFFTDimStride = locFFTDim + shift;
+					}
+				}
+				for (uint64_t j = 0; j < logicalRegistersPerThread / locStageRadix; j++) {
+					if (subLogicalGroupSize * ((j + k * logicalRegistersPerThread / locStageRadix) * locStageRadix) <= locFFTDim) {
+						if (subLogicalGroupSize * ((1 + j + k * logicalRegistersPerThread / locStageRadix) * locStageRadix) > locFFTDim) {
+							uint64_t current_group_cut = locFFTDim / locStageRadix - (j + k * logicalRegistersPerThread / locStageRadix) * subLogicalGroupSize;
+							sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s  < %" PRIu64 ") {\n", sc->raderIDx, current_group_cut);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						sprintf(tempNum, "%" PRIu64 "", j * subLogicalGroupSize);
+						res = VkAddReal(sc, sc->stageInvocationID, sc->raderIDx, tempNum);
+						if (res != VKFFT_SUCCESS) return res;
+						res = VkMovReal(sc, sc->blockInvocationID, sc->stageInvocationID);
+						if (res != VKFFT_SUCCESS) return res;
+						sprintf(tempNum, "%" PRIu64 "", locStageSize);
+						res = VkModReal(sc, sc->stageInvocationID, sc->stageInvocationID, tempNum);
+						if (res != VKFFT_SUCCESS) return res;
+						res = VkSubReal(sc, sc->blockInvocationID, sc->blockInvocationID, sc->stageInvocationID);
+						if (res != VKFFT_SUCCESS) return res;
+						sprintf(tempNum, "%" PRIu64 "", locStageRadix);
+						res = VkMulReal(sc, sc->inoutID, sc->blockInvocationID, tempNum);
+						if (res != VKFFT_SUCCESS) return res;
+						res = VkAddReal(sc, sc->inoutID, sc->inoutID, sc->stageInvocationID);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					/*sc->tempLen = sprintf(sc->tempStr, "\
+	stageInvocationID = (gl_LocalInvocationID.x + %" PRIu64 ") %% (%" PRIu64 ");\n\
+	blockInvocationID = (gl_LocalInvocationID.x + %" PRIu64 ") - stageInvocationID;\n\
+	inoutID = stageInvocationID + blockInvocationID * %" PRIu64 ";\n", j * logicalGroupSize, stageSize, j * logicalGroupSize, stageRadix);*/
+
+					for (uint64_t i = 0; i < locStageRadix; i++) {
+						uint64_t id = j + k * logicalRegistersPerThread / locStageRadix + i * logicalStoragePerThread / locStageRadix;
+						id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
+						sprintf(tempID[t + k * sc->registers_per_thread], "%s", sc->regIDs[id]);
+						t++;
+						if (subLogicalGroupSize * ((j + k * logicalRegistersPerThread / locStageRadix) * locStageRadix) <= locFFTDim) {
+							sprintf(tempNum, "%" PRIu64 "", i * locStageSize);
+							res = VkAddReal(sc, sc->combinedID, sc->inoutID, tempNum);
+							if (res != VKFFT_SUCCESS) return res;
+
+							//last stage - mult rader kernel
+							if (rader_stage == sc->currentRaderContainer->numStages - 1) {
+								if (sc->inline_rader_kernel) {
+									sc->tempLen = sprintf(sc->tempStr, "\
+		%s.x = r_rader_kernel_%" PRIu64 "[%s];\n\
+		%s.y = i_rader_kernel_%" PRIu64 "[%s];\n", sc->w, stageRadix, sc->combinedID, sc->w, stageRadix, sc->combinedID);
+									res = VkAppendLine(sc);
+									if (res != VKFFT_SUCCESS) return res;
+								}
+								else {
+									sc->tempLen = sprintf(sc->tempStr, "\
+		%s = twiddleLUT[%s+%" PRIu64 "];\n", sc->w, sc->combinedID, sc->currentRaderContainer->RaderKernelOffsetLUT);
+									res = VkAppendLine(sc);
+									if (res != VKFFT_SUCCESS) return res;
+								}
+								/*sc->tempLen = sprintf(sc->tempStr, "\
+		printf(\"%%f %%f - %%f %%f\\n\", %s.x, %s.y, %s.x, %s.y);\n", sc->regIDs[id], sc->regIDs[id], sc->w, sc->w);
+					res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;*/
+								res = VkMulComplex(sc, sc->regIDs[id], sc->regIDs[id], sc->w, sc->temp);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							if (rader_stage != sc->currentRaderContainer->numStages - 1) {
+								if (!raderTranspose) {
+									sprintf(tempNum, "%" PRIu64 "", sc->fftDim / stageRadix);
+									res = VkAddReal(sc, sc->sdataID, sc->combinedID, tempNum);
+									if (res != VKFFT_SUCCESS) return res;
+									sprintf(tempNum, "%s * %" PRIu64 "", sc->raderIDx2, locFFTDimStride);
+									res = VkAddReal(sc, sc->sdataID, sc->sdataID, tempNum);
+									if (res != VKFFT_SUCCESS) return res;
+								}
+								else {
+									sprintf(tempNum, "%" PRIu64 "", sc->currentRaderContainer->containerFFTNum);
+									res = VkMulReal(sc, sc->sdataID, sc->combinedID, tempNum);
+									if (res != VKFFT_SUCCESS) return res;
+									sprintf(tempNum, "%" PRIu64 "", sc->fftDim / stageRadix);
+									res = VkAddReal(sc, sc->sdataID, sc->sdataID, tempNum);
+									if (res != VKFFT_SUCCESS) return res;
+									sprintf(tempNum, "%s", sc->raderIDx2);
+									res = VkAddReal(sc, sc->sdataID, sc->sdataID, tempNum);
+									if (res != VKFFT_SUCCESS) return res;
+								}
+								if (!strided) {
+									if (0 && (locStageSize <= sc->numSharedBanks / 2) && (locFFTsCombined > sc->numSharedBanks / 2) && (sc->sharedStrideBankConflictFirstStages != locFFTDim / sc->registerBoost) && ((locFFTDim & (locFFTDim - 1)) == 0) && (locStageSize * locStageRadix != locFFTDim)) {
+										if (sc->resolveBankConflictFirstStages == 0) {
+											sc->resolveBankConflictFirstStages = 1;
+											sc->tempLen = sprintf(sc->tempStr, "\
+	%s = %" PRIu64 ";", sc->sharedStride, sc->sharedStrideBankConflictFirstStages);
+											res = VkAppendLine(sc);
+											if (res != VKFFT_SUCCESS) return res;
+										}
+										sc->tempLen = sprintf(sc->tempStr, "\
+	%s = (%s / %" PRIu64 ") * %" PRIu64 " + %s %% %" PRIu64 ";", sc->sdataID, sc->sdataID, sc->numSharedBanks / 2, sc->numSharedBanks / 2 + 1, sc->sdataID, sc->numSharedBanks / 2);
+										res = VkAppendLine(sc);
+										if (res != VKFFT_SUCCESS) return res;
+
+									}
+									else {
+										if (sc->resolveBankConflictFirstStages == 1) {
+											sc->resolveBankConflictFirstStages = 0;
+											sc->tempLen = sprintf(sc->tempStr, "\
+	%s = %" PRIu64 ";", sc->sharedStride, sc->sharedStrideReadWriteConflict);
+											res = VkAppendLine(sc);
+											if (res != VKFFT_SUCCESS) return res;
+										}
+									}
+								}
+								if (strided) {
+									res = VkMulReal(sc, sc->sdataID, sc->sdataID, sc->sharedStride);
+									if (res != VKFFT_SUCCESS) return res;
+									res = VkAddReal(sc, sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_x);
+									if (res != VKFFT_SUCCESS) return res;
+								}
+								else {
+									if (sc->localSize[1] > 1) {
+										res = VkMulReal(sc, sc->combinedID, sc->gl_LocalInvocationID_y, sc->sharedStride);
+										if (res != VKFFT_SUCCESS) return res;
+										res = VkAddReal(sc, sc->sdataID, sc->sdataID, sc->combinedID);
+										if (res != VKFFT_SUCCESS) return res;
+									}
+								}
+								//sprintf(sc->sdataID, "sharedStride * gl_LocalInvocationID.y + inoutID + %" PRIu64 "", i * stageSize);
+								res = VkSharedStore(sc, sc->sdataID, sc->regIDs[id]);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+						/*sc->tempLen = sprintf(sc->tempStr, "\
+sdata[sharedStride * gl_LocalInvocationID.y + inoutID + %" PRIu64 "] = temp%s%s;\n", i * stageSize, sc->regIDs[id], stageNormalization);*/
+					}
+					if (subLogicalGroupSize * ((j + k * logicalRegistersPerThread / locStageRadix) * locStageRadix) <= locFFTDim) {
+						if (subLogicalGroupSize * ((1 + j + k * logicalRegistersPerThread / locStageRadix) * locStageRadix) > locFFTDim) {
+							sc->tempLen = sprintf(sc->tempStr, "	}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+				}
+				sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				for (uint64_t j = logicalRegistersPerThread; j < sc->registers_per_thread; j++) {
+					sprintf(tempID[t + k * sc->registers_per_thread], "%s", sc->regIDs[t + k * sc->registers_per_thread]);
+					t++;
+				}
+				t = 0;
+			}
+			if (rader_stage != sc->currentRaderContainer->numStages - 1) {
+				for (uint64_t i = 0; i < sc->registers_per_thread * sc->registerBoost; i++) {
+					//printf("0 - %s\n", resID[i]);
+					sprintf(sc->regIDs[i], "%s", tempID[i]);
+					//sprintf(resID[i], "%s", tempID[i]);
+					//printf("1 - %s\n", resID[i]);
+				}
+			}
+			for (uint64_t i = 0; i < sc->registers_per_thread * sc->registerBoost; i++) {
+				free(tempID[i]);
+				tempID[i] = 0;
+			}
+			free(tempID);
+			tempID = 0;
+		}
+		else
+			return VKFFT_ERROR_MALLOC_FAILED;
+
+		if (rader_stage > 0) {
+			switch (locStageRadix) {
+			case 2:
+				locStageSizeSum += locStageSize;
+				break;
+			case 3:
+				locStageSizeSum += locStageSize * 2;
+				break;
+			case 4:
+				locStageSizeSum += locStageSize * 2;
+				break;
+			case 5:
+				locStageSizeSum += locStageSize * 4;
+				break;
+			case 6:
+				locStageSizeSum += locStageSize * 5;
+				break;
+			case 7:
+				locStageSizeSum += locStageSize * 6;
+				break;
+			case 8:
+				locStageSizeSum += locStageSize * 3;
+				break;
+			case 9:
+				locStageSizeSum += locStageSize * 8;
+				break;
+			case 10:
+				locStageSizeSum += locStageSize * 9;
+				break;
+			case 11:
+				locStageSizeSum += locStageSize * 10;
+				break;
+			case 12:
+				locStageSizeSum += locStageSize * 11;
+				break;
+			case 13:
+				locStageSizeSum += locStageSize * 12;
+				break;
+			case 14:
+				locStageSizeSum += locStageSize * 13;
+				break;
+			case 15:
+				locStageSizeSum += locStageSize * 14;
+				break;
+			case 16:
+				locStageSizeSum += locStageSize * 4;
+				break;
+			case 32:
+				locStageSizeSum += locStageSize * 5;
+				break;
+			default:
+				locStageSizeSum += locStageSize * (locStageRadix);
+				break;
+			}
+		}
+		locStageSize *= locStageRadix;
+		locStageAngle /= locStageRadix;
+
+		if (rader_stage != sc->currentRaderContainer->numStages - 1) {
+			res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
+			if (res != VKFFT_SUCCESS) return res;
+			res = appendZeropadEnd(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			res = appendBarrierVkFFT(sc, 1);
+			if (res != VKFFT_SUCCESS) return res;
+			res = appendZeropadStart(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+	}
+
+	//iFFT
+	locStageSize = 1;
+	locStageAngle = double_PI;
+	locStageSizeSum = 0;
+	for (int64_t rader_stage = sc->currentRaderContainer->numStages - 1; rader_stage >= 0; rader_stage--) {
+		uint64_t locStageRadix = sc->currentRaderContainer->stageRadix[rader_stage];
+		uint64_t logicalStoragePerThread = sc->currentRaderContainer->registers_per_thread_per_radix[locStageRadix] * sc->registerBoost;// (sc->registers_per_thread % stageRadix == 0) ? sc->registers_per_thread * sc->registerBoost : sc->min_registers_per_thread * sc->registerBoost;
+		uint64_t logicalRegistersPerThread = sc->currentRaderContainer->registers_per_thread_per_radix[locStageRadix];// (sc->registers_per_thread % stageRadix == 0) ? sc->registers_per_thread : sc->min_registers_per_thread;
+		uint64_t locFFTDim = sc->currentRaderContainer->containerFFTDim; //different length due to all -1 cutoffs
+		uint64_t locFFTsCombined = sc->currentRaderContainer->containerFFTNum * locFFTDim;
+		//uint64_t logicalGroupSize = (uint64_t)ceil(locFFTsCombined / (double)logicalStoragePerThread);
+		uint64_t subLogicalGroupSize = (uint64_t)ceil(locFFTDim / (double)logicalStoragePerThread); // hopefully it is not <1, will fix 
+		uint64_t locFFTDimStride = locFFTDim; //different length due to all -1 cutoffs
+		if (shift <= sc->sharedShiftRaderFFT) locFFTDimStride = locFFTDim + shift;
+		//local radix
+		if (!raderTranspose) {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s %% %" PRIu64 ";\n", sc->raderIDx, gl_LocalInvocationID, subLogicalGroupSize); //local id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s / %" PRIu64 ";\n", sc->raderIDx2, gl_LocalInvocationID, subLogicalGroupSize); //global prime id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		else {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s / %" PRIu64 ";\n", sc->raderIDx, gl_LocalInvocationID, sc->currentRaderContainer->containerFFTNum); //local id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s %% %" PRIu64 ";\n", sc->raderIDx2, gl_LocalInvocationID, sc->currentRaderContainer->containerFFTNum); //global prime id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		for (uint64_t k = 0; k < sc->registerBoost; k++) {
+			if (!raderTranspose) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->raderIDx2, sc->currentRaderContainer->containerFFTNum);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			else {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->raderIDx, subLogicalGroupSize);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			for (uint64_t j = 0; j < logicalRegistersPerThread / locStageRadix; j++) {
+				if (subLogicalGroupSize * ((j + k * logicalRegistersPerThread / locStageRadix) * locStageRadix) > locFFTDim) continue;
+				if (subLogicalGroupSize * ((1 + j + k * logicalRegistersPerThread / locStageRadix) * locStageRadix) > locFFTDim) {
+					uint64_t current_group_cut = locFFTDim / locStageRadix - (j + k * logicalRegistersPerThread / locStageRadix) * subLogicalGroupSize;
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s  < %" PRIu64 ") {\n", sc->raderIDx, current_group_cut);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+
+				sc->tempLen = sprintf(sc->tempStr, "\
+		%s = (%s+ %" PRIu64 ") %% (%" PRIu64 ");\n", sc->stageInvocationID, sc->raderIDx, (j + k * logicalRegistersPerThread / locStageRadix) * subLogicalGroupSize, locStageSize);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				if (sc->LUT)
+					sc->tempLen = sprintf(sc->tempStr, "		LUTId = stageInvocationID + %" PRIu64 ";\n", locStageSizeSum + sc->currentRaderContainer->RaderRadixOffsetLUTiFFT);
+				else
+					sc->tempLen = sprintf(sc->tempStr, "		angle = stageInvocationID * %.17e%s;\n", (double)(locStageAngle), LFending);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				if (rader_stage != (int64_t)sc->currentRaderContainer->numStages - 1) {
+					for (uint64_t i = 0; i < locStageRadix; i++) {
+						uint64_t id = j + i * logicalRegistersPerThread / locStageRadix;
+						id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
+						if (!raderTranspose) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		%s = (%s + %" PRIu64 ") + %s*%" PRIu64 ";\n", sc->sdataID, sc->raderIDx, j * subLogicalGroupSize + i * locFFTDim / locStageRadix + sc->fftDim / stageRadix, sc->raderIDx2, locFFTDimStride);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						else {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		%s = (%s + %" PRIu64 ")*%" PRIu64 " + %s + %" PRIu64 ";\n", sc->sdataID, sc->raderIDx, j * subLogicalGroupSize + i * locFFTDim / locStageRadix, sc->currentRaderContainer->containerFFTNum, sc->raderIDx2, sc->fftDim / stageRadix);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (!strided) {
+							if (sc->resolveBankConflictFirstStages == 1) {
+								sc->tempLen = sprintf(sc->tempStr, "\
+	%s = (%s / %" PRIu64 ") * %" PRIu64 " + %s %% %" PRIu64 ";", sc->sdataID, sc->sdataID, sc->numSharedBanks / 2, sc->numSharedBanks / 2 + 1, sc->sdataID, sc->numSharedBanks / 2);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+						if (strided) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s * sharedStride + %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_x);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						else {
+							if (sc->localSize[1] > 1) {
+								sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + sharedStride * %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_y);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+						sc->tempLen = sprintf(sc->tempStr, "\
+		%s = sdata[%s];\n", sc->regIDs[id], sc->sdataID);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+				}
+				char** regID = (char**)malloc(sizeof(char*) * locStageRadix);
+				if (regID) {
+					for (uint64_t i = 0; i < locStageRadix; i++) {
+						regID[i] = (char*)malloc(sizeof(char) * 50);
+						if (!regID[i]) {
+							for (uint64_t p = 0; p < i; p++) {
+								free(regID[p]);
+								regID[p] = 0;
+							}
+							free(regID);
+							regID = 0;
+							return VKFFT_ERROR_MALLOC_FAILED;
+						}
+						uint64_t id = j + k * logicalRegistersPerThread / locStageRadix + i * logicalStoragePerThread / locStageRadix;
+						id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
+						sprintf(regID[i], "%s", sc->regIDs[id]);
+					}
+					res = inlineRadixKernelVkFFT(sc, floatType, uintType, locStageRadix, locStageSize, locStageSizeSum, locStageAngle, regID);
+					if (res != VKFFT_SUCCESS) return res;
+					for (uint64_t i = 0; i < locStageRadix; i++) {
+						uint64_t id = j + k * logicalRegistersPerThread / locStageRadix + i * logicalStoragePerThread / locStageRadix;
+						id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
+						sprintf(sc->regIDs[id], "%s", regID[i]);
+					}
+					for (uint64_t i = 0; i < locStageRadix; i++) {
+						free(regID[i]);
+						regID[i] = 0;
+					}
+					free(regID);
+					regID = 0;
+				}
+				else
+					return VKFFT_ERROR_MALLOC_FAILED;
+				if (subLogicalGroupSize * ((1 + j + k * logicalRegistersPerThread / locStageRadix) * locStageRadix) > locFFTDim) {
+					sc->tempLen = sprintf(sc->tempStr, "		}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+			}
+			sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
+		if (res != VKFFT_SUCCESS) return res;
+		res = appendZeropadEnd(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		res = appendBarrierVkFFT(sc, 1);
+		if (res != VKFFT_SUCCESS) return res;
+		if (!strided) {
+			if (rader_stage == 0) {
+				if (sc->sharedStrideRaderFFT > 0) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		sharedStride = %" PRIu64 ";\n", sc->fftDim);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+			}
+		}
+		res = appendZeropadStart(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
+		if (res != VKFFT_SUCCESS) return res;
+		//local shuffle
+		char** tempID;
+		tempID = (char**)malloc(sizeof(char*) * sc->registers_per_thread * sc->registerBoost);
+		if (tempID) {
+			for (uint64_t i = 0; i < sc->registers_per_thread * sc->registerBoost; i++) {
+				tempID[i] = (char*)malloc(sizeof(char) * 50);
+				if (!tempID[i]) {
+					for (uint64_t j = 0; j < i; j++) {
+						free(tempID[j]);
+						tempID[j] = 0;
+					}
+					free(tempID);
+					tempID = 0;
+					return VKFFT_ERROR_MALLOC_FAILED;
+				}
+			}
+			for (uint64_t k = 0; k < sc->registerBoost; ++k) {
+				uint64_t t = 0;
+				if (!raderTranspose) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->raderIDx2, sc->currentRaderContainer->containerFFTNum);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				else {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->raderIDx, subLogicalGroupSize);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				if (rader_stage == 0) {
+					res = VkMovReal(sc, sc->stageInvocationID, sc->raderIDx2);
+					if (res != VKFFT_SUCCESS) return res;
+					res = VkMovReal(sc, sc->blockInvocationID, sc->stageInvocationID);
+					if (res != VKFFT_SUCCESS) return res;
+					sprintf(tempNum, "%" PRIu64 "", stageSize);
+					res = VkModReal(sc, sc->stageInvocationID, sc->stageInvocationID, tempNum);
+					if (res != VKFFT_SUCCESS) return res;
+					res = VkSubReal(sc, sc->blockInvocationID, sc->blockInvocationID, sc->stageInvocationID);
+					if (res != VKFFT_SUCCESS) return res;
+					sprintf(tempNum, "%" PRIu64 "", stageRadix);
+					res = VkMulReal(sc, sc->raderIDx2, sc->blockInvocationID, tempNum);
+					if (res != VKFFT_SUCCESS) return res;
+					res = VkAddReal(sc, sc->raderIDx2, sc->raderIDx2, sc->stageInvocationID);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				if (!strided) {
+					if (rader_stage != (int64_t)sc->currentRaderContainer->numStages - 1) {
+						shift = (subLogicalGroupSize > (locFFTDim % (sc->numSharedBanks / 2))) ? subLogicalGroupSize - locFFTDim % (sc->numSharedBanks / 2) : 0;
+						if (shift <= sc->sharedShiftRaderFFT) locFFTDimStride = locFFTDim + shift;
+					}
+					else {
+						shift = ((locFFTDim % (sc->numSharedBanks / 2))) ? 0 : 1;
+						if (shift <= sc->sharedShiftRaderFFT) locFFTDimStride = locFFTDim + shift;
+					}
+				}
+				for (uint64_t j = 0; j < logicalRegistersPerThread / locStageRadix; j++) {
+					if (subLogicalGroupSize * ((j + k * logicalRegistersPerThread / locStageRadix) * locStageRadix) <= locFFTDim) {
+						if (subLogicalGroupSize * ((1 + j + k * logicalRegistersPerThread / locStageRadix) * locStageRadix) > locFFTDim) {
+							uint64_t current_group_cut = locFFTDim / locStageRadix - (j + k * logicalRegistersPerThread / locStageRadix) * subLogicalGroupSize;
+							sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s  < %" PRIu64 ") {\n", sc->raderIDx, current_group_cut);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						sprintf(tempNum, "%" PRIu64 "", j * subLogicalGroupSize);
+						res = VkAddReal(sc, sc->stageInvocationID, sc->raderIDx, tempNum);
+						if (res != VKFFT_SUCCESS) return res;
+						res = VkMovReal(sc, sc->blockInvocationID, sc->stageInvocationID);
+						if (res != VKFFT_SUCCESS) return res;
+						sprintf(tempNum, "%" PRIu64 "", locStageSize);
+						res = VkModReal(sc, sc->stageInvocationID, sc->stageInvocationID, tempNum);
+						if (res != VKFFT_SUCCESS) return res;
+						res = VkSubReal(sc, sc->blockInvocationID, sc->blockInvocationID, sc->stageInvocationID);
+						if (res != VKFFT_SUCCESS) return res;
+						sprintf(tempNum, "%" PRIu64 "", locStageRadix);
+						res = VkMulReal(sc, sc->inoutID, sc->blockInvocationID, tempNum);
+						if (res != VKFFT_SUCCESS) return res;
+						res = VkAddReal(sc, sc->inoutID, sc->inoutID, sc->stageInvocationID);
+						if (res != VKFFT_SUCCESS) return res;
+
+					}
+					/*sc->tempLen = sprintf(sc->tempStr, "\
+	stageInvocationID = (gl_LocalInvocationID.x + %" PRIu64 ") %% (%" PRIu64 ");\n\
+	blockInvocationID = (gl_LocalInvocationID.x + %" PRIu64 ") - stageInvocationID;\n\
+	inoutID = stageInvocationID + blockInvocationID * %" PRIu64 ";\n", j * logicalGroupSize, stageSize, j * logicalGroupSize, stageRadix);*/
+
+					for (uint64_t i = 0; i < locStageRadix; i++) {
+						uint64_t id = j + k * logicalRegistersPerThread / locStageRadix + i * logicalStoragePerThread / locStageRadix;
+						id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
+						sprintf(tempID[t + k * sc->registers_per_thread], "%s", sc->regIDs[id]);
+						t++;
+						if (subLogicalGroupSize * ((j + k * logicalRegistersPerThread / locStageRadix) * locStageRadix) <= locFFTDim) {
+							sprintf(tempNum, "%" PRIu64 "", i * locStageSize);
+							res = VkAddReal(sc, sc->combinedID, sc->inoutID, tempNum);
+							if (res != VKFFT_SUCCESS) return res;
+
+							if (rader_stage == 0) {
+								locFFTDimStride = locFFTDim;
+								//last stage - add x0
+
+								uint64_t g = sc->currentRaderContainer->generator;
+								if (sc->inline_rader_g_pow == 1) {
+									sc->tempLen = sprintf(sc->tempStr, "\
+			%s= g_pow_%" PRIu64 "[%" PRIu64 "-%s];\n", sc->combinedID, stageRadix, stageRadix - 1, sc->combinedID);
+									res = VkAppendLine(sc);
+									if (res != VKFFT_SUCCESS) return res;
+								}
+								else if (sc->inline_rader_g_pow == 2) {
+									sc->tempLen = sprintf(sc->tempStr, "\
+			%s= g_pow[%" PRIu64 "-%s];\n", sc->combinedID, stageRadix - 1 + sc->currentRaderContainer->raderUintLUToffset, sc->combinedID);
+									res = VkAppendLine(sc);
+									if (res != VKFFT_SUCCESS) return res;
+								}
+								else {
+									sc->tempLen = sprintf(sc->tempStr, "\
+			%s= (%" PRIu64 "-%s);\n\
+			%s=1;\n\
+			while (%s != 0)\n\
+			{\n\
+				%s = (%s * %" PRIu64 ") %% %" PRIu64 ";\n\
+				%s--;\n\
+			}\n", sc->inoutID, stageRadix - 1, sc->combinedID, sc->sdataID, sc->inoutID, sc->combinedID, sc->combinedID, g, stageRadix, sc->inoutID);
+									res = VkAppendLine(sc);
+									if (res != VKFFT_SUCCESS) return res;
+								}
+								if (sc->inverse) {
+									sprintf(tempNum, "(%" PRIu64 "-%s)*%" PRIu64 "", (stageRadix), sc->combinedID, stageSize);
+								}
+								else {
+									sprintf(tempNum, "%s*%" PRIu64 "", sc->combinedID, stageSize);
+								}
+								res = VkAddReal(sc, sc->sdataID, sc->raderIDx2, tempNum);
+								if (res != VKFFT_SUCCESS) return res;
+								//normalization is in kernel
+								/*sprintf(tempNum, "%.17e%s", 1.0 / locFFTDim, LFending);
+								res = VkMulComplexNumber(sc, sc->regIDs[id], sc->regIDs[id], tempNum);
+								if (res != VKFFT_SUCCESS) return res;*/
+								res = VkAddComplex(sc, sc->regIDs[id], sc->regIDs[id], sc->x0[0]);
+								if (res != VKFFT_SUCCESS) return res;
+
+							}
+							else {
+								if (!raderTranspose) {
+									sprintf(tempNum, "%" PRIu64 "", sc->fftDim / stageRadix);
+									res = VkAddReal(sc, sc->sdataID, sc->combinedID, tempNum);
+									if (res != VKFFT_SUCCESS) return res;
+									sprintf(tempNum, "%s * %" PRIu64 "", sc->raderIDx2, locFFTDimStride);
+									res = VkAddReal(sc, sc->sdataID, sc->sdataID, tempNum);
+									if (res != VKFFT_SUCCESS) return res;
+								}
+								else {
+									sprintf(tempNum, "%" PRIu64 "", sc->currentRaderContainer->containerFFTNum);
+									res = VkMulReal(sc, sc->sdataID, sc->combinedID, tempNum);
+									if (res != VKFFT_SUCCESS) return res;
+									sprintf(tempNum, "%" PRIu64 "", sc->fftDim / stageRadix);
+									res = VkAddReal(sc, sc->sdataID, sc->sdataID, tempNum);
+									if (res != VKFFT_SUCCESS) return res;
+									sprintf(tempNum, "%s", sc->raderIDx2);
+									res = VkAddReal(sc, sc->sdataID, sc->sdataID, tempNum);
+									if (res != VKFFT_SUCCESS) return res;
+								}
+							}
+							if (!strided) {
+								if (0 && (locStageSize <= sc->numSharedBanks / 2) && (locFFTsCombined > sc->numSharedBanks / 2) && (sc->sharedStrideBankConflictFirstStages != locFFTDim / sc->registerBoost) && ((locFFTDim & (locFFTDim - 1)) == 0) && (locStageSize * locStageRadix != locFFTDim)) {
+									if (sc->resolveBankConflictFirstStages == 0) {
+										sc->resolveBankConflictFirstStages = 1;
+										sc->tempLen = sprintf(sc->tempStr, "\
+	%s = %" PRIu64 ";", sc->sharedStride, sc->sharedStrideBankConflictFirstStages);
+										res = VkAppendLine(sc);
+										if (res != VKFFT_SUCCESS) return res;
+									}
+									sc->tempLen = sprintf(sc->tempStr, "\
+	%s = (%s / %" PRIu64 ") * %" PRIu64 " + %s %% %" PRIu64 ";", sc->sdataID, sc->sdataID, sc->numSharedBanks / 2, sc->numSharedBanks / 2 + 1, sc->sdataID, sc->numSharedBanks / 2);
+									res = VkAppendLine(sc);
+									if (res != VKFFT_SUCCESS) return res;
+
+								}
+								else {
+									if (sc->resolveBankConflictFirstStages == 1) {
+										sc->resolveBankConflictFirstStages = 0;
+										sc->tempLen = sprintf(sc->tempStr, "\
+	%s = %" PRIu64 ";", sc->sharedStride, sc->sharedStrideReadWriteConflict);
+										res = VkAppendLine(sc);
+										if (res != VKFFT_SUCCESS) return res;
+									}
+								}
+							}
+							if (strided) {
+								res = VkMulReal(sc, sc->sdataID, sc->sdataID, sc->sharedStride);
+								if (res != VKFFT_SUCCESS) return res;
+								res = VkAddReal(sc, sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_x);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							else {
+								if (sc->localSize[1] > 1) {
+									res = VkMulReal(sc, sc->combinedID, sc->gl_LocalInvocationID_y, sc->sharedStride);
+									if (res != VKFFT_SUCCESS) return res;
+									res = VkAddReal(sc, sc->sdataID, sc->sdataID, sc->combinedID);
+									if (res != VKFFT_SUCCESS) return res;
+								}
+							}
+							//sprintf(sc->sdataID, "sharedStride * gl_LocalInvocationID.y + inoutID + %" PRIu64 "", i * stageSize);
+							if ((((sc->actualInverse) && (sc->normalize)) || ((sc->convolutionStep || sc->useBluesteinFFT) && (stageAngle > 0))) && (rader_stage == 0)) {
+								if (strcmp(stageNormalization, "")) {
+									res = VkMulComplexNumber(sc, sc->regIDs[id], sc->regIDs[id], stageNormalization);
+								}
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							res = VkSharedStore(sc, sc->sdataID, sc->regIDs[id]);
+							if (res != VKFFT_SUCCESS) return res;
+							//sc->tempLen = sprintf(sc->tempStr, "	printf(\"%%d %%f %%f \\n \", %s, %s.x, %s.y);\n\n", sc->sdataID, sc->regIDs[id], sc->regIDs[id]);
+							//res = VkAppendLine(sc);
+							//if (res != VKFFT_SUCCESS) return res;
+						}
+						/*sc->tempLen = sprintf(sc->tempStr, "\
+sdata[sharedStride * gl_LocalInvocationID.y + inoutID + %" PRIu64 "] = temp%s%s;\n", i * stageSize, sc->regIDs[id], stageNormalization);*/
+					}
+					if (subLogicalGroupSize * ((j + k * logicalRegistersPerThread / locStageRadix) * locStageRadix) <= locFFTDim) {
+						if (subLogicalGroupSize * ((1 + j + k * logicalRegistersPerThread / locStageRadix) * locStageRadix) > locFFTDim) {
+							sc->tempLen = sprintf(sc->tempStr, "	}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+				}
+				sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				for (uint64_t j = logicalRegistersPerThread; j < sc->registers_per_thread; j++) {
+					sprintf(tempID[t + k * sc->registers_per_thread], "%s", sc->regIDs[t + k * sc->registers_per_thread]);
+					t++;
+				}
+				t = 0;
+			}
+			for (uint64_t i = 0; i < sc->registers_per_thread * sc->registerBoost; i++) {
+				//printf("0 - %s\n", resID[i]);
+				sprintf(sc->regIDs[i], "%s", tempID[i]);
+				//sprintf(resID[i], "%s", tempID[i]);
+				//printf("1 - %s\n", resID[i]);
+			}
+			for (uint64_t i = 0; i < sc->registers_per_thread * sc->registerBoost; i++) {
+				free(tempID[i]);
+				tempID[i] = 0;
+			}
+			free(tempID);
+			tempID = 0;
+		}
+		else
+			return VKFFT_ERROR_MALLOC_FAILED;
+
+		if (rader_stage < (int64_t)sc->currentRaderContainer->numStages - 1) {
+			switch (locStageRadix) {
+			case 2:
+				locStageSizeSum += locStageSize;
+				break;
+			case 3:
+				locStageSizeSum += locStageSize * 2;
+				break;
+			case 4:
+				locStageSizeSum += locStageSize * 2;
+				break;
+			case 5:
+				locStageSizeSum += locStageSize * 4;
+				break;
+			case 6:
+				locStageSizeSum += locStageSize * 5;
+				break;
+			case 7:
+				locStageSizeSum += locStageSize * 6;
+				break;
+			case 8:
+				locStageSizeSum += locStageSize * 3;
+				break;
+			case 9:
+				locStageSizeSum += locStageSize * 8;
+				break;
+			case 10:
+				locStageSizeSum += locStageSize * 9;
+				break;
+			case 11:
+				locStageSizeSum += locStageSize * 10;
+				break;
+			case 12:
+				locStageSizeSum += locStageSize * 11;
+				break;
+			case 13:
+				locStageSizeSum += locStageSize * 12;
+				break;
+			case 14:
+				locStageSizeSum += locStageSize * 13;
+				break;
+			case 15:
+				locStageSizeSum += locStageSize * 14;
+				break;
+			case 16:
+				locStageSizeSum += locStageSize * 4;
+				break;
+			case 32:
+				locStageSizeSum += locStageSize * 5;
+				break;
+			default:
+				locStageSizeSum += locStageSize * (locStageRadix);
+				break;
+			}
+		}
+		locStageSize *= locStageRadix;
+		locStageAngle /= locStageRadix;
+		res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
+		if (res != VKFFT_SUCCESS) return res;
+		res = appendZeropadEnd(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		res = appendBarrierVkFFT(sc, 1);
+		if (res != VKFFT_SUCCESS) return res;
+		res = appendZeropadStart(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
+		if (res != VKFFT_SUCCESS) return res;
+	}
+
+	{
+		uint64_t locStageRadix = sc->currentRaderContainer->stageRadix[sc->currentRaderContainer->numStages - 1];
+		uint64_t logicalStoragePerThread = sc->currentRaderContainer->registers_per_thread_per_radix[locStageRadix] * sc->registerBoost;// (sc->registers_per_thread % stageRadix == 0) ? sc->registers_per_thread * sc->registerBoost : sc->min_registers_per_thread * sc->registerBoost;
+		//uint64_t logicalRegistersPerThread = sc->currentRaderContainer->registers_per_thread_per_radix[locStageRadix];// (sc->registers_per_thread % stageRadix == 0) ? sc->registers_per_thread : sc->min_registers_per_thread;
+		uint64_t locFFTDim = sc->currentRaderContainer->containerFFTDim; //different length due to all -1 cutoffs
+		//uint64_t locFFTsCombined = sc->currentRaderContainer->containerFFTNum * locFFTDim;
+		//uint64_t logicalGroupSize = (uint64_t)ceil(locFFTsCombined / (double)logicalStoragePerThread);
+		uint64_t subLogicalGroupSize = (uint64_t)ceil(locFFTDim / (double)logicalStoragePerThread); // hopefully it is not <1, will fix 
+		if (!raderTranspose) {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s %% %" PRIu64 ";\n", sc->raderIDx, gl_LocalInvocationID, subLogicalGroupSize); //local id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s / %" PRIu64 ";\n", sc->raderIDx2, gl_LocalInvocationID, subLogicalGroupSize); //global prime id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		else {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s / %" PRIu64 ";\n", sc->raderIDx, gl_LocalInvocationID, sc->currentRaderContainer->containerFFTNum); //local id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s %% %" PRIu64 ";\n", sc->raderIDx2, gl_LocalInvocationID, sc->currentRaderContainer->containerFFTNum); //global prime id
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		if (!raderTranspose) {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->raderIDx2, sc->currentRaderContainer->containerFFTNum);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		else {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->raderIDx, subLogicalGroupSize);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s == 0) {\n", sc->raderIDx);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+
+		res = VkMovReal(sc, sc->stageInvocationID, sc->raderIDx2);
+		if (res != VKFFT_SUCCESS) return res;
+		res = VkMovReal(sc, sc->blockInvocationID, sc->stageInvocationID);
+		if (res != VKFFT_SUCCESS) return res;
+		sprintf(tempNum, "%" PRIu64 "", stageSize);
+		res = VkModReal(sc, sc->stageInvocationID, sc->stageInvocationID, tempNum);
+		if (res != VKFFT_SUCCESS) return res;
+		res = VkSubReal(sc, sc->blockInvocationID, sc->blockInvocationID, sc->stageInvocationID);
+		if (res != VKFFT_SUCCESS) return res;
+		sprintf(tempNum, "%" PRIu64 "", stageRadix);
+		res = VkMulReal(sc, sc->raderIDx2, sc->blockInvocationID, tempNum);
+		if (res != VKFFT_SUCCESS) return res;
+		res = VkAddReal(sc, sc->raderIDx2, sc->raderIDx2, sc->stageInvocationID);
+		if (res != VKFFT_SUCCESS) return res;
+
+		sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s;\n", sc->sdataID, sc->raderIDx2);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+
+		if (strided) {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s * sharedStride + %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_x);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		else {
+			if (sc->localSize[1] > 1) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + sharedStride * %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_y);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+		}
+		if (((sc->actualInverse) && (sc->normalize)) || ((sc->convolutionStep || sc->useBluesteinFFT) && (stageAngle > 0))) {
+			if (strcmp(stageNormalization, "")) {
+				res = VkMulComplexNumber(sc, sc->x0[1], sc->x0[1], stageNormalization);
+			}
+			if (res != VKFFT_SUCCESS) return res;
+		}
+
+		sc->tempLen = sprintf(sc->tempStr, "\
+		sdata[%s] = %s;\n", sc->sdataID, sc->x0[1]);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
+		if (res != VKFFT_SUCCESS) return res;
+		res = appendZeropadEnd(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		res = appendBarrierVkFFT(sc, 1);
+		if (res != VKFFT_SUCCESS) return res;
+	}
+	return res;
+}
+static inline VkFFTResult appendMultRaderStage(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType, uint64_t stageSize, uint64_t stageSizeSum, long double stageAngle, uint64_t stageRadix, uint64_t stageID, uint64_t strided) {
+	VkFFTResult res = VKFFT_SUCCESS;
+	long double double_PI = 3.14159265358979323846264338327950288419716939937510L;
+	char vecType[30];
+	char LFending[4] = "";
+	char tempNum[50] = "";
+	if (!strcmp(floatType, "float")) sprintf(LFending, "f");
+#if(VKFFT_BACKEND==0)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "vec2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "dvec2");
+	char cosDef[20] = "cos";
+	char sinDef[20] = "sin";
+	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
+#elif(VKFFT_BACKEND==1)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	char cosDef[20] = "__cosf";
+	char sinDef[20] = "__sinf";
+	if (!strcmp(floatType, "double")) sprintf(LFending, "l");
+#elif(VKFFT_BACKEND==2)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	char cosDef[20] = "__cosf";
+	char sinDef[20] = "__sinf";
+	if (!strcmp(floatType, "double")) sprintf(LFending, "l");
+#elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	char cosDef[20] = "native_cos";
+	char sinDef[20] = "native_sin";
+	//if (!strcmp(floatType, "double")) sprintf(LFending, "l");
+#elif(VKFFT_BACKEND==5)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	char cosDef[20] = "cos";
+	char sinDef[20] = "sin";
+	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
+#endif
+	char stageNormalization[50] = "";
+	uint64_t normalizationValue = 1;
+	if ((((sc->actualInverse) && (sc->normalize)) || (sc->convolutionStep && (stageAngle > 0))) && (stageSize == 1) && (sc->axis_upload_id == 0) && (!(sc->useBluesteinFFT && (stageAngle < 0)))) {
+		if ((sc->performDCT) && (sc->actualInverse)) {
+			if (sc->performDCT == 1)
+				normalizationValue = (sc->sourceFFTSize - 1) * 2;
+			else
+				normalizationValue = sc->sourceFFTSize * 2;
+		}
+		else
+			normalizationValue = sc->sourceFFTSize;
+	}
+	if (sc->useBluesteinFFT && (stageAngle > 0) && (stageSize == 1) && (sc->axis_upload_id == 0)) {
+		normalizationValue *= sc->fft_dim_full;
+	}
+	if (normalizationValue != 1) {
+		sprintf(stageNormalization, "%.17e%s", 1.0 / (double)(normalizationValue), LFending);
+	}
+	char convolutionInverse[10] = "";
+	if (sc->convolutionStep) {
+		if (stageAngle < 0)
+			sprintf(convolutionInverse, ", 0");
+		else
+			sprintf(convolutionInverse, ", 1");
+	}
+	res = appendBarrierVkFFT(sc, 1);
+	if (res != VKFFT_SUCCESS) return res;
+	res = appendZeropadStart(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
+	if (res != VKFFT_SUCCESS) return res;
+
+	uint64_t num_logical_subgroups = (strided) ? sc->localSize[1] / ((stageRadix + 1) / 2) : sc->localSize[0] / ((stageRadix + 1) / 2);
+	uint64_t num_logical_groups = (uint64_t)ceil((sc->fftDim / stageRadix) / (double)(num_logical_subgroups));
+	uint64_t require_cutoff_check = ((sc->fftDim == (num_logical_subgroups * num_logical_groups * stageRadix))) ? 0 : 1;
+	uint64_t require_cutoff_check2;
+	char* gl_LocalInvocationID = (strided) ? sc->gl_LocalInvocationID_y : sc->gl_LocalInvocationID_x;
+
+	if (strided) {
+		require_cutoff_check2 = ((sc->localSize[1] % ((stageRadix + 1) / 2)) == 0) ? 0 : 1;
+	}
+	else {
+		require_cutoff_check2 = ((sc->localSize[0] % ((stageRadix + 1) / 2)) == 0) ? 0 : 1;
+	}
+	sc->tempLen = sprintf(sc->tempStr, "	%s= %s %% %" PRIu64 ";\n", sc->raderIDx, gl_LocalInvocationID, (stageRadix + 1) / 2);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	sc->tempLen = sprintf(sc->tempStr, "	%s= %s / %" PRIu64 ";\n", sc->raderIDx2, gl_LocalInvocationID, (stageRadix + 1) / 2);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+
+	for (uint64_t k = 0; k < sc->registerBoost; k++) {
+		for (uint64_t j = 0; j < 1; j++) {
+			if (stageSize > 1) {
+				if (require_cutoff_check2) {
+					if (strided) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->localSize[1] - sc->localSize[1] % ((stageRadix + 1) / 2));
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					else {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->gl_LocalInvocationID_x, sc->localSize[0] - sc->localSize[0] % ((stageRadix + 1) / 2));
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+				}
+				for (uint64_t t = 0; t < num_logical_groups; t++) {
+					if ((require_cutoff_check) && (t == num_logical_groups - 1)) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->raderIDx2, sc->fftDim / stageRadix - t * num_logical_subgroups);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s = (%s+%" PRIu64 ") %% (%" PRIu64 ");\n", sc->stageInvocationID, sc->raderIDx2, t * num_logical_subgroups, stageSize);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+
+					if (sc->LUT)
+						sc->tempLen = sprintf(sc->tempStr, "		LUTId = stageInvocationID*%" PRIu64 " + %" PRIu64 ";\n", stageRadix, stageSizeSum);
+					else
+						sc->tempLen = sprintf(sc->tempStr, "		angle = stageInvocationID * %.17e%s;\n", (double)stageAngle, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					if (sc->LUT) {
+						sc->tempLen = sprintf(sc->tempStr, "		%s = twiddleLUT[LUTId+%s];\n\n", sc->w, sc->raderIDx);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+						if (!sc->inverse) {
+							sc->tempLen = sprintf(sc->tempStr, "		%s.y = -%s.y;\n", sc->w, sc->w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+					else {
+						if (!strcmp(floatType, "float")) {
+							sc->tempLen = sprintf(sc->tempStr, "		%s.x = %s(angle*%.17e%s*(%s));\n", sc->w, cosDef, 2.0 / stageRadix, LFending, sc->raderIDx);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "		%s.y = %s(angle*%.17e%s*(%s));\n", sc->w, sinDef, 2.0 / stageRadix, LFending, sc->raderIDx);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
+						}
+						if (!strcmp(floatType, "double")) {
+							sc->tempLen = sprintf(sc->tempStr, "		%s = sincos_20(angle*%.17e%s*(%s));\n", sc->w, 2.0 / stageRadix, LFending, sc->raderIDx);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+					//sc->tempLen = sprintf(sc->tempStr, "	printf(\"%%d %%f %%f \\n \", %s, %s.x, %s.y);\n\n", sc->gl_LocalInvocationID_x, sc->w, sc->w);
+					//res = VkAppendLine(sc);
+					//if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s = (%s) * %" PRIu64 " + %s + %" PRIu64 ";\n", sc->sdataID, sc->raderIDx, sc->fftDim / stageRadix, sc->raderIDx2, t * num_logical_subgroups);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					if (strided) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s * sharedStride + %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_x);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					else {
+						if (sc->localSize[1] > 1) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + sharedStride * %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_y);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s = sdata[%s];\n", sc->regIDs[0], sc->sdataID);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+
+					res = VkMulComplex(sc, sc->temp, sc->regIDs[0], sc->w, 0);
+					if (res != VKFFT_SUCCESS) return res;
+
+					sc->tempLen = sprintf(sc->tempStr, "\
+		sdata[%s] = %s;\n", sc->sdataID, sc->temp);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->raderIDx, (stageRadix - 1) / 2);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s = (%s+%" PRIu64 ") %% (%" PRIu64 ");\n", sc->stageInvocationID, sc->raderIDx2, t * num_logical_subgroups, stageSize);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+
+					if (sc->LUT)
+						sc->tempLen = sprintf(sc->tempStr, "		LUTId = stageInvocationID*%" PRIu64 " + %" PRIu64 ";\n", stageRadix, stageSizeSum);
+					else
+						sc->tempLen = sprintf(sc->tempStr, "		angle = stageInvocationID * %.17e%s;\n", (double)stageAngle, LFending);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					if (sc->LUT) {
+						sc->tempLen = sprintf(sc->tempStr, "		%s = twiddleLUT[LUTId+%s+%" PRIu64 "];\n\n", sc->w, sc->raderIDx, (stageRadix + 1) / 2);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+						if (!sc->inverse) {
+							sc->tempLen = sprintf(sc->tempStr, "		%s.y = -%s.y;\n", sc->w, sc->w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+					else {
+						if (!strcmp(floatType, "float")) {
+							sc->tempLen = sprintf(sc->tempStr, "		%s.x = %s(angle*%.17e%s*(%" PRIu64 " + %s));\n", sc->w, cosDef, 2.0 / stageRadix, LFending, (stageRadix + 1) / 2, sc->raderIDx);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							sc->tempLen = sprintf(sc->tempStr, "		%s.y = %s(angle*%.17e%s*(%" PRIu64 " + %s));\n", sc->w, sinDef, 2.0 / stageRadix, LFending, (stageRadix + 1) / 2, sc->raderIDx);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
+						}
+						if (!strcmp(floatType, "double")) {
+							sc->tempLen = sprintf(sc->tempStr, "		%s = sincos_20(angle*%.17e%s*(%" PRIu64 " + %s));\n", sc->w, 2.0 / stageRadix, LFending, (stageRadix + 1) / 2, sc->raderIDx);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+					//sc->tempLen = sprintf(sc->tempStr, "	printf(\"%%d %%f %%f \\n \", %s, %s.x, %s.y);\n\n", sc->gl_LocalInvocationID_x, sc->w, sc->w);
+					//res = VkAppendLine(sc);
+					//if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s = (%" PRIu64 " + %s) * %" PRIu64 " + %s + %" PRIu64 ";\n", sc->sdataID, (stageRadix + 1) / 2, sc->raderIDx, sc->fftDim / stageRadix, sc->raderIDx2, t * num_logical_subgroups);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					if (strided) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s * sharedStride + %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_x);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					else {
+						if (sc->localSize[1] > 1) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + sharedStride * %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_y);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s = sdata[%s];\n", sc->regIDs[0], sc->sdataID);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+
+					res = VkMulComplex(sc, sc->temp, sc->regIDs[0], sc->w, 0);
+					if (res != VKFFT_SUCCESS) return res;
+
+					sc->tempLen = sprintf(sc->tempStr, "\
+		sdata[%s] = %s;\n", sc->sdataID, sc->temp);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					if ((require_cutoff_check) && (t == num_logical_groups - 1)) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+				}
+				if (require_cutoff_check2) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
+				if (res != VKFFT_SUCCESS) return res;
+				res = appendZeropadEnd(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				res = appendBarrierVkFFT(sc, 1);
+				if (res != VKFFT_SUCCESS) return res;
+				res = appendZeropadStart(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			if (require_cutoff_check2) {
+				if (strided) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->localSize[1] - sc->localSize[1] % ((stageRadix + 1) / 2));
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				else {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->gl_LocalInvocationID_x, sc->localSize[0] - sc->localSize[0] % ((stageRadix + 1) / 2));
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+			}
+			//save x0
+			for (uint64_t t = 0; t < num_logical_groups; t++) {
+				if ((require_cutoff_check) && (t == num_logical_groups - 1)) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->raderIDx2, sc->fftDim / stageRadix - t * num_logical_subgroups);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				if (strided) {
+					if (sc->localSize[0] > 1) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		%s = (%s + %" PRIu64 ") * sharedStride + %s;\n", sc->sdataID, sc->raderIDx2, t * num_logical_subgroups, sc->gl_LocalInvocationID_x);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					else {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + %" PRIu64 ";\n", sc->sdataID, sc->raderIDx2, t * num_logical_subgroups);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+				}
+				else {
+					if (sc->localSize[1] > 1) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + %" PRIu64 " + sharedStride * %s;\n", sc->sdataID, sc->raderIDx2, t * num_logical_subgroups, sc->gl_LocalInvocationID_y);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					else {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + %" PRIu64 ";\n", sc->sdataID, sc->raderIDx2, t * num_logical_subgroups);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+				}
+				sc->tempLen = sprintf(sc->tempStr, "\
+		%s = sdata[%s];\n", sc->x0[t], sc->sdataID);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+
+				if ((require_cutoff_check) && (t == num_logical_groups - 1)) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+			}
+			//generator index + shuffle 
+			sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s>0){\n", sc->raderIDx);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+
+			uint64_t g = sc->currentRaderContainer->generator;
+			if (sc->inline_rader_g_pow == 1) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+			%s= g_pow_%" PRIu64 "[%s-1];\n", sc->sdataID, stageRadix, sc->raderIDx);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			else if (sc->inline_rader_g_pow == 2) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+			%s= g_pow[%s-1+%" PRIu64 "];\n", sc->sdataID, sc->raderIDx, sc->currentRaderContainer->raderUintLUToffset);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			else {
+				sc->tempLen = sprintf(sc->tempStr, "\
+			%s= (%s-1);\n\
+			%s=1;\n\
+			while (%s != 0)\n\
+			{\n\
+				%s = (%s * %" PRIu64 ") %% %" PRIu64 ";\n\
+				%s--;\n\
+			}\n", sc->inoutID, sc->raderIDx, sc->sdataID, sc->inoutID, sc->sdataID, sc->sdataID, g, stageRadix, sc->inoutID);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			for (uint64_t t = 0; t < num_logical_groups; t++) {
+				if ((require_cutoff_check) && (t == num_logical_groups - 1)) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->raderIDx2, sc->fftDim / stageRadix - t * num_logical_subgroups);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+
+				sc->tempLen = sprintf(sc->tempStr, "\
+			%s = %s * %" PRIu64 " + %s + %" PRIu64 ";\n", sc->combinedID, sc->sdataID, sc->fftDim / stageRadix, sc->raderIDx2, t * num_logical_subgroups);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				if (strided) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+			%s = %s * sharedStride + %s;\n", sc->combinedID, sc->combinedID, sc->gl_LocalInvocationID_x);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				else {
+					if (sc->localSize[1] > 1) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+			%s = %s + sharedStride * %s;\n", sc->combinedID, sc->combinedID, sc->gl_LocalInvocationID_y);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+				}
+				sc->tempLen = sprintf(sc->tempStr, "\
+			%s = sdata[%s];\n", sc->regIDs[t * 2], sc->combinedID);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+
+				if ((require_cutoff_check) && (t == num_logical_groups - 1)) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+			}
+			if (sc->inline_rader_g_pow == 1) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+			%s= g_pow_%" PRIu64 "[%s+ %" PRIu64 "];\n", sc->sdataID, stageRadix, sc->raderIDx, (stageRadix - 1) / 2 - 1);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			else if (sc->inline_rader_g_pow == 2) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+			%s= g_pow[%s+ %" PRIu64 "];\n", sc->sdataID, sc->raderIDx, (stageRadix - 1) / 2 - 1 + sc->currentRaderContainer->raderUintLUToffset);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			else {
+				sc->tempLen = sprintf(sc->tempStr, "\
+			%s= (%s+ %" PRIu64 ");\n\
+			%s=1;\n\
+			while (%s != 0)\n\
+			{\n\
+				%s = (%s * %" PRIu64 ") %% %" PRIu64 ";\n\
+				%s--;\n\
+			}\n", sc->inoutID, sc->raderIDx, (stageRadix - 1) / 2 - 1, sc->sdataID, sc->inoutID, sc->sdataID, sc->sdataID, g, stageRadix, sc->inoutID);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+
+			for (uint64_t t = 0; t < num_logical_groups; t++) {
+				if ((require_cutoff_check) && (t == num_logical_groups - 1)) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->raderIDx2, sc->fftDim / stageRadix - t * num_logical_subgroups);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+
+				sc->tempLen = sprintf(sc->tempStr, "\
+			%s = %s * %" PRIu64 " + %s + %" PRIu64 ";\n", sc->combinedID, sc->sdataID, sc->fftDim / stageRadix, sc->raderIDx2, t * num_logical_subgroups);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				if (strided) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+			%s = %s * sharedStride + %s;\n", sc->combinedID, sc->combinedID, sc->gl_LocalInvocationID_x);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				else {
+					if (sc->localSize[1] > 1) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+			%s = %s + sharedStride * %s;\n", sc->combinedID, sc->combinedID, sc->gl_LocalInvocationID_y);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+				}
+				sc->tempLen = sprintf(sc->tempStr, "\
+			%s = sdata[%s];\n", sc->regIDs[2 * t + 1], sc->combinedID);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+
+				if ((require_cutoff_check) && (t == num_logical_groups - 1)) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+			}
+			sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+
+			if (require_cutoff_check2) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+
+			res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
+			if (res != VKFFT_SUCCESS) return res;
+			res = appendZeropadEnd(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			res = appendBarrierVkFFT(sc, 1);
+			if (res != VKFFT_SUCCESS) return res;
+			//load deconv kernel
+			if (!sc->inline_rader_kernel) {
+				for (uint64_t t = 0; t < (uint64_t)ceil((stageRadix - 1) / ((double)(sc->localSize[0] * sc->localSize[1]))); t++) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + %s *  %" PRIu64 " + %" PRIu64 ";\n", sc->combinedID, sc->gl_LocalInvocationID_x, sc->gl_LocalInvocationID_y, sc->localSize[0], t * sc->localSize[0] * sc->localSize[1]);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					if (t == ((uint64_t)ceil((stageRadix - 1) / ((double)(sc->localSize[0] * sc->localSize[1]))) - 1)) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s < %" PRIu64 "){\n", sc->combinedID, (stageRadix - 1));
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					if (sc->LUT) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		%s = twiddleLUT[%s+%" PRIu64 "];\n", sc->w, sc->combinedID, sc->currentRaderContainer->RaderKernelOffsetLUT);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+						if (sc->inverse) {
+							sc->tempLen = sprintf(sc->tempStr, "		%s.y = -%s.y;\n", sc->w, sc->w);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						sc->tempLen = sprintf(sc->tempStr, "\
+		sdata[%s+%" PRIu64 "] = %s;\n", sc->combinedID, sc->RaderKernelOffsetShared[stageID], sc->w);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					else {
+						if (sc->inline_rader_g_pow == 1) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+			%s= g_pow_%" PRIu64 "[%" PRIu64 " - %s];\n", sc->sdataID, stageRadix, stageRadix - 1, sc->combinedID);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						else if (sc->inline_rader_g_pow == 2) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+			%s= g_pow[%" PRIu64 " - %s];\n", sc->sdataID, stageRadix - 1 + sc->currentRaderContainer->raderUintLUToffset, sc->combinedID);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						else {
+							sc->tempLen = sprintf(sc->tempStr, "\
+			%s= (%" PRIu64 " - %s);\n\
+			%s=1;\n\
+			while (%s != 0)\n\
+			{\n\
+				%s = (%s * %" PRIu64 ") %% %" PRIu64 ";\n\
+				%s--;\n\
+			}\n", sc->inoutID, stageRadix - 1, sc->combinedID, sc->sdataID, sc->inoutID, sc->sdataID, sc->sdataID, g, stageRadix, sc->inoutID);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (!strcmp(floatType, "float")) {
+							sc->tempLen = sprintf(sc->tempStr, "		%s.x = %s(%.17e%s*%s);\n", sc->w, cosDef, (double)(2.0 * double_PI / stageRadix), LFending, sc->sdataID);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							if (sc->inverse) {
+								sc->tempLen = sprintf(sc->tempStr, "		%s.y = %s(%.17e%s*%s);\n", sc->w, sinDef, (double)(2.0 * double_PI / stageRadix), LFending, sc->sdataID);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							else {
+								sc->tempLen = sprintf(sc->tempStr, "		%s.y = -%s(%.17e%s*%s);\n", sc->w, sinDef, (double)(2.0 * double_PI / stageRadix), LFending, sc->sdataID);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							//sc->tempLen = sprintf(sc->tempStr, "	w = %s(cos(angle*%.17e), sin(angle*%.17e));\n\n", vecType, 2.0 * i / radix, 2.0 * i / radix);
+						}
+						if (!strcmp(floatType, "double")) {
+							sc->tempLen = sprintf(sc->tempStr, "		%s = sincos_20(%.17e%s*%s);\n", sc->w, (double)(2.0 * double_PI / stageRadix), LFending, sc->sdataID);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+							if (!sc->inverse) {
+								sc->tempLen = sprintf(sc->tempStr, "		%s.y = -%s.y;\n", sc->w, sc->w);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+						sc->tempLen = sprintf(sc->tempStr, "\
+		sdata[%s+%" PRIu64 "] = %s;\n", sc->combinedID, sc->RaderKernelOffsetShared[stageID], sc->w);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					if (t == ((uint64_t)ceil((stageRadix - 1) / ((double)(sc->localSize[0] * sc->localSize[1]))) - 1)) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+				}
+			}
+			res = appendZeropadStart(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
+			if (res != VKFFT_SUCCESS) return res;
+
+			if (require_cutoff_check2) {
+				if (strided) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->localSize[1] - sc->localSize[1] % ((stageRadix + 1) / 2));
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				else {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->gl_LocalInvocationID_x, sc->localSize[0] - sc->localSize[0] % ((stageRadix + 1) / 2));
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+			}
+			//x0 is ready
+
+			//no subgroups
+			/* {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s==0){\n", sc->gl_LocalInvocationID_x);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				sc->tempLen = sprintf(sc->tempStr, "\
+		%s.x = 0;\n\
+		%s.y = 0;\n", sc->regIDs[0], sc->regIDs[0]);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				sc->tempLen = sprintf(sc->tempStr, "\
+		%s = 0;\n", sc->combinedID);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+
+				if (sc->localSize[1] > 1) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + sharedStride * (%s);\n", sc->sdataID, sc->combinedID, sc->gl_LocalInvocationID_y);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "\
+		while(%s<%" PRIu64 "){\n\
+		%s.x += sdata[%s].x;\n\
+		%s.y += sdata[%s].y;\n\
+		%s++; %s++;}\n", sc->combinedID, stageRadix, sc->regIDs[0], sc->sdataID, sc->regIDs[0], sc->sdataID, sc->combinedID, sc->sdataID);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				else {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		while(%s<%" PRIu64 "){\n\
+		%s.x += sdata[%s].x;\n\
+		%s.y += sdata[%s].y;\n\
+		%s++;}\n", sc->combinedID, stageRadix, sc->regIDs[0], sc->combinedID, sc->regIDs[0], sc->combinedID, sc->combinedID);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				sc->tempLen = sprintf(sc->tempStr, "\
+		%s = 0;\n", sc->sdataID);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+
+				if (sc->localSize[1] > 1) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + sharedStride * (%s);\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_y);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				sc->tempLen = sprintf(sc->tempStr, "\
+		sdata[%s] = %s;\n", sc->sdataID, sc->regIDs[0]);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}*/
+			//subgroups
+			/* {
+				uint64_t numGroupsQuant = ((((sc->localSize[0] * sc->localSize[1] * sc->localSize[2]) % sc->warpSize) == 0) || (sc->numSubgroups == 1)) ? sc->numSubgroups : sc->numSubgroups - 1;
+				if (numGroupsQuant != sc->numSubgroups) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->gl_SubgroupID, numGroupsQuant);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				for (uint64_t t = 0; t < (uint64_t)ceil(sc->localSize[1] / (double)numGroupsQuant); t++) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s.x = 0;\n", sc->regIDs[0]);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s.y = 0;\n", sc->regIDs[0]);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					uint64_t quant = (sc->warpSize < (sc->localSize[0] * sc->localSize[1] * sc->localSize[2])) ? sc->warpSize : (sc->localSize[0] * sc->localSize[1] * sc->localSize[2]);
+					for (uint64_t t2 = 0; t2 < (uint64_t)ceil(stageRadix / (double)quant); t2++) {
+						if ((t == (uint64_t)ceil(sc->localSize[1] / (double)numGroupsQuant) - 1) && (sc->localSize[1] > 1) && ((sc->localSize[1] % numGroupsQuant) != 0)) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->gl_SubgroupID, sc->localSize[1] % numGroupsQuant);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (t2 == (uint64_t)ceil(stageRadix / (double)quant) - 1) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->gl_SubgroupInvocationID, stageRadix % quant);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						sc->tempLen = sprintf(sc->tempStr, "\
+		%s = (%s+%" PRIu64 ") * %" PRIu64 ";\n", sc->sdataID, sc->gl_SubgroupInvocationID, t2 * quant, sc->fftDim / stageRadix);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+
+						if (sc->localSize[1] > 1) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + sharedStride * (%s+%" PRIu64 ");\n", sc->sdataID, sc->sdataID, sc->gl_SubgroupID, t * numGroupsQuant);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						sc->tempLen = sprintf(sc->tempStr, "\
+		%s = sdata[%s];\n", sc->regIDs[1], sc->sdataID);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+						res = VkAddComplex(sc, sc->regIDs[0], sc->regIDs[0], sc->regIDs[1]);
+						if (res != VKFFT_SUCCESS) return res;
+						if (t2 == (uint64_t)ceil(stageRadix / (double)quant) - 1) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if ((t == (uint64_t)ceil(sc->localSize[1] / (double)numGroupsQuant) - 1) && (sc->localSize[1] > 1) && ((sc->localSize[1] % numGroupsQuant) != 0)) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+
+					res = VkSubgroupAdd(sc, sc->regIDs[0], sc->regIDs[0], 1);
+					if (res != VKFFT_SUCCESS) return res;
+
+					if ((t == (uint64_t)ceil(sc->localSize[1] / (double)numGroupsQuant) - 1) && (sc->localSize[1] > 1) && ((sc->localSize[1] % numGroupsQuant) != 0)) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->gl_SubgroupID, sc->localSize[1] % numGroupsQuant);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s==0){\n", sc->gl_SubgroupInvocationID);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s = 0;\n", sc->sdataID);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+
+					if (sc->localSize[1] > 1) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + sharedStride * (%s+%" PRIu64 ");\n", sc->sdataID, sc->sdataID, sc->gl_SubgroupID, t * numGroupsQuant);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					sc->tempLen = sprintf(sc->tempStr, "\
+		sdata[%s] = %s;\n", sc->sdataID, sc->regIDs[0]);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					if ((t == (uint64_t)ceil(sc->localSize[1] / (double)numGroupsQuant) - 1) && (sc->localSize[1] > 1) && ((sc->localSize[1] % numGroupsQuant) != 0)) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+				}
+				if (numGroupsQuant != sc->numSubgroups) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+			}*/
+
+			sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s > 0){\n", sc->raderIDx);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			for (uint64_t t = 0; t < num_logical_groups; t++) {
+				if ((require_cutoff_check) && (t == num_logical_groups - 1)) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->raderIDx2, sc->fftDim / stageRadix - t * num_logical_subgroups);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				sc->tempLen = sprintf(sc->tempStr, "\
+		%s = (%s) * %" PRIu64 " + %s + %" PRIu64 ";\n", sc->sdataID, sc->raderIDx, sc->fftDim / stageRadix, sc->raderIDx2, t * num_logical_subgroups);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				if (strided) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s * sharedStride + %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_x);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + %" PRIu64 " * sharedStride;\n", sc->combinedID, sc->sdataID, (stageRadix - 1) / 2 * sc->fftDim / stageRadix);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				else {
+					if (sc->localSize[1] > 1) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + sharedStride * %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_y);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + %" PRIu64 ";\n", sc->combinedID, sc->sdataID, (stageRadix - 1) / 2 * sc->fftDim / stageRadix);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+
+				sc->tempLen = sprintf(sc->tempStr, "\
+		%s.x = %s.x - %s.x;\n", sc->temp, sc->regIDs[2 * t], sc->regIDs[2 * t + 1]);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				sc->tempLen = sprintf(sc->tempStr, "\
+		%s.x += %s.x;\n", sc->regIDs[2 * t], sc->regIDs[2 * t + 1]);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				sc->tempLen = sprintf(sc->tempStr, "\
+		%s.y = %s.y + %s.y;\n", sc->temp, sc->regIDs[2 * t], sc->regIDs[2 * t + 1]);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				sc->tempLen = sprintf(sc->tempStr, "\
+		%s.y -= %s.y;\n", sc->regIDs[2 * t], sc->regIDs[2 * t + 1]);
+				res = VkAppendLine(sc);
+				sc->tempLen = sprintf(sc->tempStr, "\
+		sdata[%s] = %s;\n", sc->sdataID, sc->regIDs[2 * t]);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				sc->tempLen = sprintf(sc->tempStr, "\
+		sdata[%s] = %s;\n", sc->combinedID, sc->temp);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+
+				if ((require_cutoff_check) && (t == num_logical_groups - 1)) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+			}
+			//sc->tempLen = sprintf(sc->tempStr, "	printf(\"%%d %%f %%f %%f %%f \\n \", %s, %s.x, %s.y, %s.x, %s.y);\n\n", sc->gl_LocalInvocationID_x, sc->regIDs[0], sc->regIDs[0], sc->temp, sc->temp);
+			//res = VkAppendLine(sc);
+			//if (res != VKFFT_SUCCESS) return res;
+			sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+
+			if (require_cutoff_check2) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
+			if (res != VKFFT_SUCCESS) return res;
+			res = appendZeropadEnd(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			res = appendBarrierVkFFT(sc, 1);
+			if (res != VKFFT_SUCCESS) return res;
+			res = appendZeropadStart(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
+			if (res != VKFFT_SUCCESS) return res;
+			if (require_cutoff_check2) {
+				if (strided) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->localSize[1] - sc->localSize[1] % ((stageRadix + 1) / 2));
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				else {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->gl_LocalInvocationID_x, sc->localSize[0] - sc->localSize[0] % ((stageRadix + 1) / 2));
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+			}
+
+			sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s < %" PRIu64 "){\n", sc->raderIDx, (stageRadix + 1) / 2);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			for (uint64_t t = 0; t < num_logical_groups; t++) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		%s.x = 0;\n", sc->regIDs[2 * t + 1]);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				sc->tempLen = sprintf(sc->tempStr, "\
+		%s.y = 0;\n", sc->regIDs[2 * t + 1]);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s == %" PRIu64 "){\n", sc->raderIDx, (stageRadix - 1) / 2);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s.x = 1; %s.y = 0;\n", sc->w, sc->w);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			for (uint64_t i = 0; i < (stageRadix - 1) / 2; i++) {
+
+				sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s < %" PRIu64 "){\n", sc->raderIDx, (stageRadix - 1) / 2);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+
+				sc->tempLen = sprintf(sc->tempStr, "\
+		%s = ((%" PRIu64 "+%s) %% %" PRIu64 ");\n", sc->sdataID, stageRadix - 1 - i, sc->raderIDx, (stageRadix - 1));
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				if (sc->inline_rader_kernel) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s.x = r_rader_kernel_%" PRIu64 "[%s];\n\
+		%s.y = i_rader_kernel_%" PRIu64 "[%s];\n", sc->w, stageRadix, sc->sdataID, sc->w, stageRadix, sc->sdataID);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				else {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s = sdata[%s+%" PRIu64 "];\n", sc->w, sc->sdataID, sc->RaderKernelOffsetShared[stageID]);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+
+				for (uint64_t t = 0; t < num_logical_groups; t++) {
+#if(VKFFT_BACKEND != 2) //AMD compiler fix
+					if ((require_cutoff_check) && (t == num_logical_groups - 1)) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->raderIDx2, sc->fftDim / stageRadix - t * num_logical_subgroups);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+#endif
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s+ %" PRIu64 ";\n", sc->sdataID, sc->raderIDx2, t * num_logical_subgroups + (1 + i) * sc->fftDim / stageRadix);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					if (strided) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s * sharedStride + %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_x);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					else {
+						if (sc->localSize[1] > 1) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		%s = %s + sharedStride * %s;\n", sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_y);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s = sdata[%s];\n", sc->regIDs[0], sc->sdataID);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					if (strided) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		%s += %" PRIu64 "*sharedStride;\n", sc->sdataID, (stageRadix - 1) / 2 * sc->fftDim / stageRadix);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					else {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		%s += %" PRIu64 " ;\n", sc->sdataID, (stageRadix - 1) / 2 * sc->fftDim / stageRadix);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					sc->tempLen = sprintf(sc->tempStr, "\
+		%s = sdata[%s];\n", sc->temp, sc->sdataID);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+#if(VKFFT_BACKEND == 2) //AMD compiler fix
+					if ((require_cutoff_check) && (t == num_logical_groups - 1)) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s>=%" PRIu64 "){%s.x =0;%s.y=0;%s.x=0;%s.y=0;}\n", sc->raderIDx2, sc->fftDim / stageRadix - t * num_logical_subgroups, sc->temp, sc->temp, sc->regIDs[0], sc->regIDs[0]);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+#endif
+					sprintf(tempNum, "%s", sc->x0[t]);
+					res = VkFMA3Complex(sc, tempNum, sc->regIDs[2 * t + 1], sc->regIDs[0], sc->w, sc->temp);
+					if (res != VKFFT_SUCCESS) return res;
+#if(VKFFT_BACKEND != 2) //AMD compiler fix
+					if ((require_cutoff_check) && (t == num_logical_groups - 1)) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+#endif
+#if(VKFFT_BACKEND == 2) //AMD compiler fix
+					if ((uint64_t)ceil((sc->localSize[0] * sc->localSize[1]) / ((double)sc->warpSize)) * sc->warpSize * (1 + sc->registers_per_thread + sc->usedLocRegs) > 2048) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+
+						if (require_cutoff_check2) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+
+						res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
+						if (res != VKFFT_SUCCESS) return res;
+						res = appendZeropadEnd(sc);
+						if (res != VKFFT_SUCCESS) return res;
+						res = appendBarrierVkFFT(sc, 1);
+						if (res != VKFFT_SUCCESS) return res;
+						res = appendZeropadStart(sc);
+						if (res != VKFFT_SUCCESS) return res;
+						res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
+						if (res != VKFFT_SUCCESS) return res;
+
+						if (require_cutoff_check2) {
+							if (strided) {
+								sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->localSize[1] - sc->localSize[1] % ((stageRadix + 1) / 2));
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							else {
+								sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->gl_LocalInvocationID_x, sc->localSize[0] - sc->localSize[0] % ((stageRadix + 1) / 2));
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+						sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s < %" PRIu64 "){\n", sc->raderIDx, (stageRadix + 1) / 2);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+#endif
+				}
+#if(VKFFT_BACKEND == 2) //AMD compiler fix
+				if ((uint64_t)ceil((sc->localSize[0] * sc->localSize[1]) / ((double)sc->warpSize)) * sc->warpSize * (1 + sc->registers_per_thread + sc->usedLocRegs) <= 2048) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+
+					if (require_cutoff_check2) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+
+					res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
+					if (res != VKFFT_SUCCESS) return res;
+					res = appendZeropadEnd(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					res = appendBarrierVkFFT(sc, 1);
+					if (res != VKFFT_SUCCESS) return res;
+					res = appendZeropadStart(sc);
+					if (res != VKFFT_SUCCESS) return res;
+					res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
+					if (res != VKFFT_SUCCESS) return res;
+
+					if (require_cutoff_check2) {
+						if (strided) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->localSize[1] - sc->localSize[1] % ((stageRadix + 1) / 2));
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						else {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->gl_LocalInvocationID_x, sc->localSize[0] - sc->localSize[0] % ((stageRadix + 1) / 2));
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s < %" PRIu64 "){\n", sc->raderIDx, (stageRadix + 1) / 2);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+#endif
+			}
+			for (uint64_t t = 0; t < num_logical_groups; t++) {
+				if ((require_cutoff_check) && (t == num_logical_groups - 1)) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->raderIDx2, sc->fftDim / stageRadix - t * num_logical_subgroups);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				sprintf(tempNum, "%s", sc->x0[t]);
+
+				sc->tempLen = sprintf(sc->tempStr, "\
+		%s.x = %s.x-%s.x;\n\
+		%s.y = %s.y+%s.y;\n", sc->regIDs[2 * t], tempNum, sc->regIDs[2 * t + 1], sc->regIDs[2 * t], tempNum, sc->regIDs[2 * t + 1]);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				sc->tempLen = sprintf(sc->tempStr, "\
+		%s.x = %s.x+%s.x;\n\
+		%s.y = %s.y-%s.y;\n", sc->regIDs[2 * t + 1], tempNum, sc->regIDs[2 * t + 1], sc->regIDs[2 * t + 1], tempNum, sc->regIDs[2 * t + 1]);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+
+				if ((require_cutoff_check) && (t == num_logical_groups - 1)) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+			}
+			sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+
+			if (require_cutoff_check2) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+
+			res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
+			if (res != VKFFT_SUCCESS) return res;
+			res = appendZeropadEnd(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			res = appendBarrierVkFFT(sc, 1);
+			if (res != VKFFT_SUCCESS) return res;
+			res = appendZeropadStart(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
+			if (res != VKFFT_SUCCESS) return res;
+
+			if (require_cutoff_check2) {
+				if (strided) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->localSize[1] - sc->localSize[1] % ((stageRadix + 1) / 2));
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				else {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->gl_LocalInvocationID_x, sc->localSize[0] - sc->localSize[0] % ((stageRadix + 1) / 2));
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+			}
+
+			sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s < %" PRIu64 "){\n", sc->raderIDx, (stageRadix - 1) / 2);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			//sc->tempLen = sprintf(sc->tempStr, "	printf(\"%%d %%f %%f \\n \", %s, %s.x, %s.y);\n\n", sc->gl_LocalInvocationID_x, sc->regIDs[1], sc->regIDs[1]);
+			//res = VkAppendLine(sc);
+			//if (res != VKFFT_SUCCESS) return res;
+			if (sc->inline_rader_g_pow == 1) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+			%s= g_pow_%" PRIu64 "[%" PRIu64 "-%s];\n", sc->sdataID, stageRadix, stageRadix - 1, sc->raderIDx);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			else if (sc->inline_rader_g_pow == 2) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+			%s= g_pow[%" PRIu64 "-%s];\n", sc->sdataID, stageRadix - 1 + sc->currentRaderContainer->raderUintLUToffset, sc->raderIDx);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			else {
+				sc->tempLen = sprintf(sc->tempStr, "\
+			%s= (%" PRIu64 "-%s);\n\
+			%s=1;\n\
+			while (%s != 0)\n\
+			{\n\
+				%s = (%s * %" PRIu64 ") %% %" PRIu64 ";\n\
+				%s--;\n\
+			}\n", sc->inoutID, stageRadix - 1, sc->raderIDx, sc->sdataID, sc->inoutID, sc->sdataID, sc->sdataID, g, stageRadix, sc->inoutID);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			sc->tempLen = sprintf(sc->tempStr, "\
+		}else{\n");
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			sc->tempLen = sprintf(sc->tempStr, "\
+		%s = 0;\n", sc->sdataID);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			for (uint64_t t = 0; t < num_logical_groups; t++) {
+				if ((require_cutoff_check) && (t == num_logical_groups - 1)) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->raderIDx2, sc->fftDim / stageRadix - t * num_logical_subgroups);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+
+				sprintf(tempNum, "%" PRIu64 "", t * num_logical_subgroups);
+				res = VkAddReal(sc, sc->combinedID, sc->raderIDx2, tempNum);
+				if (res != VKFFT_SUCCESS) return res;
+				sprintf(tempNum, "%" PRIu64 "", stageSize);
+				res = VkModReal(sc, sc->stageInvocationID, sc->combinedID, tempNum);
+				if (res != VKFFT_SUCCESS) return res;
+				res = VkSubReal(sc, sc->blockInvocationID, sc->combinedID, sc->stageInvocationID);
+				if (res != VKFFT_SUCCESS) return res;
+				sprintf(tempNum, "%" PRIu64 "", stageRadix);
+				res = VkMulReal(sc, sc->inoutID, sc->blockInvocationID, tempNum);
+				if (res != VKFFT_SUCCESS) return res;
+
+				sc->tempLen = sprintf(sc->tempStr, "\
+			%s = %s + %s * %" PRIu64 " + %s;\n", sc->combinedID, sc->inoutID, sc->sdataID, stageSize, sc->stageInvocationID);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				if (strided) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+			%s = %s * sharedStride + %s;\n", sc->combinedID, sc->combinedID, sc->gl_LocalInvocationID_x);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				else {
+					if (sc->localSize[1] > 1) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+			%s = %s + sharedStride * %s;\n", sc->combinedID, sc->combinedID, sc->gl_LocalInvocationID_y);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+				}
+				if (((sc->actualInverse) && (sc->normalize)) || ((sc->convolutionStep || sc->useBluesteinFFT) && (stageAngle > 0))) {
+					if (strcmp(stageNormalization, "")) {
+						res = VkMulComplexNumber(sc, sc->regIDs[2 * t], sc->regIDs[2 * t], stageNormalization);
+					}
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				sc->tempLen = sprintf(sc->tempStr, "\
+		sdata[%s]=%s;\n", sc->combinedID, sc->regIDs[2 * t]);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+
+				if ((require_cutoff_check) && (t == num_logical_groups - 1)) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+			}
+			sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s < %" PRIu64 "){\n", sc->raderIDx, (stageRadix - 1) / 2);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			if (sc->inline_rader_g_pow == 1) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+			%s= g_pow_%" PRIu64 "[%" PRIu64 "-%s];\n", sc->sdataID, stageRadix, (stageRadix - 1) / 2, sc->raderIDx);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			else if (sc->inline_rader_g_pow == 2) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+			%s= g_pow[%" PRIu64 "-%s];\n", sc->sdataID, (stageRadix - 1) / 2 + sc->currentRaderContainer->raderUintLUToffset, sc->raderIDx);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			else {
+				sc->tempLen = sprintf(sc->tempStr, "\
+			%s= (%" PRIu64 "-%s);\n\
+			%s=1;\n\
+			while (%s != 0)\n\
+			{\n\
+				%s = (%s * %" PRIu64 ") %% %" PRIu64 ";\n\
+				%s--;\n\
+			}\n", sc->inoutID, (stageRadix - 1) / 2, sc->raderIDx, sc->sdataID, sc->inoutID, sc->sdataID, sc->sdataID, g, stageRadix, sc->inoutID);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			for (uint64_t t = 0; t < num_logical_groups; t++) {
+				if ((require_cutoff_check) && (t == num_logical_groups - 1)) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if(%s<%" PRIu64 "){\n", sc->raderIDx2, sc->fftDim / stageRadix - t * num_logical_subgroups);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+
+				sprintf(tempNum, "%" PRIu64 "", t * num_logical_subgroups);
+				res = VkAddReal(sc, sc->combinedID, sc->raderIDx2, tempNum);
+				if (res != VKFFT_SUCCESS) return res;
+				sprintf(tempNum, "%" PRIu64 "", stageSize);
+				res = VkModReal(sc, sc->stageInvocationID, sc->combinedID, tempNum);
+				if (res != VKFFT_SUCCESS) return res;
+				res = VkSubReal(sc, sc->blockInvocationID, sc->combinedID, sc->stageInvocationID);
+				if (res != VKFFT_SUCCESS) return res;
+				sprintf(tempNum, "%" PRIu64 "", stageRadix);
+				res = VkMulReal(sc, sc->inoutID, sc->blockInvocationID, tempNum);
+				if (res != VKFFT_SUCCESS) return res;
+				sc->tempLen = sprintf(sc->tempStr, "\
+			%s = %s + %s * %" PRIu64 " + %s;\n", sc->combinedID, sc->inoutID, sc->sdataID, stageSize, sc->stageInvocationID);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+				if (strided) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+			%s = %s * sharedStride + %s;\n", sc->combinedID, sc->combinedID, sc->gl_LocalInvocationID_x);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				else {
+					if (sc->localSize[1] > 1) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+			%s = %s + sharedStride * %s;\n", sc->combinedID, sc->combinedID, sc->gl_LocalInvocationID_y);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+				}
+				if (((sc->actualInverse) && (sc->normalize)) || ((sc->convolutionStep || sc->useBluesteinFFT) && (stageAngle > 0))) {
+					if (strcmp(stageNormalization, "")) {
+						res = VkMulComplexNumber(sc, sc->regIDs[2 * t + 1], sc->regIDs[2 * t + 1], stageNormalization);
+					}
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				sc->tempLen = sprintf(sc->tempStr, "\
+		sdata[%s]=%s;\n", sc->combinedID, sc->regIDs[2 * t + 1]);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+
+				if ((require_cutoff_check) && (t == num_logical_groups - 1)) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+			}
+			sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			if (require_cutoff_check2) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
+			if (res != VKFFT_SUCCESS) return res;
+			res = appendZeropadEnd(sc);
+			if (res != VKFFT_SUCCESS) return res;
+			res = appendBarrierVkFFT(sc, 1);
+			if (res != VKFFT_SUCCESS) return res;
+		}
 	}
-	res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
-	if (res != VKFFT_SUCCESS) return res;
+
 	return res;
 }
 
-static inline VkFFTResult appendRadixStageNonStrided(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType, uint64_t stageSize, uint64_t stageSizeSum, double stageAngle, uint64_t stageRadix) {
+static inline VkFFTResult appendRadixStageNonStrided(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType, uint64_t stageSize, uint64_t stageSizeSum, long double stageAngle, uint64_t stageRadix) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	char vecType[30];
 	char LFending[4] = "";
@@ -13359,6 +18432,10 @@ static inline VkFFTResult appendRadixStageNonStrided(VkFFTSpecializationConstant
 	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
 	//if (!strcmp(floatType, "double")) sprintf(LFending, "l");
+#elif(VKFFT_BACKEND==5)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
 #endif
 
 	char convolutionInverse[10] = "";
@@ -13370,7 +18447,7 @@ static inline VkFFTResult appendRadixStageNonStrided(VkFFTSpecializationConstant
 	}
 	uint64_t logicalStoragePerThread = sc->registers_per_thread_per_radix[stageRadix] * sc->registerBoost;// (sc->registers_per_thread % stageRadix == 0) ? sc->registers_per_thread * sc->registerBoost : sc->min_registers_per_thread * sc->registerBoost;
 	uint64_t logicalRegistersPerThread = sc->registers_per_thread_per_radix[stageRadix];// (sc->registers_per_thread % stageRadix == 0) ? sc->registers_per_thread : sc->min_registers_per_thread;
-	uint64_t logicalGroupSize = sc->fftDim / logicalStoragePerThread;
+	uint64_t logicalGroupSize = (uint64_t)ceil(sc->fftDim / (double)logicalStoragePerThread);
 	if ((!((sc->readToRegisters == 1) && (stageSize == 1) && (!(((sc->convolutionStep) || (sc->useBluesteinFFT && sc->BluesteinConvolutionStep)) && (stageAngle > 0) && ((sc->matrixConvolution > 1) || (sc->numKernels > 1)))))) && ((sc->localSize[0] * logicalStoragePerThread > sc->fftDim) || (stageSize > 1) || ((sc->localSize[1] > 1) && (!(sc->performR2C && (sc->actualInverse)))) || ((sc->convolutionStep) && ((sc->matrixConvolution > 1) || (sc->numKernels > 1)) && (stageAngle > 0)) || (sc->performDCT)))
 	{
 		res = appendBarrierVkFFT(sc, 1);
@@ -13381,13 +18458,6 @@ static inline VkFFTResult appendRadixStageNonStrided(VkFFTSpecializationConstant
 	res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 	if (res != VKFFT_SUCCESS) return res;
 
-	if (sc->localSize[0] * logicalStoragePerThread > sc->fftDim) {
-		sc->tempLen = sprintf(sc->tempStr, "\
-		if (%s * %" PRIu64 " < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, logicalStoragePerThread, sc->fftDim);
-		res = VkAppendLine(sc);
-		if (res != VKFFT_SUCCESS) return res;
-	}
-
 	//upload second stage of LUT to sm
 	uint64_t numLUTelementsStage = 0;
 	switch (stageRadix) {
@@ -13407,16 +18477,34 @@ static inline VkFFTResult appendRadixStageNonStrided(VkFFTSpecializationConstant
 		numLUTelementsStage = 5;
 		break;
 	default:
-		numLUTelementsStage = stageRadix - 1;
+		if (stageRadix < sc->fixMinRaderPrimeMult)
+			numLUTelementsStage = stageRadix - 1;
+		else
+			numLUTelementsStage = stageRadix;
 		break;
 	}
-	if ((sc->LUT) && (stageSize > 1) && ((((numLUTelementsStage >= 4)&&(sc->fftDim>=1024))||(((numLUTelementsStage >= 3) && (sc->fftDim < 1024))))|| (logicalRegistersPerThread / stageRadix > 1)) && (sc->registerBoost == 1) && (stageSize < sc->warpSize))
+	if ((sc->LUT) && (stageSize > 1) && ((((numLUTelementsStage >= 4) && (sc->fftDim >= 1024)) || (((numLUTelementsStage >= 3) && (sc->fftDim < 1024)))) || (logicalRegistersPerThread / stageRadix > 1)) && (sc->registerBoost == 1) && (stageSize < sc->warpSize))
 		sc->useCoalescedLUTUploadToSM = 1;
 	else
 		sc->useCoalescedLUTUploadToSM = 0;
 
 	for (uint64_t k = 0; k < sc->registerBoost; k++) {
+		if (logicalGroupSize != sc->localSize[0]) {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, logicalGroupSize);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
 		for (uint64_t j = 0; j < logicalRegistersPerThread / stageRadix; j++) {
+			if (logicalGroupSize * ((j + k * logicalRegistersPerThread / stageRadix) * stageRadix) > sc->fftDim) continue;
+			if (logicalGroupSize * ((1 + j + k * logicalRegistersPerThread / stageRadix) * stageRadix) > sc->fftDim) {
+				uint64_t current_group_cut = sc->fftDim / stageRadix - (j + k * logicalRegistersPerThread / stageRadix) * logicalGroupSize;
+				sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s  < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, current_group_cut);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+
 			sc->tempLen = sprintf(sc->tempStr, "\
 		%s = (%s+ %" PRIu64 ") %% (%" PRIu64 ");\n", sc->stageInvocationID, sc->gl_LocalInvocationID_x, (j + k * logicalRegistersPerThread / stageRadix) * logicalGroupSize, stageSize);
 			res = VkAppendLine(sc);
@@ -13424,7 +18512,7 @@ static inline VkFFTResult appendRadixStageNonStrided(VkFFTSpecializationConstant
 			if (sc->LUT)
 				sc->tempLen = sprintf(sc->tempStr, "		LUTId = stageInvocationID + %" PRIu64 ";\n", stageSizeSum);
 			else
-				sc->tempLen = sprintf(sc->tempStr, "		angle = stageInvocationID * %.17e%s;\n", stageAngle, LFending);
+				sc->tempLen = sprintf(sc->tempStr, "		angle = stageInvocationID * %.17e%s;\n", (double)stageAngle, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
 			if ((!((sc->readToRegisters == 1) && (stageSize == 1) && (!(((sc->convolutionStep) || (sc->useBluesteinFFT && sc->BluesteinConvolutionStep)) && (stageAngle > 0) && ((sc->matrixConvolution > 1) || (sc->numKernels > 1)))))) && ((sc->registerBoost == 1) && ((sc->localSize[0] * logicalStoragePerThread > sc->fftDim) || (stageSize > 1) || ((sc->localSize[1] > 1) && (!(sc->performR2C && (sc->actualInverse)))) || ((sc->convolutionStep) && ((sc->matrixConvolution > 1) || (sc->numKernels > 1)) && (stageAngle > 0)) || (sc->performDCT)))) {
@@ -13463,9 +18551,9 @@ static inline VkFFTResult appendRadixStageNonStrided(VkFFTSpecializationConstant
 					for (uint64_t i = 0; i < stageRadix; i++) {
 						regID[i] = (char*)malloc(sizeof(char) * 50);
 						if (!regID[i]) {
-							for (uint64_t j = 0; j < i; j++) {
-								free(regID[j]);
-								regID[j] = 0;
+							for (uint64_t p = 0; p < i; p++) {
+								free(regID[p]);
+								regID[p] = 0;
 							}
 							free(regID);
 							regID = 0;
@@ -13497,15 +18585,20 @@ static inline VkFFTResult appendRadixStageNonStrided(VkFFTSpecializationConstant
 				else
 					return VKFFT_ERROR_MALLOC_FAILED;
 			}
-		}
 
-		if (sc->useCoalescedLUTUploadToSM) {
-			if (sc->localSize[0] * logicalStoragePerThread > sc->fftDim) {
-				sc->tempLen = sprintf(sc->tempStr, "\
-		}\n");
+			if (logicalGroupSize * ((1 + j + k * logicalRegistersPerThread / stageRadix) * stageRadix) > sc->fftDim) {
+				sc->tempLen = sprintf(sc->tempStr, "		}\n");
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
 			}
+		}
+		if (logicalGroupSize != sc->localSize[0]) {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		if (sc->useCoalescedLUTUploadToSM) {
 			res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
 			if (res != VKFFT_SUCCESS) return res;
 			res = appendZeropadEnd(sc);
@@ -13555,36 +18648,43 @@ static inline VkFFTResult appendRadixStageNonStrided(VkFFTSpecializationConstant
 			if (res != VKFFT_SUCCESS) return res;
 			res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 			if (res != VKFFT_SUCCESS) return res;
-
-			if (sc->localSize[0] * logicalStoragePerThread > sc->fftDim) {
+			if (logicalGroupSize != sc->localSize[0]) {
 				sc->tempLen = sprintf(sc->tempStr, "\
-		if (%s * %" PRIu64 " < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, logicalStoragePerThread, sc->fftDim);
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, logicalGroupSize);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
 			}
 			for (uint64_t j = 0; j < logicalRegistersPerThread / stageRadix; j++) {
-			char** regID = (char**)malloc(sizeof(char*) * stageRadix);
-			if (regID) {
-				for (uint64_t i = 0; i < stageRadix; i++) {
-					regID[i] = (char*)malloc(sizeof(char) * 50);
-					if (!regID[i]) {
-						for (uint64_t j = 0; j < i; j++) {
-							free(regID[j]);
-							regID[j] = 0;
+				if (logicalGroupSize * ((j + k * logicalRegistersPerThread / stageRadix) * stageRadix) > sc->fftDim) continue;
+				if (logicalGroupSize * ((1 + j + k * logicalRegistersPerThread / stageRadix) * stageRadix) > sc->fftDim) {
+					uint64_t current_group_cut = sc->fftDim / stageRadix - (j + k * logicalRegistersPerThread / stageRadix) * logicalGroupSize;
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s  < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, current_group_cut);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				char** regID = (char**)malloc(sizeof(char*) * stageRadix);
+				if (regID) {
+					for (uint64_t i = 0; i < stageRadix; i++) {
+						regID[i] = (char*)malloc(sizeof(char) * 50);
+						if (!regID[i]) {
+							for (uint64_t p = 0; p < i; p++) {
+								free(regID[p]);
+								regID[p] = 0;
+							}
+							free(regID);
+							regID = 0;
+							return VKFFT_ERROR_MALLOC_FAILED;
 						}
-						free(regID);
-						regID = 0;
-						return VKFFT_ERROR_MALLOC_FAILED;
-					}
-					uint64_t id = j + k * logicalRegistersPerThread / stageRadix + i * logicalStoragePerThread / stageRadix;
-					id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
-					sprintf(regID[i], "%s", sc->regIDs[id]);
-					/*if(j + i * logicalStoragePerThread / stageRadix < logicalRegistersPerThread)
-						sprintf(regID[i], "%s", sc->regIDs[j + i * logicalStoragePerThread / stageRadix]);
-					else
-						sprintf(regID[i], "%" PRIu64 "[%" PRIu64 "]", (j + i * logicalStoragePerThread / stageRadix)/ logicalRegistersPerThread, (j + i * logicalStoragePerThread / stageRadix) % logicalRegistersPerThread);*/
+						uint64_t id = j + k * logicalRegistersPerThread / stageRadix + i * logicalStoragePerThread / stageRadix;
+						id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
+						sprintf(regID[i], "%s", sc->regIDs[id]);
+						/*if(j + i * logicalStoragePerThread / stageRadix < logicalRegistersPerThread)
+							sprintf(regID[i], "%s", sc->regIDs[j + i * logicalStoragePerThread / stageRadix]);
+						else
+							sprintf(regID[i], "%" PRIu64 "[%" PRIu64 "]", (j + i * logicalStoragePerThread / stageRadix)/ logicalRegistersPerThread, (j + i * logicalStoragePerThread / stageRadix) % logicalRegistersPerThread);*/
 
-				}
+					}
 					sc->tempLen = sprintf(sc->tempStr, "\
 		%s = (%s+ %" PRIu64 ") %% (%" PRIu64 ");\n", sc->stageInvocationID, sc->gl_LocalInvocationID_x, (j + k * logicalRegistersPerThread / stageRadix) * logicalGroupSize, stageSize);
 					res = VkAppendLine(sc);
@@ -13593,59 +18693,66 @@ static inline VkFFTResult appendRadixStageNonStrided(VkFFTSpecializationConstant
 						if (sc->LUT)
 							sc->tempLen = sprintf(sc->tempStr, "		LUTId = stageInvocationID + %" PRIu64 ";\n", stageSizeSum);
 						else
-							sc->tempLen = sprintf(sc->tempStr, "		angle = stageInvocationID * %.17e%s;\n", stageAngle, LFending);
+							sc->tempLen = sprintf(sc->tempStr, "		angle = stageInvocationID * %.17e%s;\n", (double)stageAngle, LFending);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					res = inlineRadixKernelVkFFT(sc, floatType, uintType, stageRadix, stageSize, stageSizeSum, stageAngle, regID);
-				if (res != VKFFT_SUCCESS) return res;
-				for (uint64_t i = 0; i < stageRadix; i++) {
-					uint64_t id = j + k * logicalRegistersPerThread / stageRadix + i * logicalStoragePerThread / stageRadix;
-					id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
-					sprintf(sc->regIDs[id], "%s", regID[i]);
+					if (res != VKFFT_SUCCESS) return res;
+					for (uint64_t i = 0; i < stageRadix; i++) {
+						uint64_t id = j + k * logicalRegistersPerThread / stageRadix + i * logicalStoragePerThread / stageRadix;
+						id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
+						sprintf(sc->regIDs[id], "%s", regID[i]);
+					}
+					for (uint64_t i = 0; i < stageRadix; i++) {
+						free(regID[i]);
+						regID[i] = 0;
+					}
+					free(regID);
+					regID = 0;
 				}
-				for (uint64_t i = 0; i < stageRadix; i++) {
-					free(regID[i]);
-					regID[i] = 0;
+				else
+					return VKFFT_ERROR_MALLOC_FAILED;
+				if (logicalGroupSize * ((1 + j + k * logicalRegistersPerThread / stageRadix) * stageRadix) > sc->fftDim) {
+					sc->tempLen = sprintf(sc->tempStr, "		}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
 				}
-				free(regID);
-				regID = 0;
 			}
-			else
-				return VKFFT_ERROR_MALLOC_FAILED;
-		}
-		if ((stageSize == 1) && (sc->cacheShuffle)) {
-			for (uint64_t i = 0; i < logicalRegistersPerThread; i++) {
-				uint64_t id = i + k * logicalRegistersPerThread;
-				id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
+			if (logicalGroupSize != sc->localSize[0]) {
 				sc->tempLen = sprintf(sc->tempStr, "\
-		shuffle[%" PRIu64 "]=%s;\n", i, sc->regIDs[id]);
+		}\n");
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
 			}
-			for (uint64_t i = 0; i < logicalRegistersPerThread; i++) {
-				uint64_t id = i + k * logicalRegistersPerThread;
-				id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
-				sc->tempLen = sprintf(sc->tempStr, "\
+			if ((stageSize == 1) && (sc->cacheShuffle)) {
+				for (uint64_t i = 0; i < logicalRegistersPerThread; i++) {
+					uint64_t id = i + k * logicalRegistersPerThread;
+					id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
+					sc->tempLen = sprintf(sc->tempStr, "\
+		shuffle[%" PRIu64 "]=%s;\n", i, sc->regIDs[id]);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				for (uint64_t i = 0; i < logicalRegistersPerThread; i++) {
+					uint64_t id = i + k * logicalRegistersPerThread;
+					id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
+					sc->tempLen = sprintf(sc->tempStr, "\
 		%s=shuffle[(%" PRIu64 "+tshuffle)%%(%" PRIu64 ")];\n", sc->regIDs[id], i, logicalRegistersPerThread);
-				res = VkAppendLine(sc);
-				if (res != VKFFT_SUCCESS) return res;
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
 				}
 			}
 		}
 	}
-	if (sc->localSize[0] * logicalStoragePerThread > sc->fftDim) {
-		sc->tempLen = sprintf(sc->tempStr, "		}\n");
-		res = VkAppendLine(sc);
-		if (res != VKFFT_SUCCESS) return res;
-	}
+
 	res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
 	if (res != VKFFT_SUCCESS) return res;
 	res = appendZeropadEnd(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
 }
-static inline VkFFTResult appendRadixStageStrided(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType, uint64_t stageSize, uint64_t stageSizeSum, double stageAngle, uint64_t stageRadix) {
+static inline VkFFTResult appendRadixStageStrided(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType, uint64_t stageSize, uint64_t stageSizeSum, long double stageAngle, uint64_t stageRadix) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	char vecType[30];
 	char LFending[4] = "";
@@ -13666,6 +18773,10 @@ static inline VkFFTResult appendRadixStageStrided(VkFFTSpecializationConstantsLa
 	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
 	//if (!strcmp(floatType, "double")) sprintf(LFending, "l");
+#elif(VKFFT_BACKEND==5)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
 #endif
 
 	char convolutionInverse[10] = "";
@@ -13677,7 +18788,7 @@ static inline VkFFTResult appendRadixStageStrided(VkFFTSpecializationConstantsLa
 	}
 	uint64_t logicalStoragePerThread = sc->registers_per_thread_per_radix[stageRadix] * sc->registerBoost;// (sc->registers_per_thread % stageRadix == 0) ? sc->registers_per_thread * sc->registerBoost : sc->min_registers_per_thread * sc->registerBoost;
 	uint64_t logicalRegistersPerThread = sc->registers_per_thread_per_radix[stageRadix];// (sc->registers_per_thread % stageRadix == 0) ? sc->registers_per_thread : sc->min_registers_per_thread;
-	uint64_t logicalGroupSize = sc->fftDim / logicalStoragePerThread;
+	uint64_t logicalGroupSize = (uint64_t)ceil(sc->fftDim / (double)logicalStoragePerThread);
 	if ((!((sc->readToRegisters == 1) && (stageSize == 1) && (!(((sc->convolutionStep) || (sc->useBluesteinFFT && sc->BluesteinConvolutionStep)) && (stageAngle > 0) && ((sc->matrixConvolution > 1) || (sc->numKernels > 1)))))) && (((sc->axis_id == 0) && (sc->axis_upload_id == 0) && (!(sc->performR2C && (sc->actualInverse)))) || (sc->localSize[1] * logicalStoragePerThread > sc->fftDim) || (stageSize > 1) || ((sc->convolutionStep) && ((sc->matrixConvolution > 1) || (sc->numKernels > 1)) && (stageAngle > 0)) || (sc->performDCT)))
 	{
 		res = appendBarrierVkFFT(sc, 1);
@@ -13687,12 +18798,7 @@ static inline VkFFTResult appendRadixStageStrided(VkFFTSpecializationConstantsLa
 	if (res != VKFFT_SUCCESS) return res;
 	res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 	if (res != VKFFT_SUCCESS) return res;
-	if (sc->localSize[1] * logicalStoragePerThread > sc->fftDim) {
-		sc->tempLen = sprintf(sc->tempStr, "\
-		if (%s * %" PRIu64 " < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, logicalStoragePerThread, sc->fftDim);
-		res = VkAppendLine(sc);
-		if (res != VKFFT_SUCCESS) return res;
-	}
+
 
 	//upload second stage of LUT to sm
 	uint64_t numLUTelementsStage = 0;
@@ -13713,7 +18819,10 @@ static inline VkFFTResult appendRadixStageStrided(VkFFTSpecializationConstantsLa
 		numLUTelementsStage = 5;
 		break;
 	default:
-		numLUTelementsStage = stageRadix - 1;
+		if (stageRadix < sc->fixMinRaderPrimeMult)
+			numLUTelementsStage = stageRadix - 1;
+		else
+			numLUTelementsStage = stageRadix;
 		break;
 	}
 	if ((sc->LUT) && (stageSize > 1) && ((((numLUTelementsStage >= 4) && (sc->fftDim >= 1024)) || (((numLUTelementsStage >= 3) && (sc->fftDim < 1024)))) || (logicalRegistersPerThread / stageRadix > 1)) && (sc->registerBoost == 1) && (stageSize < sc->warpSize))
@@ -13723,7 +18832,21 @@ static inline VkFFTResult appendRadixStageStrided(VkFFTSpecializationConstantsLa
 
 
 	for (uint64_t k = 0; k < sc->registerBoost; k++) {
+		if (logicalGroupSize != sc->localSize[1]) {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, logicalGroupSize);
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
 		for (uint64_t j = 0; j < logicalRegistersPerThread / stageRadix; j++) {
+			if (logicalGroupSize * ((j + k * logicalRegistersPerThread / stageRadix) * stageRadix) > sc->fftDim) continue;
+			if (logicalGroupSize * ((1 + j + k * logicalRegistersPerThread / stageRadix) * stageRadix) > sc->fftDim) {
+				uint64_t current_group_cut = sc->fftDim / stageRadix - (j + k * logicalRegistersPerThread / stageRadix) * logicalGroupSize;
+				sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, current_group_cut);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
 			sc->tempLen = sprintf(sc->tempStr, "\
 		%s = (%s+ %" PRIu64 ") %% (%" PRIu64 ");\n", sc->stageInvocationID, sc->gl_LocalInvocationID_y, (j + k * logicalRegistersPerThread / stageRadix) * logicalGroupSize, stageSize);
 			res = VkAppendLine(sc);
@@ -13731,7 +18854,7 @@ static inline VkFFTResult appendRadixStageStrided(VkFFTSpecializationConstantsLa
 			if (sc->LUT)
 				sc->tempLen = sprintf(sc->tempStr, "		LUTId = stageInvocationID + %" PRIu64 ";\n", stageSizeSum);
 			else
-				sc->tempLen = sprintf(sc->tempStr, "		angle = stageInvocationID * %.17e%s;\n", stageAngle, LFending);
+				sc->tempLen = sprintf(sc->tempStr, "		angle = stageInvocationID * %.17e%s;\n", (double)stageAngle, LFending);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
 			if ((!((sc->readToRegisters == 1) && (stageSize == 1) && (!(((sc->convolutionStep) || (sc->useBluesteinFFT && sc->BluesteinConvolutionStep)) && (stageAngle > 0) && ((sc->matrixConvolution > 1) || (sc->numKernels > 1)))))) && ((sc->registerBoost == 1) && (((sc->axis_id == 0) && (sc->axis_upload_id == 0) && (!(sc->performR2C && (sc->actualInverse)))) || (sc->localSize[1] * logicalStoragePerThread > sc->fftDim) || (stageSize > 1) || ((sc->convolutionStep) && ((sc->matrixConvolution > 1) || (sc->numKernels > 1)) && (stageAngle > 0)) || (sc->performDCT)))) {
@@ -13750,9 +18873,9 @@ static inline VkFFTResult appendRadixStageStrided(VkFFTSpecializationConstantsLa
 					for (uint64_t i = 0; i < stageRadix; i++) {
 						regID[i] = (char*)malloc(sizeof(char) * 50);
 						if (!regID[i]) {
-							for (uint64_t j = 0; j < i; j++) {
-								free(regID[j]);
-								regID[j] = 0;
+							for (uint64_t p = 0; p < i; p++) {
+								free(regID[p]);
+								regID[p] = 0;
 							}
 							free(regID);
 							regID = 0;
@@ -13784,15 +18907,20 @@ static inline VkFFTResult appendRadixStageStrided(VkFFTSpecializationConstantsLa
 				else
 					return VKFFT_ERROR_MALLOC_FAILED;
 			}
-		}
-
-		//upload second stage of LUT to sm
-		if (sc->useCoalescedLUTUploadToSM) {
-			if (sc->localSize[1] * logicalStoragePerThread > sc->fftDim) {
+			if (logicalGroupSize * ((1 + j + k * logicalRegistersPerThread / stageRadix) * stageRadix) > sc->fftDim) {
 				sc->tempLen = sprintf(sc->tempStr, "		}\n");
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
 			}
+		}
+		if (logicalGroupSize != sc->localSize[1]) {
+			sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+			res = VkAppendLine(sc);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+		//upload second stage of LUT to sm
+		if (sc->useCoalescedLUTUploadToSM) {
 			res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
 			if (res != VKFFT_SUCCESS) return res;
 			res = appendZeropadEnd(sc);
@@ -13840,36 +18968,43 @@ static inline VkFFTResult appendRadixStageStrided(VkFFTSpecializationConstantsLa
 			if (res != VKFFT_SUCCESS) return res;
 			res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 			if (res != VKFFT_SUCCESS) return res;
-
-			if (sc->localSize[1] * logicalStoragePerThread > sc->fftDim) {
+			if (logicalGroupSize != sc->localSize[1]) {
 				sc->tempLen = sprintf(sc->tempStr, "\
-		if (%s * %" PRIu64 " < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, logicalStoragePerThread, sc->fftDim);
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, logicalGroupSize);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
 			}
 			for (uint64_t j = 0; j < logicalRegistersPerThread / stageRadix; j++) {
-			char** regID = (char**)malloc(sizeof(char*) * stageRadix);
-			if (regID) {
-				for (uint64_t i = 0; i < stageRadix; i++) {
-					regID[i] = (char*)malloc(sizeof(char) * 50);
-					if (!regID[i]) {
-						for (uint64_t j = 0; j < i; j++) {
-							free(regID[j]);
-							regID[j] = 0;
+				if (logicalGroupSize * ((j + k * logicalRegistersPerThread / stageRadix) * stageRadix) > sc->fftDim) continue;
+				if (logicalGroupSize * ((1 + j + k * logicalRegistersPerThread / stageRadix) * stageRadix) > sc->fftDim) {
+					uint64_t current_group_cut = sc->fftDim / stageRadix - (j + k * logicalRegistersPerThread / stageRadix) * logicalGroupSize;
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s  < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, current_group_cut);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				char** regID = (char**)malloc(sizeof(char*) * stageRadix);
+				if (regID) {
+					for (uint64_t i = 0; i < stageRadix; i++) {
+						regID[i] = (char*)malloc(sizeof(char) * 50);
+						if (!regID[i]) {
+							for (uint64_t p = 0; p < i; p++) {
+								free(regID[p]);
+								regID[p] = 0;
+							}
+							free(regID);
+							regID = 0;
+							return VKFFT_ERROR_MALLOC_FAILED;
 						}
-						free(regID);
-						regID = 0;
-						return VKFFT_ERROR_MALLOC_FAILED;
-					}
-					uint64_t id = j + k * logicalRegistersPerThread / stageRadix + i * logicalStoragePerThread / stageRadix;
-					id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
-					sprintf(regID[i], "%s", sc->regIDs[id]);
-					/*if (j + i * logicalStoragePerThread / stageRadix < logicalRegistersPerThread)
-						sprintf(regID[i], "_%" PRIu64 "", j + i * logicalStoragePerThread / stageRadix);
-					else
-						sprintf(regID[i], "%" PRIu64 "[%" PRIu64 "]", (j + i * logicalStoragePerThread / stageRadix) / logicalRegistersPerThread, (j + i * logicalStoragePerThread / stageRadix) % logicalRegistersPerThread);*/
+						uint64_t id = j + k * logicalRegistersPerThread / stageRadix + i * logicalStoragePerThread / stageRadix;
+						id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
+						sprintf(regID[i], "%s", sc->regIDs[id]);
+						/*if (j + i * logicalStoragePerThread / stageRadix < logicalRegistersPerThread)
+							sprintf(regID[i], "_%" PRIu64 "", j + i * logicalStoragePerThread / stageRadix);
+						else
+							sprintf(regID[i], "%" PRIu64 "[%" PRIu64 "]", (j + i * logicalStoragePerThread / stageRadix) / logicalRegistersPerThread, (j + i * logicalStoragePerThread / stageRadix) % logicalRegistersPerThread);*/
 
-				}
+					}
 					sc->tempLen = sprintf(sc->tempStr, "\
 		%s = (%s+ %" PRIu64 ") %% (%" PRIu64 ");\n", sc->stageInvocationID, sc->gl_LocalInvocationID_y, (j + k * logicalRegistersPerThread / stageRadix) * logicalGroupSize, stageSize);
 					res = VkAppendLine(sc);
@@ -13877,33 +19012,39 @@ static inline VkFFTResult appendRadixStageStrided(VkFFTSpecializationConstantsLa
 					if (sc->LUT)
 						sc->tempLen = sprintf(sc->tempStr, "		LUTId = stageInvocationID + %" PRIu64 ";\n", stageSizeSum);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "		angle = stageInvocationID * %.17e%s;\n", stageAngle, LFending);
+						sc->tempLen = sprintf(sc->tempStr, "		angle = stageInvocationID * %.17e%s;\n", (double)stageAngle, LFending);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					res = inlineRadixKernelVkFFT(sc, floatType, uintType, stageRadix, stageSize, stageSizeSum, stageAngle, regID);
-				if (res != VKFFT_SUCCESS) return res;
-				for (uint64_t i = 0; i < stageRadix; i++) {
-					uint64_t id = j + k * logicalRegistersPerThread / stageRadix + i * logicalStoragePerThread / stageRadix;
-					id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
-					sprintf(sc->regIDs[id], "%s", regID[i]);
+					if (res != VKFFT_SUCCESS) return res;
+					for (uint64_t i = 0; i < stageRadix; i++) {
+						uint64_t id = j + k * logicalRegistersPerThread / stageRadix + i * logicalStoragePerThread / stageRadix;
+						id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
+						sprintf(sc->regIDs[id], "%s", regID[i]);
+					}
+					for (uint64_t i = 0; i < stageRadix; i++) {
+						free(regID[i]);
+						regID[i] = 0;
+					}
+					free(regID);
+					regID = 0;
 				}
-				for (uint64_t i = 0; i < stageRadix; i++) {
-					free(regID[i]);
-					regID[i] = 0;
+				else
+					return VKFFT_ERROR_MALLOC_FAILED;
+				if (logicalGroupSize * ((1 + j + k * logicalRegistersPerThread / stageRadix) * stageRadix) > sc->fftDim) {
+					sc->tempLen = sprintf(sc->tempStr, "		}\n");
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
 				}
-				free(regID);
-				regID = 0;
 			}
-			else
-				return VKFFT_ERROR_MALLOC_FAILED;
+			if (logicalGroupSize != sc->localSize[1]) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
 			}
 		}
 	}
-	if (sc->localSize[1] * logicalStoragePerThread > sc->fftDim) {
-		sc->tempLen = sprintf(sc->tempStr, "		}\n");
-		res = VkAppendLine(sc);
-		if (res != VKFFT_SUCCESS) return res;
-	}
 	res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
 	if (res != VKFFT_SUCCESS) return res;
 	res = appendZeropadEnd(sc);
@@ -13915,26 +19056,67 @@ static inline VkFFTResult appendRadixStageStrided(VkFFTSpecializationConstantsLa
 	}
 	return res;
 }
-static inline VkFFTResult appendRadixStage(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType, uint64_t stageSize, uint64_t stageSizeSum, double stageAngle, uint64_t stageRadix, uint64_t shuffleType) {
+static inline VkFFTResult appendRadixStage(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType, uint64_t stageSize, uint64_t stageSizeSum, long double stageAngle, uint64_t stageRadix, uint64_t stageID, uint64_t shuffleType) {
 	VkFFTResult res = VKFFT_SUCCESS;
-	switch (shuffleType) {
-	case 0: case 5: case 6: case 110: case 120: case 130: case 140: case 142: case 144: {
-		res = appendRadixStageNonStrided(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, stageRadix);
-		if (res != VKFFT_SUCCESS) return res;
-		//appendBarrierVkFFT(sc, 1);
-		break;
-	}
-	case 1: case 2: case 111: case 121: case 131: case 141: case 143: case 145: {
-		res = appendRadixStageStrided(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, stageRadix);
-		if (res != VKFFT_SUCCESS) return res;
-		//appendBarrierVkFFT(sc, 1);
-		break;
+	if (sc->rader_generator[stageID]) {
+		for (uint64_t i = 0; i < sc->numRaderPrimes; i++) {
+			if (sc->raderContainer[i].prime == stageRadix) {
+				sc->currentRaderContainer = &sc->raderContainer[i];
+			}
+		}
+		if (sc->currentRaderContainer->type) {
+			switch (shuffleType) {
+			case 0: case 5: case 6: case 110: case 120: case 130: case 140: case 142: case 144: {
+				res = appendMultRaderStage(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, stageRadix, stageID, 0);
+				if (res != VKFFT_SUCCESS) return res;
+				//appendBarrierVkFFT(sc, 1);
+				break;
+			}
+			case 1: case 2: case 111: case 121: case 131: case 141: case 143: case 145: {
+				res = appendMultRaderStage(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, stageRadix, stageID, 1);
+				if (res != VKFFT_SUCCESS) return res;
+				//appendBarrierVkFFT(sc, 1);
+				break;
+			}
+			}
+		}
+		else {
+			switch (shuffleType) {
+			case 0: case 5: case 6: case 110: case 120: case 130: case 140: case 142: case 144: {
+				res = appendFFTRaderStage(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, stageRadix, stageID, 0);
+				if (res != VKFFT_SUCCESS) return res;
+				//appendBarrierVkFFT(sc, 1);
+				break;
+			}
+			case 1: case 2: case 111: case 121: case 131: case 141: case 143: case 145: {
+				res = appendFFTRaderStage(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, stageRadix, stageID, 1);
+				if (res != VKFFT_SUCCESS) return res;
+				//appendBarrierVkFFT(sc, 1);
+				break;
+			}
+			}
+		}
 	}
+	else {
+		switch (shuffleType) {
+		case 0: case 5: case 6: case 110: case 120: case 130: case 140: case 142: case 144: {
+			res = appendRadixStageNonStrided(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, stageRadix);
+			if (res != VKFFT_SUCCESS) return res;
+			//appendBarrierVkFFT(sc, 1);
+			break;
+		}
+		case 1: case 2: case 111: case 121: case 131: case 141: case 143: case 145: {
+			res = appendRadixStageStrided(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, stageRadix);
+			if (res != VKFFT_SUCCESS) return res;
+			//appendBarrierVkFFT(sc, 1);
+			break;
+		}
+		}
 	}
 	return res;
 }
 
-static inline VkFFTResult appendRegisterBoostShuffle(VkFFTSpecializationConstantsLayout* sc, const char* floatType, uint64_t stageSize, uint64_t stageRadixPrev, uint64_t stageRadix, double stageAngle) {
+static inline VkFFTResult appendRegisterBoostShuffle(VkFFTSpecializationConstantsLayout* sc, const char* floatType, uint64_t stageSize, uint64_t stageRadixPrev, uint64_t stageRadix, long double stageAngle) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	/*if (((sc->actualInverse) && (sc->normalize)) || ((sc->convolutionStep || sc->useBluesteinFFT) && (stageAngle > 0))) {
 		uint64_t bluesteinInverseNormalize = 1;
@@ -13959,7 +19141,7 @@ static inline VkFFTResult appendRegisterBoostShuffle(VkFFTSpecializationConstant
 	return res;
 }
 
-static inline VkFFTResult appendRadixShuffleNonStrided(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType, uint64_t stageSize, uint64_t stageSizeSum, double stageAngle, uint64_t stageRadix, uint64_t stageRadixNext) {
+static inline VkFFTResult appendRadixShuffleNonStrided(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType, uint64_t stageSize, uint64_t stageSizeSum, long double stageAngle, uint64_t stageRadix, uint64_t stageRadixNext) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	char vecType[30];
 	char LFending[4] = "";
@@ -13979,6 +19161,10 @@ static inline VkFFTResult appendRadixShuffleNonStrided(VkFFTSpecializationConsta
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+#elif(VKFFT_BACKEND==5)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
 #endif
 	char stageNormalization[50] = "";
 	uint64_t normalizationValue = 1;
@@ -14005,8 +19191,8 @@ static inline VkFFTResult appendRadixShuffleNonStrided(VkFFTSpecializationConsta
 	uint64_t logicalRegistersPerThread = sc->registers_per_thread_per_radix[stageRadix];// (sc->registers_per_thread % stageRadix == 0) ? sc->registers_per_thread : sc->min_registers_per_thread;
 	uint64_t logicalRegistersPerThreadNext = sc->registers_per_thread_per_radix[stageRadixNext];// (sc->registers_per_thread % stageRadixNext == 0) ? sc->registers_per_thread : sc->min_registers_per_thread;
 
-	uint64_t logicalGroupSize = sc->fftDim / logicalStoragePerThread;
-	uint64_t logicalGroupSizeNext = sc->fftDim / logicalStoragePerThreadNext;
+	uint64_t logicalGroupSize = (uint64_t)ceil(sc->fftDim / (double)logicalStoragePerThread);
+	uint64_t logicalGroupSizeNext = (uint64_t)ceil(sc->fftDim / (double)logicalStoragePerThreadNext);
 	if ((!((sc->writeFromRegisters == 1) && (stageSize == sc->fftDim / stageRadix) && (!(((sc->convolutionStep) || (sc->useBluesteinFFT && sc->BluesteinConvolutionStep)) && (stageAngle < 0) && ((sc->matrixConvolution > 1) || (sc->numKernels > 1)))))) && (((sc->registerBoost == 1) && ((sc->localSize[0] * logicalStoragePerThread > sc->fftDim) || (stageSize < sc->fftDim / stageRadix) || ((sc->reorderFourStep) && (sc->fftDim < sc->fft_dim_full) && (sc->localSize[1] > 1)) || (sc->localSize[1] > 1) || ((sc->performR2C) && (!sc->actualInverse) && (sc->axis_id == 0)) || ((sc->convolutionStep) && ((sc->matrixConvolution > 1) || (sc->numKernels > 1)) && (stageAngle < 0)))) || (sc->performDCT)))
 	{
 		res = appendBarrierVkFFT(sc, 1);
@@ -14034,12 +19220,7 @@ static inline VkFFTResult appendRadixShuffleNonStrided(VkFFTSpecializationConsta
 				if (res != VKFFT_SUCCESS) return res;
 				res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 				if (res != VKFFT_SUCCESS) return res;
-				if (sc->localSize[0] * logicalStoragePerThread > sc->fftDim) {
-					sc->tempLen = sprintf(sc->tempStr, "\
-	if (%s * %" PRIu64 " < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, logicalStoragePerThread, sc->fftDim);
-					res = VkAppendLine(sc);
-					if (res != VKFFT_SUCCESS) return res;
-				}
+
 				for (uint64_t k = 0; k < sc->registerBoost; ++k) {
 					uint64_t t = 0;
 					if (k > 0) {
@@ -14049,29 +19230,44 @@ static inline VkFFTResult appendRadixShuffleNonStrided(VkFFTSpecializationConsta
 						if (res != VKFFT_SUCCESS) return res;
 						res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 						if (res != VKFFT_SUCCESS) return res;
-						if (sc->localSize[0] * logicalStoragePerThread > sc->fftDim) {
+						if (logicalGroupSize * logicalStoragePerThread > sc->fftDim) {
 							sc->tempLen = sprintf(sc->tempStr, "\
 	if (%s * %" PRIu64 " < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, logicalStoragePerThread, sc->fftDim);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
 					}
-					for (uint64_t j = 0; j < logicalRegistersPerThread / stageRadix; j++) {
-						sprintf(tempNum, "%" PRIu64 "", j * logicalGroupSize);
-						res = VkAddReal(sc, sc->stageInvocationID, sc->gl_LocalInvocationID_x, tempNum);
-						if (res != VKFFT_SUCCESS) return res;
-						res = VkMovReal(sc, sc->blockInvocationID, sc->stageInvocationID);
-						if (res != VKFFT_SUCCESS) return res;
-						sprintf(tempNum, "%" PRIu64 "", stageSize);
-						res = VkModReal(sc, sc->stageInvocationID, sc->stageInvocationID, tempNum);
-						if (res != VKFFT_SUCCESS) return res;
-						res = VkSubReal(sc, sc->blockInvocationID, sc->blockInvocationID, sc->stageInvocationID);
-						if (res != VKFFT_SUCCESS) return res;
-						sprintf(tempNum, "%" PRIu64 "", stageRadix);
-						res = VkMulReal(sc, sc->inoutID, sc->blockInvocationID, tempNum);
-						if (res != VKFFT_SUCCESS) return res;
-						res = VkAddReal(sc, sc->inoutID, sc->inoutID, sc->stageInvocationID);
+					if (logicalGroupSize != sc->localSize[0]) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, logicalGroupSize);
+						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
+					}
+					for (uint64_t j = 0; j < logicalRegistersPerThread / stageRadix; j++) {
+						if (logicalGroupSize * ((j + k * logicalRegistersPerThread / stageRadix) * stageRadix) <= sc->fftDim) {
+							if (logicalGroupSize * ((1 + j + k * logicalRegistersPerThread / stageRadix) * stageRadix) > sc->fftDim) {
+								uint64_t current_group_cut = sc->fftDim / stageRadix - (j + k * logicalRegistersPerThread / stageRadix) * logicalGroupSize;
+								sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s  < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, current_group_cut);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							sprintf(tempNum, "%" PRIu64 "", j * logicalGroupSize);
+							res = VkAddReal(sc, sc->stageInvocationID, sc->gl_LocalInvocationID_x, tempNum);
+							if (res != VKFFT_SUCCESS) return res;
+							res = VkMovReal(sc, sc->blockInvocationID, sc->stageInvocationID);
+							if (res != VKFFT_SUCCESS) return res;
+							sprintf(tempNum, "%" PRIu64 "", stageSize);
+							res = VkModReal(sc, sc->stageInvocationID, sc->stageInvocationID, tempNum);
+							if (res != VKFFT_SUCCESS) return res;
+							res = VkSubReal(sc, sc->blockInvocationID, sc->blockInvocationID, sc->stageInvocationID);
+							if (res != VKFFT_SUCCESS) return res;
+							sprintf(tempNum, "%" PRIu64 "", stageRadix);
+							res = VkMulReal(sc, sc->inoutID, sc->blockInvocationID, tempNum);
+							if (res != VKFFT_SUCCESS) return res;
+							res = VkAddReal(sc, sc->inoutID, sc->inoutID, sc->stageInvocationID);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						/*sc->tempLen = sprintf(sc->tempStr, "\
 		stageInvocationID = (gl_LocalInvocationID.x + %" PRIu64 ") %% (%" PRIu64 ");\n\
 		blockInvocationID = (gl_LocalInvocationID.x + %" PRIu64 ") - stageInvocationID;\n\
@@ -14082,31 +19278,33 @@ static inline VkFFTResult appendRadixShuffleNonStrided(VkFFTSpecializationConsta
 								id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
 								sprintf(tempID[t + k * sc->registers_per_thread], "%s", sc->regIDs[id]);
 								t++;
-								sprintf(tempNum, "%" PRIu64 "", i);
-								res = VkAddReal(sc, sc->sdataID, tempNum, sc->tshuffle);
-								if (res != VKFFT_SUCCESS) return res;
-								sprintf(tempNum, "%" PRIu64 "", logicalRegistersPerThread);
-								res = VkModReal(sc, sc->sdataID, sc->sdataID, tempNum);
-								if (res != VKFFT_SUCCESS) return res;
-								sprintf(tempNum, "%" PRIu64 "", stageSize);
-								res = VkMulReal(sc, sc->sdataID, sc->sdataID, tempNum);
-								if (res != VKFFT_SUCCESS) return res;
-								if (sc->localSize[1] > 1) {
-									res = VkMulReal(sc, sc->combinedID, sc->gl_LocalInvocationID_y, sc->sharedStride);
+								if (logicalGroupSize * ((j + k * logicalRegistersPerThread / stageRadix) * stageRadix) <= sc->fftDim) {
+									sprintf(tempNum, "%" PRIu64 "", i);
+									res = VkAddReal(sc, sc->sdataID, tempNum, sc->tshuffle);
 									if (res != VKFFT_SUCCESS) return res;
-									res = VkAddReal(sc, sc->sdataID, sc->sdataID, sc->combinedID);
+									sprintf(tempNum, "%" PRIu64 "", logicalRegistersPerThread);
+									res = VkModReal(sc, sc->sdataID, sc->sdataID, tempNum);
+									if (res != VKFFT_SUCCESS) return res;
+									sprintf(tempNum, "%" PRIu64 "", stageSize);
+									res = VkMulReal(sc, sc->sdataID, sc->sdataID, tempNum);
+									if (res != VKFFT_SUCCESS) return res;
+									if (sc->localSize[1] > 1) {
+										res = VkMulReal(sc, sc->combinedID, sc->gl_LocalInvocationID_y, sc->sharedStride);
+										if (res != VKFFT_SUCCESS) return res;
+										res = VkAddReal(sc, sc->sdataID, sc->sdataID, sc->combinedID);
+										if (res != VKFFT_SUCCESS) return res;
+									}
+									res = VkAddReal(sc, sc->sdataID, sc->sdataID, sc->inoutID);
 									if (res != VKFFT_SUCCESS) return res;
-								}
-								res = VkAddReal(sc, sc->sdataID, sc->sdataID, sc->inoutID);
-								if (res != VKFFT_SUCCESS) return res;
 
-								//sprintf(sc->sdataID, "sharedStride * gl_LocalInvocationID.y + inoutID + ((%" PRIu64 "+tshuffle) %% (%" PRIu64 "))*%" PRIu64 "", i, logicalRegistersPerThread, stageSize);
-								if (strcmp(stageNormalization, "")) {
-									res = VkMulComplexNumber(sc, sc->regIDs[id], sc->regIDs[id], stageNormalization);
+									//sprintf(sc->sdataID, "sharedStride * gl_LocalInvocationID.y + inoutID + ((%" PRIu64 "+tshuffle) %% (%" PRIu64 "))*%" PRIu64 "", i, logicalRegistersPerThread, stageSize);
+									if (strcmp(stageNormalization, "")) {
+										res = VkMulComplexNumber(sc, sc->regIDs[id], sc->regIDs[id], stageNormalization);
+										if (res != VKFFT_SUCCESS) return res;
+									}
+									res = VkSharedStore(sc, sc->sdataID, sc->regIDs[id]);
 									if (res != VKFFT_SUCCESS) return res;
 								}
-								res = VkSharedStore(sc, sc->sdataID, sc->regIDs[id]);
-								if (res != VKFFT_SUCCESS) return res;
 								/*sc->tempLen = sprintf(sc->tempStr, "\
 	sdata[sharedStride * gl_LocalInvocationID.y + inoutID + ((%" PRIu64 "+tshuffle) %% (%" PRIu64 "))*%" PRIu64 "] = temp%s%s;\n", i, logicalRegistersPerThread, stageSize, sc->regIDs[id], stageNormalization);*/
 							}
@@ -14117,49 +19315,64 @@ static inline VkFFTResult appendRadixShuffleNonStrided(VkFFTSpecializationConsta
 								id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
 								sprintf(tempID[t + k * sc->registers_per_thread], "%s", sc->regIDs[id]);
 								t++;
-								sprintf(tempNum, "%" PRIu64 "", i * stageSize);
-								res = VkAddReal(sc, sc->sdataID, sc->inoutID, tempNum);
-								if (res != VKFFT_SUCCESS) return res;
-								if ((stageSize <= sc->numSharedBanks / 2) && (sc->fftDim > sc->numSharedBanks / 2) && (sc->sharedStrideBankConflictFirstStages != sc->fftDim / sc->registerBoost) && ((sc->fftDim & (sc->fftDim - 1)) == 0) && (stageSize * stageRadix != sc->fftDim)) {
-									if (sc->resolveBankConflictFirstStages == 0) {
-										sc->resolveBankConflictFirstStages = 1;
-										sc->tempLen = sprintf(sc->tempStr, "\
+								if (logicalGroupSize * ((j + k * logicalRegistersPerThread / stageRadix) * stageRadix) <= sc->fftDim) {
+									sprintf(tempNum, "%" PRIu64 "", i * stageSize);
+									res = VkAddReal(sc, sc->sdataID, sc->inoutID, tempNum);
+									if (res != VKFFT_SUCCESS) return res;
+									if ((stageSize <= sc->numSharedBanks / 2) && (sc->fftDim > sc->numSharedBanks / 2) && (sc->sharedStrideBankConflictFirstStages != sc->fftDim / sc->registerBoost) && ((sc->fftDim & (sc->fftDim - 1)) == 0) && (stageSize * stageRadix != sc->fftDim)) {
+										if (sc->resolveBankConflictFirstStages == 0) {
+											sc->resolveBankConflictFirstStages = 1;
+											sc->tempLen = sprintf(sc->tempStr, "\
 	%s = %" PRIu64 ";", sc->sharedStride, sc->sharedStrideBankConflictFirstStages);
+											res = VkAppendLine(sc);
+											if (res != VKFFT_SUCCESS) return res;
+										}
+										sc->tempLen = sprintf(sc->tempStr, "\
+	%s = (%s / %" PRIu64 ") * %" PRIu64 " + %s %% %" PRIu64 ";", sc->sdataID, sc->sdataID, sc->numSharedBanks / 2, sc->numSharedBanks / 2 + 1, sc->sdataID, sc->numSharedBanks / 2);
 										res = VkAppendLine(sc);
 										if (res != VKFFT_SUCCESS) return res;
-									}
-									sc->tempLen = sprintf(sc->tempStr, "\
-	%s = (%s / %" PRIu64 ") * %" PRIu64 " + %s %% %" PRIu64 ";", sc->sdataID, sc->sdataID, sc->numSharedBanks / 2, sc->numSharedBanks / 2 + 1, sc->sdataID, sc->numSharedBanks / 2);
-									res = VkAppendLine(sc);
-									if (res != VKFFT_SUCCESS) return res;
 
-								}
-								else {
-									if (sc->resolveBankConflictFirstStages == 1) {
-										sc->resolveBankConflictFirstStages = 0;
-										sc->tempLen = sprintf(sc->tempStr, "\
+									}
+									else {
+										if (sc->resolveBankConflictFirstStages == 1) {
+											sc->resolveBankConflictFirstStages = 0;
+											sc->tempLen = sprintf(sc->tempStr, "\
 	%s = %" PRIu64 ";", sc->sharedStride, sc->sharedStrideReadWriteConflict);
-										res = VkAppendLine(sc);
+											res = VkAppendLine(sc);
+											if (res != VKFFT_SUCCESS) return res;
+										}
+									}
+									if (sc->localSize[1] > 1) {
+										res = VkMulReal(sc, sc->combinedID, sc->gl_LocalInvocationID_y, sc->sharedStride);
+										if (res != VKFFT_SUCCESS) return res;
+										res = VkAddReal(sc, sc->sdataID, sc->sdataID, sc->combinedID);
 										if (res != VKFFT_SUCCESS) return res;
 									}
-								}
-								if (sc->localSize[1] > 1) {
-									res = VkMulReal(sc, sc->combinedID, sc->gl_LocalInvocationID_y, sc->sharedStride);
-									if (res != VKFFT_SUCCESS) return res;
-									res = VkAddReal(sc, sc->sdataID, sc->sdataID, sc->combinedID);
-									if (res != VKFFT_SUCCESS) return res;
-								}
-								//sprintf(sc->sdataID, "sharedStride * gl_LocalInvocationID.y + inoutID + %" PRIu64 "", i * stageSize);
-								if (strcmp(stageNormalization, "")) {
-									res = VkMulComplexNumber(sc, sc->regIDs[id], sc->regIDs[id], stageNormalization);
+									//sprintf(sc->sdataID, "sharedStride * gl_LocalInvocationID.y + inoutID + %" PRIu64 "", i * stageSize);
+									if (strcmp(stageNormalization, "")) {
+										res = VkMulComplexNumber(sc, sc->regIDs[id], sc->regIDs[id], stageNormalization);
+										if (res != VKFFT_SUCCESS) return res;
+									}
+									res = VkSharedStore(sc, sc->sdataID, sc->regIDs[id]);
 									if (res != VKFFT_SUCCESS) return res;
 								}
-								res = VkSharedStore(sc, sc->sdataID, sc->regIDs[id]);
-								if (res != VKFFT_SUCCESS) return res;
 								/*sc->tempLen = sprintf(sc->tempStr, "\
 	sdata[sharedStride * gl_LocalInvocationID.y + inoutID + %" PRIu64 "] = temp%s%s;\n", i * stageSize, sc->regIDs[id], stageNormalization);*/
 							}
 						}
+						if (logicalGroupSize * ((j + k * logicalRegistersPerThread / stageRadix) * stageRadix) <= sc->fftDim) {
+							if (logicalGroupSize * ((1 + j + k * logicalRegistersPerThread / stageRadix) * stageRadix) > sc->fftDim) {
+								sc->tempLen = sprintf(sc->tempStr, "	}\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+					}
+					if (logicalGroupSize != sc->localSize[0]) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
 					}
 					for (uint64_t j = logicalRegistersPerThread; j < sc->registers_per_thread; j++) {
 						sprintf(tempID[t + k * sc->registers_per_thread], "%s", sc->regIDs[t + k * sc->registers_per_thread]);
@@ -14167,7 +19380,7 @@ static inline VkFFTResult appendRadixShuffleNonStrided(VkFFTSpecializationConsta
 					}
 					t = 0;
 					if (sc->registerBoost > 1) {
-						if (sc->localSize[0] * logicalStoragePerThread > sc->fftDim)
+						if (logicalGroupSize * logicalStoragePerThread > sc->fftDim)
 						{
 							sc->tempLen = sprintf(sc->tempStr, "	}\n");
 							res = VkAppendLine(sc);
@@ -14183,7 +19396,7 @@ static inline VkFFTResult appendRadixShuffleNonStrided(VkFFTSpecializationConsta
 						if (res != VKFFT_SUCCESS) return res;
 						res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 						if (res != VKFFT_SUCCESS) return res;
-						if (sc->localSize[0] * logicalStoragePerThreadNext > sc->fftDim) {
+						if (logicalGroupSize * logicalStoragePerThreadNext > sc->fftDim) {
 							sc->tempLen = sprintf(sc->tempStr, "\
 	if (%s * %" PRIu64 " < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, logicalStoragePerThreadNext, sc->fftDim);
 							res = VkAppendLine(sc);
@@ -14218,7 +19431,7 @@ static inline VkFFTResult appendRadixShuffleNonStrided(VkFFTSpecializationConsta
 							}
 
 						}
-						if (sc->localSize[0] * logicalStoragePerThreadNext > sc->fftDim)
+						if (logicalGroupSize * logicalStoragePerThreadNext > sc->fftDim)
 						{
 							sc->tempLen = sprintf(sc->tempStr, "	}\n");
 							res = VkAppendLine(sc);
@@ -14230,12 +19443,6 @@ static inline VkFFTResult appendRadixShuffleNonStrided(VkFFTSpecializationConsta
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					else {
-						if (sc->localSize[0] * logicalStoragePerThread > sc->fftDim)
-						{
-							sc->tempLen = sprintf(sc->tempStr, "	}\n");
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
-						}
 						res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
 						if (res != VKFFT_SUCCESS) return res;
 						res = appendZeropadEnd(sc);
@@ -14306,12 +19513,7 @@ static inline VkFFTResult appendRadixShuffleNonStrided(VkFFTSpecializationConsta
 		if (res != VKFFT_SUCCESS) return res;
 		res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 		if (res != VKFFT_SUCCESS) return res;
-		if (sc->localSize[0] * logicalStoragePerThread > sc->fftDim) {
-			sc->tempLen = sprintf(sc->tempStr, "\
-	if (%s * %" PRIu64 " < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, logicalStoragePerThread, sc->fftDim);
-			res = VkAppendLine(sc);
-			if (res != VKFFT_SUCCESS) return res;
-		}
+
 		if (((sc->actualInverse) && (sc->normalize)) || ((sc->convolutionStep || sc->useBluesteinFFT) && (stageAngle > 0))) {
 			for (uint64_t i = 0; i < logicalStoragePerThread; i++) {
 				if (strcmp(stageNormalization, "")) {
@@ -14322,12 +19524,7 @@ static inline VkFFTResult appendRadixShuffleNonStrided(VkFFTSpecializationConsta
 	temp%s = temp%s%s;\n", sc->regIDs[(i / logicalRegistersPerThread) * sc->registers_per_thread + i % logicalRegistersPerThread], sc->regIDs[(i / logicalRegistersPerThread) * sc->registers_per_thread + i % logicalRegistersPerThread], stageNormalization);*/
 			}
 		}
-		if (sc->localSize[0] * logicalStoragePerThread > sc->fftDim)
-		{
-			sc->tempLen = sprintf(sc->tempStr, "	}\n");
-			res = VkAppendLine(sc);
-			if (res != VKFFT_SUCCESS) return res;
-		}
+
 		res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
 		if (res != VKFFT_SUCCESS) return res;
 		res = appendZeropadEnd(sc);
@@ -14335,7 +19532,7 @@ static inline VkFFTResult appendRadixShuffleNonStrided(VkFFTSpecializationConsta
 	}
 	return res;
 }
-static inline VkFFTResult appendRadixShuffleStrided(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType, uint64_t stageSize, uint64_t stageSizeSum, double stageAngle, uint64_t stageRadix, uint64_t stageRadixNext) {
+static inline VkFFTResult appendRadixShuffleStrided(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType, uint64_t stageSize, uint64_t stageSizeSum, long double stageAngle, uint64_t stageRadix, uint64_t stageRadixNext) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	char vecType[30];
 	char LFending[4] = "";
@@ -14355,6 +19552,10 @@ static inline VkFFTResult appendRadixShuffleStrided(VkFFTSpecializationConstants
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+#elif(VKFFT_BACKEND==5)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
 #endif
 
 	char tempNum[50] = "";
@@ -14364,8 +19565,8 @@ static inline VkFFTResult appendRadixShuffleStrided(VkFFTSpecializationConstants
 	uint64_t logicalRegistersPerThread = sc->registers_per_thread_per_radix[stageRadix];//(sc->registers_per_thread % stageRadix == 0) ? sc->registers_per_thread : sc->min_registers_per_thread;
 	uint64_t logicalRegistersPerThreadNext = sc->registers_per_thread_per_radix[stageRadixNext];//(sc->registers_per_thread % stageRadixNext == 0) ? sc->registers_per_thread : sc->min_registers_per_thread;
 
-	uint64_t logicalGroupSize = sc->fftDim / logicalStoragePerThread;
-	uint64_t logicalGroupSizeNext = sc->fftDim / logicalStoragePerThreadNext;
+	uint64_t logicalGroupSize = (uint64_t)ceil(sc->fftDim / (double)logicalStoragePerThread);
+	uint64_t logicalGroupSizeNext = (uint64_t)ceil(sc->fftDim / (double)logicalStoragePerThreadNext);
 	char stageNormalization[50] = "";
 	uint64_t normalizationValue = 1;
 	if ((((sc->actualInverse) && (sc->normalize)) || (sc->convolutionStep && (stageAngle > 0))) && (stageSize == 1) && (sc->axis_upload_id == 0) && (!(sc->useBluesteinFFT && (stageAngle < 0)))) {
@@ -14417,12 +19618,7 @@ static inline VkFFTResult appendRadixShuffleStrided(VkFFTSpecializationConstants
 				if (res != VKFFT_SUCCESS) return res;
 				res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 				if (res != VKFFT_SUCCESS) return res;
-				if (sc->localSize[1] * logicalStoragePerThread > sc->fftDim) {
-					sc->tempLen = sprintf(sc->tempStr, "\
-	if (%s * %" PRIu64 " < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, logicalStoragePerThread, sc->fftDim);
-					res = VkAppendLine(sc);
-					if (res != VKFFT_SUCCESS) return res;
-				}
+
 				for (uint64_t k = 0; k < sc->registerBoost; ++k) {
 					uint64_t t = 0;
 					if (k > 0) {
@@ -14432,29 +19628,44 @@ static inline VkFFTResult appendRadixShuffleStrided(VkFFTSpecializationConstants
 						if (res != VKFFT_SUCCESS) return res;
 						res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 						if (res != VKFFT_SUCCESS) return res;
-						if (sc->localSize[1] * logicalStoragePerThread > sc->fftDim) {
+						if (logicalGroupSize * logicalStoragePerThread > sc->fftDim) {
 							sc->tempLen = sprintf(sc->tempStr, "\
 	if (%s * %" PRIu64 " < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, logicalStoragePerThread, sc->fftDim);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
 					}
-					for (uint64_t j = 0; j < logicalRegistersPerThread / stageRadix; j++) {
-						sprintf(tempNum, "%" PRIu64 "", j * logicalGroupSize);
-						res = VkAddReal(sc, sc->stageInvocationID, sc->gl_LocalInvocationID_y, tempNum);
-						if (res != VKFFT_SUCCESS) return res;
-						res = VkMovReal(sc, sc->blockInvocationID, sc->stageInvocationID);
-						if (res != VKFFT_SUCCESS) return res;
-						sprintf(tempNum, "%" PRIu64 "", stageSize);
-						res = VkModReal(sc, sc->stageInvocationID, sc->stageInvocationID, tempNum);
-						if (res != VKFFT_SUCCESS) return res;
-						res = VkSubReal(sc, sc->blockInvocationID, sc->blockInvocationID, sc->stageInvocationID);
-						if (res != VKFFT_SUCCESS) return res;
-						sprintf(tempNum, "%" PRIu64 "", stageRadix);
-						res = VkMulReal(sc, sc->inoutID, sc->blockInvocationID, tempNum);
-						if (res != VKFFT_SUCCESS) return res;
-						res = VkAddReal(sc, sc->inoutID, sc->inoutID, sc->stageInvocationID);
+					if (logicalGroupSize != sc->localSize[1]) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, logicalGroupSize);
+						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
+					}
+					for (uint64_t j = 0; j < logicalRegistersPerThread / stageRadix; j++) {
+						if (logicalGroupSize * ((j + k * logicalRegistersPerThread / stageRadix) * stageRadix) <= sc->fftDim) {
+							if (logicalGroupSize * ((1 + j + k * logicalRegistersPerThread / stageRadix) * stageRadix) > sc->fftDim) {
+								uint64_t current_group_cut = sc->fftDim / stageRadix - (j + k * logicalRegistersPerThread / stageRadix) * logicalGroupSize;
+								sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s  < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, current_group_cut);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							sprintf(tempNum, "%" PRIu64 "", j * logicalGroupSize);
+							res = VkAddReal(sc, sc->stageInvocationID, sc->gl_LocalInvocationID_y, tempNum);
+							if (res != VKFFT_SUCCESS) return res;
+							res = VkMovReal(sc, sc->blockInvocationID, sc->stageInvocationID);
+							if (res != VKFFT_SUCCESS) return res;
+							sprintf(tempNum, "%" PRIu64 "", stageSize);
+							res = VkModReal(sc, sc->stageInvocationID, sc->stageInvocationID, tempNum);
+							if (res != VKFFT_SUCCESS) return res;
+							res = VkSubReal(sc, sc->blockInvocationID, sc->blockInvocationID, sc->stageInvocationID);
+							if (res != VKFFT_SUCCESS) return res;
+							sprintf(tempNum, "%" PRIu64 "", stageRadix);
+							res = VkMulReal(sc, sc->inoutID, sc->blockInvocationID, tempNum);
+							if (res != VKFFT_SUCCESS) return res;
+							res = VkAddReal(sc, sc->inoutID, sc->inoutID, sc->stageInvocationID);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						/*sc->tempLen = sprintf(sc->tempStr, "\
 		stageInvocationID = (gl_LocalInvocationID.y + %" PRIu64 ") %% (%" PRIu64 ");\n\
 		blockInvocationID = (gl_LocalInvocationID.y + %" PRIu64 ") - stageInvocationID;\n\
@@ -14464,23 +19675,38 @@ static inline VkFFTResult appendRadixShuffleStrided(VkFFTSpecializationConstants
 							id = (id / logicalRegistersPerThread) * sc->registers_per_thread + id % logicalRegistersPerThread;
 							sprintf(tempID[t + k * sc->registers_per_thread], "%s", sc->regIDs[id]);
 							t++;
-							sprintf(tempNum, "%" PRIu64 "", i * stageSize);
-							res = VkAddReal(sc, sc->sdataID, sc->inoutID, tempNum);
-							if (res != VKFFT_SUCCESS) return res;
-							res = VkMulReal(sc, sc->sdataID, sc->sharedStride, sc->sdataID);
-							if (res != VKFFT_SUCCESS) return res;
-							res = VkAddReal(sc, sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_x);
-							if (res != VKFFT_SUCCESS) return res;
-							//sprintf(sc->sdataID, "sharedStride * gl_LocalInvocationID.y + inoutID + %" PRIu64 "", i * stageSize);
-							if (strcmp(stageNormalization, "")) {
-								res = VkMulComplexNumber(sc, sc->regIDs[id], sc->regIDs[id], stageNormalization);
+							if (logicalGroupSize * ((j + k * logicalRegistersPerThread / stageRadix) * stageRadix) <= sc->fftDim) {
+								sprintf(tempNum, "%" PRIu64 "", i * stageSize);
+								res = VkAddReal(sc, sc->sdataID, sc->inoutID, tempNum);
+								if (res != VKFFT_SUCCESS) return res;
+								res = VkMulReal(sc, sc->sdataID, sc->sharedStride, sc->sdataID);
+								if (res != VKFFT_SUCCESS) return res;
+								res = VkAddReal(sc, sc->sdataID, sc->sdataID, sc->gl_LocalInvocationID_x);
+								if (res != VKFFT_SUCCESS) return res;
+								//sprintf(sc->sdataID, "sharedStride * gl_LocalInvocationID.y + inoutID + %" PRIu64 "", i * stageSize);
+								if (strcmp(stageNormalization, "")) {
+									res = VkMulComplexNumber(sc, sc->regIDs[id], sc->regIDs[id], stageNormalization);
+									if (res != VKFFT_SUCCESS) return res;
+								}
+								res = VkSharedStore(sc, sc->sdataID, sc->regIDs[id]);
 								if (res != VKFFT_SUCCESS) return res;
 							}
-							res = VkSharedStore(sc, sc->sdataID, sc->regIDs[id]);
-							if (res != VKFFT_SUCCESS) return res;
 							/*sc->tempLen = sprintf(sc->tempStr, "\
 		sdata[gl_WorkGroupSize.x*(inoutID+%" PRIu64 ")+gl_LocalInvocationID.x] = temp%s%s;\n", i * stageSize, sc->regIDs[id], stageNormalization);*/
 						}
+						if (logicalGroupSize * ((j + k * logicalRegistersPerThread / stageRadix) * stageRadix) <= sc->fftDim) {
+							if (logicalGroupSize * ((1 + j + k * logicalRegistersPerThread / stageRadix) * stageRadix) > sc->fftDim) {
+								sc->tempLen = sprintf(sc->tempStr, "	}\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+					}
+					if (logicalGroupSize != sc->localSize[1]) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
 					}
 					for (uint64_t j = logicalRegistersPerThread; j < sc->registers_per_thread; j++) {
 						sprintf(tempID[t + k * sc->registers_per_thread], "%s", sc->regIDs[t + k * sc->registers_per_thread]);
@@ -14488,12 +19714,6 @@ static inline VkFFTResult appendRadixShuffleStrided(VkFFTSpecializationConstants
 					}
 					t = 0;
 					if (sc->registerBoost > 1) {
-						if (sc->localSize[1] * logicalStoragePerThread > sc->fftDim)
-						{
-							sc->tempLen = sprintf(sc->tempStr, "	}\n");
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
-						}
 						res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
 						if (res != VKFFT_SUCCESS) return res;
 						res = appendZeropadEnd(sc);
@@ -14504,7 +19724,7 @@ static inline VkFFTResult appendRadixShuffleStrided(VkFFTSpecializationConstants
 						if (res != VKFFT_SUCCESS) return res;
 						res = VkAppendLineFromInput(sc, sc->disableThreadsStart);
 						if (res != VKFFT_SUCCESS) return res;
-						if (sc->localSize[1] * logicalStoragePerThreadNext > sc->fftDim) {
+						if (logicalGroupSize * logicalStoragePerThreadNext > sc->fftDim) {
 							sc->tempLen = sprintf(sc->tempStr, "\
 	if (%s * %" PRIu64 " < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, logicalStoragePerThreadNext, sc->fftDim);
 							res = VkAppendLine(sc);
@@ -14529,8 +19749,7 @@ static inline VkFFTResult appendRadixShuffleStrided(VkFFTSpecializationConstants
 								t++;
 							}
 						}
-						if (sc->localSize[1] * logicalStoragePerThreadNext > sc->fftDim)
-						{
+						if (logicalGroupSize * logicalStoragePerThreadNext > sc->fftDim) {
 							sc->tempLen = sprintf(sc->tempStr, "	}\n");
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -14541,12 +19760,6 @@ static inline VkFFTResult appendRadixShuffleStrided(VkFFTSpecializationConstants
 						if (res != VKFFT_SUCCESS) return res;
 					}
 					else {
-						if (sc->localSize[1] * logicalStoragePerThread > sc->fftDim)
-						{
-							sc->tempLen = sprintf(sc->tempStr, "	}\n");
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
-						}
 						res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
 						if (res != VKFFT_SUCCESS) return res;
 						res = appendZeropadEnd(sc);
@@ -14641,21 +19854,23 @@ static inline VkFFTResult appendRadixShuffleStrided(VkFFTSpecializationConstants
 	}
 	return res;
 }
-static inline VkFFTResult appendRadixShuffle(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType, uint64_t stageSize, uint64_t stageSizeSum, double stageAngle, uint64_t stageRadix, uint64_t stageRadixNext, uint64_t shuffleType) {
+static inline VkFFTResult appendRadixShuffle(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType, uint64_t stageSize, uint64_t stageSizeSum, long double stageAngle, uint64_t stageRadix, uint64_t stageRadixNext, uint64_t stageID, uint64_t shuffleType) {
 	VkFFTResult res = VKFFT_SUCCESS;
-	switch (shuffleType) {
-	case 0: case 5: case 6: case 110: case 120: case 130: case 140: case 142: case 144: {
-		res = appendRadixShuffleNonStrided(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, stageRadix, stageRadixNext);
-		if (res != VKFFT_SUCCESS) return res;
-		//appendBarrierVkFFT(sc, 1);
-		break;
-	}
-	case 1: case 2: case 111: case 121: case 131: case 141: case 143: case 145: {
-		res = appendRadixShuffleStrided(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, stageRadix, stageRadixNext);
-		if (res != VKFFT_SUCCESS) return res;
-		//appendBarrierVkFFT(sc, 1);
-		break;
-	}
+	if (sc->rader_generator[stageID] == 0) {
+		switch (shuffleType) {
+		case 0: case 5: case 6: case 110: case 120: case 130: case 140: case 142: case 144: {
+			res = appendRadixShuffleNonStrided(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, stageRadix, stageRadixNext);
+			if (res != VKFFT_SUCCESS) return res;
+			//appendBarrierVkFFT(sc, 1);
+			break;
+		}
+		case 1: case 2: case 111: case 121: case 131: case 141: case 143: case 145: {
+			res = appendRadixShuffleStrided(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, stageRadix, stageRadixNext);
+			if (res != VKFFT_SUCCESS) return res;
+			//appendBarrierVkFFT(sc, 1);
+			break;
+		}
+		}
 	}
 	return res;
 }
@@ -14843,6 +20058,7 @@ static inline VkFFTResult appendCoordinateRegisterStore(VkFFTSpecializationConst
 		switch (readType) {
 		case 0: case 5: case 6: case 110: case 120: case 130: case 140: case 142: case 144://single_c2c
 		{
+			uint64_t used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
 			res = appendBarrierVkFFT(sc, 1);
 			if (res != VKFFT_SUCCESS) return res;
 			res = appendZeropadStart(sc);
@@ -14854,11 +20070,23 @@ static inline VkFFTResult appendCoordinateRegisterStore(VkFFTSpecializationConst
 		%s = sdata[sharedStride * %s + %s];\n", sc->regIDs[0], sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
-				for (uint64_t i = 1; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 1; i < used_registers_read; i++) {
+					if (sc->localSize[0] * (i + 1) > sc->fftDim) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, sc->fftDim - sc->localSize[0] * i);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					sc->tempLen = sprintf(sc->tempStr, "\
 		%s = sdata[sharedStride * %s + %s + %" PRIu64 " * %s];\n", sc->regIDs[i], sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, i, sc->gl_WorkGroupSize_x);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					if (sc->localSize[0] * (i + 1) > sc->fftDim) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 				}
 				//appendBarrierVkFFT(sc, 3);
 			}
@@ -14872,11 +20100,23 @@ static inline VkFFTResult appendCoordinateRegisterStore(VkFFTSpecializationConst
 		%s = sdata[sharedStride * %s + %s];\n", sc->regIDs[0], sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
-				for (uint64_t i = 1; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 1; i < used_registers_read; i++) {
+					if (sc->localSize[0] * (i + 1) > sc->fftDim) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, sc->fftDim - sc->localSize[0] * i);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					sc->tempLen = sprintf(sc->tempStr, "\
 		%s = sdata[sharedStride * %s + %s + %" PRIu64 " * %s];\n", sc->regIDs[i], sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, i, sc->gl_WorkGroupSize_x);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					if (sc->localSize[0] * (i + 1) > sc->fftDim) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 				}
 				//appendBarrierVkFFT(sc, 3);
 				sc->tempLen = sprintf(sc->tempStr, "			break;\n");
@@ -14891,11 +20131,23 @@ static inline VkFFTResult appendCoordinateRegisterStore(VkFFTSpecializationConst
 		%s_%" PRIu64 " = sdata[sharedStride * %s + %s];\n", sc->regIDs[0], i, sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					for (uint64_t j = 1; j < sc->min_registers_per_thread; j++) {
+					for (uint64_t j = 1; j < used_registers_read; j++) {
+						if (sc->localSize[0] * (j + 1) > sc->fftDim) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, sc->fftDim - sc->localSize[0] * j);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						sc->tempLen = sprintf(sc->tempStr, "\
 		%s_%" PRIu64 " = sdata[sharedStride * %s + %s + %" PRIu64 " * %s];\n", sc->regIDs[j], i, sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, j, sc->gl_WorkGroupSize_x);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
+						if (sc->localSize[0] * (j + 1) > sc->fftDim) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 					//appendBarrierVkFFT(sc, 3);
 					sc->tempLen = sprintf(sc->tempStr, "			break;\n");
@@ -14914,6 +20166,7 @@ static inline VkFFTResult appendCoordinateRegisterStore(VkFFTSpecializationConst
 		}
 		case 1: case 111: case 121: case 131: case 141: case 143: case 145://grouped_c2c
 		{
+			uint64_t used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
 			res = appendBarrierVkFFT(sc, 1);
 			if (res != VKFFT_SUCCESS) return res;
 			res = appendZeropadStart(sc);
@@ -14925,11 +20178,23 @@ static inline VkFFTResult appendCoordinateRegisterStore(VkFFTSpecializationConst
 		%s = sdata[%s*(%s)+%s];\n", sc->regIDs[0], sc->sharedStride, sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
-				for (uint64_t i = 1; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 1; i < used_registers_read; i++) {
+					if (sc->localSize[1] * (i + 1) > sc->fftDim) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, sc->fftDim - sc->localSize[1] * i);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					sc->tempLen = sprintf(sc->tempStr, "\
 		%s = sdata[%s*(%s+%" PRIu64 "*%s)+%s];\n", sc->regIDs[i], sc->sharedStride, sc->gl_LocalInvocationID_y, i, sc->gl_WorkGroupSize_y, sc->gl_LocalInvocationID_x);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					if (sc->localSize[1] * (i + 1) > sc->fftDim) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 				}
 				//appendBarrierVkFFT(sc, 3);
 			}
@@ -14943,11 +20208,23 @@ static inline VkFFTResult appendCoordinateRegisterStore(VkFFTSpecializationConst
 		%s = sdata[%s*(%s)+%s];\n", sc->regIDs[0], sc->sharedStride, sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
-				for (uint64_t i = 1; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 1; i < used_registers_read; i++) {
+					if (sc->localSize[1] * (i + 1) > sc->fftDim) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, sc->fftDim - sc->localSize[1] * i);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					sc->tempLen = sprintf(sc->tempStr, "\
 		%s = sdata[%s*(%s+%" PRIu64 "*%s)+%s];\n", sc->regIDs[i], sc->sharedStride, sc->gl_LocalInvocationID_y, i, sc->gl_WorkGroupSize_y, sc->gl_LocalInvocationID_x);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					if (sc->localSize[1] * (i + 1) > sc->fftDim) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 				}
 				//appendBarrierVkFFT(sc, 3);
 				sc->tempLen = sprintf(sc->tempStr, "			break;\n");
@@ -14962,11 +20239,23 @@ static inline VkFFTResult appendCoordinateRegisterStore(VkFFTSpecializationConst
 		%s_%" PRIu64 " = sdata[%s*(%s)+%s];\n", sc->regIDs[0], i, sc->sharedStride, sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					for (uint64_t j = 1; j < sc->min_registers_per_thread; j++) {
+					for (uint64_t j = 1; j < used_registers_read; j++) {
+						if (sc->localSize[1] * (j + 1) > sc->fftDim) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, sc->fftDim - sc->localSize[1] * j);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						sc->tempLen = sprintf(sc->tempStr, "\
 		%s_%" PRIu64 " = sdata[%s*(%s+%" PRIu64 "*%s)+%s];\n", sc->regIDs[j], i, sc->sharedStride, sc->gl_LocalInvocationID_y, j, sc->gl_WorkGroupSize_y, sc->gl_LocalInvocationID_x);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
+						if (sc->localSize[1] * (j + 1) > sc->fftDim) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 					//appendBarrierVkFFT(sc, 3);
 					sc->tempLen = sprintf(sc->tempStr, "			break;\n");
@@ -14993,6 +20282,7 @@ static inline VkFFTResult appendCoordinateRegisterPull(VkFFTSpecializationConsta
 		switch (readType) {
 		case 0: case 5: case 6: case 110: case 120: case 130: case 140: case 142: case 144://single_c2c
 		{
+			uint64_t used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
 			res = appendBarrierVkFFT(sc, 1);
 			if (res != VKFFT_SUCCESS) return res;
 			res = appendZeropadStart(sc);
@@ -15004,11 +20294,23 @@ static inline VkFFTResult appendCoordinateRegisterPull(VkFFTSpecializationConsta
 			sdata[sharedStride * %s + %s] = %s;\n", sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, sc->regIDs[0]);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
-				for (uint64_t i = 1; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 1; i < used_registers_read; i++) {
+					if (sc->localSize[0] * (i + 1) > sc->fftDim) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, sc->fftDim - sc->localSize[0] * i);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					sc->tempLen = sprintf(sc->tempStr, "\
 			sdata[sharedStride * %s + %s + %" PRIu64 " * %s] = %s;\n", sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, i, sc->gl_WorkGroupSize_x, sc->regIDs[i]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					if (sc->localSize[0] * (i + 1) > sc->fftDim) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 				}
 				//appendBarrierVkFFT(sc, 3);
 			}
@@ -15022,11 +20324,23 @@ static inline VkFFTResult appendCoordinateRegisterPull(VkFFTSpecializationConsta
 			sdata[sharedStride * %s + %s] = %s;\n", sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, sc->regIDs[0]);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
-				for (uint64_t i = 1; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 1; i < used_registers_read; i++) {
+					if (sc->localSize[0] * (i + 1) > sc->fftDim) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, sc->fftDim - sc->localSize[0] * i);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					sc->tempLen = sprintf(sc->tempStr, "\
 			sdata[sharedStride * %s + %s + %" PRIu64 " * %s] = %s;\n", sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, i, sc->gl_WorkGroupSize_x, sc->regIDs[i]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					if (sc->localSize[0] * (i + 1) > sc->fftDim) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 				}
 				//appendBarrierVkFFT(sc, 3);
 				sc->tempLen = sprintf(sc->tempStr, "			break;\n");
@@ -15041,11 +20355,23 @@ static inline VkFFTResult appendCoordinateRegisterPull(VkFFTSpecializationConsta
 			sdata[sharedStride * %s + %s] = %s_%" PRIu64 ";\n", sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, sc->regIDs[0], i);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					for (uint64_t j = 1; j < sc->min_registers_per_thread; j++) {
+					for (uint64_t j = 1; j < used_registers_read; j++) {
+						if (sc->localSize[0] * (j + 1) > sc->fftDim) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, sc->fftDim - sc->localSize[0] * j);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						sc->tempLen = sprintf(sc->tempStr, "\
 			sdata[sharedStride * %s + %s + %" PRIu64 " * %s] = %s_%" PRIu64 ";\n", sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, j, sc->gl_WorkGroupSize_x, sc->regIDs[j], i);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
+						if (sc->localSize[0] * (i + 1) > sc->fftDim) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 					//appendBarrierVkFFT(sc, 3);
 					sc->tempLen = sprintf(sc->tempStr, "			break;\n");
@@ -15064,6 +20390,7 @@ static inline VkFFTResult appendCoordinateRegisterPull(VkFFTSpecializationConsta
 		}
 		case 1: case 111: case 121: case 131: case 141: case 143: case 145://grouped_c2c
 		{
+			uint64_t used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
 			res = appendBarrierVkFFT(sc, 1);
 			if (res != VKFFT_SUCCESS) return res;
 			res = appendZeropadStart(sc);
@@ -15075,11 +20402,23 @@ static inline VkFFTResult appendCoordinateRegisterPull(VkFFTSpecializationConsta
 		sdata[%s*(%s)+%s] = %s;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, sc->regIDs[0]);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
-				for (uint64_t i = 1; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 1; i < used_registers_read; i++) {
+					if (sc->localSize[1] * (i + 1) > sc->fftDim) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, sc->fftDim - sc->localSize[1] * i);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					sc->tempLen = sprintf(sc->tempStr, "\
 		sdata[%s*(%s+%" PRIu64 "*%s)+%s] = %s;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, i, sc->gl_WorkGroupSize_y, sc->gl_LocalInvocationID_x, sc->regIDs[i]);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
+					if (sc->localSize[1] * (i + 1) > sc->fftDim) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 				}
 				//appendBarrierVkFFT(sc, 3);
 			}
@@ -15093,7 +20432,20 @@ static inline VkFFTResult appendCoordinateRegisterPull(VkFFTSpecializationConsta
 		sdata[%s*(%s)+%s] = %s;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, sc->regIDs[0]);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
-				for (uint64_t i = 1; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 1; i < used_registers_read; i++) {
+					if (sc->localSize[1] * (i + 1) > sc->fftDim) {
+						sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, sc->fftDim - sc->localSize[1] * i);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+						if (sc->localSize[1] * (i + 1) > sc->fftDim) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+
 					sc->tempLen = sprintf(sc->tempStr, "\
 		sdata[%s*(%s+%" PRIu64 "*%s)+%s] = %s;\n", sc->sharedStride, sc->gl_LocalInvocationID_y, i, sc->gl_WorkGroupSize_y, sc->gl_LocalInvocationID_x, sc->regIDs[i]);
 					res = VkAppendLine(sc);
@@ -15112,11 +20464,23 @@ static inline VkFFTResult appendCoordinateRegisterPull(VkFFTSpecializationConsta
 		sdata[%s*(%s)+%s] = %s_%" PRIu64 ";\n", sc->sharedStride, sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, sc->regIDs[0], i);
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
-					for (uint64_t j = 1; j < sc->min_registers_per_thread; j++) {
+					for (uint64_t j = 1; j < used_registers_read; j++) {
+						if (sc->localSize[1] * (j + 1) > sc->fftDim) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, sc->fftDim - sc->localSize[1] * j);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						sc->tempLen = sprintf(sc->tempStr, "\
 		sdata[%s*(%s+%" PRIu64 "*%s)+%s] = %s_%" PRIu64 ";\n", sc->sharedStride, sc->gl_LocalInvocationID_y, j, sc->gl_WorkGroupSize_y, sc->gl_LocalInvocationID_x, sc->regIDs[j], i);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
+						if (sc->localSize[1] * (j + 1) > sc->fftDim) {
+							sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 					}
 					//appendBarrierVkFFT(sc, 3);
 					sc->tempLen = sprintf(sc->tempStr, "			break;\n");
@@ -15152,6 +20516,9 @@ static inline VkFFTResult appendPreparationBatchedKernelConvolution(VkFFTSpecial
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+#elif(VKFFT_BACKEND==5)
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
 #endif
 	char separateRegisterStore[100] = "_store";
 
@@ -15181,7 +20548,7 @@ static inline VkFFTResult appendPreparationBatchedKernelConvolution(VkFFTSpecial
 	if (res != VKFFT_SUCCESS) return res;
 	return res;
 }
-static inline VkFFTResult appendBluesteinConvolution(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* floatTypeMemory, const char* uintType, uint64_t dataType) {
+static inline VkFFTResult appendBluesteinConvolution(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* uintType, uint64_t dataType) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	char shiftX[500] = "";
 	if (sc->performWorkGroupShift[0])
@@ -15192,8 +20559,6 @@ static inline VkFFTResult appendBluesteinConvolution(VkFFTSpecializationConstant
 			sprintf(requestCoordinate, "0");
 		}
 	}
-	char index_x[2000] = "";
-	char index_y[2000] = "";
 	char requestBatch[100] = "";
 	char separateRegisterStore[100] = "";
 	if (sc->convolutionStep) {
@@ -15214,10 +20579,25 @@ static inline VkFFTResult appendBluesteinConvolution(VkFFTSpecializationConstant
 		res = VkAppendLine(sc);
 		if (res != VKFFT_SUCCESS) return res;
 	}
-	for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+	uint64_t used_registers_read = 1;
+	switch (dataType) {
+	case 0: case 5: case 6: case 110: case 120: case 130: case 140: case 142: case 144:
+		used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+		break;
+	case 1: case 2: case 111: case 121: case 131: case 141: case 143: case 145:
+		used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+		break;
+	}
+	for (uint64_t i = 0; i < used_registers_read; i++) {
 		switch (dataType) {
 		case 0: case 5: case 6: case 110: case 120: case 130: case 140: case 142: case 144:
 		{
+			if (sc->localSize[0] * (i + 1) > sc->fftDim) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, sc->fftDim - sc->localSize[0] * i);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
 			if (sc->fftDim == sc->fft_dim_full) {
 				sc->tempLen = sprintf(sc->tempStr, "		%s = %s + %" PRIu64 ";\n", sc->inoutID, sc->gl_LocalInvocationID_x, i * sc->localSize[0]);
 				res = VkAppendLine(sc);
@@ -15231,8 +20611,14 @@ static inline VkFFTResult appendBluesteinConvolution(VkFFTSpecializationConstant
 			}
 			break;
 		}
-		case 1: case 111: case 121: case 131: case 141: case 143: case 145:
+		case 1: case 2: case 111: case 121: case 131: case 141: case 143: case 145:
 		{
+			if (sc->localSize[1] * (i + 1) > sc->fftDim) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, sc->fftDim - sc->localSize[1] * i);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
 			if (sc->fftDim == sc->fft_dim_full) {
 				sc->tempLen = sprintf(sc->tempStr, "			%s = %s + %" PRIu64 ";\n", sc->inoutID, sc->gl_LocalInvocationID_y, i * sc->localSize[1]);
 				res = VkAppendLine(sc);
@@ -15268,6 +20654,28 @@ static inline VkFFTResult appendBluesteinConvolution(VkFFTSpecializationConstant
 		sc->tempLen = sprintf(sc->tempStr, "		%s.y = temp_imag0;\n", sc->regIDs[i]);
 		res = VkAppendLine(sc);
 		if (res != VKFFT_SUCCESS) return res;
+		switch (dataType) {
+		case 0: case 5: case 6: case 110: case 120: case 130: case 140: case 142: case 144:
+		{
+			if (sc->localSize[0] * (i + 1) > sc->fftDim) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			break;
+		}
+		case 1: case 2: case 111: case 121: case 131: case 141: case 143: case 145:
+		{
+			if (sc->localSize[1] * (i + 1) > sc->fftDim) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			break;
+		}
+		}
 	}
 	res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
 	if (res != VKFFT_SUCCESS) return res;
@@ -15293,6 +20701,9 @@ static inline VkFFTResult appendKernelConvolution(VkFFTSpecializationConstantsLa
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 		sprintf(convTypeLeft, "(float)");
 		//sprintf(convTypeRight, "");
+#elif(VKFFT_BACKEND==5)
+		sprintf(convTypeLeft, "float(");
+		sprintf(convTypeRight, ")");
 #endif
 	}
 	if ((!strcmp(floatType, "double")) && (strcmp(floatTypeMemory, "double"))) {
@@ -15308,6 +20719,9 @@ static inline VkFFTResult appendKernelConvolution(VkFFTSpecializationConstantsLa
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 		sprintf(convTypeLeft, "(double)");
 		//sprintf(convTypeRight, "");
+#elif(VKFFT_BACKEND==5)
+		sprintf(convTypeLeft, "double(");
+		sprintf(convTypeRight, ")");
 #endif
 	}
 
@@ -15342,7 +20756,16 @@ static inline VkFFTResult appendKernelConvolution(VkFFTSpecializationConstantsLa
 		res = VkAppendLine(sc);
 		if (res != VKFFT_SUCCESS) return res;
 	}
-	for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+	uint64_t used_registers_read = 1;
+	switch (dataType) {
+	case 0: case 5: case 6: case 110: case 120: case 130: case 140: case 142: case 144:
+		used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+		break;
+	case 1: case 2: case 111: case 121: case 131: case 141: case 143: case 145:
+		used_registers_read = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+		break;
+	}
+	for (uint64_t i = 0; i < used_registers_read; i++) {
 		if (i > 0) {
 			for (uint64_t j = 0; j < sc->matrixConvolution; j++) {
 				sc->tempLen = sprintf(sc->tempStr, "		temp_real%" PRIu64 " = 0;\n", j);
@@ -15363,7 +20786,11 @@ static inline VkFFTResult appendKernelConvolution(VkFFTSpecializationConstantsLa
 					sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, i * sc->localSize[0] * sc->localSize[1]);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
-
+				if ((1 + i) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+					sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[1]);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
 				if (sc->inputStride[0] > 1) {
 					sc->tempLen = sprintf(sc->tempStr, "			%s = ", sc->inoutID);
 					res = VkAppendLine(sc);
@@ -15402,6 +20829,12 @@ static inline VkFFTResult appendKernelConvolution(VkFFTSpecializationConstantsLa
 				}
 			}
 			else {
+				if (sc->localSize[0] * (i + 1) > sc->fftDim) {
+					sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_x, sc->fftDim - sc->localSize[0] * i);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
 				sc->tempLen = sprintf(sc->tempStr, "			%s = ", sc->inoutID);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
@@ -15423,6 +20856,12 @@ static inline VkFFTResult appendKernelConvolution(VkFFTSpecializationConstantsLa
 		}
 		case 1:
 		{
+			if (sc->localSize[1] * (i + 1) > sc->fftDim) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		if (%s < %" PRIu64 ") {\n", sc->gl_LocalInvocationID_y, sc->fftDim - sc->localSize[1] * i);
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
 			sc->tempLen = sprintf(sc->tempStr, "			%s = ", sc->inoutID);
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
@@ -15512,6 +20951,8 @@ static inline VkFFTResult appendKernelConvolution(VkFFTSpecializationConstantsLa
 				sc->tempLen = sprintf(sc->tempStr, "		w.x = rsqrt(temp_real0*temp_real0+temp_imag0*temp_imag0);\n");
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 				sc->tempLen = sprintf(sc->tempStr, "		w.x = rsqrt(temp_real0*temp_real0+temp_imag0*temp_imag0);\n");
+#elif(VKFFT_BACKEND==5)
+				sc->tempLen = sprintf(sc->tempStr, "        w.x = rsqrt(temp_real0*temp_real0+temp_imag0*temp_imag0);\n");
 #endif
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
@@ -15540,6 +20981,8 @@ static inline VkFFTResult appendKernelConvolution(VkFFTSpecializationConstantsLa
 					sc->tempLen = sprintf(sc->tempStr, "		w.x = rsqrt(temp_real%" PRIu64 "*temp_real%" PRIu64 "+temp_imag%" PRIu64 "*temp_imag%" PRIu64 ");\n", l, l, l, l);
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 					sc->tempLen = sprintf(sc->tempStr, "		w.x = rsqrt(temp_real%" PRIu64 "*temp_real%" PRIu64 "+temp_imag%" PRIu64 "*temp_imag%" PRIu64 ");\n", l, l, l, l);
+#elif(VKFFT_BACKEND==5)
+					sc->tempLen = sprintf(sc->tempStr, "        w.x = rsqrt(temp_real%" PRIu64 "*temp_real%" PRIu64 "+temp_imag%" PRIu64 "*temp_imag%" PRIu64 ");\n", l, l, l, l);
 #endif
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
@@ -15617,6 +21060,28 @@ static inline VkFFTResult appendKernelConvolution(VkFFTSpecializationConstantsLa
 				if (res != VKFFT_SUCCESS) return res;
 			}
 		}
+		switch (dataType) {
+		case 0: case 5: case 6: case 110: case 120: case 130: case 140: case 142: case 144:
+		{
+			if (sc->localSize[0] * (i + 1) > sc->fftDim) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			break;
+		}
+		case 1: case 2: case 111: case 121: case 131: case 141: case 143: case 145:
+		{
+			if (sc->localSize[1] * (i + 1) > sc->fftDim) {
+				sc->tempLen = sprintf(sc->tempStr, "\
+		}\n");
+				res = VkAppendLine(sc);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			break;
+		}
+		}
 	}
 	res = VkAppendLineFromInput(sc, sc->disableThreadsEnd);
 	if (res != VKFFT_SUCCESS) return res;
@@ -15629,7 +21094,7 @@ static inline VkFFTResult setWriteFromRegisters(VkFFTSpecializationConstantsLayo
 	switch (writeType) {
 	case 0: //single_c2c
 	{
-		if ((sc->localSize[1] > 1) || (sc->localSize[0] * sc->stageRadix[sc->numStages - 1] * (sc->registers_per_thread_per_radix[sc->stageRadix[sc->numStages - 1]] / sc->stageRadix[sc->numStages - 1]) > sc->fftDim)) {
+		if ((sc->localSize[1] > 1) || (sc->localSize[0] * sc->stageRadix[sc->numStages - 1] * (sc->registers_per_thread_per_radix[sc->stageRadix[sc->numStages - 1]] / sc->stageRadix[sc->numStages - 1]) > sc->fftDim) || (sc->rader_generator[sc->numStages - 1] > 0)) {
 			sc->writeFromRegisters = 0;
 		}
 		else
@@ -15638,7 +21103,7 @@ static inline VkFFTResult setWriteFromRegisters(VkFFTSpecializationConstantsLayo
 	}
 	case 1: //grouped_c2c
 	{
-		if (sc->localSize[1] * sc->stageRadix[sc->numStages - 1] * (sc->registers_per_thread_per_radix[sc->stageRadix[sc->numStages - 1]] / sc->stageRadix[sc->numStages - 1]) > sc->fftDim) {
+		if ((sc->localSize[1] * sc->stageRadix[sc->numStages - 1] * (sc->registers_per_thread_per_radix[sc->stageRadix[sc->numStages - 1]] / sc->stageRadix[sc->numStages - 1]) > sc->fftDim) || (sc->rader_generator[sc->numStages - 1] > 0)) {
 			sc->writeFromRegisters = 0;
 			res = appendBarrierVkFFT(sc, 1);
 			if (res != VKFFT_SUCCESS) return res;
@@ -15649,7 +21114,7 @@ static inline VkFFTResult setWriteFromRegisters(VkFFTSpecializationConstantsLayo
 	}
 	case 2: //single_c2c_strided
 	{
-		if (sc->localSize[1] * sc->stageRadix[sc->numStages - 1] * (sc->registers_per_thread_per_radix[sc->stageRadix[sc->numStages - 1]] / sc->stageRadix[sc->numStages - 1]) > sc->fftDim) {
+		if ((sc->localSize[1] * sc->stageRadix[sc->numStages - 1] * (sc->registers_per_thread_per_radix[sc->stageRadix[sc->numStages - 1]] / sc->stageRadix[sc->numStages - 1]) > sc->fftDim) || (sc->rader_generator[sc->numStages - 1] > 0)) {
 			sc->writeFromRegisters = 0;
 		}
 		else
@@ -15663,7 +21128,7 @@ static inline VkFFTResult setWriteFromRegisters(VkFFTSpecializationConstantsLayo
 	}
 	case 6: //single_c2r
 	{
-		if ((sc->axisSwapped) || (sc->localSize[1] > 1) || (sc->localSize[0] * sc->stageRadix[sc->numStages - 1] * (sc->registers_per_thread_per_radix[sc->stageRadix[sc->numStages - 1]] / sc->stageRadix[sc->numStages - 1]) > sc->fftDim)) {
+		if ((sc->axisSwapped) || (sc->localSize[1] > 1) || (sc->localSize[0] * sc->stageRadix[sc->numStages - 1] * (sc->registers_per_thread_per_radix[sc->stageRadix[sc->numStages - 1]] / sc->stageRadix[sc->numStages - 1]) > sc->fftDim) || (sc->rader_generator[sc->numStages - 1] > 0)) {
 			sc->writeFromRegisters = 0;
 		}
 		else
@@ -15680,7 +21145,7 @@ static inline VkFFTResult setWriteFromRegisters(VkFFTSpecializationConstantsLayo
 }
 static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* floatTypeMemory, const char* uintType, uint64_t writeType) {
 	VkFFTResult res = VKFFT_SUCCESS;
-	double double_PI = 3.1415926535897932384626433832795;
+	long double double_PI = 3.14159265358979323846264338327950288419716939937510L;
 	char vecType[30];
 	char outputsStruct[20] = "";
 	char LFending[4] = "";
@@ -15716,6 +21181,13 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 	//if (!strcmp(floatType, "double")) sprintf(LFending, "l");
 	char cosDef[20] = "native_cos";
 	char sinDef[20] = "native_sin";
+#elif(VKFFT_BACKEND==5)
+	sprintf(outputsStruct, "outputs");
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
+	char cosDef[20] = "cos";
+	char sinDef[20] = "sin";
 #endif
 	char convTypeLeft[20] = "";
 	char convTypeRight[20] = "";
@@ -15743,6 +21215,9 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 			sprintf(convTypeLeft, "(float)");
 			//sprintf(convTypeRight, "");
+#elif(VKFFT_BACKEND==5)
+			sprintf(convTypeLeft, "float(");
+			sprintf(convTypeRight, ")");
 #endif
 		}
 		else {
@@ -15758,6 +21233,9 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 			sprintf(convTypeLeft, "conv_float2(");
 			sprintf(convTypeRight, ")");
+#elif(VKFFT_BACKEND==5)
+			sprintf(convTypeLeft, "conv_float2(");
+			sprintf(convTypeRight, ")");
 #endif
 		}
 	}
@@ -15775,6 +21253,9 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 			sprintf(convTypeLeft, "(double)");
 			//sprintf(convTypeRight, "");
+#elif(VKFFT_BACKEND==5)
+			sprintf(convTypeLeft, "double(");
+			sprintf(convTypeRight, ")");
 #endif
 		}
 		else {
@@ -15790,6 +21271,9 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 			sprintf(convTypeLeft, "conv_double2(");
 			sprintf(convTypeRight, ")");
+#elif(VKFFT_BACKEND==5)
+			sprintf(convTypeLeft, "conv_double2(");
+			sprintf(convTypeRight, ")");
 #endif
 		}
 	}
@@ -15863,14 +21347,249 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 			res = VkAppendLine(sc);
 			if (res != VKFFT_SUCCESS) return res;
 		}
+		uint64_t used_registers_write = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+		if (sc->registerBoost > 1) used_registers_write /= sc->registerBoost;
 		if (sc->reorderFourStep) {
 			if (sc->fftDim == sc->fft_dim_full) {
 				for (uint64_t k = 0; k < sc->registerBoost; k++) {
-					for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+					for (uint64_t i = 0; i < used_registers_write; i++) {
 						if (sc->localSize[1] == 1)
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_write) * sc->localSize[0]);
 						else
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1]);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+
+						if (sc->outputStride[0] > 1)
+							sc->tempLen = sprintf(sc->tempStr, "		inoutID = (combinedID %% %" PRIu64 ") * %" PRIu64 " + (combinedID / %" PRIu64 ") * %" PRIu64 ";\n", sc->fftDim, sc->outputStride[0], sc->fftDim, sc->outputStride[1]);
+						else
+							sc->tempLen = sprintf(sc->tempStr, "		inoutID = (combinedID %% %" PRIu64 ") + (combinedID / %" PRIu64 ") * %" PRIu64 ";\n", sc->fftDim, sc->fftDim, sc->outputStride[1]);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+						if (sc->axisSwapped) {
+							if (sc->size[sc->axis_id + 1] % sc->localSize[0] != 0) {
+								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + (%s%s)*%" PRIu64 "< %" PRIu64 "){\n", sc->fftDim, sc->gl_WorkGroupID_y, shiftY2, sc->localSize[0], sc->size[sc->axis_id + 1]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+						else {
+							if (sc->size[sc->axis_id + 1] % sc->localSize[1] != 0) {
+								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + (%s%s)*%" PRIu64 "< %" PRIu64 "){\n", sc->fftDim, sc->gl_WorkGroupID_y, shiftY2, sc->localSize[1], sc->size[sc->axis_id + 1]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[1]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+						if (sc->zeropadBluestein[1]) {
+							sc->tempLen = sprintf(sc->tempStr, "		if((combinedID %% %" PRIu64 ") < %" PRIu64 "){\n", sc->fft_dim_full, sc->fft_zeropad_Bluestein_left_write[sc->axis_id]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (sc->zeropad[1]) {
+							sc->tempLen = sprintf(sc->tempStr, "		if((inoutID %% %" PRIu64 " < %" PRIu64 ")||(inoutID %% %" PRIu64 " >= %" PRIu64 ")){\n", sc->outputStride[1], sc->fft_zeropad_left_write[sc->axis_id], sc->outputStride[1], sc->fft_zeropad_right_write[sc->axis_id]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						sc->tempLen = sprintf(sc->tempStr, "			%s = ", sc->inoutID);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+						res = indexOutputVkFFT(sc, uintType, writeType, sc->inoutID, 0, requestCoordinate, requestBatch);
+						if (res != VKFFT_SUCCESS) return res;
+						sc->tempLen = sprintf(sc->tempStr, ";\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+						res = appendZeropadStartReadWriteStage(sc, 0);
+						if (res != VKFFT_SUCCESS) return res;
+						if (sc->writeFromRegisters) {
+							if (sc->outputBufferBlockNum == 1)
+								sc->tempLen = sprintf(sc->tempStr, "		%s[%s] = %s%s%s;\n", outputsStruct, sc->inoutID, convTypeLeft, sc->regIDs[i + k * sc->registers_per_thread], convTypeRight);
+							else
+								sc->tempLen = sprintf(sc->tempStr, "		outputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "] = %s%s%s;\n", sc->inoutID, sc->outputBufferBlockSize, outputsStruct, sc->inoutID, sc->outputBufferBlockSize, convTypeLeft, sc->regIDs[i + k * sc->registers_per_thread], convTypeRight);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						else {
+							if (sc->axisSwapped) {
+								if (sc->outputBufferBlockNum == 1)
+									sc->tempLen = sprintf(sc->tempStr, "		%s[%s] = %ssdata[(combinedID %% %" PRIu64 ") * sharedStride + (combinedID / %" PRIu64 ")]%s;\n", outputsStruct, sc->inoutID, convTypeLeft, sc->fftDim, sc->fftDim, convTypeRight);
+								else
+									sc->tempLen = sprintf(sc->tempStr, "		outputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "] = %ssdata[(combinedID %% %" PRIu64 ") * sharedStride + (combinedID / %" PRIu64 ")]%s;\n", sc->inoutID, sc->outputBufferBlockSize, outputsStruct, sc->inoutID, sc->outputBufferBlockSize, convTypeLeft, sc->fftDim, sc->fftDim, convTypeRight);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							else {
+								if (sc->outputBufferBlockNum == 1)
+									sc->tempLen = sprintf(sc->tempStr, "		%s[%s] = %ssdata[(combinedID %% %" PRIu64 ") + (combinedID / %" PRIu64 ") * sharedStride]%s;\n", outputsStruct, sc->inoutID, convTypeLeft, sc->fftDim, sc->fftDim, convTypeRight);
+								else
+									sc->tempLen = sprintf(sc->tempStr, "		outputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "] = %ssdata[(combinedID %% %" PRIu64 ") + (combinedID / %" PRIu64 ") * sharedStride]%s;\n", sc->inoutID, sc->outputBufferBlockSize, outputsStruct, sc->inoutID, sc->outputBufferBlockSize, convTypeLeft, sc->fftDim, sc->fftDim, convTypeRight);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+						res = appendZeropadEndReadWriteStage(sc);
+						if (res != VKFFT_SUCCESS) return res;
+						if (sc->zeropad[1]) {
+							sc->tempLen = sprintf(sc->tempStr, "	}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (sc->zeropadBluestein[1]) {
+							sc->tempLen = sprintf(sc->tempStr, "	}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (sc->axisSwapped) {
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+								sc->tempLen = sprintf(sc->tempStr, "		}\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							if (sc->size[sc->axis_id + 1] % sc->localSize[0] != 0) {
+								sc->tempLen = sprintf(sc->tempStr, "		}");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+						else {
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+								sc->tempLen = sprintf(sc->tempStr, "		}\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							if (sc->size[sc->axis_id + 1] % sc->localSize[1] != 0) {
+								sc->tempLen = sprintf(sc->tempStr, "		}");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+					}
+				}
+			}
+			else {
+				for (uint64_t k = 0; k < sc->registerBoost; k++) {
+					for (uint64_t i = 0; i < used_registers_write; i++) {
+						if (sc->localSize[1] == 1)
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_write) * sc->localSize[0]);
+						else
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1]);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+						if (sc->axisSwapped) {
+							sc->tempLen = sprintf(sc->tempStr, "		inoutID = combinedID %% %" PRIu64 " + ((%s%s) / %" PRIu64 ")*%" PRIu64 " + ((combinedID/%" PRIu64 ") * %" PRIu64 ")+ ((%s%s) %% %" PRIu64 ") * %" PRIu64 ";\n", sc->localSize[0], sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[0], sc->localSize[0], sc->fft_dim_full / sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fft_dim_full / sc->firstStageStartSize);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						else {
+							if (sc->localSize[1] == 1)
+								sc->tempLen = sprintf(sc->tempStr, "		inoutID = (%s%s)/%" PRIu64 "+ (combinedID * %" PRIu64 ")+ ((%s%s) %% %" PRIu64 ") * %" PRIu64 ";\n", sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fft_dim_full / sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fft_dim_full / sc->firstStageStartSize);
+							else
+								sc->tempLen = sprintf(sc->tempStr, "		inoutID = combinedID %% %" PRIu64 " + ((%s%s) / %" PRIu64 ")*%" PRIu64 " + ((combinedID/%" PRIu64 ") * %" PRIu64 ")+ ((%s%s) %% %" PRIu64 ") * %" PRIu64 ";\n", sc->localSize[1], sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[1], sc->localSize[1], sc->fft_dim_full / sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fft_dim_full / sc->firstStageStartSize);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						if (sc->zeropad[1]) {
+							sc->tempLen = sprintf(sc->tempStr, "		if((inoutID %% %" PRIu64 " < %" PRIu64 ")||(inoutID %% %" PRIu64 " >= %" PRIu64 ")){\n", sc->fft_dim_full, sc->fft_zeropad_left_write[sc->axis_id], sc->fft_dim_full, sc->fft_zeropad_right_write[sc->axis_id]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						sc->tempLen = sprintf(sc->tempStr, "			%s = ", sc->inoutID);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+						res = indexOutputVkFFT(sc, uintType, writeType, sc->inoutID, 0, requestCoordinate, requestBatch);
+						if (res != VKFFT_SUCCESS) return res;
+						sc->tempLen = sprintf(sc->tempStr, ";\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+						res = appendZeropadStartReadWriteStage(sc, 0);
+						if (res != VKFFT_SUCCESS) return res;
+						if (sc->writeFromRegisters) {
+							//not used
+							if (sc->outputBufferBlockNum == 1)
+								sc->tempLen = sprintf(sc->tempStr, "			%s[%s] = %s%s%s;\n", outputsStruct, sc->inoutID, convTypeLeft, sc->regIDs[i + k * sc->registers_per_thread], convTypeRight);
+							else
+								sc->tempLen = sprintf(sc->tempStr, "			outputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "] = %s%s%s;\n", sc->inoutID, sc->outputBufferBlockSize, outputsStruct, sc->inoutID, sc->outputBufferBlockSize, convTypeLeft, sc->regIDs[i + k * sc->registers_per_thread], convTypeRight);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+						else {
+							if (sc->axisSwapped) {
+								if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+									sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
+									res = VkAppendLine(sc);
+									if (res != VKFFT_SUCCESS) return res;
+								}
+								if (sc->outputBufferBlockNum == 1)
+									sc->tempLen = sprintf(sc->tempStr, "			%s[%s] = %ssdata[(combinedID %% %s)+(combinedID/%s)*sharedStride]%s;\n", outputsStruct, sc->inoutID, convTypeLeft, sc->gl_WorkGroupSize_x, sc->gl_WorkGroupSize_x, convTypeRight);
+								else
+									sc->tempLen = sprintf(sc->tempStr, "			outputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "] = %ssdata[(combinedID %% %s)+(combinedID/%s)*sharedStride]%s;\n", sc->inoutID, sc->outputBufferBlockSize, outputsStruct, sc->inoutID, sc->outputBufferBlockSize, convTypeLeft, sc->gl_WorkGroupSize_x, sc->gl_WorkGroupSize_x, convTypeRight);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+								if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+									sc->tempLen = sprintf(sc->tempStr, "		}\n");
+									res = VkAppendLine(sc);
+									if (res != VKFFT_SUCCESS) return res;
+								}
+							}
+							else {
+								if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+									sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[1]);
+									res = VkAppendLine(sc);
+									if (res != VKFFT_SUCCESS) return res;
+								}
+								if (sc->outputBufferBlockNum == 1)
+									sc->tempLen = sprintf(sc->tempStr, "			%s[%s] = %ssdata[(combinedID %% %s)*sharedStride+combinedID/%s]%s;\n", outputsStruct, sc->inoutID, convTypeLeft, sc->gl_WorkGroupSize_y, sc->gl_WorkGroupSize_y, convTypeRight);
+								else
+									sc->tempLen = sprintf(sc->tempStr, "			outputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "] = %ssdata[(combinedID %% %s)*sharedStride+combinedID/%s]%s;\n", sc->inoutID, sc->outputBufferBlockSize, outputsStruct, sc->inoutID, sc->outputBufferBlockSize, convTypeLeft, sc->gl_WorkGroupSize_y, sc->gl_WorkGroupSize_y, convTypeRight);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+								if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+									sc->tempLen = sprintf(sc->tempStr, "		}\n");
+									res = VkAppendLine(sc);
+									if (res != VKFFT_SUCCESS) return res;
+								}
+							}
+						}
+						res = appendZeropadEndReadWriteStage(sc);
+						if (res != VKFFT_SUCCESS) return res;
+						/*
+						if (sc->outputBufferBlockNum == 1)
+							if (sc->localSize[1] == 1)
+								sc->tempLen = sprintf(sc->tempStr, "		%s[indexOutput(inoutID%s%s)] = %stemp_%" PRIu64 "%s;\n", requestCoordinate, requestBatch, convTypeLeft, i, convTypeRight);
+							else
+								sc->tempLen = sprintf(sc->tempStr, "			%s[indexOutput(inoutID%s%s)] = %stemp_%" PRIu64 "%s;\n", requestCoordinate, requestBatch, convTypeLeft, i, convTypeRight);
+						else
+							if (sc->localSize[1] == 1)
+								sc->tempLen = sprintf(sc->tempStr, "			outputBlocks[indexOutput(inoutID%s%s) / %" PRIu64 "]%s[indexOutput(inoutID%s%s) %% %" PRIu64 "] = %stemp_%" PRIu64 "%s;\n", requestCoordinate, requestBatch, sc->outputBufferBlockSize, outputsStruct, requestCoordinate, requestBatch, sc->outputBufferBlockSize, convTypeLeft, i, convTypeRight);
+							else
+								sc->tempLen = sprintf(sc->tempStr, "			outputBlocks[indexOutput(inoutID%s%s) / %" PRIu64 "]%s[indexOutput(inoutID%s%s) %% %" PRIu64 "] = %stemp_%" PRIu64 "%s;\n", requestCoordinate, requestBatch, sc->outputBufferBlockSize, outputsStruct, requestCoordinate, requestBatch, sc->outputBufferBlockSize, convTypeLeft, i, convTypeRight);
+						*/
+						if (sc->zeropad[1]) {
+							sc->tempLen = sprintf(sc->tempStr, "	}");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
+					}
+				}
+			}
+		}
+		else {
+			if (sc->fftDim == sc->fft_dim_full) {
+				for (uint64_t k = 0; k < sc->registerBoost; k++) {
+					for (uint64_t i = 0; i < used_registers_write; i++) {
+						if (sc->localSize[1] == 1)
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_write) * sc->localSize[0]);
+						else
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 
@@ -15882,14 +21601,24 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 						if (res != VKFFT_SUCCESS) return res;
 						if (sc->axisSwapped) {
 							if (sc->size[sc->axis_id + 1] % sc->localSize[0] != 0) {
-								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + (%s%s)*%" PRIu64 "< %" PRIu64 "){", sc->fftDim, sc->gl_WorkGroupID_y, shiftY2, sc->localSize[0], sc->size[sc->axis_id + 1]);
+								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + %s*%" PRIu64 "< %" PRIu64 "){\n", sc->fftDim, sc->gl_WorkGroupID_y, sc->localSize[0], sc->size[sc->axis_id + 1]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
 						}
 						else {
 							if (sc->size[sc->axis_id + 1] % sc->localSize[1] != 0) {
-								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + (%s%s)*%" PRIu64 "< %" PRIu64 "){", sc->fftDim, sc->gl_WorkGroupID_y, shiftY2, sc->localSize[1], sc->size[sc->axis_id + 1]);
+								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + %s*%" PRIu64 "< %" PRIu64 "){\n", sc->fftDim, sc->gl_WorkGroupID_y, sc->localSize[1], sc->size[sc->axis_id + 1]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[1]);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
@@ -15953,205 +21682,23 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						if (sc->axisSwapped) {
-							if (sc->size[sc->axis_id + 1] % sc->localSize[0] != 0) {
-								sc->tempLen = sprintf(sc->tempStr, "		}");
-								res = VkAppendLine(sc);
-								if (res != VKFFT_SUCCESS) return res;
-							}
-						}
-						else {
-							if (sc->size[sc->axis_id + 1] % sc->localSize[1] != 0) {
-								sc->tempLen = sprintf(sc->tempStr, "		}");
-								res = VkAppendLine(sc);
-								if (res != VKFFT_SUCCESS) return res;
-							}
-						}
-					}
-				}
-			}
-			else {
-				for (uint64_t k = 0; k < sc->registerBoost; k++) {
-					for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
-						if (sc->localSize[1] == 1)
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
-						else
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						if (sc->axisSwapped) {
-							sc->tempLen = sprintf(sc->tempStr, "		inoutID = combinedID %% %" PRIu64 " + ((%s%s) / %" PRIu64 ")*%" PRIu64 " + ((combinedID/%" PRIu64 ") * %" PRIu64 ")+ ((%s%s) %% %" PRIu64 ") * %" PRIu64 ";\n", sc->localSize[0], sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[0], sc->localSize[0], sc->fft_dim_full / sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fft_dim_full / sc->firstStageStartSize);
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
-						}
-						else {
-							if (sc->localSize[1] == 1)
-								sc->tempLen = sprintf(sc->tempStr, "		inoutID = (%s%s)/%" PRIu64 "+ (combinedID * %" PRIu64 ")+ ((%s%s) %% %" PRIu64 ") * %" PRIu64 ";\n", sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fft_dim_full / sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fft_dim_full / sc->firstStageStartSize);
-							else
-								sc->tempLen = sprintf(sc->tempStr, "		inoutID = combinedID %% %" PRIu64 " + ((%s%s) / %" PRIu64 ")*%" PRIu64 " + ((combinedID/%" PRIu64 ") * %" PRIu64 ")+ ((%s%s) %% %" PRIu64 ") * %" PRIu64 ";\n", sc->localSize[1], sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[1], sc->localSize[1], sc->fft_dim_full / sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fft_dim_full / sc->firstStageStartSize);
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
-						}
-						if (sc->zeropad[1]) {
-							sc->tempLen = sprintf(sc->tempStr, "		if((inoutID %% %" PRIu64 " < %" PRIu64 ")||(inoutID %% %" PRIu64 " >= %" PRIu64 ")){\n", sc->fft_dim_full, sc->fft_zeropad_left_write[sc->axis_id], sc->fft_dim_full, sc->fft_zeropad_right_write[sc->axis_id]);
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
-						}
-						sc->tempLen = sprintf(sc->tempStr, "			%s = ", sc->inoutID);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						res = indexOutputVkFFT(sc, uintType, writeType, sc->inoutID, 0, requestCoordinate, requestBatch);
-						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, ";\n");
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						res = appendZeropadStartReadWriteStage(sc, 0);
-						if (res != VKFFT_SUCCESS) return res;
-						if (sc->writeFromRegisters) {
-							if (sc->outputBufferBlockNum == 1)
-								sc->tempLen = sprintf(sc->tempStr, "			%s[%s] = %s%s%s;\n", outputsStruct, sc->inoutID, convTypeLeft, sc->regIDs[i + k * sc->registers_per_thread], convTypeRight);
-							else
-								sc->tempLen = sprintf(sc->tempStr, "			outputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "] = %s%s%s;\n", sc->inoutID, sc->outputBufferBlockSize, outputsStruct, sc->inoutID, sc->outputBufferBlockSize, convTypeLeft, sc->regIDs[i + k * sc->registers_per_thread], convTypeRight);
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
-						}
-						else {
-							if (sc->axisSwapped) {
-								if (sc->outputBufferBlockNum == 1)
-									sc->tempLen = sprintf(sc->tempStr, "			%s[%s] = %ssdata[(combinedID %% %s)+(combinedID/%s)*sharedStride]%s;\n", outputsStruct, sc->inoutID, convTypeLeft, sc->gl_WorkGroupSize_x, sc->gl_WorkGroupSize_x, convTypeRight);
-								else
-									sc->tempLen = sprintf(sc->tempStr, "			outputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "] = %ssdata[(combinedID %% %s)+(combinedID/%s)*sharedStride]%s;\n", sc->inoutID, sc->outputBufferBlockSize, outputsStruct, sc->inoutID, sc->outputBufferBlockSize, convTypeLeft, sc->gl_WorkGroupSize_x, sc->gl_WorkGroupSize_x, convTypeRight);
-								res = VkAppendLine(sc);
-								if (res != VKFFT_SUCCESS) return res;
-							}
-							else {
-								if (sc->outputBufferBlockNum == 1)
-									sc->tempLen = sprintf(sc->tempStr, "			%s[%s] = %ssdata[(combinedID %% %s)*sharedStride+combinedID/%s]%s;\n", outputsStruct, sc->inoutID, convTypeLeft, sc->gl_WorkGroupSize_y, sc->gl_WorkGroupSize_y, convTypeRight);
-								else
-									sc->tempLen = sprintf(sc->tempStr, "			outputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "] = %ssdata[(combinedID %% %s)*sharedStride+combinedID/%s]%s;\n", sc->inoutID, sc->outputBufferBlockSize, outputsStruct, sc->inoutID, sc->outputBufferBlockSize, convTypeLeft, sc->gl_WorkGroupSize_y, sc->gl_WorkGroupSize_y, convTypeRight);
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+								sc->tempLen = sprintf(sc->tempStr, "		}\n");
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
-						}
-						res = appendZeropadEndReadWriteStage(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						/*
-						if (sc->outputBufferBlockNum == 1)
-							if (sc->localSize[1] == 1)
-								sc->tempLen = sprintf(sc->tempStr, "		%s[indexOutput(inoutID%s%s)] = %stemp_%" PRIu64 "%s;\n", requestCoordinate, requestBatch, convTypeLeft, i, convTypeRight);
-							else
-								sc->tempLen = sprintf(sc->tempStr, "			%s[indexOutput(inoutID%s%s)] = %stemp_%" PRIu64 "%s;\n", requestCoordinate, requestBatch, convTypeLeft, i, convTypeRight);
-						else
-							if (sc->localSize[1] == 1)
-								sc->tempLen = sprintf(sc->tempStr, "			outputBlocks[indexOutput(inoutID%s%s) / %" PRIu64 "]%s[indexOutput(inoutID%s%s) %% %" PRIu64 "] = %stemp_%" PRIu64 "%s;\n", requestCoordinate, requestBatch, sc->outputBufferBlockSize, outputsStruct, requestCoordinate, requestBatch, sc->outputBufferBlockSize, convTypeLeft, i, convTypeRight);
-							else
-								sc->tempLen = sprintf(sc->tempStr, "			outputBlocks[indexOutput(inoutID%s%s) / %" PRIu64 "]%s[indexOutput(inoutID%s%s) %% %" PRIu64 "] = %stemp_%" PRIu64 "%s;\n", requestCoordinate, requestBatch, sc->outputBufferBlockSize, outputsStruct, requestCoordinate, requestBatch, sc->outputBufferBlockSize, convTypeLeft, i, convTypeRight);
-						*/
-						if (sc->zeropad[1]) {
-							sc->tempLen = sprintf(sc->tempStr, "	}");
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
-						}
-					}
-				}
-			}
-		}
-		else {
-			if (sc->fftDim == sc->fft_dim_full) {
-				for (uint64_t k = 0; k < sc->registerBoost; k++) {
-					for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
-						if (sc->localSize[1] == 1)
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
-						else
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-
-						if (sc->outputStride[0] > 1)
-							sc->tempLen = sprintf(sc->tempStr, "		inoutID = (combinedID %% %" PRIu64 ") * %" PRIu64 " + (combinedID / %" PRIu64 ") * %" PRIu64 ";\n", sc->fftDim, sc->outputStride[0], sc->fftDim, sc->outputStride[1]);
-						else
-							sc->tempLen = sprintf(sc->tempStr, "		inoutID = (combinedID %% %" PRIu64 ") + (combinedID / %" PRIu64 ") * %" PRIu64 ";\n", sc->fftDim, sc->fftDim, sc->outputStride[1]);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						if (sc->axisSwapped) {
 							if (sc->size[sc->axis_id + 1] % sc->localSize[0] != 0) {
-								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + %s*%" PRIu64 "< %" PRIu64 "){", sc->fftDim, sc->gl_WorkGroupID_y, sc->localSize[0], sc->size[sc->axis_id + 1]);
+								sc->tempLen = sprintf(sc->tempStr, "		}");
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
 						}
 						else {
-							if (sc->size[sc->axis_id + 1] % sc->localSize[1] != 0) {
-								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + %s*%" PRIu64 "< %" PRIu64 "){", sc->fftDim, sc->gl_WorkGroupID_y, sc->localSize[1], sc->size[sc->axis_id + 1]);
-								res = VkAppendLine(sc);
-								if (res != VKFFT_SUCCESS) return res;
-							}
-						}
-						if (sc->zeropadBluestein[1]) {
-							sc->tempLen = sprintf(sc->tempStr, "		if((combinedID %% %" PRIu64 ") < %" PRIu64 "){\n", sc->fft_dim_full, sc->fft_zeropad_Bluestein_left_write[sc->axis_id]);
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
-						}
-						if (sc->zeropad[1]) {
-							sc->tempLen = sprintf(sc->tempStr, "		if((inoutID %% %" PRIu64 " < %" PRIu64 ")||(inoutID %% %" PRIu64 " >= %" PRIu64 ")){\n", sc->outputStride[1], sc->fft_zeropad_left_write[sc->axis_id], sc->outputStride[1], sc->fft_zeropad_right_write[sc->axis_id]);
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
-						}
-						sc->tempLen = sprintf(sc->tempStr, "			%s = ", sc->inoutID);
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						res = indexOutputVkFFT(sc, uintType, writeType, sc->inoutID, 0, requestCoordinate, requestBatch);
-						if (res != VKFFT_SUCCESS) return res;
-						sc->tempLen = sprintf(sc->tempStr, ";\n");
-						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						res = appendZeropadStartReadWriteStage(sc, 0);
-						if (res != VKFFT_SUCCESS) return res;
-						if (sc->writeFromRegisters) {
-							if (sc->outputBufferBlockNum == 1)
-								sc->tempLen = sprintf(sc->tempStr, "		%s[%s] = %s%s%s;\n", outputsStruct, sc->inoutID, convTypeLeft, sc->regIDs[i + k * sc->registers_per_thread], convTypeRight);
-							else
-								sc->tempLen = sprintf(sc->tempStr, "		outputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "] = %s%s%s;\n", sc->inoutID, sc->outputBufferBlockSize, outputsStruct, sc->inoutID, sc->outputBufferBlockSize, convTypeLeft, sc->regIDs[i + k * sc->registers_per_thread], convTypeRight);
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
-						}
-						else {
-							if (sc->axisSwapped) {
-								if (sc->outputBufferBlockNum == 1)
-									sc->tempLen = sprintf(sc->tempStr, "		%s[%s] = %ssdata[(combinedID %% %" PRIu64 ") * sharedStride + (combinedID / %" PRIu64 ")]%s;\n", outputsStruct, sc->inoutID, convTypeLeft, sc->fftDim, sc->fftDim, convTypeRight);
-								else
-									sc->tempLen = sprintf(sc->tempStr, "		outputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "] = %ssdata[(combinedID %% %" PRIu64 ") * sharedStride + (combinedID / %" PRIu64 ")]%s;\n", sc->inoutID, sc->outputBufferBlockSize, outputsStruct, sc->inoutID, sc->outputBufferBlockSize, convTypeLeft, sc->fftDim, sc->fftDim, convTypeRight);
-								res = VkAppendLine(sc);
-								if (res != VKFFT_SUCCESS) return res;
-							}
-							else {
-								if (sc->outputBufferBlockNum == 1)
-									sc->tempLen = sprintf(sc->tempStr, "		%s[%s] = %ssdata[(combinedID %% %" PRIu64 ") + (combinedID / %" PRIu64 ") * sharedStride]%s;\n", outputsStruct, sc->inoutID, convTypeLeft, sc->fftDim, sc->fftDim, convTypeRight);
-								else
-									sc->tempLen = sprintf(sc->tempStr, "		outputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "] = %ssdata[(combinedID %% %" PRIu64 ") + (combinedID / %" PRIu64 ") * sharedStride]%s;\n", sc->inoutID, sc->outputBufferBlockSize, outputsStruct, sc->inoutID, sc->outputBufferBlockSize, convTypeLeft, sc->fftDim, sc->fftDim, convTypeRight);
-								res = VkAppendLine(sc);
-								if (res != VKFFT_SUCCESS) return res;
-							}
-						}
-						res = appendZeropadEndReadWriteStage(sc);
-						if (res != VKFFT_SUCCESS) return res;
-						if (sc->zeropad[1]) {
-							sc->tempLen = sprintf(sc->tempStr, "	}\n");
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
-						}
-						if (sc->zeropadBluestein[1]) {
-							sc->tempLen = sprintf(sc->tempStr, "	}\n");
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
-						}
-						if (sc->axisSwapped) {
-							if (sc->size[sc->axis_id + 1] % sc->localSize[0] != 0) {
-								sc->tempLen = sprintf(sc->tempStr, "		}");
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+								sc->tempLen = sprintf(sc->tempStr, "		}\n");
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
-						}
-						else {
 							if (sc->size[sc->axis_id + 1] % sc->localSize[1] != 0) {
 								sc->tempLen = sprintf(sc->tempStr, "		}");
 								res = VkAppendLine(sc);
@@ -16163,24 +21710,24 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 			}
 			else {
 				for (uint64_t k = 0; k < sc->registerBoost; k++) {
-					for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
-						if (sc->localSize[1] == 1)
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+					for (uint64_t i = 0; i < used_registers_write; i++) {
+						/*if (sc->localSize[1] == 1)
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_write) * sc->localSize[0]);
 						else {
 							if (!sc->axisSwapped)
-								sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+								sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1]);
 							else
-								sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 " * numActiveThreads;\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread));
+								sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 " * numActiveThreads;\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_write));
 						}
 						res = VkAppendLine(sc);
-						if (res != VKFFT_SUCCESS) return res;
+						if (res != VKFFT_SUCCESS) return res;*/
 						if (sc->axisSwapped) {
 							sc->tempLen = sprintf(sc->tempStr, "		inoutID = (combinedID %% %" PRIu64 ")+(combinedID / %" PRIu64 ") * %" PRIu64 " + (((%s%s) %% %" PRIu64 ") * %" PRIu64 " + ((%s%s) / %" PRIu64 ") * %" PRIu64 ");", sc->fftDim, sc->fftDim, sc->firstStageStartSize, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[0] * sc->firstStageStartSize);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						else {
-							sc->tempLen = sprintf(sc->tempStr, "		inoutID = %s+%" PRIu64 "+%s * %" PRIu64 " + (((%s%s) %% %" PRIu64 ") * %" PRIu64 " + ((%s%s) / %" PRIu64 ") * %" PRIu64 ");", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0], sc->gl_LocalInvocationID_y, sc->firstStageStartSize, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[1] * sc->firstStageStartSize);
+							sc->tempLen = sprintf(sc->tempStr, "		inoutID = %s+%" PRIu64 "+%s * %" PRIu64 " + (((%s%s) %% %" PRIu64 ") * %" PRIu64 " + ((%s%s) / %" PRIu64 ") * %" PRIu64 ");", sc->gl_LocalInvocationID_x, (i + k * used_registers_write) * sc->localSize[0], sc->gl_LocalInvocationID_y, sc->firstStageStartSize, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->fftDim, sc->gl_WorkGroupID_x, shiftX, sc->firstStageStartSize / sc->fftDim, sc->localSize[1] * sc->firstStageStartSize);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
@@ -16201,6 +21748,7 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 						res = appendZeropadStartReadWriteStage(sc, 0);
 						if (res != VKFFT_SUCCESS) return res;
 						if (sc->writeFromRegisters) {
+							//not used
 							if (sc->outputBufferBlockNum == 1)
 								sc->tempLen = sprintf(sc->tempStr, "		%s[inoutID]=%s%s%s;\n", outputsStruct, convTypeLeft, sc->regIDs[i + k * sc->registers_per_thread], convTypeRight);
 							else
@@ -16210,20 +21758,40 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 						}
 						else {
 							if (sc->axisSwapped) {
+								if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+									sc->tempLen = sprintf(sc->tempStr, "		if(%s < %" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->fftDim - (i + k * used_registers_write) * sc->localSize[1]);
+									res = VkAppendLine(sc);
+									if (res != VKFFT_SUCCESS) return res;
+								}
 								if (sc->outputBufferBlockNum == 1)
-									sc->tempLen = sprintf(sc->tempStr, "		%s[inoutID]=%ssdata[%s + sharedStride*(%s + %" PRIu64 ")]%s;\n", outputsStruct, convTypeLeft, sc->gl_LocalInvocationID_x, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], convTypeRight);
+									sc->tempLen = sprintf(sc->tempStr, "		%s[inoutID]=%ssdata[%s + sharedStride*(%s + %" PRIu64 ")]%s;\n", outputsStruct, convTypeLeft, sc->gl_LocalInvocationID_x, sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[1], convTypeRight);
 								else
-									sc->tempLen = sprintf(sc->tempStr, "		outputBlocks[inoutID / %" PRIu64 "]%s[inoutID %% %" PRIu64 "] = %ssdata[%s + sharedStride*(%s + %" PRIu64 ")]%s;\n", sc->outputBufferBlockSize, outputsStruct, sc->outputBufferBlockSize, convTypeLeft, sc->gl_LocalInvocationID_x, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], convTypeRight);
+									sc->tempLen = sprintf(sc->tempStr, "		outputBlocks[inoutID / %" PRIu64 "]%s[inoutID %% %" PRIu64 "] = %ssdata[%s + sharedStride*(%s + %" PRIu64 ")]%s;\n", sc->outputBufferBlockSize, outputsStruct, sc->outputBufferBlockSize, convTypeLeft, sc->gl_LocalInvocationID_x, sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[1], convTypeRight);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
+								if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+									sc->tempLen = sprintf(sc->tempStr, "		}\n");
+									res = VkAppendLine(sc);
+									if (res != VKFFT_SUCCESS) return res;
+								}
 							}
 							else {
+								if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+									sc->tempLen = sprintf(sc->tempStr, "		if(%s < %" PRIu64 "){\n", sc->gl_LocalInvocationID_x, sc->fftDim - (i + k * used_registers_write) * sc->localSize[0]);
+									res = VkAppendLine(sc);
+									if (res != VKFFT_SUCCESS) return res;
+								}
 								if (sc->outputBufferBlockNum == 1)
-									sc->tempLen = sprintf(sc->tempStr, "		%s[inoutID]=%ssdata[sharedStride*%s + (%s + %" PRIu64 ")]%s;\n", outputsStruct, convTypeLeft, sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0], convTypeRight);
+									sc->tempLen = sprintf(sc->tempStr, "		%s[inoutID]=%ssdata[sharedStride*%s + (%s + %" PRIu64 ")]%s;\n", outputsStruct, convTypeLeft, sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, (i + k * used_registers_write) * sc->localSize[0], convTypeRight);
 								else
-									sc->tempLen = sprintf(sc->tempStr, "		outputBlocks[inoutID / %" PRIu64 "]%s[inoutID %% %" PRIu64 "] = %ssdata[sharedStride*%s + (%s + %" PRIu64 ")]%s;\n", sc->outputBufferBlockSize, outputsStruct, sc->outputBufferBlockSize, convTypeLeft, sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0], convTypeRight);
+									sc->tempLen = sprintf(sc->tempStr, "		outputBlocks[inoutID / %" PRIu64 "]%s[inoutID %% %" PRIu64 "] = %ssdata[sharedStride*%s + (%s + %" PRIu64 ")]%s;\n", sc->outputBufferBlockSize, outputsStruct, sc->outputBufferBlockSize, convTypeLeft, sc->gl_LocalInvocationID_y, sc->gl_LocalInvocationID_x, (i + k * used_registers_write) * sc->localSize[0], convTypeRight);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
+								if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+									sc->tempLen = sprintf(sc->tempStr, "		}\n");
+									res = VkAppendLine(sc);
+									if (res != VKFFT_SUCCESS) return res;
+								}
 							}
 						}
 						appendZeropadEndReadWriteStage(sc);
@@ -16256,10 +21824,12 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 		sc->tempLen = sprintf(sc->tempStr, "		if (((%s%s) / %" PRIu64 ") %% (%" PRIu64 ")+((%s%s) / %" PRIu64 ") * (%" PRIu64 ") < %" PRIu64 ") {\n", sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x * sc->stageStartSize, sc->fftDim * sc->stageStartSize, sc->size[sc->axis_id]);
 		res = VkAppendLine(sc);
 		if (res != VKFFT_SUCCESS) return res;
+		uint64_t used_registers_write = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+		if (sc->registerBoost > 1) used_registers_write /= sc->registerBoost;
 		if ((sc->reorderFourStep) && (sc->stageStartSize == 1)) {
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
-					sc->tempLen = sprintf(sc->tempStr, "		inoutID = (%s + %" PRIu64 ") * (%" PRIu64 ") + (((%s%s) / %" PRIu64 ") %% (%" PRIu64 ")) * (%" PRIu64 ") + ((%s%s) / %" PRIu64 ");\n", sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->fft_dim_full / sc->fftDim, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->firstStageStartSize / sc->fftDim, sc->fft_dim_full / sc->firstStageStartSize, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x * (sc->firstStageStartSize / sc->fftDim));
+				for (uint64_t i = 0; i < used_registers_write; i++) {
+					sc->tempLen = sprintf(sc->tempStr, "		inoutID = (%s + %" PRIu64 ") * (%" PRIu64 ") + (((%s%s) / %" PRIu64 ") %% (%" PRIu64 ")) * (%" PRIu64 ") + ((%s%s) / %" PRIu64 ");\n", sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[1], sc->fft_dim_full / sc->fftDim, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->firstStageStartSize / sc->fftDim, sc->fft_dim_full / sc->firstStageStartSize, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x * (sc->firstStageStartSize / sc->fftDim));
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 
@@ -16279,6 +21849,11 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 					if (res != VKFFT_SUCCESS) return res;
 					res = appendZeropadStartReadWriteStage(sc, 0);
 					if (res != VKFFT_SUCCESS) return res;
+					if ((1 + i + k * used_registers_write) * sc->localSize[1] >= (sc->fftDim)) {
+						sc->tempLen = sprintf(sc->tempStr, "		if(%s < %" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->fftDim - (i + k * used_registers_write) * sc->localSize[1]);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					if (sc->writeFromRegisters) {
 						if (sc->outputBufferBlockNum == 1)
 							sc->tempLen = sprintf(sc->tempStr, "			%s[%s] = %s%s%s;\n", outputsStruct, sc->inoutID, convTypeLeft, sc->regIDs[i + k * sc->registers_per_thread], convTypeRight);
@@ -16289,13 +21864,18 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 					}
 					else {
 						if (sc->outputBufferBlockNum == 1)
-							sc->tempLen = sprintf(sc->tempStr, "			%s[%s] = %ssdata[%s*(%s+%" PRIu64 ") + %s]%s;\n", outputsStruct, sc->inoutID, convTypeLeft, sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeRight);
+							sc->tempLen = sprintf(sc->tempStr, "			%s[%s] = %ssdata[%s*(%s+%" PRIu64 ") + %s]%s;\n", outputsStruct, sc->inoutID, convTypeLeft, sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeRight);
 						else
-							sc->tempLen = sprintf(sc->tempStr, "			outputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "] = %ssdata[%s*(%s+%" PRIu64 ") + %s]%s;\n", sc->inoutID, sc->outputBufferBlockSize, outputsStruct, sc->inoutID, sc->outputBufferBlockSize, convTypeLeft, sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeRight);
+							sc->tempLen = sprintf(sc->tempStr, "			outputBlocks[%s / %" PRIu64 "]%s[%s %% %" PRIu64 "] = %ssdata[%s*(%s+%" PRIu64 ") + %s]%s;\n", sc->inoutID, sc->outputBufferBlockSize, outputsStruct, sc->inoutID, sc->outputBufferBlockSize, convTypeLeft, sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeRight);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 
 					}
+					if ((1 + i + k * used_registers_write) * sc->localSize[1] >= (sc->fftDim)) {
+						sc->tempLen = sprintf(sc->tempStr, "		}\n");
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					res = appendZeropadEndReadWriteStage(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					if (sc->zeropad[1]) {
@@ -16309,9 +21889,9 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 		}
 		else {
 			for (uint64_t k = 0; k < sc->registerBoost; k++) {
-				for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+				for (uint64_t i = 0; i < used_registers_write; i++) {
 					if (sc->zeropadBluestein[1]) {
-						sc->tempLen = sprintf(sc->tempStr, "		inoutID = (%s + %" PRIu64 ") * %" PRIu64 " + ((%s%s) / %" PRIu64 ") %% (%" PRIu64 ")+((%s%s) / %" PRIu64 ") * (%" PRIu64 ");\n", sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->stageStartSize, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x * sc->stageStartSize, sc->stageStartSize * sc->fftDim);
+						sc->tempLen = sprintf(sc->tempStr, "		inoutID = (%s + %" PRIu64 ") * %" PRIu64 " + ((%s%s) / %" PRIu64 ") %% (%" PRIu64 ")+((%s%s) / %" PRIu64 ") * (%" PRIu64 ");\n", sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[1], sc->stageStartSize, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x * sc->stageStartSize, sc->stageStartSize * sc->fftDim);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 						sc->tempLen = sprintf(sc->tempStr, "		if((inoutID %% %" PRIu64 ") < %" PRIu64 "){\n", sc->fft_dim_full, sc->fft_zeropad_Bluestein_left_write[sc->axis_id]);
@@ -16320,7 +21900,7 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 					}
 					if (sc->zeropad[1]) {
 						if (!sc->zeropadBluestein[1]) {
-							sc->tempLen = sprintf(sc->tempStr, "		inoutID = (%s + %" PRIu64 ") * %" PRIu64 " + ((%s%s) / %" PRIu64 ") %% (%" PRIu64 ")+((%s%s) / %" PRIu64 ") * (%" PRIu64 ");\n", sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->stageStartSize, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x * sc->stageStartSize, sc->stageStartSize * sc->fftDim);
+							sc->tempLen = sprintf(sc->tempStr, "		inoutID = (%s + %" PRIu64 ") * %" PRIu64 " + ((%s%s) / %" PRIu64 ") %% (%" PRIu64 ")+((%s%s) / %" PRIu64 ") * (%" PRIu64 ");\n", sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[1], sc->stageStartSize, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x * sc->stageStartSize, sc->stageStartSize * sc->fftDim);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
@@ -16332,7 +21912,7 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 					sprintf(index_x, "(%s%s) %% (%" PRIu64 ")", sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x);
-					sprintf(index_y, "%" PRIu64 " * (%s + %" PRIu64 ") + ((%s%s) / %" PRIu64 ") %% (%" PRIu64 ")+((%s%s) / %" PRIu64 ") * (%" PRIu64 ")", sc->stageStartSize, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x * sc->stageStartSize, sc->stageStartSize * sc->fftDim);
+					sprintf(index_y, "%" PRIu64 " * (%s + %" PRIu64 ") + ((%s%s) / %" PRIu64 ") %% (%" PRIu64 ")+((%s%s) / %" PRIu64 ") * (%" PRIu64 ")", sc->stageStartSize, sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[1], sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x * sc->stageStartSize, sc->stageStartSize * sc->fftDim);
 					res = indexOutputVkFFT(sc, uintType, writeType, index_x, index_y, requestCoordinate, requestBatch);
 					if (res != VKFFT_SUCCESS) return res;
 					sc->tempLen = sprintf(sc->tempStr, ";\n");
@@ -16340,7 +21920,12 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 					if (res != VKFFT_SUCCESS) return res;
 					res = appendZeropadStartReadWriteStage(sc, 0);
 					if (res != VKFFT_SUCCESS) return res;
-					//sc->tempLen = sprintf(sc->tempStr, "		inoutID = indexOutput((%s%s) %% (%" PRIu64 "), %" PRIu64 " * (%s + %" PRIu64 ") + ((%s%s) / %" PRIu64 ") %% (%" PRIu64 ")+((%s%s) / %" PRIu64 ") * (%" PRIu64 ")%s%s);\n", sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x * sc->stageStartSize, sc->stageStartSize * sc->fftDim, requestCoordinate, requestBatch);
+					//sc->tempLen = sprintf(sc->tempStr, "		inoutID = indexOutput((%s%s) %% (%" PRIu64 "), %" PRIu64 " * (%s + %" PRIu64 ") + ((%s%s) / %" PRIu64 ") %% (%" PRIu64 ")+((%s%s) / %" PRIu64 ") * (%" PRIu64 ")%s%s);\n", sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[1], sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x, sc->stageStartSize, sc->gl_GlobalInvocationID_x, shiftX, sc->fft_dim_x * sc->stageStartSize, sc->stageStartSize * sc->fftDim, requestCoordinate, requestBatch);
+					if ((1 + i + k * used_registers_write) * sc->localSize[1] >= (sc->fftDim)) {
+						sc->tempLen = sprintf(sc->tempStr, "		if(%s < %" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->fftDim - (i + k * used_registers_write) * sc->localSize[1]);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
 					if (sc->writeFromRegisters) {
 						if (sc->outputBufferBlockNum == 1)
 							sc->tempLen = sprintf(sc->tempStr, "			%s[inoutID] = %s%s%s;\n", outputsStruct, convTypeLeft, sc->regIDs[i + k * sc->registers_per_thread], convTypeRight);
@@ -16351,9 +21936,14 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 					}
 					else {
 						if (sc->outputBufferBlockNum == 1)
-							sc->tempLen = sprintf(sc->tempStr, "			%s[inoutID] = %ssdata[%s*(%s+%" PRIu64 ") + %s]%s;\n", outputsStruct, convTypeLeft, sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeRight);
+							sc->tempLen = sprintf(sc->tempStr, "			%s[inoutID] = %ssdata[%s*(%s+%" PRIu64 ") + %s]%s;\n", outputsStruct, convTypeLeft, sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeRight);
 						else
-							sc->tempLen = sprintf(sc->tempStr, "			outputBlocks[inoutID / %" PRIu64 "]%s[inoutID %% %" PRIu64 "] =  %ssdata[%s*(%s+%" PRIu64 ") + %s]%s;\n", sc->outputBufferBlockSize, outputsStruct, sc->outputBufferBlockSize, convTypeLeft, sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeRight);
+							sc->tempLen = sprintf(sc->tempStr, "			outputBlocks[inoutID / %" PRIu64 "]%s[inoutID %% %" PRIu64 "] =  %ssdata[%s*(%s+%" PRIu64 ") + %s]%s;\n", sc->outputBufferBlockSize, outputsStruct, sc->outputBufferBlockSize, convTypeLeft, sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeRight);
+						res = VkAppendLine(sc);
+						if (res != VKFFT_SUCCESS) return res;
+					}
+					if ((1 + i + k * used_registers_write) * sc->localSize[1] >= (sc->fftDim)) {
+						sc->tempLen = sprintf(sc->tempStr, "		}\n");
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 					}
@@ -16392,9 +21982,11 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 		sc->tempLen = sprintf(sc->tempStr, "		if (((%s%s) / %" PRIu64 ") * (%" PRIu64 ") < %" PRIu64 ") {\n", sc->gl_GlobalInvocationID_x, shiftX, sc->stageStartSize, sc->stageStartSize * sc->fftDim, sc->fft_dim_full);
 		res = VkAppendLine(sc);
 		if (res != VKFFT_SUCCESS) return res;
+		uint64_t used_registers_write = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+		if (sc->registerBoost > 1) used_registers_write /= sc->registerBoost;
 		for (uint64_t k = 0; k < sc->registerBoost; k++) {
-			for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
-				sc->tempLen = sprintf(sc->tempStr, "		inoutID = (%s%s) %% (%" PRIu64 ") + %" PRIu64 " * (%s + %" PRIu64 ") + ((%s%s) / %" PRIu64 ") * (%" PRIu64 ");\n", sc->gl_GlobalInvocationID_x, shiftX, sc->stageStartSize, sc->stageStartSize, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_GlobalInvocationID_x, shiftX, sc->stageStartSize, sc->stageStartSize * sc->fftDim);
+			for (uint64_t i = 0; i < used_registers_write; i++) {
+				sc->tempLen = sprintf(sc->tempStr, "		inoutID = (%s%s) %% (%" PRIu64 ") + %" PRIu64 " * (%s + %" PRIu64 ") + ((%s%s) / %" PRIu64 ") * (%" PRIu64 ");\n", sc->gl_GlobalInvocationID_x, shiftX, sc->stageStartSize, sc->stageStartSize, sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[1], sc->gl_GlobalInvocationID_x, shiftX, sc->stageStartSize, sc->stageStartSize * sc->fftDim);
 				res = VkAppendLine(sc);
 				if (res != VKFFT_SUCCESS) return res;
 				if (sc->zeropadBluestein[1]) {
@@ -16417,6 +22009,11 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 				if (res != VKFFT_SUCCESS) return res;
 				res = appendZeropadStartReadWriteStage(sc, 0);
 				if (res != VKFFT_SUCCESS) return res;
+				if ((1 + i + k * used_registers_write) * sc->localSize[1] >= (sc->fftDim)) {
+					sc->tempLen = sprintf(sc->tempStr, "		if(%s < %" PRIu64 "){\n", sc->gl_LocalInvocationID_y, sc->fftDim - (i + k * used_registers_write) * sc->localSize[1]);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
 				if (sc->writeFromRegisters) {
 					if (sc->outputBufferBlockNum == 1)
 						sc->tempLen = sprintf(sc->tempStr, "			%s[inoutID] = %s%s%s;\n", outputsStruct, convTypeLeft, sc->regIDs[i + k * sc->registers_per_thread], convTypeRight);
@@ -16427,9 +22024,14 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 				}
 				else {
 					if (sc->outputBufferBlockNum == 1)
-						sc->tempLen = sprintf(sc->tempStr, "			%s[inoutID] = %ssdata[%s*(%s+%" PRIu64 ") + %s]%s;\n", outputsStruct, convTypeLeft, sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeRight);
+						sc->tempLen = sprintf(sc->tempStr, "			%s[inoutID] = %ssdata[%s*(%s+%" PRIu64 ") + %s]%s;\n", outputsStruct, convTypeLeft, sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeRight);
 					else
-						sc->tempLen = sprintf(sc->tempStr, "			outputBlocks[inoutID / %" PRIu64 "]%s[inoutID %% %" PRIu64 "] = %ssdata[%s*(%s+%" PRIu64 ") + %s]%s;\n", sc->outputBufferBlockSize, outputsStruct, sc->outputBufferBlockSize, convTypeLeft, sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeRight);
+						sc->tempLen = sprintf(sc->tempStr, "			outputBlocks[inoutID / %" PRIu64 "]%s[inoutID %% %" PRIu64 "] = %ssdata[%s*(%s+%" PRIu64 ") + %s]%s;\n", sc->outputBufferBlockSize, outputsStruct, sc->outputBufferBlockSize, convTypeLeft, sc->sharedStride, sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[1], sc->gl_LocalInvocationID_x, convTypeRight);
+					res = VkAppendLine(sc);
+					if (res != VKFFT_SUCCESS) return res;
+				}
+				if ((1 + i + k * used_registers_write) * sc->localSize[1] >= (sc->fftDim)) {
+					sc->tempLen = sprintf(sc->tempStr, "		}\n");
 					res = VkAppendLine(sc);
 					if (res != VKFFT_SUCCESS) return res;
 				}
@@ -16512,7 +22114,7 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 						}
 					}
 					uint64_t num_out = (sc->axisSwapped) ? (uint64_t)ceil(mult * (sc->fftDim / 2 + 1) / (double)sc->localSize[1]) : (uint64_t)ceil(mult * (sc->fftDim / 2 + 1) / (double)sc->localSize[0]);
-					//num_out = (uint64_t)ceil(num_out / (double)sc->min_registers_per_thread);
+					//num_out = (uint64_t)ceil(num_out / (double)used_registers_write);
 					for (uint64_t i = 0; i < num_out; i++) {
 						if (sc->localSize[1] == 1)
 							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * num_out) * sc->localSize[0]);
@@ -16538,7 +22140,7 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
-							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] >= mult * (sc->fftDim / 2 + 1) * sc->localSize[0]) {
+							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] > mult * (sc->fftDim / 2 + 1) * sc->localSize[0]) {
 								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){", mult * (sc->fftDim / 2 + 1) * sc->localSize[0]);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
@@ -16550,7 +22152,7 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
-							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] >= mult * (sc->fftDim / 2 + 1) * sc->localSize[1]) {
+							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] > mult * (sc->fftDim / 2 + 1) * sc->localSize[1]) {
 								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){", mult * (sc->fftDim / 2 + 1) * sc->localSize[1]);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
@@ -16667,14 +22269,14 @@ static inline VkFFTResult appendWriteDataVkFFT(VkFFTSpecializationConstantsLayou
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						if (sc->axisSwapped) {
-							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] >= mult * (sc->fftDim / 2 + 1) * sc->localSize[0]) {
+							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] > mult * (sc->fftDim / 2 + 1) * sc->localSize[0]) {
 								sc->tempLen = sprintf(sc->tempStr, "	}\n");
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
 						}
 						else {
-							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] >= mult * (sc->fftDim / 2 + 1) * sc->localSize[1])
+							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] > mult * (sc->fftDim / 2 + 1) * sc->localSize[1])
 							{
 								sc->tempLen = sprintf(sc->tempStr, "	}\n");
 								res = VkAppendLine(sc);
@@ -16731,6 +22333,8 @@ if (%s==%" PRIu64 ") \n\
 			if (res != VKFFT_SUCCESS) return res;
 		}
 		uint64_t mult = (sc->mergeSequencesR2C) ? 2 : 1;
+		uint64_t used_registers_write = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+		if (sc->registerBoost > 1) used_registers_write /= sc->registerBoost;
 		//res = appendZeropadStart(sc);
 		//if (res != VKFFT_SUCCESS) return res;
 		if (sc->reorderFourStep) {
@@ -16739,11 +22343,11 @@ if (%s==%" PRIu64 ") \n\
 		else {
 			if (sc->fftDim == sc->fft_dim_full) {
 				for (uint64_t k = 0; k < sc->registerBoost; k++) {
-					for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+					for (uint64_t i = 0; i < used_registers_write; i++) {
 						if (sc->localSize[1] == 1)
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_write) * sc->localSize[0]);
 						else
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 
@@ -16760,6 +22364,11 @@ if (%s==%" PRIu64 ") \n\
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
 						}
 						else {
 							if ((uint64_t)ceil(sc->size[1] / (double)mult) % sc->localSize[1] != 0) {
@@ -16767,6 +22376,11 @@ if (%s==%" PRIu64 ") \n\
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[1]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
 						}
 						if (sc->zeropadBluestein[1]) {
 							sc->tempLen = sprintf(sc->tempStr, "		if((combinedID %% %" PRIu64 ") < %" PRIu64 "){\n", sc->fft_dim_full, sc->fft_zeropad_Bluestein_left_write[sc->axis_id]);
@@ -16861,6 +22475,11 @@ if (%s==%" PRIu64 ") \n\
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						if (sc->axisSwapped) {
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+								sc->tempLen = sprintf(sc->tempStr, "		}\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
 							if ((uint64_t)ceil(sc->size[1] / (double)mult) % sc->localSize[0] != 0) {
 								sc->tempLen = sprintf(sc->tempStr, "		}");
 								res = VkAppendLine(sc);
@@ -16868,6 +22487,11 @@ if (%s==%" PRIu64 ") \n\
 							}
 						}
 						else {
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+								sc->tempLen = sprintf(sc->tempStr, "		}\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
 							if ((uint64_t)ceil(sc->size[1] / (double)mult) % sc->localSize[1] != 0) {
 								sc->tempLen = sprintf(sc->tempStr, "		}");
 								res = VkAppendLine(sc);
@@ -16945,7 +22569,7 @@ if (%s==%" PRIu64 ") \n\
 						}
 					}
 					uint64_t num_out = (sc->axisSwapped) ? (uint64_t)ceil((sc->fftDim) / (double)sc->localSize[1]) : (uint64_t)ceil((sc->fftDim) / (double)sc->localSize[0]);
-					//num_out = (uint64_t)ceil(num_out / (double)sc->min_registers_per_thread);
+					//num_out = (uint64_t)ceil(num_out / (double)used_registers_write);
 					for (uint64_t i = 0; i < num_out; i++) {
 						if (sc->localSize[1] == 1)
 							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * num_out) * sc->localSize[0]);
@@ -16970,7 +22594,7 @@ if (%s==%" PRIu64 ") \n\
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
-							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim) * sc->localSize[0]) {
+							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim) * sc->localSize[0]) {
 								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", (sc->fftDim) * sc->localSize[0]);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
@@ -16982,7 +22606,7 @@ if (%s==%" PRIu64 ") \n\
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
-							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim) * sc->localSize[1]) {
+							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim) * sc->localSize[1]) {
 								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", (sc->fftDim) * sc->localSize[1]);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
@@ -17088,14 +22712,14 @@ if (%s==%" PRIu64 ") \n\
 						if (res != VKFFT_SUCCESS) return res;
 
 						if (sc->axisSwapped) {
-							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim) * sc->localSize[0]) {
+							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim) * sc->localSize[0]) {
 								sc->tempLen = sprintf(sc->tempStr, "	}\n");
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
 						}
 						else {
-							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim) * sc->localSize[1])
+							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim) * sc->localSize[1])
 							{
 								sc->tempLen = sprintf(sc->tempStr, "	}\n");
 								res = VkAppendLine(sc);
@@ -17171,7 +22795,7 @@ if (%s==%" PRIu64 ") \n\
 						//if (res != VKFFT_SUCCESS) return res;
 					}
 					uint64_t num_out = (uint64_t)ceil(mult * (sc->fftDim) / (double)sc->localSize[1]);
-					//num_out = (uint64_t)ceil(num_out / (double)sc->min_registers_per_thread);
+					//num_out = (uint64_t)ceil(num_out / (double)used_registers_write);
 					for (uint64_t i = 0; i < num_out; i++) {
 						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * num_out) * sc->localSize[0] * sc->localSize[1]);
 						res = VkAppendLine(sc);
@@ -17185,7 +22809,7 @@ if (%s==%" PRIu64 ") \n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim) * sc->localSize[0]) {
+						if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim) * sc->localSize[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", (sc->fftDim) * sc->localSize[0]);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -17274,7 +22898,7 @@ if (%s==%" PRIu64 ") \n\
 						}
 						res = appendZeropadEndReadWriteStage(sc);
 						if (res != VKFFT_SUCCESS) return res;
-						if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim) * sc->localSize[0]) {
+						if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim) * sc->localSize[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "	}\n");
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -17355,7 +22979,7 @@ if (%s==%" PRIu64 ") \n\
 						}
 					}
 					uint64_t num_out = (sc->axisSwapped) ? (uint64_t)ceil((sc->fftDim / 2 + 1) / (double)sc->localSize[1]) : (uint64_t)ceil((sc->fftDim / 2 + 1) / (double)sc->localSize[0]);
-					//num_out = (uint64_t)ceil(num_out / (double)sc->min_registers_per_thread);
+					//num_out = (uint64_t)ceil(num_out / (double)used_registers_write);
 					for (uint64_t i = 0; i < num_out; i++) {
 						if (sc->localSize[1] == 1)
 							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * num_out) * sc->localSize[0]);
@@ -17380,7 +23004,7 @@ if (%s==%" PRIu64 ") \n\
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
-							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim / 2 + 1) * sc->localSize[0]) {
+							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim / 2 + 1) * sc->localSize[0]) {
 								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", (sc->fftDim / 2 + 1) * sc->localSize[0]);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
@@ -17392,7 +23016,7 @@ if (%s==%" PRIu64 ") \n\
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
-							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim / 2 + 1) * sc->localSize[1]) {
+							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim / 2 + 1) * sc->localSize[1]) {
 								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", (sc->fftDim / 2 + 1) * sc->localSize[1]);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
@@ -17424,12 +23048,25 @@ if (%s==%" PRIu64 ") \n\
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						else {
-							sc->tempLen = sprintf(sc->tempStr, "		mult.x = 2*%s(%.17e%s * (combinedID %% %" PRIu64 ") );\n", cosDef, -double_PI / 2 / sc->fftDim, LFending, sc->fftDim / 2 + 1);
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
-							sc->tempLen = sprintf(sc->tempStr, "		mult.y = 2*%s(%.17e%s * (combinedID %% %" PRIu64 ") );\n", sinDef, -double_PI / 2 / sc->fftDim, LFending, sc->fftDim / 2 + 1);
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
+							if (!strcmp(floatType, "float")) {
+								sc->tempLen = sprintf(sc->tempStr, "		mult.x = 2*%s(%.17e%s * (combinedID %% %" PRIu64 ") );\n", cosDef, (double)(-double_PI / 2 / sc->fftDim), LFending, sc->fftDim / 2 + 1);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+								sc->tempLen = sprintf(sc->tempStr, "		mult.y = 2*%s(%.17e%s * (combinedID %% %" PRIu64 ") );\n", sinDef, (double)(-double_PI / 2 / sc->fftDim), LFending, sc->fftDim / 2 + 1);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							if (!strcmp(floatType, "double")) {
+								sc->tempLen = sprintf(sc->tempStr, "		mult = sincos_20(%.17e%s * (combinedID %% %" PRIu64 ") );\n", (double)(-double_PI / 2 / sc->fftDim), LFending, sc->fftDim / 2 + 1);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+								sc->tempLen = sprintf(sc->tempStr, "		mult.x = 2*mult.x;\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+								sc->tempLen = sprintf(sc->tempStr, "		mult.y = 2*mult.y;\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
 						}
 						if (sc->mergeSequencesR2C) {
 							if (sc->axisSwapped) {
@@ -17678,14 +23315,14 @@ if (%s==%" PRIu64 ") \n\
 						if (res != VKFFT_SUCCESS) return res;
 
 						if (sc->axisSwapped) {
-							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim / 2 + 1) * sc->localSize[0]) {
+							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim / 2 + 1) * sc->localSize[0]) {
 								sc->tempLen = sprintf(sc->tempStr, "	}\n");
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
 						}
 						else {
-							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim / 2 + 1) * sc->localSize[1])
+							if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim / 2 + 1) * sc->localSize[1])
 							{
 								sc->tempLen = sprintf(sc->tempStr, "	}\n");
 								res = VkAppendLine(sc);
@@ -17773,7 +23410,7 @@ if (%s==%" PRIu64 ") \n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim / 2 + 1) * sc->localSize[0]) {
+						if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim / 2 + 1) * sc->localSize[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", (sc->fftDim / 2 + 1) * sc->localSize[0]);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -17804,12 +23441,25 @@ if (%s==%" PRIu64 ") \n\
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						else {
-							sc->tempLen = sprintf(sc->tempStr, "		mult.x = 2*%s(%.17e%s * (combinedID / %" PRIu64 ") );\n", cosDef, -double_PI / 2 / sc->fftDim, LFending, sc->localSize[0]);
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
-							sc->tempLen = sprintf(sc->tempStr, "		mult.y = 2*%s(%.17e%s * (combinedID / %" PRIu64 ") );\n", sinDef, -double_PI / 2 / sc->fftDim, LFending, sc->localSize[0]);
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
+							if (!strcmp(floatType, "float")) {
+								sc->tempLen = sprintf(sc->tempStr, "		mult.x = 2*%s(%.17e%s * (combinedID / %" PRIu64 ") );\n", cosDef, (double)(-double_PI / 2 / sc->fftDim), LFending, sc->localSize[0]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+								sc->tempLen = sprintf(sc->tempStr, "		mult.y = 2*%s(%.17e%s * (combinedID / %" PRIu64 ") );\n", sinDef, (double)(-double_PI / 2 / sc->fftDim), LFending, sc->localSize[0]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							if (!strcmp(floatType, "double")) {
+								sc->tempLen = sprintf(sc->tempStr, "		mult = sincos_20(%.17e%s * (combinedID / %" PRIu64 ") );\n", (double)(-double_PI / 2 / sc->fftDim), LFending, sc->localSize[0]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+								sc->tempLen = sprintf(sc->tempStr, "		mult.x = 2*mult.x;\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+								sc->tempLen = sprintf(sc->tempStr, "		mult.y = 2*mult.y;\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
 						}
 
 						if (sc->mergeSequencesR2C) {
@@ -17913,7 +23563,7 @@ if (%s==%" PRIu64 ") \n\
 						}
 						res = appendZeropadEndReadWriteStage(sc);
 						if (res != VKFFT_SUCCESS) return res;
-						if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim / 2 + 1) * sc->localSize[0]) {
+						if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim / 2 + 1) * sc->localSize[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "	}\n");
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -17951,6 +23601,8 @@ if (%s==%" PRIu64 ") \n\
 		if (sc->performWorkGroupShift[1])
 			sprintf(shiftY2, " + consts.workGroupShiftY ");
 		uint64_t mult = (sc->mergeSequencesR2C) ? 2 : 1;
+		uint64_t used_registers_write = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+		if (sc->registerBoost > 1) used_registers_write /= sc->registerBoost;
 		if (sc->reorderFourStep) {
 			//Not implemented
 		}
@@ -17958,7 +23610,10 @@ if (%s==%" PRIu64 ") \n\
 			//appendBarrierVkFFT(sc, 1);
 			//appendZeropadStart(sc);
 			if (sc->fftDim == sc->fft_dim_full) {
-				if (sc->zeropadBluestein[1])  sc->fftDim = sc->fft_zeropad_Bluestein_left_write[sc->axis_id];
+				if (sc->zeropadBluestein[1]) {
+					sc->fftDim = sc->fft_zeropad_Bluestein_left_write[sc->axis_id];
+					used_registers_write = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+				}
 				uint64_t maxBluesteinCutOff = 1;
 				if (sc->zeropadBluestein[1]) {
 					if (sc->axisSwapped)
@@ -17967,12 +23622,12 @@ if (%s==%" PRIu64 ") \n\
 						maxBluesteinCutOff = sc->fftDim * sc->localSize[1];
 				}
 				for (uint64_t k = 0; k < sc->registerBoost; k++) {
-					//num_out = (uint64_t)ceil(num_out / (double)sc->min_registers_per_thread);
-					for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+					//num_out = (uint64_t)ceil(num_out / (double)used_registers_write);
+					for (uint64_t i = 0; i < used_registers_write; i++) {
 						if (sc->localSize[1] == 1)
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_write) * sc->localSize[0]);
 						else
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 						if (sc->zeropadBluestein[1]) {
@@ -17991,6 +23646,11 @@ if (%s==%" PRIu64 ") \n\
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						if (sc->axisSwapped) {
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
 							if ((uint64_t)ceil(sc->size[1] / (double)mult) % sc->localSize[0] != 0) {
 								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + %s*%" PRIu64 "< %" PRIu64 "){\n", sc->fftDim, sc->gl_WorkGroupID_y, sc->localSize[0], (uint64_t)ceil(sc->size[1] / (double)mult));
 								res = VkAppendLine(sc);
@@ -17999,6 +23659,11 @@ if (%s==%" PRIu64 ") \n\
 
 						}
 						else {
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[1]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
 							if ((uint64_t)ceil(sc->size[1] / (double)mult) % sc->localSize[1] != 0) {
 								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID / %" PRIu64 " + %s*%" PRIu64 "< %" PRIu64 "){\n", sc->fftDim, sc->gl_WorkGroupID_y, sc->localSize[1], (uint64_t)ceil(sc->size[1] / (double)mult));
 								res = VkAppendLine(sc);
@@ -18022,12 +23687,12 @@ if (%s==%" PRIu64 ") \n\
 						if (res != VKFFT_SUCCESS) return res;
 						if (sc->mergeSequencesR2C) {
 							if (sc->axisSwapped) {
-								sc->tempLen = sprintf(sc->tempStr, "		sdataID = (((combinedID %% %" PRIu64 ") %% 2) * %" PRIu64 " + (1-2*((combinedID %% %" PRIu64 ") %% 2)) * ((combinedID %% %" PRIu64 ")/2)) * sharedStride + (combinedID / %" PRIu64 ");\n", sc->fftDim, sc->fftDim - 1, sc->fftDim, sc->fftDim, sc->fftDim);
+								sc->tempLen = sprintf(sc->tempStr, "		sdataID = (((combinedID %% %" PRIu64 ") %% 2) * %" PRIu64 " + (-2*((combinedID %% %" PRIu64 ") %% 2)+1) * ((combinedID %% %" PRIu64 ")/2)) * sharedStride + (combinedID / %" PRIu64 ");\n", sc->fftDim, sc->fftDim - 1, sc->fftDim, sc->fftDim, sc->fftDim);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
 							else {
-								sc->tempLen = sprintf(sc->tempStr, "		sdataID = (((combinedID %% %" PRIu64 ") %% 2) * %" PRIu64 " + (1-2*((combinedID %% %" PRIu64 ") %% 2)) * ((combinedID %% %" PRIu64 ")/2)) + (combinedID / %" PRIu64 ")* sharedStride;\n", sc->fftDim, sc->fftDim - 1, sc->fftDim, sc->fftDim, sc->fftDim);
+								sc->tempLen = sprintf(sc->tempStr, "		sdataID = (((combinedID %% %" PRIu64 ") %% 2) * %" PRIu64 " + (-2*((combinedID %% %" PRIu64 ") %% 2)+1) * ((combinedID %% %" PRIu64 ")/2)) + (combinedID / %" PRIu64 ")* sharedStride;\n", sc->fftDim, sc->fftDim - 1, sc->fftDim, sc->fftDim, sc->fftDim);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
@@ -18050,7 +23715,7 @@ if (%s==%" PRIu64 ") \n\
 						}
 						else {
 							if (!sc->axisSwapped) {
-								sc->tempLen = sprintf(sc->tempStr, "		sdataID = (((combinedID %% %" PRIu64 ") %% 2) * %" PRIu64 " + (1-2*((combinedID %% %" PRIu64 ") %% 2)) * ((combinedID %% %" PRIu64 ")/2)) + (combinedID / %" PRIu64 ") * sharedStride;\n", sc->fftDim, sc->fftDim - 1, sc->fftDim, sc->fftDim, sc->fftDim);
+								sc->tempLen = sprintf(sc->tempStr, "		sdataID = (((combinedID %% %" PRIu64 ") %% 2) * %" PRIu64 " + (-2*((combinedID %% %" PRIu64 ") %% 2)+1) * ((combinedID %% %" PRIu64 ")/2)) + (combinedID / %" PRIu64 ") * sharedStride;\n", sc->fftDim, sc->fftDim - 1, sc->fftDim, sc->fftDim, sc->fftDim);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 								if (sc->outputBufferBlockNum == 1)
@@ -18061,7 +23726,7 @@ if (%s==%" PRIu64 ") \n\
 								if (res != VKFFT_SUCCESS) return res;
 							}
 							else {
-								sc->tempLen = sprintf(sc->tempStr, "		sdataID = (((combinedID %% %" PRIu64 ") %% 2) * %" PRIu64 " + (1-2*((combinedID %% %" PRIu64 ") %% 2)) * ((combinedID %% %" PRIu64 ")/2)) * sharedStride + (combinedID / %" PRIu64 ");\n", sc->fftDim, sc->fftDim - 1, sc->fftDim, sc->fftDim, sc->fftDim);
+								sc->tempLen = sprintf(sc->tempStr, "		sdataID = (((combinedID %% %" PRIu64 ") %% 2) * %" PRIu64 " + (-2*((combinedID %% %" PRIu64 ") %% 2)+1) * ((combinedID %% %" PRIu64 ")/2)) * sharedStride + (combinedID / %" PRIu64 ");\n", sc->fftDim, sc->fftDim - 1, sc->fftDim, sc->fftDim, sc->fftDim);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 								if (sc->outputBufferBlockNum == 1)
@@ -18085,6 +23750,11 @@ if (%s==%" PRIu64 ") \n\
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+								sc->tempLen = sprintf(sc->tempStr, "		}\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
 						}
 						else {
 							if ((uint64_t)ceil(sc->size[1] / (double)mult) % sc->localSize[1] != 0) {
@@ -18092,6 +23762,11 @@ if (%s==%" PRIu64 ") \n\
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+								sc->tempLen = sprintf(sc->tempStr, "		}\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
 						}
 						if (sc->zeropadBluestein[1]) {
 							sc->tempLen = sprintf(sc->tempStr, "	}\n");
@@ -18100,7 +23775,10 @@ if (%s==%" PRIu64 ") \n\
 						}
 					}
 				}
-				if (sc->zeropadBluestein[1])  sc->fftDim = sc->fft_dim_full;
+				if (sc->zeropadBluestein[1]) {
+					sc->fftDim = sc->fft_dim_full;
+					used_registers_write = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+				}
 
 			}
 			else {
@@ -18127,6 +23805,8 @@ if (%s==%" PRIu64 ") \n\
 		if (sc->performWorkGroupShift[1])
 			sprintf(shiftY2, " + consts.workGroupShiftY ");
 		//uint64_t mult = (sc->mergeSequencesR2C) ? 2 : 1;
+		uint64_t used_registers_write = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+		if (sc->registerBoost > 1) used_registers_write /= sc->registerBoost;
 		if (sc->reorderFourStep) {
 			//Not implemented
 		}
@@ -18134,7 +23814,10 @@ if (%s==%" PRIu64 ") \n\
 			//appendBarrierVkFFT(sc, 1);
 			//appendZeropadStart(sc);
 			if (sc->fftDim == sc->fft_dim_full) {
-				if (sc->zeropadBluestein[1])  sc->fftDim = sc->fft_zeropad_Bluestein_left_write[sc->axis_id];
+				if (sc->zeropadBluestein[1]) {
+					sc->fftDim = sc->fft_zeropad_Bluestein_left_write[sc->axis_id];
+					used_registers_write = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+				}
 				for (uint64_t k = 0; k < sc->registerBoost; k++) {
 					if (sc->mergeSequencesR2C) {
 						sc->tempLen = sprintf(sc->tempStr, "\
@@ -18151,9 +23834,9 @@ if (%s==%" PRIu64 ") \n\
 						//res = appendZeropadStart(sc);
 						//if (res != VKFFT_SUCCESS) return res;
 					}
-					//num_out = (uint64_t)ceil(num_out / (double)sc->min_registers_per_thread);
-					for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+					//num_out = (uint64_t)ceil(num_out / (double)used_registers_write);
+					for (uint64_t i = 0; i < used_registers_write; i++) {
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 						if (sc->zeropadBluestein[1]) {
@@ -18161,6 +23844,11 @@ if (%s==%" PRIu64 ") \n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
+						if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						sc->tempLen = sprintf(sc->tempStr, "			%s = %s%s + ((combinedID/%" PRIu64 ") * %" PRIu64 ");\n", sc->inoutID, sc->gl_GlobalInvocationID_x, shiftX, sc->localSize[0], sc->outputStride[1]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
@@ -18169,7 +23857,7 @@ if (%s==%" PRIu64 ") \n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						/*if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] >= mult * (sc->fftDim / 2 + 1) * sc->localSize[0]) {
+						/*if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] > mult * (sc->fftDim / 2 + 1) * sc->localSize[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", mult * (sc->fftDim / 2 + 1) * sc->localSize[0]);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -18257,7 +23945,7 @@ if (%s==%" PRIu64 ") \n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						/*if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] >= mult * (sc->fftDim / 2 + 1) * sc->localSize[0]) {
+						/*if ((1 + i + k * num_out) * sc->localSize[0] * sc->localSize[1] > mult * (sc->fftDim / 2 + 1) * sc->localSize[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "	}\n");
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -18267,6 +23955,11 @@ if (%s==%" PRIu64 ") \n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
+						if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						if (sc->zeropadBluestein[1]) {
 							sc->tempLen = sprintf(sc->tempStr, "	}\n");
 							res = VkAppendLine(sc);
@@ -18274,7 +23967,10 @@ if (%s==%" PRIu64 ") \n\
 						}
 					}
 				}
-				if (sc->zeropadBluestein[1])  sc->fftDim = sc->fft_dim_full;
+				if (sc->zeropadBluestein[1]) {
+					sc->fftDim = sc->fft_dim_full;
+					used_registers_write = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+				}
 			}
 			else {
 
@@ -18812,6 +24508,8 @@ if (%s==%" PRIu64 ") \n\
 		char shiftY2[500] = "";
 		if (sc->performWorkGroupShift[1])
 			sprintf(shiftY2, " + consts.workGroupShiftY ");
+		uint64_t used_registers_write = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+		if (sc->registerBoost > 1) used_registers_write /= sc->registerBoost;
 		if (sc->reorderFourStep) {
 			//Not implemented
 		}
@@ -18819,7 +24517,10 @@ if (%s==%" PRIu64 ") \n\
 			//appendBarrierVkFFT(sc, 1);
 			//appendZeropadStart(sc);
 			if (sc->fftDim == sc->fft_dim_full) {
-				if (sc->zeropadBluestein[1])  sc->fftDim = sc->fft_zeropad_Bluestein_left_write[sc->axis_id];
+				if (sc->zeropadBluestein[1]) {
+					sc->fftDim = sc->fft_zeropad_Bluestein_left_write[sc->axis_id];
+					used_registers_write = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+				}
 				uint64_t maxBluesteinCutOff = 1;
 				if (sc->zeropadBluestein[1]) {
 					if (sc->axisSwapped)
@@ -18828,12 +24529,12 @@ if (%s==%" PRIu64 ") \n\
 						maxBluesteinCutOff = sc->fftDim * sc->localSize[1];
 				}
 				for (uint64_t k = 0; k < sc->registerBoost; k++) {
-					//num_out = (uint64_t)ceil(num_out / (double)sc->min_registers_per_thread);
-					for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+					//num_out = (uint64_t)ceil(num_out / (double)used_registers_write);
+					for (uint64_t i = 0; i < used_registers_write; i++) {
 						if (sc->localSize[1] == 1)
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_write) * sc->localSize[0]);
 						else
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 						if (sc->zeropadBluestein[1]) {
@@ -18842,11 +24543,21 @@ if (%s==%" PRIu64 ") \n\
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						if (sc->axisSwapped) {
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
 							sc->tempLen = sprintf(sc->tempStr, "		sdataID = (((combinedID %% %" PRIu64 ") %% 2) * %" PRIu64 " + (1-2*((combinedID %% %" PRIu64 ") %% 2)) * ((combinedID %% %" PRIu64 ")/2)) * sharedStride + (combinedID / %" PRIu64 ");\n", sc->fftDim, sc->fftDim - 1, sc->fftDim, sc->fftDim, sc->fftDim);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						else {
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[1]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
 							sc->tempLen = sprintf(sc->tempStr, "		sdataID = (((combinedID %% %" PRIu64 ") %% 2) * %" PRIu64 " + (1-2*((combinedID %% %" PRIu64 ") %% 2)) * ((combinedID %% %" PRIu64 ")/2)) + (combinedID / %" PRIu64 ")* sharedStride;\n", sc->fftDim, sc->fftDim - 1, sc->fftDim, sc->fftDim, sc->fftDim);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -18858,6 +24569,20 @@ if (%s==%" PRIu64 ") \n\
 						sc->tempLen = sprintf(sc->tempStr, "		%s.y = %s.y * (1.0%s - 2 * ((combinedID %% %" PRIu64 ")%%2));\n", sc->regIDs[i + k * sc->registers_per_thread], sc->regIDs[i + k * sc->registers_per_thread], LFending, sc->fftDim);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
+						if (sc->axisSwapped) {
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[0])) {
+								sc->tempLen = sprintf(sc->tempStr, "		}\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+						else {
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim * sc->localSize[1])) {
+								sc->tempLen = sprintf(sc->tempStr, "		}\n");
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
 						if (sc->zeropadBluestein[1]) {
 							sc->tempLen = sprintf(sc->tempStr, "		}\n");
 							res = VkAppendLine(sc);
@@ -18866,11 +24591,11 @@ if (%s==%" PRIu64 ") \n\
 					}
 				}
 				for (uint64_t k = 0; k < sc->registerBoost; k++) {
-					for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+					for (uint64_t i = 0; i < used_registers_write; i++) {
 						if (sc->localSize[1] == 1)
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_write) * sc->localSize[0]);
 						else
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 						if (sc->axisSwapped) {
@@ -18879,7 +24604,7 @@ if (%s==%" PRIu64 ") \n\
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
-							if ((1 + i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim) * sc->localSize[0]) {
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim) * sc->localSize[0]) {
 								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", (sc->fftDim) * sc->localSize[0]);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
@@ -18891,7 +24616,7 @@ if (%s==%" PRIu64 ") \n\
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
-							if ((1 + i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim) * sc->localSize[1]) {
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim) * sc->localSize[1]) {
 								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", (sc->fftDim) * sc->localSize[1]);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
@@ -18913,12 +24638,19 @@ if (%s==%" PRIu64 ") \n\
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						else {
-							sc->tempLen = sprintf(sc->tempStr, "		mult.x = %s(%.17e%s * (2*(combinedID %% %" PRIu64 ")+1) );\n", cosDef, -double_PI / 8 / sc->fftDim, LFending, sc->fftDim);
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
-							sc->tempLen = sprintf(sc->tempStr, "		mult.y = %s(%.17e%s * (2*(combinedID %% %" PRIu64 ")+1) );\n", sinDef, -double_PI / 8 / sc->fftDim, LFending, sc->fftDim);
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
+							if (!strcmp(floatType, "float")) {
+								sc->tempLen = sprintf(sc->tempStr, "		mult.x = %s(%.17e%s * (2*(combinedID %% %" PRIu64 ")+1) );\n", cosDef, (double)(-double_PI / 8 / sc->fftDim), LFending, sc->fftDim);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+								sc->tempLen = sprintf(sc->tempStr, "		mult.y = %s(%.17e%s * (2*(combinedID %% %" PRIu64 ")+1) );\n", sinDef, (double)(-double_PI / 8 / sc->fftDim), LFending, sc->fftDim);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							if (!strcmp(floatType, "double")) {
+								sc->tempLen = sprintf(sc->tempStr, "		mult = sincos_20(%.17e%s * (2*(combinedID %% %" PRIu64 ")+1) );\n", (double)(-double_PI / 8 / sc->fftDim), LFending, sc->fftDim);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
 						}
 						if (sc->zeropad[1]) {
 							sc->tempLen = sprintf(sc->tempStr, "		if((inoutID %% %" PRIu64 " < %" PRIu64 ")||(inoutID %% %" PRIu64 " >= %" PRIu64 ")){\n", sc->outputStride[1], sc->fft_zeropad_left_write[sc->axis_id], sc->outputStride[1], sc->fft_zeropad_right_write[sc->axis_id]);
@@ -18962,14 +24694,14 @@ if (%s==%" PRIu64 ") \n\
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						if (sc->axisSwapped) {
-							if ((1 + i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim) * sc->localSize[0]) {
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim) * sc->localSize[0]) {
 								sc->tempLen = sprintf(sc->tempStr, "	}\n");
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
 						}
 						else {
-							if ((1 + i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim) * sc->localSize[1])
+							if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim) * sc->localSize[1])
 							{
 								sc->tempLen = sprintf(sc->tempStr, "	}\n");
 								res = VkAppendLine(sc);
@@ -18992,7 +24724,10 @@ if (%s==%" PRIu64 ") \n\
 						}
 					}
 				}
-				if (sc->zeropadBluestein[1])  sc->fftDim = sc->fft_dim_full;
+				if (sc->zeropadBluestein[1]) {
+					sc->fftDim = sc->fft_dim_full;
+					used_registers_write = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+				}
 			}
 			else {
 
@@ -19020,6 +24755,8 @@ if (%s==%" PRIu64 ") \n\
 		char shiftY2[500] = "";
 		if (sc->performWorkGroupShift[1])
 			sprintf(shiftY2, " + consts.workGroupShiftY ");
+		uint64_t used_registers_write = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+		if (sc->registerBoost > 1) used_registers_write /= sc->registerBoost;
 		if (sc->reorderFourStep) {
 			//Not implemented
 		}
@@ -19027,14 +24764,17 @@ if (%s==%" PRIu64 ") \n\
 			//appendBarrierVkFFT(sc, 1);
 			//appendZeropadStart(sc);
 			if (sc->fftDim == sc->fft_dim_full) {
-				if (sc->zeropadBluestein[1])  sc->fftDim = sc->fft_zeropad_Bluestein_left_write[sc->axis_id];
+				if (sc->zeropadBluestein[1]) {
+					sc->fftDim = sc->fft_zeropad_Bluestein_left_write[sc->axis_id];
+					used_registers_write = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+				}
 				for (uint64_t k = 0; k < sc->registerBoost; k++) {
-					//num_out = (uint64_t)ceil(num_out / (double)sc->min_registers_per_thread);
-					for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+					//num_out = (uint64_t)ceil(num_out / (double)used_registers_write);
+					for (uint64_t i = 0; i < used_registers_write; i++) {
 						if (sc->localSize[1] == 1)
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_write) * sc->localSize[0]);
 						else
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 						if (sc->zeropadBluestein[1]) {
@@ -19042,6 +24782,11 @@ if (%s==%" PRIu64 ") \n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
+						if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim) * sc->localSize[0]) {
+							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", (sc->fftDim) * sc->localSize[0]);
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						sc->tempLen = sprintf(sc->tempStr, "		sdataID = (((combinedID / %" PRIu64 ") %% 2) * %" PRIu64 " + (1-2*((combinedID / %" PRIu64 ") %% 2)) * ((combinedID / %" PRIu64 ")/2)) * sharedStride + (combinedID %% %" PRIu64 ");\n", sc->localSize[0], sc->fftDim - 1, sc->localSize[0], sc->localSize[0], sc->localSize[0]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
@@ -19052,6 +24797,11 @@ if (%s==%" PRIu64 ") \n\
 						sc->tempLen = sprintf(sc->tempStr, "		%s.y = %s.y * (1.0%s - 2 * ((combinedID / %" PRIu64 ")%%2));\n", sc->regIDs[i + k * sc->registers_per_thread], sc->regIDs[i + k * sc->registers_per_thread], LFending, sc->localSize[0]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
+						if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim) * sc->localSize[0]) {
+							sc->tempLen = sprintf(sc->tempStr, "		}\n");
+							res = VkAppendLine(sc);
+							if (res != VKFFT_SUCCESS) return res;
+						}
 						if (sc->zeropadBluestein[1]) {
 							sc->tempLen = sprintf(sc->tempStr, "		}\n");
 							res = VkAppendLine(sc);
@@ -19060,11 +24810,11 @@ if (%s==%" PRIu64 ") \n\
 					}
 				}
 				for (uint64_t k = 0; k < sc->registerBoost; k++) {
-					for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
+					for (uint64_t i = 0; i < used_registers_write; i++) {
 						if (sc->localSize[1] == 1)
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_write) * sc->localSize[0]);
 						else
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 						if (sc->zeropadBluestein[1]) {
@@ -19077,7 +24827,7 @@ if (%s==%" PRIu64 ") \n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						if ((1 + i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim) * sc->localSize[0]) {
+						if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim) * sc->localSize[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", (sc->fftDim) * sc->localSize[0]);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -19086,7 +24836,7 @@ if (%s==%" PRIu64 ") \n\
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 						sprintf(index_x, "(%s%s) %% (%" PRIu64 ")", sc->gl_GlobalInvocationID_x, shiftX2, sc->fft_dim_x);
-						sprintf(index_y, "(%s + %" PRIu64 ")", sc->gl_LocalInvocationID_y, (i + k * 2 * sc->min_registers_per_thread) * sc->localSize[1]);
+						sprintf(index_y, "(%s + %" PRIu64 ")", sc->gl_LocalInvocationID_y, (i + k * 2 * used_registers_write) * sc->localSize[1]);
 						res = indexOutputVkFFT(sc, uintType, writeType, index_x, index_y, requestCoordinate, requestBatch);
 						if (res != VKFFT_SUCCESS) return res;
 						sc->tempLen = sprintf(sc->tempStr, ";\n");
@@ -19100,12 +24850,19 @@ if (%s==%" PRIu64 ") \n\
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						else {
-							sc->tempLen = sprintf(sc->tempStr, "		mult.x = %s(%.17e%s * (2*(combinedID / %" PRIu64 ")+1) );\n", cosDef, -double_PI / 8 / sc->fftDim, LFending, sc->localSize[0]);
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
-							sc->tempLen = sprintf(sc->tempStr, "		mult.y = %s(%.17e%s * (2*(combinedID / %" PRIu64 ")+1) );\n", sinDef, -double_PI / 8 / sc->fftDim, LFending, sc->localSize[0]);
-							res = VkAppendLine(sc);
-							if (res != VKFFT_SUCCESS) return res;
+							if (!strcmp(floatType, "float")) {
+								sc->tempLen = sprintf(sc->tempStr, "		mult.x = %s(%.17e%s * (2*(combinedID / %" PRIu64 ")+1) );\n", cosDef, (double)(-double_PI / 8 / sc->fftDim), LFending, sc->localSize[0]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+								sc->tempLen = sprintf(sc->tempStr, "		mult.y = %s(%.17e%s * (2*(combinedID / %" PRIu64 ")+1) );\n", sinDef, (double)(-double_PI / 8 / sc->fftDim), LFending, sc->localSize[0]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+							if (!strcmp(floatType, "double")) {
+								sc->tempLen = sprintf(sc->tempStr, "		mult = sincos_20(%.17e%s * (2*(combinedID / %" PRIu64 ")+1) );\n", (double)(-double_PI / 8 / sc->fftDim), LFending, sc->localSize[0]);
+								res = VkAppendLine(sc);
+								if (res != VKFFT_SUCCESS) return res;
+							}
 						}
 						if (sc->zeropad[1]) {
 							sc->tempLen = sprintf(sc->tempStr, "		if((%s %% %" PRIu64 " < %" PRIu64 ")||(%s %% %" PRIu64 " >= %" PRIu64 ")){\n", index_y, sc->fft_dim_full, sc->fft_zeropad_left_read[sc->axis_id], index_y, sc->fft_dim_full, sc->fft_zeropad_right_read[sc->axis_id]);
@@ -19127,7 +24884,7 @@ if (%s==%" PRIu64 ") \n\
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 						sprintf(index_x, "(%s%s) %% (%" PRIu64 ")", sc->gl_GlobalInvocationID_x, shiftX2, sc->fft_dim_x);
-						sprintf(index_y, "(%" PRIu64 " - (%s + %" PRIu64 "))", 2 * sc->fftDim - 1, sc->gl_LocalInvocationID_y, (i + k * 2 * sc->min_registers_per_thread) * sc->localSize[1]);
+						sprintf(index_y, "(%" PRIu64 " - (%s + %" PRIu64 "))", 2 * sc->fftDim - 1, sc->gl_LocalInvocationID_y, (i + k * 2 * used_registers_write) * sc->localSize[1]);
 						res = indexOutputVkFFT(sc, uintType, writeType, index_x, index_y, requestCoordinate, requestBatch);
 						if (res != VKFFT_SUCCESS) return res;
 						sc->tempLen = sprintf(sc->tempStr, ";\n");
@@ -19151,7 +24908,7 @@ if (%s==%" PRIu64 ") \n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						if ((1 + i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1] >= (sc->fftDim) * sc->localSize[0]) {
+						if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > (sc->fftDim) * sc->localSize[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "	}\n");
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -19168,7 +24925,10 @@ if (%s==%" PRIu64 ") \n\
 						}
 					}
 				}
-				if (sc->zeropadBluestein[1])  sc->fftDim = sc->fft_dim_full;
+				if (sc->zeropadBluestein[1]) {
+					sc->fftDim = sc->fft_dim_full;
+					used_registers_write = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+				}
 			}
 			else {
 
@@ -19194,6 +24954,8 @@ if (%s==%" PRIu64 ") \n\
 		if (sc->performWorkGroupShift[1])
 			sprintf(shiftY2, " + consts.workGroupShiftY ");
 		uint64_t mult = (sc->mergeSequencesR2C) ? 2 : 1;
+		uint64_t used_registers_write = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+		if (sc->registerBoost > 1) used_registers_write /= sc->registerBoost;
 		if (sc->reorderFourStep) {
 			//Not implemented
 		}
@@ -19201,7 +24963,10 @@ if (%s==%" PRIu64 ") \n\
 			//appendBarrierVkFFT(sc, 1);
 			//appendZeropadStart(sc);
 			if (sc->fftDim == sc->fft_dim_full) {
-				if (sc->zeropadBluestein[1])  sc->fftDim = sc->fft_zeropad_Bluestein_left_write[sc->axis_id];
+				if (sc->zeropadBluestein[1]) {
+					sc->fftDim = sc->fft_zeropad_Bluestein_left_write[sc->axis_id];
+					used_registers_write = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+				}
 				for (uint64_t k = 0; k < sc->registerBoost; k++) {
 					if (sc->mergeSequencesR2C) {
 						if (sc->axisSwapped) {
@@ -19236,12 +25001,12 @@ if (%s==%" PRIu64 ") \n\
 						}
 					}
 					//uint64_t num_out = (sc->axisSwapped) ? (uint64_t)ceil(mult * (sc->fftDim / 2 + 1) / (double)sc->localSize[1]) : (uint64_t)ceil(mult * (sc->fftDim / 2 + 1) / (double)sc->localSize[0]);
-					//num_out = (uint64_t)ceil(num_out / (double)sc->min_registers_per_thread);
-					for (uint64_t i = 0; i < mult * sc->min_registers_per_thread; i++) {
+					//num_out = (uint64_t)ceil(num_out / (double)used_registers_write);
+					for (uint64_t i = 0; i < mult * used_registers_write; i++) {
 						if (sc->localSize[1] == 1)
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * sc->min_registers_per_thread) * sc->localSize[0]);
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = %s + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, (i + k * used_registers_write) * sc->localSize[0]);
 						else
-							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+							sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 
@@ -19255,7 +25020,7 @@ if (%s==%" PRIu64 ") \n\
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
-							if ((1 + i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1] >= mult * sc->fftDim * sc->localSize[0]) {
+							if ((1 + i + mult * k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > mult * sc->fftDim * sc->localSize[0]) {
 								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", mult * sc->fftDim * sc->localSize[0]);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
@@ -19267,7 +25032,7 @@ if (%s==%" PRIu64 ") \n\
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
-							if ((1 + i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1] >= mult * sc->fftDim * sc->localSize[1]) {
+							if ((1 + i + mult * k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > mult * sc->fftDim * sc->localSize[1]) {
 								sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", mult * sc->fftDim * sc->localSize[1]);
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
@@ -19620,14 +25385,14 @@ if (%s==%" PRIu64 ") \n\
 							if (res != VKFFT_SUCCESS) return res;
 						}
 						if (sc->axisSwapped) {
-							if ((1 + i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1] >= mult * sc->fftDim * sc->localSize[0]) {
+							if ((1 + i + mult * k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > mult * sc->fftDim * sc->localSize[0]) {
 								sc->tempLen = sprintf(sc->tempStr, "	}\n");
 								res = VkAppendLine(sc);
 								if (res != VKFFT_SUCCESS) return res;
 							}
 						}
 						else {
-							if ((1 + i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1] >= mult * sc->fftDim * sc->localSize[1])
+							if ((1 + i + mult * k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > mult * sc->fftDim * sc->localSize[1])
 							{
 								sc->tempLen = sprintf(sc->tempStr, "	}\n");
 								res = VkAppendLine(sc);
@@ -19650,7 +25415,10 @@ if (%s==%" PRIu64 ") \n\
 						}
 					}
 				}
-				if (sc->zeropadBluestein[1])  sc->fftDim = sc->fft_dim_full;
+				if (sc->zeropadBluestein[1]) {
+					sc->fftDim = sc->fft_dim_full;
+					used_registers_write = (sc->axisSwapped) ? (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]) : (uint64_t)ceil(sc->fftDim / (double)sc->localSize[0]);
+				}
 				/*for (uint64_t k = 0; k < sc->registerBoost; k++) {
 					for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
 						if (sc->localSize[1] == 1)
@@ -19728,7 +25496,9 @@ if (%s==%" PRIu64 ") \n\
 		char shiftY2[500] = "";
 		if (sc->performWorkGroupShift[1])
 			sprintf(shiftY2, " + consts.workGroupShiftY ");
-		uint64_t mult = (sc->mergeSequencesR2C) ? 2 : 1;
+		//uint64_t mult = (sc->mergeSequencesR2C) ? 2 : 1;
+		uint64_t used_registers_write = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+		if (sc->registerBoost > 1) used_registers_write /= sc->registerBoost;
 		if (sc->reorderFourStep) {
 			//Not implemented
 		}
@@ -19736,10 +25506,13 @@ if (%s==%" PRIu64 ") \n\
 			//appendBarrierVkFFT(sc, 1);
 			//appendZeropadStart(sc);
 			if (sc->fftDim == sc->fft_dim_full) {
-				if (sc->zeropadBluestein[1])  sc->fftDim = sc->fft_zeropad_Bluestein_left_write[sc->axis_id];
+				if (sc->zeropadBluestein[1]) {
+					sc->fftDim = sc->fft_zeropad_Bluestein_left_write[sc->axis_id];
+					used_registers_write = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+				}
 				for (uint64_t k = 0; k < sc->registerBoost; k++) {
-					for (uint64_t i = 0; i < sc->min_registers_per_thread; i++) {
-						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1]);
+					for (uint64_t i = 0; i < used_registers_write; i++) {
+						sc->tempLen = sprintf(sc->tempStr, "		combinedID = (%s + %" PRIu64 " * %s) + %" PRIu64 ";\n", sc->gl_LocalInvocationID_x, sc->localSize[0], sc->gl_LocalInvocationID_y, (i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1]);
 						res = VkAppendLine(sc);
 						if (res != VKFFT_SUCCESS) return res;
 
@@ -19751,7 +25524,7 @@ if (%s==%" PRIu64 ") \n\
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
 						}
-						if ((1 + i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1] >= sc->fftDim * sc->localSize[0]) {
+						if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > sc->fftDim * sc->localSize[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "		if(combinedID < %" PRIu64 "){\n", sc->fftDim * sc->localSize[0]);
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -19880,7 +25653,7 @@ if (%s==%" PRIu64 ") \n\
 
 						res = appendZeropadEndReadWriteStage(sc);
 						if (res != VKFFT_SUCCESS) return res;
-						if ((1 + i + k * sc->min_registers_per_thread) * sc->localSize[0] * sc->localSize[1] >= sc->fftDim * sc->localSize[0]) {
+						if ((1 + i + k * used_registers_write) * sc->localSize[0] * sc->localSize[1] > sc->fftDim * sc->localSize[0]) {
 							sc->tempLen = sprintf(sc->tempStr, "	}\n");
 							res = VkAppendLine(sc);
 							if (res != VKFFT_SUCCESS) return res;
@@ -19892,7 +25665,10 @@ if (%s==%" PRIu64 ") \n\
 						}
 					}
 				}
-				if (sc->zeropadBluestein[1])  sc->fftDim = sc->fft_dim_full;
+				if (sc->zeropadBluestein[1]) {
+					sc->fftDim = sc->fft_dim_full;
+					used_registers_write = (uint64_t)ceil(sc->fftDim / (double)sc->localSize[1]);
+				}
 			}
 			else {
 
@@ -19908,8 +25684,11 @@ if (%s==%" PRIu64 ") \n\
 static inline VkFFTResult shaderGenVkFFT_R2C_decomposition(char* output, VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* floatTypeInputMemory, const char* floatTypeOutputMemory, const char* floatTypeKernelMemory, const char* uintType, uint64_t type) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	//appendLicense(output);
-	sc->oldLocale = setlocale(LC_ALL, NULL);
-	setlocale(LC_ALL, "C");
+	if (!sc->disableSetLocale) {
+		const char* loc_oldLocale = setlocale(LC_ALL, NULL);
+		strcpy(sc->oldLocale, loc_oldLocale);
+		setlocale(LC_ALL, "C");
+	}
 	sc->output = output;
 	sc->tempStr = (char*)malloc(sizeof(char) * sc->maxTempLength);
 	if (!sc->tempStr) return VKFFT_ERROR_MALLOC_FAILED;
@@ -19922,7 +25701,9 @@ static inline VkFFTResult shaderGenVkFFT_R2C_decomposition(char* output, VkFFTSp
 	char outputsStruct[20] = "";
 	char LFending[4] = "";
 	if (!strcmp(floatType, "float")) sprintf(LFending, "f");
+	char uintType_32[30];
 #if(VKFFT_BACKEND==0)
+	sprintf(uintType_32, "unsigned int");
 	if (sc->inputBufferBlockNum == 1)
 		sprintf(inputsStruct, "inputs");
 	else
@@ -19943,19 +25724,42 @@ static inline VkFFTResult shaderGenVkFFT_R2C_decomposition(char* output, VkFFTSp
 	sprintf(sc->gl_LocalInvocationID_x, "gl_LocalInvocationID.x");
 	sprintf(sc->gl_LocalInvocationID_y, "gl_LocalInvocationID.y");
 	sprintf(sc->gl_LocalInvocationID_z, "gl_LocalInvocationID.z");
-	sprintf(sc->gl_GlobalInvocationID_x, "gl_GlobalInvocationID.x");
-	sprintf(sc->gl_GlobalInvocationID_y, "gl_GlobalInvocationID.y");
-	sprintf(sc->gl_GlobalInvocationID_z, "gl_GlobalInvocationID.z");
-	sprintf(sc->gl_WorkGroupID_x, "gl_WorkGroupID.x");
-	sprintf(sc->gl_WorkGroupID_y, "gl_WorkGroupID.y");
-	sprintf(sc->gl_WorkGroupID_z, "gl_WorkGroupID.z");
+	switch (sc->swapComputeWorkGroupID) {
+	case 0:
+		sprintf(sc->gl_GlobalInvocationID_x, "gl_GlobalInvocationID.x");
+		sprintf(sc->gl_GlobalInvocationID_y, "gl_GlobalInvocationID.y");
+		sprintf(sc->gl_GlobalInvocationID_z, "gl_GlobalInvocationID.z");
+		sprintf(sc->gl_WorkGroupID_x, "gl_WorkGroupID.x");
+		sprintf(sc->gl_WorkGroupID_y, "gl_WorkGroupID.y");
+		sprintf(sc->gl_WorkGroupID_z, "gl_WorkGroupID.z");
+		break;
+	case 1:
+		sprintf(sc->gl_GlobalInvocationID_x, "(gl_LocalInvocationID.x + gl_WorkGroupID.y * gl_WorkGroupSize.x)");
+		sprintf(sc->gl_GlobalInvocationID_y, "(gl_LocalInvocationID.y + gl_WorkGroupID.x * gl_WorkGroupSize.y)");
+		sprintf(sc->gl_GlobalInvocationID_z, "gl_GlobalInvocationID.z");
+		sprintf(sc->gl_WorkGroupID_x, "gl_WorkGroupID.y");
+		sprintf(sc->gl_WorkGroupID_y, "gl_WorkGroupID.x");
+		sprintf(sc->gl_WorkGroupID_z, "gl_WorkGroupID.z");
+		break;
+	case 2:
+		sprintf(sc->gl_GlobalInvocationID_x, "(gl_LocalInvocationID.x + gl_WorkGroupID.z * gl_WorkGroupSize.x)");
+		sprintf(sc->gl_GlobalInvocationID_y, "gl_GlobalInvocationID.y");
+		sprintf(sc->gl_GlobalInvocationID_z, "(gl_LocalInvocationID.z + gl_WorkGroupID.x * gl_WorkGroupSize.z)");
+		sprintf(sc->gl_WorkGroupID_x, "gl_WorkGroupID.z");
+		sprintf(sc->gl_WorkGroupID_y, "gl_WorkGroupID.y");
+		sprintf(sc->gl_WorkGroupID_z, "gl_WorkGroupID.x");
+		break;
+	}
 	sprintf(sc->gl_WorkGroupSize_x, "gl_WorkGroupSize.x");
 	sprintf(sc->gl_WorkGroupSize_y, "gl_WorkGroupSize.y");
 	sprintf(sc->gl_WorkGroupSize_z, "gl_WorkGroupSize.z");
+	sprintf(sc->gl_SubgroupInvocationID, "gl_SubgroupInvocationID");
+	sprintf(sc->gl_SubgroupID, "gl_SubgroupID");
 	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
 	char cosDef[20] = "cos";
 	char sinDef[20] = "sin";
 #elif(VKFFT_BACKEND==1)
+	sprintf(uintType_32, "unsigned int");
 	sprintf(inputsStruct, "inputs");
 	sprintf(outputsStruct, "outputs");
 	if (!strcmp(floatType, "half")) sprintf(vecType, "f16vec2");
@@ -19970,19 +25774,42 @@ static inline VkFFTResult shaderGenVkFFT_R2C_decomposition(char* output, VkFFTSp
 	sprintf(sc->gl_LocalInvocationID_x, "threadIdx.x");
 	sprintf(sc->gl_LocalInvocationID_y, "threadIdx.y");
 	sprintf(sc->gl_LocalInvocationID_z, "threadIdx.z");
-	sprintf(sc->gl_GlobalInvocationID_x, "(threadIdx.x + blockIdx.x * blockDim.x)");
-	sprintf(sc->gl_GlobalInvocationID_y, "(threadIdx.y + blockIdx.y * blockDim.y)");
-	sprintf(sc->gl_GlobalInvocationID_z, "(threadIdx.z + blockIdx.z * blockDim.z)");
-	sprintf(sc->gl_WorkGroupID_x, "blockIdx.x");
-	sprintf(sc->gl_WorkGroupID_y, "blockIdx.y");
-	sprintf(sc->gl_WorkGroupID_z, "blockIdx.z");
+	switch (sc->swapComputeWorkGroupID) {
+	case 0:
+		sprintf(sc->gl_GlobalInvocationID_x, "(threadIdx.x + blockIdx.x * blockDim.x)");
+		sprintf(sc->gl_GlobalInvocationID_y, "(threadIdx.y + blockIdx.y * blockDim.y)");
+		sprintf(sc->gl_GlobalInvocationID_z, "(threadIdx.z + blockIdx.z * blockDim.z)");
+		sprintf(sc->gl_WorkGroupID_x, "blockIdx.x");
+		sprintf(sc->gl_WorkGroupID_y, "blockIdx.y");
+		sprintf(sc->gl_WorkGroupID_z, "blockIdx.z");
+		break;
+	case 1:
+		sprintf(sc->gl_GlobalInvocationID_x, "(threadIdx.x + blockIdx.y * blockDim.x)");
+		sprintf(sc->gl_GlobalInvocationID_y, "(threadIdx.y + blockIdx.x * blockDim.y)");
+		sprintf(sc->gl_GlobalInvocationID_z, "(threadIdx.z + blockIdx.z * blockDim.z)");
+		sprintf(sc->gl_WorkGroupID_x, "blockIdx.y");
+		sprintf(sc->gl_WorkGroupID_y, "blockIdx.x");
+		sprintf(sc->gl_WorkGroupID_z, "blockIdx.z");
+		break;
+	case 2:
+		sprintf(sc->gl_GlobalInvocationID_x, "(threadIdx.x + blockIdx.z * blockDim.x)");
+		sprintf(sc->gl_GlobalInvocationID_y, "(threadIdx.y + blockIdx.y * blockDim.y)");
+		sprintf(sc->gl_GlobalInvocationID_z, "(threadIdx.z + blockIdx.x * blockDim.z)");
+		sprintf(sc->gl_WorkGroupID_x, "blockIdx.z");
+		sprintf(sc->gl_WorkGroupID_y, "blockIdx.y");
+		sprintf(sc->gl_WorkGroupID_z, "blockIdx.x");
+		break;
+	}
 	sprintf(sc->gl_WorkGroupSize_x, "blockDim.x");
 	sprintf(sc->gl_WorkGroupSize_y, "blockDim.y");
 	sprintf(sc->gl_WorkGroupSize_z, "blockDim.z");
+	sprintf(sc->gl_SubgroupInvocationID, "(threadIdx.x %% %" PRIu64 ")", sc->warpSize);
+	sprintf(sc->gl_SubgroupID, "(threadIdx.x / %" PRIu64 ")", sc->warpSize);
 	if (!strcmp(floatType, "double")) sprintf(LFending, "l");
 	char cosDef[20] = "__cosf";
 	char sinDef[20] = "__sinf";
 #elif(VKFFT_BACKEND==2)
+	sprintf(uintType_32, "unsigned int");
 	sprintf(inputsStruct, "inputs");
 	sprintf(outputsStruct, "outputs");
 	if (!strcmp(floatType, "half")) sprintf(vecType, "f16vec2");
@@ -19997,19 +25824,42 @@ static inline VkFFTResult shaderGenVkFFT_R2C_decomposition(char* output, VkFFTSp
 	sprintf(sc->gl_LocalInvocationID_x, "threadIdx.x");
 	sprintf(sc->gl_LocalInvocationID_y, "threadIdx.y");
 	sprintf(sc->gl_LocalInvocationID_z, "threadIdx.z");
-	sprintf(sc->gl_GlobalInvocationID_x, "(threadIdx.x + blockIdx.x * blockDim.x)");
-	sprintf(sc->gl_GlobalInvocationID_y, "(threadIdx.y + blockIdx.y * blockDim.y)");
-	sprintf(sc->gl_GlobalInvocationID_z, "(threadIdx.z + blockIdx.z * blockDim.z)");
-	sprintf(sc->gl_WorkGroupID_x, "blockIdx.x");
-	sprintf(sc->gl_WorkGroupID_y, "blockIdx.y");
-	sprintf(sc->gl_WorkGroupID_z, "blockIdx.z");
+	switch (sc->swapComputeWorkGroupID) {
+	case 0:
+		sprintf(sc->gl_GlobalInvocationID_x, "(threadIdx.x + blockIdx.x * blockDim.x)");
+		sprintf(sc->gl_GlobalInvocationID_y, "(threadIdx.y + blockIdx.y * blockDim.y)");
+		sprintf(sc->gl_GlobalInvocationID_z, "(threadIdx.z + blockIdx.z * blockDim.z)");
+		sprintf(sc->gl_WorkGroupID_x, "blockIdx.x");
+		sprintf(sc->gl_WorkGroupID_y, "blockIdx.y");
+		sprintf(sc->gl_WorkGroupID_z, "blockIdx.z");
+		break;
+	case 1:
+		sprintf(sc->gl_GlobalInvocationID_x, "(threadIdx.x + blockIdx.y * blockDim.x)");
+		sprintf(sc->gl_GlobalInvocationID_y, "(threadIdx.y + blockIdx.x * blockDim.y)");
+		sprintf(sc->gl_GlobalInvocationID_z, "(threadIdx.z + blockIdx.z * blockDim.z)");
+		sprintf(sc->gl_WorkGroupID_x, "blockIdx.y");
+		sprintf(sc->gl_WorkGroupID_y, "blockIdx.x");
+		sprintf(sc->gl_WorkGroupID_z, "blockIdx.z");
+		break;
+	case 2:
+		sprintf(sc->gl_GlobalInvocationID_x, "(threadIdx.x + blockIdx.z * blockDim.x)");
+		sprintf(sc->gl_GlobalInvocationID_y, "(threadIdx.y + blockIdx.y * blockDim.y)");
+		sprintf(sc->gl_GlobalInvocationID_z, "(threadIdx.z + blockIdx.x * blockDim.z)");
+		sprintf(sc->gl_WorkGroupID_x, "blockIdx.z");
+		sprintf(sc->gl_WorkGroupID_y, "blockIdx.y");
+		sprintf(sc->gl_WorkGroupID_z, "blockIdx.x");
+		break;
+	}
 	sprintf(sc->gl_WorkGroupSize_x, "blockDim.x");
 	sprintf(sc->gl_WorkGroupSize_y, "blockDim.y");
 	sprintf(sc->gl_WorkGroupSize_z, "blockDim.z");
+	sprintf(sc->gl_SubgroupInvocationID, "(threadIdx.x %% %" PRIu64 ")", sc->warpSize);
+	sprintf(sc->gl_SubgroupID, "(threadIdx.x / %" PRIu64 ")", sc->warpSize);
 	if (!strcmp(floatType, "double")) sprintf(LFending, "l");
 	char cosDef[20] = "__cosf";
 	char sinDef[20] = "__sinf";
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
+	sprintf(uintType_32, "unsigned int");
 	sprintf(inputsStruct, "inputs");
 	sprintf(outputsStruct, "outputs");
 	if (!strcmp(floatType, "half")) sprintf(vecType, "f16vec2");
@@ -20024,18 +25874,88 @@ static inline VkFFTResult shaderGenVkFFT_R2C_decomposition(char* output, VkFFTSp
 	sprintf(sc->gl_LocalInvocationID_x, "get_local_id(0)");
 	sprintf(sc->gl_LocalInvocationID_y, "get_local_id(1)");
 	sprintf(sc->gl_LocalInvocationID_z, "get_local_id(2)");
-	sprintf(sc->gl_GlobalInvocationID_x, "get_global_id(0)");
-	sprintf(sc->gl_GlobalInvocationID_y, "get_global_id(1)");
-	sprintf(sc->gl_GlobalInvocationID_z, "get_global_id(2)");
-	sprintf(sc->gl_WorkGroupID_x, "get_group_id(0)");
-	sprintf(sc->gl_WorkGroupID_y, "get_group_id(1)");
-	sprintf(sc->gl_WorkGroupID_z, "get_group_id(2)");
+	switch (sc->swapComputeWorkGroupID) {
+	case 0:
+		sprintf(sc->gl_GlobalInvocationID_x, "get_global_id(0)");
+		sprintf(sc->gl_GlobalInvocationID_y, "get_global_id(1)");
+		sprintf(sc->gl_GlobalInvocationID_z, "get_global_id(2)");
+		sprintf(sc->gl_WorkGroupID_x, "get_group_id(0)");
+		sprintf(sc->gl_WorkGroupID_y, "get_group_id(1)");
+		sprintf(sc->gl_WorkGroupID_z, "get_group_id(2)");
+		break;
+	case 1:
+		sprintf(sc->gl_GlobalInvocationID_x, "(get_local_id(0) + get_group_id(1) * get_local_size(0))");
+		sprintf(sc->gl_GlobalInvocationID_y, "(get_local_id(1) + get_group_id(0) * get_local_size(1))");
+		sprintf(sc->gl_GlobalInvocationID_z, "get_global_id(2)");
+		sprintf(sc->gl_WorkGroupID_x, "get_group_id(1)");
+		sprintf(sc->gl_WorkGroupID_y, "get_group_id(0)");
+		sprintf(sc->gl_WorkGroupID_z, "get_group_id(2)");
+		break;
+	case 2:
+		sprintf(sc->gl_GlobalInvocationID_x, "(get_local_id(0) + get_group_id(2) * get_local_size(0))");
+		sprintf(sc->gl_GlobalInvocationID_y, "get_global_id(1)");
+		sprintf(sc->gl_GlobalInvocationID_z, "(get_local_id(2) + get_group_id(0) * get_local_size(2))");
+		sprintf(sc->gl_WorkGroupID_x, "get_group_id(2)");
+		sprintf(sc->gl_WorkGroupID_y, "get_group_id(1)");
+		sprintf(sc->gl_WorkGroupID_z, "get_group_id(0)");
+		break;
+	}
 	sprintf(sc->gl_WorkGroupSize_x, "get_local_size(0)");
 	sprintf(sc->gl_WorkGroupSize_y, "get_local_size(1)");
 	sprintf(sc->gl_WorkGroupSize_z, "get_local_size(2)");
 	//if (!strcmp(floatType, "double")) sprintf(LFending, "l");
 	char cosDef[20] = "native_cos";
 	char sinDef[20] = "native_sin";
+#elif(VKFFT_BACKEND==5)
+	sprintf(uintType_32, "uint");
+	sprintf(inputsStruct, "inputs");
+	sprintf(outputsStruct, "outputs");
+	if (!strcmp(floatType, "half")) sprintf(vecType, "half2");
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	if (!strcmp(floatTypeInputMemory, "half")) sprintf(vecTypeInput, "half2");
+	if (!strcmp(floatTypeInputMemory, "float")) sprintf(vecTypeInput, "float2");
+	if (!strcmp(floatTypeInputMemory, "double")) sprintf(vecTypeInput, "double2");
+	if (!strcmp(floatTypeOutputMemory, "half")) sprintf(vecTypeOutput, "half2");
+	if (!strcmp(floatTypeOutputMemory, "float")) sprintf(vecTypeOutput, "float2");
+	if (!strcmp(floatTypeOutputMemory, "double")) sprintf(vecTypeOutput, "double2");
+	sprintf(sc->gl_LocalInvocationID_x, "thread_position_in_threadgroup.x");
+	sprintf(sc->gl_LocalInvocationID_y, "thread_position_in_threadgroup.y");
+	sprintf(sc->gl_LocalInvocationID_z, "thread_position_in_threadgroup.z");
+	switch (sc->swapComputeWorkGroupID) {
+	case 0:
+		sprintf(sc->gl_GlobalInvocationID_x, "thread_position_in_grid.x");
+		sprintf(sc->gl_GlobalInvocationID_y, "thread_position_in_grid.y");
+		sprintf(sc->gl_GlobalInvocationID_z, "thread_position_in_grid.z");
+		sprintf(sc->gl_WorkGroupID_x, "threadgroup_position_in_grid.x");
+		sprintf(sc->gl_WorkGroupID_y, "threadgroup_position_in_grid.y");
+		sprintf(sc->gl_WorkGroupID_z, "threadgroup_position_in_grid.z");
+		break;
+	case 1:
+		sprintf(sc->gl_GlobalInvocationID_x, "(thread_position_in_threadgroup.x + threadgroup_position_in_grid.y * %" PRIu64 ")", sc->localSize[0]);
+		sprintf(sc->gl_GlobalInvocationID_y, "(thread_position_in_threadgroup.y + threadgroup_position_in_grid.x * %" PRIu64 ")", sc->localSize[1]);
+		sprintf(sc->gl_GlobalInvocationID_z, "thread_position_in_threadgroup.z");
+		sprintf(sc->gl_WorkGroupID_x, "threadgroup_position_in_grid.y");
+		sprintf(sc->gl_WorkGroupID_y, "threadgroup_position_in_grid.x");
+		sprintf(sc->gl_WorkGroupID_z, "threadgroup_position_in_grid.z");
+		break;
+	case 2:
+		sprintf(sc->gl_GlobalInvocationID_x, "(thread_position_in_threadgroup.x + threadgroup_position_in_grid.z * %" PRIu64 ")", sc->localSize[0]);
+		sprintf(sc->gl_GlobalInvocationID_y, "thread_position_in_threadgroup.y");
+		sprintf(sc->gl_GlobalInvocationID_z, "(thread_position_in_threadgroup.z + threadgroup_position_in_grid.x * %" PRIu64 ")", sc->localSize[2]);
+		sprintf(sc->gl_WorkGroupID_x, "threadgroup_position_in_grid.z");
+		sprintf(sc->gl_WorkGroupID_y, "threadgroup_position_in_grid.y");
+		sprintf(sc->gl_WorkGroupID_z, "threadgroup_position_in_grid.x");
+		break;
+	}
+	sprintf(sc->gl_WorkGroupSize_x, "%" PRIu64 "", sc->localSize[0]);
+	sprintf(sc->gl_WorkGroupSize_y, "%" PRIu64 "", sc->localSize[1]);
+	sprintf(sc->gl_WorkGroupSize_z, "%" PRIu64 "", sc->localSize[2]);
+	//sprintf(sc->gl_SubgroupInvocationID, "gl_SubgroupInvocationID");
+	//sprintf(sc->gl_SubgroupID, "gl_SubgroupID");
+	if (!strcmp(floatType, "double")) sprintf(LFending, "LF");
+	char cosDef[20] = "cos";
+	char sinDef[20] = "sin";
 #endif
 	sprintf(sc->stageInvocationID, "stageInvocationID");
 	sprintf(sc->blockInvocationID, "blockInvocationID");
@@ -20060,6 +25980,9 @@ static inline VkFFTResult shaderGenVkFFT_R2C_decomposition(char* output, VkFFTSp
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 		sprintf(convTypeLeftInput, "conv_float2(");
 		sprintf(convTypeRightInput, ")");
+#elif(VKFFT_BACKEND==5)
+		sprintf(convTypeLeftInput, "conv_float2(");
+		sprintf(convTypeRightInput, ")");
 #endif
 	}
 	if ((!strcmp(floatType, "double")) && (strcmp(floatTypeInputMemory, "double"))) {
@@ -20075,6 +25998,9 @@ static inline VkFFTResult shaderGenVkFFT_R2C_decomposition(char* output, VkFFTSp
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 		sprintf(convTypeLeftInput, "conv_double2(");
 		sprintf(convTypeRightInput, ")");
+#elif(VKFFT_BACKEND==5)
+		sprintf(convTypeLeftInput, "conv_double2(");
+		sprintf(convTypeRightInput, ")");
 #endif
 	}
 
@@ -20089,12 +26015,17 @@ static inline VkFFTResult shaderGenVkFFT_R2C_decomposition(char* output, VkFFTSp
 		sprintf(convTypeLeftOutput, "vec2(");
 		sprintf(convTypeRightOutput, ")");
 #elif(VKFFT_BACKEND==1)
-		sprintf(convTypeLeftOutput, "(float2)");
+		sprintf(convTypeLeftOutput, "conv_float2");
+		sprintf(convTypeRightOutput, ")");
 #elif(VKFFT_BACKEND==2)
-		sprintf(convTypeLeftOutput, "(float2)");
+		sprintf(convTypeLeftOutput, "conv_float2");
+		sprintf(convTypeRightOutput, ")");
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 		sprintf(convTypeLeftOutput, "conv_float2(");
 		sprintf(convTypeRightOutput, ")");
+#elif(VKFFT_BACKEND==5)
+		sprintf(convTypeLeftOutput, "conv_float2(");
+		sprintf(convTypeRightOutput, ")");
 #endif
 	}
 	if ((!strcmp(floatTypeOutputMemory, "double")) && (strcmp(floatType, "double"))) {
@@ -20102,12 +26033,17 @@ static inline VkFFTResult shaderGenVkFFT_R2C_decomposition(char* output, VkFFTSp
 		sprintf(convTypeLeftOutput, "dvec2(");
 		sprintf(convTypeRightOutput, ")");
 #elif(VKFFT_BACKEND==1)
-		sprintf(convTypeLeftOutput, "(double2)");
+		sprintf(convTypeLeftOutput, "conv_double2(");
+		sprintf(convTypeRightOutput, ")");
 #elif(VKFFT_BACKEND==2)
-		sprintf(convTypeLeftOutput, "(double2)");
+		sprintf(convTypeLeftOutput, "conv_double2(");
+		sprintf(convTypeRightOutput, ")");
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
 		sprintf(convTypeLeftOutput, "conv_double2(");
 		sprintf(convTypeRightOutput, ")");
+#elif(VKFFT_BACKEND==5)
+		sprintf(convTypeLeftOutput, "conv_double2(");
+		sprintf(convTypeRightOutput, ")");
 #endif
 	}
 	//sprintf(sc->tempReg, "temp");
@@ -20119,7 +26055,7 @@ static inline VkFFTResult shaderGenVkFFT_R2C_decomposition(char* output, VkFFTSp
 	if (res != VKFFT_SUCCESS) return res;
 	res = appendConstantsVkFFT(sc, floatType, uintType);
 	if (res != VKFFT_SUCCESS) return res;
-	if ((!sc->LUT) && (!strcmp(floatType, "double"))) {
+	if (((!sc->LUT) || (!sc->LUT_4step)) && (!strcmp(floatType, "double"))) {
 		res = appendSinCos20(sc, floatType, uintType);
 		if (res != VKFFT_SUCCESS) return res;
 	}
@@ -20185,10 +26121,41 @@ static inline VkFFTResult shaderGenVkFFT_R2C_decomposition(char* output, VkFFTSp
 	if (res != VKFFT_SUCCESS) return res;
 	//sc->tempLen = sprintf(sc->tempStr, ", const PushConsts consts) {\n");
 #elif(VKFFT_BACKEND==2)
+	if (!sc->useUint64 && sc->useStrict32BitAddress > 0) {
+		// These wrappers help hipcc to generate faster code for load and store operations where
+		// 64-bit scalar + 32-bit vector registers are used instead of 64-bit vector saving a few
+		// instructions for computing 64-bit vector addresses.
+		sc->tempLen = sprintf(sc->tempStr,
+			"template<typename T>\n"
+			"struct Inputs\n"
+			"{\n"
+			"	const T* buffer;\n"
+			"	inline __device__ Inputs(const T* buffer) : buffer(buffer) {}\n"
+			"	inline __device__ const T& operator[](unsigned int idx) const { return *reinterpret_cast<const T*>(reinterpret_cast<const char*>(buffer) + idx * static_cast<unsigned int>(sizeof(T))); }\n"
+			"};\n"
+			"template<typename T>\n"
+			"struct Outputs\n"
+			"{\n"
+			"	T* buffer;\n"
+			"	inline __device__ Outputs(T* buffer) : buffer(buffer) {}\n"
+			"	inline __device__ T& operator[](unsigned int idx) const { return *reinterpret_cast<T*>(reinterpret_cast<char*>(buffer) + idx * static_cast<unsigned int>(sizeof(T))); }\n"
+			"};\n"
+		);
+	}
+	else {
+		sc->tempLen = sprintf(sc->tempStr,
+			"template<typename T>\n"
+			"using Inputs = const T*;\n"
+			"template<typename T>\n"
+			"using Outputs = T*;\n"
+		);
+	}
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
 	sc->tempLen = sprintf(sc->tempStr, "extern \"C\" __launch_bounds__(%" PRIu64 ") __global__ void VkFFT_main_R2C ", sc->localSize[0] * sc->localSize[1] * sc->localSize[2]);
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
-	sc->tempLen = sprintf(sc->tempStr, "(%s* inputs, %s* outputs", vecTypeInput, vecTypeOutput);
+	sc->tempLen = sprintf(sc->tempStr, "(const Inputs<%s> inputs, Outputs<%s> outputs", vecTypeInput, vecTypeOutput);
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	if (sc->convolutionStep) {
@@ -20231,6 +26198,45 @@ static inline VkFFTResult shaderGenVkFFT_R2C_decomposition(char* output, VkFFTSp
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) return res;
 	//sc->tempLen = sprintf(sc->tempStr, ", const PushConsts consts) {\n");
+#elif(VKFFT_BACKEND==5)
+	sc->tempLen = sprintf(sc->tempStr, "kernel void VkFFT_main_R2C ");
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	sc->tempLen = sprintf(sc->tempStr, "(%s3 thread_position_in_grid [[thread_position_in_grid]], ", uintType_32);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	sc->tempLen = sprintf(sc->tempStr, "%s3 threadgroup_position_in_grid [[threadgroup_position_in_grid]], ", uintType_32);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	sc->tempLen = sprintf(sc->tempStr, "%s3 thread_position_in_threadgroup [[thread_position_in_threadgroup]], ", uintType_32);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	sc->tempLen = sprintf(sc->tempStr, "device %s* inputs[[buffer(0)]], device %s* outputs[[buffer(1)]]", vecTypeInput, vecTypeOutput);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	int args_id = 2;
+	if (sc->convolutionStep) {
+		sc->tempLen = sprintf(sc->tempStr, ", constant %s* kernel_obj[[buffer(%d)]]", vecType, args_id);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		args_id++;
+	}
+	if (sc->LUT) {
+		sc->tempLen = sprintf(sc->tempStr, ", constant %s* twiddleLUT[[buffer(%d)]]", vecType, args_id);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		args_id++;
+	}
+	if (sc->pushConstantsStructSize > 0) {
+		sc->tempLen = sprintf(sc->tempStr, ", constant PushConsts& consts[[buffer(%d)]]", args_id);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		args_id++;
+	}
+	sc->tempLen = sprintf(sc->tempStr, ") {\n");
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	//sc->tempLen = sprintf(sc->tempStr, ", const PushConsts consts) {\n");
 #endif
 	char index_x[2000] = "";
 	char idX[500] = "";
@@ -20463,7 +26469,7 @@ static inline VkFFTResult shaderGenVkFFT_R2C_decomposition(char* output, VkFFTSp
 		if (res != VKFFT_SUCCESS) return res;
 	}
 	else {
-		sc->tempLen = sprintf(sc->tempStr, "		%s angle = (loc_PI*id_x)/%" PRIu64 ";\n", floatType, sc->size[0] / 2);
+		sc->tempLen = sprintf(sc->tempStr, "		%s angle = (%.17e*id_x)/%" PRIu64 ";\n", floatType, 3.1415926535897932384626433832795, sc->size[0] / 2);
 		res = VkAppendLine(sc);
 		if (res != VKFFT_SUCCESS) return res;
 		if (!strcmp(floatType, "float")) {
@@ -20577,16 +26583,26 @@ static inline void freeShaderGenVkFFT(VkFFTSpecializationConstantsLayout* sc) {
 		free(sc->regIDs);
 		sc->regIDs = 0;
 	}
-	if (sc->oldLocale)
-	{
-		setlocale(LC_ALL, sc->oldLocale);
+	if (!sc->disableSetLocale) {
+		if (!strcmp(sc->oldLocale, "")) {
+			setlocale(LC_ALL, sc->oldLocale);
+			memset(sc->oldLocale, 0, 100 * sizeof(char));
+		}
+	}
+	if (sc->numRaderPrimes) {
+		free(sc->raderContainer);
+		sc->raderContainer = 0;
+		sc->currentRaderContainer = 0;
 	}
 }
 static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConstantsLayout* sc, const char* floatType, const char* floatTypeInputMemory, const char* floatTypeOutputMemory, const char* floatTypeKernelMemory, const char* uintType, uint64_t type) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	//appendLicense(output);
-	sc->oldLocale = setlocale(LC_ALL, NULL);
-	setlocale(LC_ALL, "C");
+	if (!sc->disableSetLocale) {
+		const char* loc_oldLocale = setlocale(LC_ALL, NULL);
+		strcpy(sc->oldLocale, loc_oldLocale);
+		setlocale(LC_ALL, "C");
+	}
 	sc->output = output;
 	sc->tempStr = (char*)malloc(sizeof(char) * sc->maxTempLength);
 	if (!sc->tempStr) return VKFFT_ERROR_MALLOC_FAILED;
@@ -20595,7 +26611,9 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 	char vecType[30];
 	char vecTypeInput[30];
 	char vecTypeOutput[30];
+	char uintType_32[30];
 #if(VKFFT_BACKEND==0)
+	sprintf(uintType_32, "uint");
 	if (!strcmp(floatType, "half")) sprintf(vecType, "f16vec2");
 	if (!strcmp(floatType, "float")) sprintf(vecType, "vec2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "dvec2");
@@ -20608,16 +26626,39 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 	sprintf(sc->gl_LocalInvocationID_x, "gl_LocalInvocationID.x");
 	sprintf(sc->gl_LocalInvocationID_y, "gl_LocalInvocationID.y");
 	sprintf(sc->gl_LocalInvocationID_z, "gl_LocalInvocationID.z");
-	sprintf(sc->gl_GlobalInvocationID_x, "gl_GlobalInvocationID.x");
-	sprintf(sc->gl_GlobalInvocationID_y, "gl_GlobalInvocationID.y");
-	sprintf(sc->gl_GlobalInvocationID_z, "gl_GlobalInvocationID.z");
-	sprintf(sc->gl_WorkGroupID_x, "gl_WorkGroupID.x");
-	sprintf(sc->gl_WorkGroupID_y, "gl_WorkGroupID.y");
-	sprintf(sc->gl_WorkGroupID_z, "gl_WorkGroupID.z");
+	switch (sc->swapComputeWorkGroupID) {
+	case 0:
+		sprintf(sc->gl_GlobalInvocationID_x, "gl_GlobalInvocationID.x");
+		sprintf(sc->gl_GlobalInvocationID_y, "gl_GlobalInvocationID.y");
+		sprintf(sc->gl_GlobalInvocationID_z, "gl_GlobalInvocationID.z");
+		sprintf(sc->gl_WorkGroupID_x, "gl_WorkGroupID.x");
+		sprintf(sc->gl_WorkGroupID_y, "gl_WorkGroupID.y");
+		sprintf(sc->gl_WorkGroupID_z, "gl_WorkGroupID.z");
+		break;
+	case 1:
+		sprintf(sc->gl_GlobalInvocationID_x, "(gl_LocalInvocationID.x + gl_WorkGroupID.y * gl_WorkGroupSize.x)");
+		sprintf(sc->gl_GlobalInvocationID_y, "(gl_LocalInvocationID.y + gl_WorkGroupID.x * gl_WorkGroupSize.y)");
+		sprintf(sc->gl_GlobalInvocationID_z, "gl_GlobalInvocationID.z");
+		sprintf(sc->gl_WorkGroupID_x, "gl_WorkGroupID.y");
+		sprintf(sc->gl_WorkGroupID_y, "gl_WorkGroupID.x");
+		sprintf(sc->gl_WorkGroupID_z, "gl_WorkGroupID.z");
+		break;
+	case 2:
+		sprintf(sc->gl_GlobalInvocationID_x, "(gl_LocalInvocationID.x + gl_WorkGroupID.z * gl_WorkGroupSize.x)");
+		sprintf(sc->gl_GlobalInvocationID_y, "gl_GlobalInvocationID.y");
+		sprintf(sc->gl_GlobalInvocationID_z, "(gl_LocalInvocationID.z + gl_WorkGroupID.x * gl_WorkGroupSize.z)");
+		sprintf(sc->gl_WorkGroupID_x, "gl_WorkGroupID.z");
+		sprintf(sc->gl_WorkGroupID_y, "gl_WorkGroupID.y");
+		sprintf(sc->gl_WorkGroupID_z, "gl_WorkGroupID.x");
+		break;
+	}
 	sprintf(sc->gl_WorkGroupSize_x, "gl_WorkGroupSize.x");
 	sprintf(sc->gl_WorkGroupSize_y, "gl_WorkGroupSize.y");
 	sprintf(sc->gl_WorkGroupSize_z, "gl_WorkGroupSize.z");
+	sprintf(sc->gl_SubgroupInvocationID, "gl_SubgroupInvocationID");
+	sprintf(sc->gl_SubgroupID, "gl_SubgroupID");
 #elif(VKFFT_BACKEND==1)
+	sprintf(uintType_32, "unsigned int");
 	if (!strcmp(floatType, "half")) sprintf(vecType, "f16vec2");
 	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
@@ -20630,16 +26671,39 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 	sprintf(sc->gl_LocalInvocationID_x, "threadIdx.x");
 	sprintf(sc->gl_LocalInvocationID_y, "threadIdx.y");
 	sprintf(sc->gl_LocalInvocationID_z, "threadIdx.z");
-	sprintf(sc->gl_GlobalInvocationID_x, "(threadIdx.x + blockIdx.x * blockDim.x)");
-	sprintf(sc->gl_GlobalInvocationID_y, "(threadIdx.y + blockIdx.y * blockDim.y)");
-	sprintf(sc->gl_GlobalInvocationID_z, "(threadIdx.z + blockIdx.z * blockDim.z)");
-	sprintf(sc->gl_WorkGroupID_x, "blockIdx.x");
-	sprintf(sc->gl_WorkGroupID_y, "blockIdx.y");
-	sprintf(sc->gl_WorkGroupID_z, "blockIdx.z");
+	switch (sc->swapComputeWorkGroupID) {
+	case 0:
+		sprintf(sc->gl_GlobalInvocationID_x, "(threadIdx.x + blockIdx.x * blockDim.x)");
+		sprintf(sc->gl_GlobalInvocationID_y, "(threadIdx.y + blockIdx.y * blockDim.y)");
+		sprintf(sc->gl_GlobalInvocationID_z, "(threadIdx.z + blockIdx.z * blockDim.z)");
+		sprintf(sc->gl_WorkGroupID_x, "blockIdx.x");
+		sprintf(sc->gl_WorkGroupID_y, "blockIdx.y");
+		sprintf(sc->gl_WorkGroupID_z, "blockIdx.z");
+		break;
+	case 1:
+		sprintf(sc->gl_GlobalInvocationID_x, "(threadIdx.x + blockIdx.y * blockDim.x)");
+		sprintf(sc->gl_GlobalInvocationID_y, "(threadIdx.y + blockIdx.x * blockDim.y)");
+		sprintf(sc->gl_GlobalInvocationID_z, "(threadIdx.z + blockIdx.z * blockDim.z)");
+		sprintf(sc->gl_WorkGroupID_x, "blockIdx.y");
+		sprintf(sc->gl_WorkGroupID_y, "blockIdx.x");
+		sprintf(sc->gl_WorkGroupID_z, "blockIdx.z");
+		break;
+	case 2:
+		sprintf(sc->gl_GlobalInvocationID_x, "(threadIdx.x + blockIdx.z * blockDim.x)");
+		sprintf(sc->gl_GlobalInvocationID_y, "(threadIdx.y + blockIdx.y * blockDim.y)");
+		sprintf(sc->gl_GlobalInvocationID_z, "(threadIdx.z + blockIdx.x * blockDim.z)");
+		sprintf(sc->gl_WorkGroupID_x, "blockIdx.z");
+		sprintf(sc->gl_WorkGroupID_y, "blockIdx.y");
+		sprintf(sc->gl_WorkGroupID_z, "blockIdx.x");
+		break;
+	}
 	sprintf(sc->gl_WorkGroupSize_x, "blockDim.x");
 	sprintf(sc->gl_WorkGroupSize_y, "blockDim.y");
 	sprintf(sc->gl_WorkGroupSize_z, "blockDim.z");
+	sprintf(sc->gl_SubgroupInvocationID, "(threadIdx.x %% %" PRIu64 ")", sc->warpSize);
+	sprintf(sc->gl_SubgroupID, "(threadIdx.x / %" PRIu64 ")", sc->warpSize);
 #elif(VKFFT_BACKEND==2)
+	sprintf(uintType_32, "unsigned int");
 	if (!strcmp(floatType, "half")) sprintf(vecType, "f16vec2");
 	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
@@ -20652,16 +26716,39 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 	sprintf(sc->gl_LocalInvocationID_x, "threadIdx.x");
 	sprintf(sc->gl_LocalInvocationID_y, "threadIdx.y");
 	sprintf(sc->gl_LocalInvocationID_z, "threadIdx.z");
-	sprintf(sc->gl_GlobalInvocationID_x, "(threadIdx.x + blockIdx.x * blockDim.x)");
-	sprintf(sc->gl_GlobalInvocationID_y, "(threadIdx.y + blockIdx.y * blockDim.y)");
-	sprintf(sc->gl_GlobalInvocationID_z, "(threadIdx.z + blockIdx.z * blockDim.z)");
-	sprintf(sc->gl_WorkGroupID_x, "blockIdx.x");
-	sprintf(sc->gl_WorkGroupID_y, "blockIdx.y");
-	sprintf(sc->gl_WorkGroupID_z, "blockIdx.z");
+	switch (sc->swapComputeWorkGroupID) {
+	case 0:
+		sprintf(sc->gl_GlobalInvocationID_x, "(threadIdx.x + blockIdx.x * blockDim.x)");
+		sprintf(sc->gl_GlobalInvocationID_y, "(threadIdx.y + blockIdx.y * blockDim.y)");
+		sprintf(sc->gl_GlobalInvocationID_z, "(threadIdx.z + blockIdx.z * blockDim.z)");
+		sprintf(sc->gl_WorkGroupID_x, "blockIdx.x");
+		sprintf(sc->gl_WorkGroupID_y, "blockIdx.y");
+		sprintf(sc->gl_WorkGroupID_z, "blockIdx.z");
+		break;
+	case 1:
+		sprintf(sc->gl_GlobalInvocationID_x, "(threadIdx.x + blockIdx.y * blockDim.x)");
+		sprintf(sc->gl_GlobalInvocationID_y, "(threadIdx.y + blockIdx.x * blockDim.y)");
+		sprintf(sc->gl_GlobalInvocationID_z, "(threadIdx.z + blockIdx.z * blockDim.z)");
+		sprintf(sc->gl_WorkGroupID_x, "blockIdx.y");
+		sprintf(sc->gl_WorkGroupID_y, "blockIdx.x");
+		sprintf(sc->gl_WorkGroupID_z, "blockIdx.z");
+		break;
+	case 2:
+		sprintf(sc->gl_GlobalInvocationID_x, "(threadIdx.x + blockIdx.z * blockDim.x)");
+		sprintf(sc->gl_GlobalInvocationID_y, "(threadIdx.y + blockIdx.y * blockDim.y)");
+		sprintf(sc->gl_GlobalInvocationID_z, "(threadIdx.z + blockIdx.x * blockDim.z)");
+		sprintf(sc->gl_WorkGroupID_x, "blockIdx.z");
+		sprintf(sc->gl_WorkGroupID_y, "blockIdx.y");
+		sprintf(sc->gl_WorkGroupID_z, "blockIdx.x");
+		break;
+	}
 	sprintf(sc->gl_WorkGroupSize_x, "blockDim.x");
 	sprintf(sc->gl_WorkGroupSize_y, "blockDim.y");
 	sprintf(sc->gl_WorkGroupSize_z, "blockDim.z");
+	sprintf(sc->gl_SubgroupInvocationID, "(threadIdx.x %% %" PRIu64 ")", sc->warpSize);
+	sprintf(sc->gl_SubgroupID, "(threadIdx.x / %" PRIu64 ")", sc->warpSize);
 #elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4))
+	sprintf(uintType_32, "unsigned int");
 	if (!strcmp(floatType, "half")) sprintf(vecType, "f16vec2");
 	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
 	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
@@ -20674,15 +26761,80 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 	sprintf(sc->gl_LocalInvocationID_x, "get_local_id(0)");
 	sprintf(sc->gl_LocalInvocationID_y, "get_local_id(1)");
 	sprintf(sc->gl_LocalInvocationID_z, "get_local_id(2)");
-	sprintf(sc->gl_GlobalInvocationID_x, "get_global_id(0)");
-	sprintf(sc->gl_GlobalInvocationID_y, "get_global_id(1)");
-	sprintf(sc->gl_GlobalInvocationID_z, "get_global_id(2)");
-	sprintf(sc->gl_WorkGroupID_x, "get_group_id(0)");
-	sprintf(sc->gl_WorkGroupID_y, "get_group_id(1)");
-	sprintf(sc->gl_WorkGroupID_z, "get_group_id(2)");
+	switch (sc->swapComputeWorkGroupID) {
+	case 0:
+		sprintf(sc->gl_GlobalInvocationID_x, "get_global_id(0)");
+		sprintf(sc->gl_GlobalInvocationID_y, "get_global_id(1)");
+		sprintf(sc->gl_GlobalInvocationID_z, "get_global_id(2)");
+		sprintf(sc->gl_WorkGroupID_x, "get_group_id(0)");
+		sprintf(sc->gl_WorkGroupID_y, "get_group_id(1)");
+		sprintf(sc->gl_WorkGroupID_z, "get_group_id(2)");
+		break;
+	case 1:
+		sprintf(sc->gl_GlobalInvocationID_x, "(get_local_id(0) + get_group_id(1) * get_local_size(0))");
+		sprintf(sc->gl_GlobalInvocationID_y, "(get_local_id(1) + get_group_id(0) * get_local_size(1))");
+		sprintf(sc->gl_GlobalInvocationID_z, "get_global_id(2)");
+		sprintf(sc->gl_WorkGroupID_x, "get_group_id(1)");
+		sprintf(sc->gl_WorkGroupID_y, "get_group_id(0)");
+		sprintf(sc->gl_WorkGroupID_z, "get_group_id(2)");
+		break;
+	case 2:
+		sprintf(sc->gl_GlobalInvocationID_x, "(get_local_id(0) + get_group_id(2) * get_local_size(0))");
+		sprintf(sc->gl_GlobalInvocationID_y, "get_global_id(1)");
+		sprintf(sc->gl_GlobalInvocationID_z, "(get_local_id(2) + get_group_id(0) * get_local_size(2))");
+		sprintf(sc->gl_WorkGroupID_x, "get_group_id(2)");
+		sprintf(sc->gl_WorkGroupID_y, "get_group_id(1)");
+		sprintf(sc->gl_WorkGroupID_z, "get_group_id(0)");
+		break;
+	}
 	sprintf(sc->gl_WorkGroupSize_x, "get_local_size(0)");
 	sprintf(sc->gl_WorkGroupSize_y, "get_local_size(1)");
 	sprintf(sc->gl_WorkGroupSize_z, "get_local_size(2)");
+#elif(VKFFT_BACKEND==5)
+	sprintf(uintType_32, "uint");
+	if (!strcmp(floatType, "half")) sprintf(vecType, "f16vec2");
+	if (!strcmp(floatType, "float")) sprintf(vecType, "float2");
+	if (!strcmp(floatType, "double")) sprintf(vecType, "double2");
+	if (!strcmp(floatTypeInputMemory, "half")) sprintf(vecTypeInput, "half2");
+	if (!strcmp(floatTypeInputMemory, "float")) sprintf(vecTypeInput, "float2");
+	if (!strcmp(floatTypeInputMemory, "double")) sprintf(vecTypeInput, "double2");
+	if (!strcmp(floatTypeOutputMemory, "half")) sprintf(vecTypeOutput, "half2");
+	if (!strcmp(floatTypeOutputMemory, "float")) sprintf(vecTypeOutput, "float2");
+	if (!strcmp(floatTypeOutputMemory, "double")) sprintf(vecTypeOutput, "double2");
+	sprintf(sc->gl_LocalInvocationID_x, "thread_position_in_threadgroup.x");
+	sprintf(sc->gl_LocalInvocationID_y, "thread_position_in_threadgroup.y");
+	sprintf(sc->gl_LocalInvocationID_z, "thread_position_in_threadgroup.z");
+	switch (sc->swapComputeWorkGroupID) {
+	case 0:
+		sprintf(sc->gl_GlobalInvocationID_x, "thread_position_in_grid.x");
+		sprintf(sc->gl_GlobalInvocationID_y, "thread_position_in_grid.y");
+		sprintf(sc->gl_GlobalInvocationID_z, "thread_position_in_grid.z");
+		sprintf(sc->gl_WorkGroupID_x, "threadgroup_position_in_grid.x");
+		sprintf(sc->gl_WorkGroupID_y, "threadgroup_position_in_grid.y");
+		sprintf(sc->gl_WorkGroupID_z, "threadgroup_position_in_grid.z");
+		break;
+	case 1:
+		sprintf(sc->gl_GlobalInvocationID_x, "(thread_position_in_threadgroup.x + threadgroup_position_in_grid.y * %" PRIu64 ")", sc->localSize[0]);
+		sprintf(sc->gl_GlobalInvocationID_y, "(thread_position_in_threadgroup.y + threadgroup_position_in_grid.x * %" PRIu64 ")", sc->localSize[1]);
+		sprintf(sc->gl_GlobalInvocationID_z, "thread_position_in_threadgroup.z");
+		sprintf(sc->gl_WorkGroupID_x, "threadgroup_position_in_grid.y");
+		sprintf(sc->gl_WorkGroupID_y, "threadgroup_position_in_grid.x");
+		sprintf(sc->gl_WorkGroupID_z, "threadgroup_position_in_grid.z");
+		break;
+	case 2:
+		sprintf(sc->gl_GlobalInvocationID_x, "(thread_position_in_threadgroup.x + threadgroup_position_in_grid.z * %" PRIu64 ")", sc->localSize[0]);
+		sprintf(sc->gl_GlobalInvocationID_y, "thread_position_in_threadgroup.y");
+		sprintf(sc->gl_GlobalInvocationID_z, "(thread_position_in_threadgroup.z + threadgroup_position_in_grid.x * %" PRIu64 ")", sc->localSize[2]);
+		sprintf(sc->gl_WorkGroupID_x, "threadgroup_position_in_grid.z");
+		sprintf(sc->gl_WorkGroupID_y, "threadgroup_position_in_grid.y");
+		sprintf(sc->gl_WorkGroupID_z, "threadgroup_position_in_grid.x");
+		break;
+	}
+	sprintf(sc->gl_WorkGroupSize_x, "%" PRIu64 "", sc->localSize[0]);
+	sprintf(sc->gl_WorkGroupSize_y, "%" PRIu64 "", sc->localSize[1]);
+	sprintf(sc->gl_WorkGroupSize_z, "%" PRIu64 "", sc->localSize[2]);
+	//sprintf(sc->gl_SubgroupInvocationID, "gl_SubgroupInvocationID");
+	//sprintf(sc->gl_SubgroupID, "gl_SubgroupID");
 #endif
 	sprintf(sc->stageInvocationID, "stageInvocationID");
 	sprintf(sc->blockInvocationID, "blockInvocationID");
@@ -20691,6 +26843,8 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 	sprintf(sc->combinedID, "combinedID");
 	sprintf(sc->inoutID, "inoutID");
 	sprintf(sc->sdataID, "sdataID");
+	sprintf(sc->raderIDx, "raderIDx");
+	sprintf(sc->raderIDx2, "raderIDx2");
 	//sprintf(sc->tempReg, "temp");
 	sc->disableThreadsStart = (char*)malloc(sizeof(char) * 500);
 	if (!sc->disableThreadsStart) {
@@ -20724,13 +26878,14 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 		freeShaderGenVkFFT(sc);
 		return res;
 	}
-	if ((!sc->LUT) && (!strcmp(floatType, "double"))) {
+	if (((!sc->LUT) || (!sc->LUT_4step)) && (!strcmp(floatType, "double"))) {
 		res = appendSinCos20(sc, floatType, uintType);
 		if (res != VKFFT_SUCCESS) {
 			freeShaderGenVkFFT(sc);
 			return res;
 		}
 	}
+
 	if (strcmp(floatType, floatTypeInputMemory)) {
 		res = appendConversion(sc, floatType, floatTypeInputMemory);
 		if (res != VKFFT_SUCCESS) {
@@ -20779,6 +26934,14 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 		}
 		id++;
 	}
+	if (sc->raderUintLUT) {
+		res = appendRaderUintLUTLayoutVkFFT(sc, id);
+		if (res != VKFFT_SUCCESS) {
+			freeShaderGenVkFFT(sc);
+			return res;
+		}
+		id++;
+	}
 	if (sc->useBluesteinFFT) {
 		res = appendBluesteinLayoutVkFFT(sc, id, floatType);
 		if (res != VKFFT_SUCCESS) {
@@ -20924,6 +27087,14 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 			return res;
 		}
 	}
+	if (sc->raderUintLUT) {
+		sc->tempLen = sprintf(sc->tempStr, ", %s* g_pow", uintType_32);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) {
+			freeShaderGenVkFFT(sc);
+			return res;
+		}
+	}
 	if (sc->BluesteinConvolutionStep) {
 		sc->tempLen = sprintf(sc->tempStr, ", %s* BluesteinConvolutionKernel", vecType);
 		res = VkAppendLine(sc);
@@ -20959,6 +27130,40 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 		freeShaderGenVkFFT(sc);
 		return res;
 	}
+	if (!sc->useUint64 && sc->useStrict32BitAddress > 0) {
+		// These wrappers help hipcc to generate faster code for load and store operations where
+		// 64-bit scalar + 32-bit vector registers are used instead of 64-bit vector saving a few
+		// instructions for computing 64-bit vector addresses.
+		sc->tempLen = sprintf(sc->tempStr,
+			"template<typename T>\n"
+			"struct Inputs\n"
+			"{\n"
+			"	const T* buffer;\n"
+			"	inline __device__ Inputs(const T* buffer) : buffer(buffer) {}\n"
+			"	inline __device__ const T& operator[](unsigned int idx) const { return *reinterpret_cast<const T*>(reinterpret_cast<const char*>(buffer) + idx * static_cast<unsigned int>(sizeof(T))); }\n"
+			"};\n"
+			"template<typename T>\n"
+			"struct Outputs\n"
+			"{\n"
+			"	T* buffer;\n"
+			"	inline __device__ Outputs(T* buffer) : buffer(buffer) {}\n"
+			"	inline __device__ T& operator[](unsigned int idx) const { return *reinterpret_cast<T*>(reinterpret_cast<char*>(buffer) + idx * static_cast<unsigned int>(sizeof(T))); }\n"
+			"};\n"
+		);
+	}
+	else {
+		sc->tempLen = sprintf(sc->tempStr,
+			"template<typename T>\n"
+			"using Inputs = const T*;\n"
+			"template<typename T>\n"
+			"using Outputs = T*;\n"
+		);
+	}
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) {
+		freeShaderGenVkFFT(sc);
+		return res;
+	}
 	sc->tempLen = sprintf(sc->tempStr, "extern \"C\" __launch_bounds__(%" PRIu64 ") __global__ void VkFFT_main ", sc->localSize[0] * sc->localSize[1] * sc->localSize[2]);
 	res = VkAppendLine(sc);
 	if (res != VKFFT_SUCCESS) {
@@ -20968,77 +27173,77 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 	switch (type) {
 	case 5:
 	{
-		sc->tempLen = sprintf(sc->tempStr, "(%s* inputs, %s* outputs", floatTypeInputMemory, vecTypeOutput);
+		sc->tempLen = sprintf(sc->tempStr, "(const Inputs<%s> inputs, Outputs<%s> outputs", floatTypeInputMemory, vecTypeOutput);
 		break;
 	}
 	case 6:
 	{
-		sc->tempLen = sprintf(sc->tempStr, "(%s* inputs, %s* outputs", vecTypeInput, floatTypeOutputMemory);
+		sc->tempLen = sprintf(sc->tempStr, "(const Inputs<%s> inputs, Outputs<%s> outputs", vecTypeInput, floatTypeOutputMemory);
 		break;
 	}
 	case 110:
 	{
-		sc->tempLen = sprintf(sc->tempStr, "(%s* inputs, %s* outputs", floatTypeInputMemory, floatTypeOutputMemory);
+		sc->tempLen = sprintf(sc->tempStr, "(const Inputs<%s> inputs, Outputs<%s> outputs", floatTypeInputMemory, floatTypeOutputMemory);
 		break;
 	}
 	case 111:
 	{
-		sc->tempLen = sprintf(sc->tempStr, "(%s* inputs, %s* outputs", floatTypeInputMemory, floatTypeOutputMemory);
+		sc->tempLen = sprintf(sc->tempStr, "(const Inputs<%s> inputs, Outputs<%s> outputs", floatTypeInputMemory, floatTypeOutputMemory);
 		break;
 	}
 	case 120:
 	{
-		sc->tempLen = sprintf(sc->tempStr, "(%s* inputs, %s* outputs", floatTypeInputMemory, floatTypeOutputMemory);
+		sc->tempLen = sprintf(sc->tempStr, "(const Inputs<%s> inputs, Outputs<%s> outputs", floatTypeInputMemory, floatTypeOutputMemory);
 		break;
 	}
 	case 121:
 	{
-		sc->tempLen = sprintf(sc->tempStr, "(%s* inputs, %s* outputs", floatTypeInputMemory, floatTypeOutputMemory);
+		sc->tempLen = sprintf(sc->tempStr, "(const Inputs<%s> inputs, Outputs<%s> outputs", floatTypeInputMemory, floatTypeOutputMemory);
 		break;
 	}
 	case 130:
 	{
-		sc->tempLen = sprintf(sc->tempStr, "(%s* inputs, %s* outputs", floatTypeInputMemory, floatTypeOutputMemory);
+		sc->tempLen = sprintf(sc->tempStr, "(const Inputs<%s> inputs, Outputs<%s> outputs", floatTypeInputMemory, floatTypeOutputMemory);
 		break;
 	}
 	case 131:
 	{
-		sc->tempLen = sprintf(sc->tempStr, "(%s* inputs, %s* outputs", floatTypeInputMemory, floatTypeOutputMemory);
+		sc->tempLen = sprintf(sc->tempStr, "(const Inputs<%s> inputs, Outputs<%s> outputs", floatTypeInputMemory, floatTypeOutputMemory);
 		break;
 	}
 	case 140:
 	{
-		sc->tempLen = sprintf(sc->tempStr, "(%s* inputs, %s* outputs", floatTypeInputMemory, floatTypeOutputMemory);
+		sc->tempLen = sprintf(sc->tempStr, "(const Inputs<%s> inputs, Outputs<%s> outputs", floatTypeInputMemory, floatTypeOutputMemory);
 		break;
 	}
 	case 141:
 	{
-		sc->tempLen = sprintf(sc->tempStr, "(%s* inputs, %s* outputs", floatTypeInputMemory, floatTypeOutputMemory);
+		sc->tempLen = sprintf(sc->tempStr, "(const Inputs<%s> inputs, Outputs<%s> outputs", floatTypeInputMemory, floatTypeOutputMemory);
 		break;
 	}
 	case 142:
 	{
-		sc->tempLen = sprintf(sc->tempStr, "(%s* inputs, %s* outputs", floatTypeInputMemory, floatTypeOutputMemory);
+		sc->tempLen = sprintf(sc->tempStr, "(const Inputs<%s> inputs, Outputs<%s> outputs", floatTypeInputMemory, floatTypeOutputMemory);
 		break;
 	}
 	case 143:
 	{
-		sc->tempLen = sprintf(sc->tempStr, "(%s* inputs, %s* outputs", floatTypeInputMemory, floatTypeOutputMemory);
+		sc->tempLen = sprintf(sc->tempStr, "(const Inputs<%s> inputs, Outputs<%s> outputs", floatTypeInputMemory, floatTypeOutputMemory);
 		break;
 	}
 	case 144:
 	{
-		sc->tempLen = sprintf(sc->tempStr, "(%s* inputs, %s* outputs", floatTypeInputMemory, floatTypeOutputMemory);
+		sc->tempLen = sprintf(sc->tempStr, "(const Inputs<%s> inputs, Outputs<%s> outputs", floatTypeInputMemory, floatTypeOutputMemory);
 		break;
 	}
 	case 145:
 	{
-		sc->tempLen = sprintf(sc->tempStr, "(%s* inputs, %s* outputs", floatTypeInputMemory, floatTypeOutputMemory);
+		sc->tempLen = sprintf(sc->tempStr, "(const Inputs<%s> inputs, Outputs<%s> outputs", floatTypeInputMemory, floatTypeOutputMemory);
 		break;
 	}
 	default:
 	{
-		sc->tempLen = sprintf(sc->tempStr, "(%s* inputs, %s* outputs", vecTypeInput, vecTypeOutput);
+		sc->tempLen = sprintf(sc->tempStr, "(const Inputs<%s> inputs, Outputs<%s> outputs", vecTypeInput, vecTypeOutput);
 		break;
 	}
 	}
@@ -21048,7 +27253,7 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 		return res;
 	}
 	if (sc->convolutionStep) {
-		sc->tempLen = sprintf(sc->tempStr, ", %s* kernel_obj", vecType);
+		sc->tempLen = sprintf(sc->tempStr, ", const Inputs<%s> kernel_obj", vecType);
 		res = VkAppendLine(sc);
 		if (res != VKFFT_SUCCESS) {
 			freeShaderGenVkFFT(sc);
@@ -21056,7 +27261,15 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 		}
 	}
 	if (sc->LUT) {
-		sc->tempLen = sprintf(sc->tempStr, ", %s* twiddleLUT", vecType);
+		sc->tempLen = sprintf(sc->tempStr, ", const Inputs<%s> twiddleLUT", vecType);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) {
+			freeShaderGenVkFFT(sc);
+			return res;
+		}
+	}
+	if (sc->raderUintLUT) {
+		sc->tempLen = sprintf(sc->tempStr, ", %s* g_pow", uintType_32);
 		res = VkAppendLine(sc);
 		if (res != VKFFT_SUCCESS) {
 			freeShaderGenVkFFT(sc);
@@ -21064,7 +27277,7 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 		}
 	}
 	if (sc->BluesteinConvolutionStep) {
-		sc->tempLen = sprintf(sc->tempStr, ", %s* BluesteinConvolutionKernel", vecType);
+		sc->tempLen = sprintf(sc->tempStr, ", const Inputs<%s> BluesteinConvolutionKernel", vecType);
 		res = VkAppendLine(sc);
 		if (res != VKFFT_SUCCESS) {
 			freeShaderGenVkFFT(sc);
@@ -21072,7 +27285,7 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 		}
 	}
 	if (sc->BluesteinPreMultiplication || sc->BluesteinPostMultiplication) {
-		sc->tempLen = sprintf(sc->tempStr, ", %s* BluesteinMultiplication", vecType);
+		sc->tempLen = sprintf(sc->tempStr, ", const Inputs<%s> BluesteinMultiplication", vecType);
 		res = VkAppendLine(sc);
 		if (res != VKFFT_SUCCESS) {
 			freeShaderGenVkFFT(sc);
@@ -21180,6 +27393,7 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 		freeShaderGenVkFFT(sc);
 		return res;
 	}
+	int args_id = 2;
 	if (sc->convolutionStep) {
 		sc->tempLen = sprintf(sc->tempStr, ", __global %s* kernel_obj", vecType);
 		res = VkAppendLine(sc);
@@ -21187,6 +27401,7 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 			freeShaderGenVkFFT(sc);
 			return res;
 		}
+		args_id++;
 	}
 	if (sc->LUT) {
 		sc->tempLen = sprintf(sc->tempStr, ", __global %s* twiddleLUT", vecType);
@@ -21195,6 +27410,16 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 			freeShaderGenVkFFT(sc);
 			return res;
 		}
+		args_id++;
+	}
+	if (sc->raderUintLUT) {
+		sc->tempLen = sprintf(sc->tempStr, ", __global %s* g_pow", uintType_32);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) {
+			freeShaderGenVkFFT(sc);
+			return res;
+		}
+		args_id++;
 	}
 	if (sc->BluesteinConvolutionStep) {
 		sc->tempLen = sprintf(sc->tempStr, ", __global %s* BluesteinConvolutionKernel", vecType);
@@ -21203,6 +27428,7 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 			freeShaderGenVkFFT(sc);
 			return res;
 		}
+		args_id++;
 	}
 	if (sc->BluesteinPreMultiplication || sc->BluesteinPostMultiplication) {
 		sc->tempLen = sprintf(sc->tempStr, ", __global %s* BluesteinMultiplication", vecType);
@@ -21211,6 +27437,7 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 			freeShaderGenVkFFT(sc);
 			return res;
 		}
+		args_id++;
 	}
 	if (sc->pushConstantsStructSize > 0) {
 		sc->tempLen = sprintf(sc->tempStr, ", PushConsts consts");
@@ -21232,6 +27459,168 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 		freeShaderGenVkFFT(sc);
 		return res;
 	}
+#elif(VKFFT_BACKEND==5)
+	sc->tempLen = sprintf(sc->tempStr, "kernel void VkFFT_main ");
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	sc->tempLen = sprintf(sc->tempStr, "(%s3 thread_position_in_grid [[thread_position_in_grid]], ", uintType_32);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	sc->tempLen = sprintf(sc->tempStr, "%s3 threadgroup_position_in_grid [[threadgroup_position_in_grid]], ", uintType_32);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	sc->tempLen = sprintf(sc->tempStr, "%s3 thread_position_in_threadgroup [[thread_position_in_threadgroup]], ", uintType_32);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	sc->tempLen = sprintf(sc->tempStr, "threadgroup %s* sdata [[threadgroup(0)]], ", vecType);
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) {
+		freeShaderGenVkFFT(sc);
+		return res;
+	}
+	switch (type) {
+	case 5:
+	{
+		sc->tempLen = sprintf(sc->tempStr, "device %s* inputs[[buffer(0)]], device %s* outputs[[buffer(1)]]", floatTypeInputMemory, vecTypeOutput);
+		break;
+	}
+	case 6:
+	{
+		sc->tempLen = sprintf(sc->tempStr, "device %s* inputs[[buffer(0)]], device %s* outputs[[buffer(1)]]", vecTypeInput, floatTypeOutputMemory);
+		break;
+	}
+	case 110:
+	{
+		sc->tempLen = sprintf(sc->tempStr, "device %s* inputs[[buffer(0)]], device %s* outputs[[buffer(1)]]", floatTypeInputMemory, floatTypeOutputMemory);
+		break;
+	}
+	case 111:
+	{
+		sc->tempLen = sprintf(sc->tempStr, "device %s* inputs[[buffer(0)]], device %s* outputs[[buffer(1)]]", floatTypeInputMemory, floatTypeOutputMemory);
+		break;
+	}
+	case 120:
+	{
+		sc->tempLen = sprintf(sc->tempStr, "device %s* inputs[[buffer(0)]], device %s* outputs[[buffer(1)]]", floatTypeInputMemory, floatTypeOutputMemory);
+		break;
+	}
+	case 121:
+	{
+		sc->tempLen = sprintf(sc->tempStr, "device %s* inputs[[buffer(0)]], device %s* outputs[[buffer(1)]]", floatTypeInputMemory, floatTypeOutputMemory);
+		break;
+	}
+	case 130:
+	{
+		sc->tempLen = sprintf(sc->tempStr, "device %s* inputs[[buffer(0)]], device %s* outputs[[buffer(1)]]", floatTypeInputMemory, floatTypeOutputMemory);
+		break;
+	}
+	case 131:
+	{
+		sc->tempLen = sprintf(sc->tempStr, "device %s* inputs[[buffer(0)]], device %s* outputs[[buffer(1)]]", floatTypeInputMemory, floatTypeOutputMemory);
+		break;
+	}
+	case 140:
+	{
+		sc->tempLen = sprintf(sc->tempStr, "device %s* inputs[[buffer(0)]], device %s* outputs[[buffer(1)]]", floatTypeInputMemory, floatTypeOutputMemory);
+		break;
+	}
+	case 141:
+	{
+		sc->tempLen = sprintf(sc->tempStr, "device %s* inputs[[buffer(0)]], device %s* outputs[[buffer(1)]]", floatTypeInputMemory, floatTypeOutputMemory);
+		break;
+	}
+	case 142:
+	{
+		sc->tempLen = sprintf(sc->tempStr, "device %s* inputs[[buffer(0)]], device %s* outputs[[buffer(1)]]", floatTypeInputMemory, floatTypeOutputMemory);
+		break;
+	}
+	case 143:
+	{
+		sc->tempLen = sprintf(sc->tempStr, "device %s* inputs[[buffer(0)]], device %s* outputs[[buffer(1)]]", floatTypeInputMemory, floatTypeOutputMemory);
+		break;
+	}
+	case 144:
+	{
+		sc->tempLen = sprintf(sc->tempStr, "device %s* inputs[[buffer(0)]], device %s* outputs[[buffer(1)]]", floatTypeInputMemory, floatTypeOutputMemory);
+		break;
+	}
+	case 145:
+	{
+		sc->tempLen = sprintf(sc->tempStr, "device %s* inputs[[buffer(0)]], device %s* outputs[[buffer(1)]]", floatTypeInputMemory, floatTypeOutputMemory);
+		break;
+	}
+	default:
+	{
+		sc->tempLen = sprintf(sc->tempStr, "device %s* inputs[[buffer(0)]], device %s* outputs[[buffer(1)]]", vecTypeInput, vecTypeOutput);
+		break;
+	}
+	}
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) {
+		freeShaderGenVkFFT(sc);
+		return res;
+	}
+	int args_id = 2;
+	if (sc->convolutionStep) {
+		sc->tempLen = sprintf(sc->tempStr, ", constant %s* kernel_obj[[buffer(%d)]]", vecType, args_id);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) {
+			freeShaderGenVkFFT(sc);
+			return res;
+		}
+		args_id++;
+	}
+	if (sc->LUT) {
+		sc->tempLen = sprintf(sc->tempStr, ", constant %s* twiddleLUT[[buffer(%d)]]", vecType, args_id);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) {
+			freeShaderGenVkFFT(sc);
+			return res;
+		}
+		args_id++;
+	}
+	if (sc->raderUintLUT) {
+		sc->tempLen = sprintf(sc->tempStr, ", constant %s* g_pow[[buffer(%d)]]", uintType_32, args_id);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) {
+			freeShaderGenVkFFT(sc);
+			return res;
+		}
+		args_id++;
+	}
+	if (sc->BluesteinConvolutionStep) {
+		sc->tempLen = sprintf(sc->tempStr, ", constant %s* BluesteinConvolutionKernel[[buffer(%d)]]", vecType, args_id);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) {
+			freeShaderGenVkFFT(sc);
+			return res;
+		}
+		args_id++;
+	}
+	if (sc->BluesteinPreMultiplication || sc->BluesteinPostMultiplication) {
+		sc->tempLen = sprintf(sc->tempStr, ", constant %s* BluesteinMultiplication[[buffer(%d)]]", vecType, args_id);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) {
+			freeShaderGenVkFFT(sc);
+			return res;
+		}
+		args_id++;
+	}
+	if (sc->pushConstantsStructSize > 0) {
+		sc->tempLen = sprintf(sc->tempStr, ", constant PushConsts& consts[[buffer(%d)]]", args_id);
+		res = VkAppendLine(sc);
+		if (res != VKFFT_SUCCESS) return res;
+		args_id++;
+	}
+	sc->tempLen = sprintf(sc->tempStr, ") {\n");
+	res = VkAppendLine(sc);
+	if (res != VKFFT_SUCCESS) return res;
+	//sc->tempLen = sprintf(sc->tempStr, ", const PushConsts consts) {\n");
+	res = appendSharedMemoryVkFFT(sc, floatType, uintType, locType);
+	if (res != VKFFT_SUCCESS) {
+		freeShaderGenVkFFT(sc);
+		return res;
+	}
 #endif
 	//if (type==0) sc->tempLen = sprintf(sc->tempStr, "return;\n");
 	res = appendInitialization(sc, floatType, uintType, type);
@@ -21276,18 +27665,21 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 		freeShaderGenVkFFT(sc);
 		return res;
 	}
-	res = appendBoostThreadDataReorder(sc, floatType, uintType, locType, 1);
-	if (res != VKFFT_SUCCESS) {
-		freeShaderGenVkFFT(sc);
-		return res;
+	if (!sc->useRader) {
+		res = appendBoostThreadDataReorder(sc, floatType, uintType, locType, 1);
+		if (res != VKFFT_SUCCESS) {
+			freeShaderGenVkFFT(sc);
+			return res;
+		}
 	}
+
 	uint64_t stageSize = 1;
 	uint64_t stageSizeSum = 0;
-	double PI_const = 3.1415926535897932384626433832795;
-	double stageAngle = (sc->inverse) ? PI_const : -PI_const;
+	long double double_PI = 3.14159265358979323846264338327950288419716939937510L;
+	long double stageAngle = (sc->inverse) ? double_PI : -double_PI;
 	for (uint64_t i = 0; i < sc->numStages; i++) {
 		if ((i == sc->numStages - 1) && (sc->registerBoost > 1)) {
-			res = appendRadixStage(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, sc->stageRadix[i], locType);
+			res = appendRadixStage(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, sc->stageRadix[i], i, locType);
 			if (res != VKFFT_SUCCESS) {
 				freeShaderGenVkFFT(sc);
 				return res;
@@ -21300,49 +27692,49 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 		}
 		else {
 
-			res = appendRadixStage(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, sc->stageRadix[i], locType);
+			res = appendRadixStage(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, sc->stageRadix[i], i, locType);
 			if (res != VKFFT_SUCCESS) {
 				freeShaderGenVkFFT(sc);
 				return res;
 			}
 			if (i > 0) {
-			switch (sc->stageRadix[i]) {
-			case 2:
-				stageSizeSum += stageSize;
-				break;
-			case 3:
-				stageSizeSum += stageSize * 2;
-				break;
-			case 4:
-				stageSizeSum += stageSize * 2;
-				break;
-			case 5:
-				stageSizeSum += stageSize * 4;
-				break;
+				switch (sc->stageRadix[i]) {
+				case 2:
+					stageSizeSum += stageSize;
+					break;
+				case 3:
+					stageSizeSum += stageSize * 2;
+					break;
+				case 4:
+					stageSizeSum += stageSize * 2;
+					break;
+				case 5:
+					stageSizeSum += stageSize * 4;
+					break;
 				case 6:
 					stageSizeSum += stageSize * 5;
 					break;
-			case 7:
-				stageSizeSum += stageSize * 6;
-				break;
-			case 8:
-				stageSizeSum += stageSize * 3;
-				break;
+				case 7:
+					stageSizeSum += stageSize * 6;
+					break;
+				case 8:
+					stageSizeSum += stageSize * 3;
+					break;
 				case 9:
 					stageSizeSum += stageSize * 8;
 					break;
 				case 10:
 					stageSizeSum += stageSize * 9;
 					break;
-			case 11:
-				stageSizeSum += stageSize * 10;
-				break;
+				case 11:
+					stageSizeSum += stageSize * 10;
+					break;
 				case 12:
 					stageSizeSum += stageSize * 11;
 					break;
-			case 13:
-				stageSizeSum += stageSize * 12;
-				break;
+				case 13:
+					stageSizeSum += stageSize * 12;
+					break;
 				case 14:
 					stageSizeSum += stageSize * 13;
 					break;
@@ -21355,17 +27747,20 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 				case 32:
 					stageSizeSum += stageSize * 5;
 					break;
+				default:
+					stageSizeSum += stageSize * (sc->stageRadix[i]);
+					break;
 				}
 			}
-			if (i == sc->numStages - 1) {
-				res = appendRadixShuffle(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, sc->stageRadix[i], sc->stageRadix[i], locType);
+			if ((i == sc->numStages - 1) || (sc->registerBoost == 1)) {
+				res = appendRadixShuffle(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, sc->stageRadix[i], sc->stageRadix[i], i, locType);
 				if (res != VKFFT_SUCCESS) {
 					freeShaderGenVkFFT(sc);
 					return res;
 				}
 			}
 			else {
-				res = appendRadixShuffle(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, sc->stageRadix[i], sc->stageRadix[i + 1], locType);
+				res = appendRadixShuffle(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, sc->stageRadix[i], sc->stageRadix[i + 1], i, locType);
 				if (res != VKFFT_SUCCESS) {
 					freeShaderGenVkFFT(sc);
 					return res;
@@ -21375,7 +27770,6 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 			stageAngle /= sc->stageRadix[i];
 		}
 	}
-
 	if ((sc->convolutionStep) || (sc->useBluesteinFFT && sc->BluesteinConvolutionStep)) {
 		res = appendCoordinateRegisterStore(sc, locType);
 		if (res != VKFFT_SUCCESS) {
@@ -21400,7 +27794,7 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 		}
 		if (sc->useBluesteinFFT && sc->BluesteinConvolutionStep)
 		{
-			res = appendBluesteinConvolution(sc, floatType, floatTypeKernelMemory, uintType, locType);
+			res = appendBluesteinConvolution(sc, floatType, uintType, locType);
 			if (res != VKFFT_SUCCESS) {
 				freeShaderGenVkFFT(sc);
 				return res;
@@ -21434,52 +27828,52 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 
 		stageSize = 1;
 		stageSizeSum = 0;
-		stageAngle = PI_const;
+		stageAngle = double_PI;
 		sc->inverse = 1;
 		for (uint64_t i = 0; i < sc->numStages; i++) {
-			res = appendRadixStage(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, sc->stageRadix[i], locType);
+			res = appendRadixStage(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, sc->stageRadix[i], i, locType);
 			if (res != VKFFT_SUCCESS) {
 				freeShaderGenVkFFT(sc);
 				return res;
 			}
 			if (i > 0) {
-			switch (sc->stageRadix[i]) {
-			case 2:
-				stageSizeSum += stageSize;
-				break;
-			case 3:
-				stageSizeSum += stageSize * 2;
-				break;
-			case 4:
-				stageSizeSum += stageSize * 2;
-				break;
-			case 5:
-				stageSizeSum += stageSize * 4;
-				break;
+				switch (sc->stageRadix[i]) {
+				case 2:
+					stageSizeSum += stageSize;
+					break;
+				case 3:
+					stageSizeSum += stageSize * 2;
+					break;
+				case 4:
+					stageSizeSum += stageSize * 2;
+					break;
+				case 5:
+					stageSizeSum += stageSize * 4;
+					break;
 				case 6:
 					stageSizeSum += stageSize * 5;
 					break;
-			case 7:
-				stageSizeSum += stageSize * 6;
-				break;
-			case 8:
-				stageSizeSum += stageSize * 3;
-				break;
+				case 7:
+					stageSizeSum += stageSize * 6;
+					break;
+				case 8:
+					stageSizeSum += stageSize * 3;
+					break;
 				case 9:
 					stageSizeSum += stageSize * 8;
 					break;
 				case 10:
 					stageSizeSum += stageSize * 9;
 					break;
-			case 11:
-				stageSizeSum += stageSize * 10;
-				break;
+				case 11:
+					stageSizeSum += stageSize * 10;
+					break;
 				case 12:
 					stageSizeSum += stageSize * 11;
 					break;
-			case 13:
-				stageSizeSum += stageSize * 12;
-				break;
+				case 13:
+					stageSizeSum += stageSize * 12;
+					break;
 				case 14:
 					stageSizeSum += stageSize * 13;
 					break;
@@ -21492,17 +27886,20 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 				case 32:
 					stageSizeSum += stageSize * 5;
 					break;
+				default:
+					stageSizeSum += stageSize * (sc->stageRadix[i]);
+					break;
 				}
 			}
 			if (i == sc->numStages - 1) {
-				res = appendRadixShuffle(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, sc->stageRadix[i], sc->stageRadix[i], locType);
+				res = appendRadixShuffle(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, sc->stageRadix[i], sc->stageRadix[i], i, locType);
 				if (res != VKFFT_SUCCESS) {
 					freeShaderGenVkFFT(sc);
 					return res;
 				}
 			}
 			else {
-				res = appendRadixShuffle(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, sc->stageRadix[i], sc->stageRadix[i + 1], locType);
+				res = appendRadixShuffle(sc, floatType, uintType, stageSize, stageSizeSum, stageAngle, sc->stageRadix[i], sc->stageRadix[i + 1], i, locType);
 				if (res != VKFFT_SUCCESS) {
 					freeShaderGenVkFFT(sc);
 					return res;
@@ -21512,10 +27909,13 @@ static inline VkFFTResult shaderGenVkFFT(char* output, VkFFTSpecializationConsta
 			stageAngle /= sc->stageRadix[i];
 		}
 	}
-	res = appendBoostThreadDataReorder(sc, floatType, uintType, locType, 0);
-	if (res != VKFFT_SUCCESS) {
-		freeShaderGenVkFFT(sc);
-		return res;
+	if (!sc->useRader) {
+		//if (((sc->stageRadix[sc->numStages - 1] < sc->fixMinRaderPrimeMult) || (sc->rader_generator[sc->numStages - 1] == 0))) {
+		res = appendBoostThreadDataReorder(sc, floatType, uintType, locType, 0);
+		if (res != VKFFT_SUCCESS) {
+			freeShaderGenVkFFT(sc);
+			return res;
+		}
 	}
 	res = appendReorder4StepWrite(sc, floatType, uintType, locType);
 	if (res != VKFFT_SUCCESS) {
@@ -21602,9 +28002,13 @@ static inline VkFFTResult allocateFFTBuffer(VkFFTApplication* app, VkBuffer* buf
 	if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_BIND_BUFFER_MEMORY;
 	return resFFT;
 }
-static inline VkFFTResult transferDataFromCPU(VkFFTApplication* app, void* arr, VkBuffer* buffer, VkDeviceSize bufferSize) {
-	VkResult res = VK_SUCCESS;
+#endif
+static inline VkFFTResult VkFFT_transferDataFromCPU(VkFFTApplication* app, void* cpu_arr, void* input_buffer, uint64_t transferSize) {
 	VkFFTResult resFFT = VKFFT_SUCCESS;
+#if(VKFFT_BACKEND==0)
+	VkBuffer* buffer = (VkBuffer*)input_buffer;
+	VkDeviceSize bufferSize = transferSize;
+	VkResult res = VK_SUCCESS;
 	VkDeviceSize stagingBufferSize = bufferSize;
 	VkBuffer stagingBuffer = { 0 };
 	VkDeviceMemory stagingBufferMemory = { 0 };
@@ -21613,7 +28017,7 @@ static inline VkFFTResult transferDataFromCPU(VkFFTApplication* app, void* arr,
 	void* data;
 	res = vkMapMemory(app->configuration.device[0], stagingBufferMemory, 0, stagingBufferSize, 0, &data);
 	if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_MAP_MEMORY;
-	memcpy(data, arr, stagingBufferSize);
+	memcpy(data, cpu_arr, stagingBufferSize);
 	vkUnmapMemory(app->configuration.device[0], stagingBufferMemory);
 	VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
 	commandBufferAllocateInfo.commandPool = app->configuration.commandPool[0];
@@ -21646,11 +28050,190 @@ static inline VkFFTResult transferDataFromCPU(VkFFTApplication* app, void* arr,
 	vkDestroyBuffer(app->configuration.device[0], stagingBuffer, 0);
 	vkFreeMemory(app->configuration.device[0], stagingBufferMemory, 0);
 	return resFFT;
+#elif(VKFFT_BACKEND==1)
+	cudaError_t res = cudaSuccess;
+	void* buffer = ((void**)input_buffer)[0];
+	res = cudaMemcpy(buffer, cpu_arr, transferSize, cudaMemcpyHostToDevice);
+	if (res != cudaSuccess) {
+		return VKFFT_ERROR_FAILED_TO_COPY;
+	}
+#elif(VKFFT_BACKEND==2)
+	hipError_t res = hipSuccess;
+	void* buffer = ((void**)input_buffer)[0];
+	res = hipMemcpy(buffer, cpu_arr, transferSize, hipMemcpyHostToDevice);
+	if (res != hipSuccess) {
+		return VKFFT_ERROR_FAILED_TO_COPY;
+	}
+#elif(VKFFT_BACKEND==3)
+	cl_int res = CL_SUCCESS;
+	cl_mem* buffer = (cl_mem*)input_buffer;
+	cl_command_queue commandQueue = clCreateCommandQueue(app->configuration.context[0], app->configuration.device[0], 0, &res);
+	if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_QUEUE;
+	res = clEnqueueWriteBuffer(commandQueue, buffer[0], CL_TRUE, 0, transferSize, cpu_arr, 0, NULL, NULL);
+	if (res != CL_SUCCESS) {
+		return VKFFT_ERROR_FAILED_TO_COPY;
+	}
+	res = clReleaseCommandQueue(commandQueue);
+	if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_RELEASE_COMMAND_QUEUE;
+#elif(VKFFT_BACKEND==4)
+	ze_result_t res = ZE_RESULT_SUCCESS;
+	void* buffer = ((void**)input_buffer)[0];
+	ze_command_queue_desc_t commandQueueCopyDesc = {
+			ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
+			0,
+			app->configuration.commandQueueID,
+			0, // index
+			0, // flags
+			ZE_COMMAND_QUEUE_MODE_DEFAULT,
+			ZE_COMMAND_QUEUE_PRIORITY_NORMAL
+	};
+	ze_command_list_handle_t copyCommandList;
+	res = zeCommandListCreateImmediate(app->configuration.context[0], app->configuration.device[0], &commandQueueCopyDesc, &copyCommandList);
+	if (res != ZE_RESULT_SUCCESS) {
+		return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+	}
+	res = zeCommandListAppendMemoryCopy(copyCommandList, buffer, cpu_arr, transferSize, 0, 0, 0);
+	if (res != ZE_RESULT_SUCCESS) {
+		return VKFFT_ERROR_FAILED_TO_COPY;
+	}
+	res = zeCommandQueueSynchronize(app->configuration.commandQueue[0], UINT32_MAX);
+	if (res != ZE_RESULT_SUCCESS) {
+		return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+	}
+#elif(VKFFT_BACKEND==5)
+	MTL::Buffer* stagingBuffer = app->configuration.device->newBuffer(cpu_arr, transferSize, MTL::ResourceStorageModeShared);
+	MTL::CommandBuffer* copyCommandBuffer = app->configuration.queue->commandBuffer();
+	if (copyCommandBuffer == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+	MTL::BlitCommandEncoder* blitCommandEncoder = copyCommandBuffer->blitCommandEncoder();
+	if (blitCommandEncoder == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+	MTL::Buffer* buffer = ((MTL::Buffer**)input_buffer)[0];
+	blitCommandEncoder->copyFromBuffer((MTL::Buffer*)stagingBuffer, 0, (MTL::Buffer*)buffer, 0, transferSize);
+	blitCommandEncoder->endEncoding();
+	copyCommandBuffer->commit();
+	copyCommandBuffer->waitUntilCompleted();
+	blitCommandEncoder->release();
+	copyCommandBuffer->release();
+	stagingBuffer->release();
+#endif
+	return resFFT;
 }
+static inline VkFFTResult VkFFT_transferDataToCPU(VkFFTApplication* app, void* cpu_arr, void* output_buffer, uint64_t transferSize) {
+	VkFFTResult resFFT = VKFFT_SUCCESS;
+#if(VKFFT_BACKEND==0)
+	VkBuffer* buffer = (VkBuffer*)output_buffer;
+	VkDeviceSize bufferSize = transferSize;
+	VkResult res = VK_SUCCESS;
+	uint64_t stagingBufferSize = bufferSize;
+	VkBuffer stagingBuffer = { 0 };
+	VkDeviceMemory stagingBufferMemory = { 0 };
+	resFFT = allocateFFTBuffer(app, &stagingBuffer, &stagingBufferMemory, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBufferSize);
+	if (resFFT != VKFFT_SUCCESS) return resFFT;
+	VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
+	commandBufferAllocateInfo.commandPool = app->configuration.commandPool[0];
+	commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+	commandBufferAllocateInfo.commandBufferCount = 1;
+	VkCommandBuffer commandBuffer = { 0 };
+	res = vkAllocateCommandBuffers(app->configuration.device[0], &commandBufferAllocateInfo, &commandBuffer);
+	if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE_COMMAND_BUFFERS;
+	VkCommandBufferBeginInfo commandBufferBeginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
+	commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+	res = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);
+	if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_BEGIN_COMMAND_BUFFER;
+	VkBufferCopy copyRegion = { 0 };
+	copyRegion.srcOffset = 0;
+	copyRegion.dstOffset = 0;
+	copyRegion.size = stagingBufferSize;
+	vkCmdCopyBuffer(commandBuffer, buffer[0], stagingBuffer, 1, &copyRegion);
+	res = vkEndCommandBuffer(commandBuffer);
+	if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
+	VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
+	submitInfo.commandBufferCount = 1;
+	submitInfo.pCommandBuffers = &commandBuffer;
+	res = vkQueueSubmit(app->configuration.queue[0], 1, &submitInfo, app->configuration.fence[0]);
+	if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE;
+	res = vkWaitForFences(app->configuration.device[0], 1, app->configuration.fence, VK_TRUE, 100000000000);
+	if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_WAIT_FOR_FENCES;
+	res = vkResetFences(app->configuration.device[0], 1, app->configuration.fence);
+	if (res != VK_SUCCESS) return VKFFT_ERROR_FAILED_TO_RESET_FENCES;
+	vkFreeCommandBuffers(app->configuration.device[0], app->configuration.commandPool[0], 1, &commandBuffer);
+	void* data;
+	res = vkMapMemory(app->configuration.device[0], stagingBufferMemory, 0, stagingBufferSize, 0, &data);
+	if (resFFT != VKFFT_SUCCESS) return resFFT;
+	memcpy(cpu_arr, data, stagingBufferSize);
+	vkUnmapMemory(app->configuration.device[0], stagingBufferMemory);
+	vkDestroyBuffer(app->configuration.device[0], stagingBuffer, 0);
+	vkFreeMemory(app->configuration.device[0], stagingBufferMemory, 0);
+#elif(VKFFT_BACKEND==1)
+	cudaError_t res = cudaSuccess;
+	void* buffer = ((void**)output_buffer)[0];
+	res = cudaMemcpy(cpu_arr, buffer, transferSize, cudaMemcpyDeviceToHost);
+	if (res != cudaSuccess) {
+		return VKFFT_ERROR_FAILED_TO_COPY;
+	}
+#elif(VKFFT_BACKEND==2)
+	hipError_t res = hipSuccess;
+	void* buffer = ((void**)output_buffer)[0];
+	res = hipMemcpy(cpu_arr, buffer, transferSize, hipMemcpyDeviceToHost);
+	if (res != hipSuccess) {
+		return VKFFT_ERROR_FAILED_TO_COPY;
+	}
+#elif(VKFFT_BACKEND==3)
+	cl_int res = CL_SUCCESS;
+	cl_mem* buffer = (cl_mem*)output_buffer;
+	cl_command_queue commandQueue = clCreateCommandQueue(app->configuration.context[0], app->configuration.device[0], 0, &res);
+	if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_QUEUE;
+	res = clEnqueueReadBuffer(commandQueue, buffer[0], CL_TRUE, 0, transferSize, cpu_arr, 0, NULL, NULL);
+	if (res != CL_SUCCESS) {
+		return VKFFT_ERROR_FAILED_TO_COPY;
+	}
+	res = clReleaseCommandQueue(commandQueue);
+	if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_RELEASE_COMMAND_QUEUE;
+#elif(VKFFT_BACKEND==4)
+	ze_result_t res = ZE_RESULT_SUCCESS;
+	void* buffer = ((void**)output_buffer)[0];
+	ze_command_queue_desc_t commandQueueCopyDesc = {
+			ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
+			0,
+			app->configuration.commandQueueID,
+			0, // index
+			0, // flags
+			ZE_COMMAND_QUEUE_MODE_DEFAULT,
+			ZE_COMMAND_QUEUE_PRIORITY_NORMAL
+	};
+	ze_command_list_handle_t copyCommandList;
+	res = zeCommandListCreateImmediate(app->configuration.context[0], app->configuration.device[0], &commandQueueCopyDesc, &copyCommandList);
+	if (res != ZE_RESULT_SUCCESS) {
+		return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+	}
+	res = zeCommandListAppendMemoryCopy(copyCommandList, cpu_arr, buffer, transferSize, 0, 0, 0);
+	if (res != ZE_RESULT_SUCCESS) {
+		return VKFFT_ERROR_FAILED_TO_COPY;
+	}
+	res = zeCommandQueueSynchronize(app->configuration.commandQueue[0], UINT32_MAX);
+	if (res != ZE_RESULT_SUCCESS) {
+		return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+	}
+#elif(VKFFT_BACKEND==5)
+	MTL::Buffer* stagingBuffer = app->configuration.device->newBuffer(transferSize, MTL::ResourceStorageModeShared);
+	MTL::CommandBuffer* copyCommandBuffer = app->configuration.queue->commandBuffer();
+	if (copyCommandBuffer == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+	MTL::BlitCommandEncoder* blitCommandEncoder = copyCommandBuffer->blitCommandEncoder();
+	if (blitCommandEncoder == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+	MTL::Buffer* buffer = ((MTL::Buffer**)output_buffer)[0];
+	blitCommandEncoder->copyFromBuffer((MTL::Buffer*)buffer, 0, (MTL::Buffer*)stagingBuffer, 0, transferSize);
+	blitCommandEncoder->endEncoding();
+	copyCommandBuffer->commit();
+	copyCommandBuffer->waitUntilCompleted();
+	blitCommandEncoder->release();
+	copyCommandBuffer->release();
+	memcpy(cpu_arr, stagingBuffer->contents(), transferSize);
+	stagingBuffer->release();
 #endif
+	return resFFT;
+}
 static inline void deleteAxis(VkFFTApplication* app, VkFFTAxis* axis) {
 #if(VKFFT_BACKEND==0)
-	if ((app->configuration.useLUT) && (!axis->referenceLUT)) {
+	if ((app->configuration.useLUT == 1) && (!axis->referenceLUT)) {
 		if (axis->bufferLUT != 0) {
 			vkDestroyBuffer(app->configuration.device[0], axis->bufferLUT, 0);
 			axis->bufferLUT = 0;
@@ -21679,51 +28262,71 @@ static inline void deleteAxis(VkFFTApplication* app, VkFFTAxis* axis) {
 #elif(VKFFT_BACKEND==1)
 	CUresult res = CUDA_SUCCESS;
 	cudaError_t res_t = cudaSuccess;
-	if ((app->configuration.useLUT) && (!axis->referenceLUT) && (axis->bufferLUT != 0)) {
+	if ((app->configuration.useLUT == 1) && (!axis->referenceLUT) && (axis->bufferLUT != 0)) {
 		res_t = cudaFree(axis->bufferLUT);
-		axis->bufferLUT = 0;
+		if (res_t == cudaSuccess) axis->bufferLUT = 0;
 	}
 	if (axis->VkFFTModule != 0) {
 		res = cuModuleUnload(axis->VkFFTModule);
-		axis->VkFFTModule = 0;
+		if (res == CUDA_SUCCESS) axis->VkFFTModule = 0;
 	}
 #elif(VKFFT_BACKEND==2)
 	hipError_t res = hipSuccess;
-	if ((app->configuration.useLUT) && (!axis->referenceLUT) && (axis->bufferLUT != 0)) {
+	if ((app->configuration.useLUT == 1) && (!axis->referenceLUT) && (axis->bufferLUT != 0)) {
 		res = hipFree(axis->bufferLUT);
-		axis->bufferLUT = 0;
+		if (res == hipSuccess) axis->bufferLUT = 0;
 	}
 	if (axis->VkFFTModule != 0) {
 		res = hipModuleUnload(axis->VkFFTModule);
-		axis->VkFFTModule = 0;
+		if (res == hipSuccess) axis->VkFFTModule = 0;
 	}
 #elif(VKFFT_BACKEND==3)
 	cl_int res = 0;
-	if ((app->configuration.useLUT) && (!axis->referenceLUT) && (axis->bufferLUT != 0)) {
+	if ((app->configuration.useLUT == 1) && (!axis->referenceLUT) && (axis->bufferLUT != 0)) {
 		res = clReleaseMemObject(axis->bufferLUT);
-		axis->bufferLUT = 0;
+		if (res == 0) axis->bufferLUT = 0;
 	}
 	if (axis->program != 0) {
 		res = clReleaseProgram(axis->program);
-		axis->program = 0;
+		if (res == 0) axis->program = 0;
 	}
 	if (axis->kernel != 0) {
 		res = clReleaseKernel(axis->kernel);
-		axis->kernel = 0;
+		if (res == 0) axis->kernel = 0;
 	}
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
-	if ((app->configuration.useLUT) && (!axis->referenceLUT) && (axis->bufferLUT != 0)) {
+	if ((app->configuration.useLUT == 1) && (!axis->referenceLUT) && (axis->bufferLUT != 0)) {
 		res = zeMemFree(app->configuration.context[0], axis->bufferLUT);
-		axis->bufferLUT = 0;
+		if (res == ZE_RESULT_SUCCESS) axis->bufferLUT = 0;
 	}
 	if (axis->VkFFTModule != 0) {
 		res = zeModuleDestroy(axis->VkFFTModule);
-		axis->VkFFTModule = 0;
+		if (res == ZE_RESULT_SUCCESS)axis->VkFFTModule = 0;
 	}
 	if (axis->VkFFTKernel != 0) {
 		res = zeKernelDestroy(axis->VkFFTKernel);
-		axis->VkFFTKernel = 0;
+		if (res == ZE_RESULT_SUCCESS)axis->VkFFTKernel = 0;
+	}
+#elif(VKFFT_BACKEND==5)
+	if (axis->pushConstants.dataUintBuffer) {
+		axis->pushConstants.dataUintBuffer->release();
+		axis->pushConstants.dataUintBuffer = 0;
+	}
+	if ((app->configuration.useLUT == 1) && (!axis->referenceLUT) && (axis->bufferLUT != 0)) {
+		((MTL::Buffer*)axis->bufferLUT)->release();
+		//free(axis->bufferLUT);
+		axis->bufferLUT = 0;
+	}
+	if (axis->pipeline != 0) {
+		axis->pipeline->release();
+		//free(axis->pipeline);
+		axis->pipeline = 0;
+	}
+	if (axis->library != 0) {
+		axis->library->release();
+		//free(axis->library);
+		axis->library = 0;
 	}
 #endif
 	if (app->configuration.saveApplicationToString) {
@@ -21740,13 +28343,12 @@ static inline void deleteVkFFT(VkFFTApplication* app) {
 		app->configuration.isCompilerInitialized = 0;
 	}
 #elif(VKFFT_BACKEND==1)
-	CUresult res = CUDA_SUCCESS;
-	cudaError_t res_t = cudaSuccess;
 	if (app->configuration.num_streams > 1) {
+		cudaError_t res_t = cudaSuccess;
 		for (uint64_t i = 0; i < app->configuration.num_streams; i++) {
 			if (app->configuration.stream_event[i] != 0) {
 				res_t = cudaEventDestroy(app->configuration.stream_event[i]);
-				app->configuration.stream_event[i] = 0;
+				if (res_t == cudaSuccess) app->configuration.stream_event[i] = 0;
 			}
 		}
 		if (app->configuration.stream_event != 0) {
@@ -21755,12 +28357,12 @@ static inline void deleteVkFFT(VkFFTApplication* app) {
 		}
 	}
 #elif(VKFFT_BACKEND==2)
-	hipError_t res_t = hipSuccess;
 	if (app->configuration.num_streams > 1) {
+		hipError_t res_t = hipSuccess;
 		for (uint64_t i = 0; i < app->configuration.num_streams; i++) {
 			if (app->configuration.stream_event[i] != 0) {
 				res_t = hipEventDestroy(app->configuration.stream_event[i]);
-				app->configuration.stream_event[i] = 0;
+				if (res_t == hipSuccess) app->configuration.stream_event[i] = 0;
 			}
 		}
 		if (app->configuration.stream_event != 0) {
@@ -21769,6 +28371,12 @@ static inline void deleteVkFFT(VkFFTApplication* app) {
 		}
 	}
 #endif
+	if (app->numRaderFFTPrimes) {
+		for (uint64_t i = 0; i < app->numRaderFFTPrimes; i++) {
+			free(app->raderFFTkernel[i]);
+			app->raderFFTkernel[i] = 0;
+		}
+	}
 	if (!app->configuration.userTempBuffer) {
 		if (app->configuration.allocateTempBuffer) {
 			app->configuration.allocateTempBuffer = 0;
@@ -21782,26 +28390,32 @@ static inline void deleteVkFFT(VkFFTApplication* app) {
 				app->configuration.tempBufferDeviceMemory = 0;
 			}
 #elif(VKFFT_BACKEND==1)
+			cudaError_t res_t = cudaSuccess;
 			if (app->configuration.tempBuffer[0] != 0) {
 				res_t = cudaFree(app->configuration.tempBuffer[0]);
-				app->configuration.tempBuffer[0] = 0;
+				if (res_t == cudaSuccess) app->configuration.tempBuffer[0] = 0;
 			}
 #elif(VKFFT_BACKEND==2)
+			hipError_t res_t = hipSuccess;
 			if (app->configuration.tempBuffer[0] != 0) {
 				res_t = hipFree(app->configuration.tempBuffer[0]);
-				app->configuration.tempBuffer[0] = 0;
+				if (res_t == hipSuccess) app->configuration.tempBuffer[0] = 0;
 			}
 #elif(VKFFT_BACKEND==3)
 			cl_int res = 0;
 			if (app->configuration.tempBuffer[0] != 0) {
 				res = clReleaseMemObject(app->configuration.tempBuffer[0]);
-				app->configuration.tempBuffer[0] = 0;
+				if (res == 0) app->configuration.tempBuffer[0] = 0;
 			}
 #elif(VKFFT_BACKEND==4)
 			ze_result_t res = ZE_RESULT_SUCCESS;
 			if (app->configuration.tempBuffer[0] != 0) {
 				res = zeMemFree(app->configuration.context[0], app->configuration.tempBuffer[0]);
-				app->configuration.tempBuffer[0] = 0;
+				if (res == ZE_RESULT_SUCCESS) app->configuration.tempBuffer[0] = 0;
+			}
+#elif(VKFFT_BACKEND==5)
+			if (app->configuration.tempBuffer[0] != 0) {
+				((MTL::Buffer*)app->configuration.tempBuffer[0])->release();
 			}
 #endif
 			if (app->configuration.tempBuffer != 0) {
@@ -21815,6 +28429,40 @@ static inline void deleteVkFFT(VkFFTApplication* app) {
 		}
 	}
 	for (uint64_t i = 0; i < app->configuration.FFTdim; i++) {
+		if (app->configuration.useRaderUintLUT) {
+			for (uint64_t j = 0; j < 4; j++) {
+				if (app->bufferRaderUintLUT[i][j]) {
+#if(VKFFT_BACKEND==0)
+					vkDestroyBuffer(app->configuration.device[0], app->bufferRaderUintLUT[i][j], 0);
+					app->bufferRaderUintLUT[i][j] = 0;
+					vkFreeMemory(app->configuration.device[0], app->bufferRaderUintLUTDeviceMemory[i][j], 0);
+					app->bufferRaderUintLUTDeviceMemory[i][j] = 0;
+#elif(VKFFT_BACKEND==1)
+					cudaError_t res_t = cudaSuccess;
+					res_t = cudaFree(app->bufferRaderUintLUT[i][j]);
+					if (res_t == cudaSuccess) app->bufferRaderUintLUT[i][j] = 0;
+#elif(VKFFT_BACKEND==2)
+					hipError_t res_t = hipSuccess;
+					res_t = hipFree(app->bufferRaderUintLUT[i][j]);
+					if (res_t == hipSuccess) app->bufferRaderUintLUT[i][j] = 0;
+#elif(VKFFT_BACKEND==3)
+					cl_int res = 0;
+					res = clReleaseMemObject(app->bufferRaderUintLUT[i][j]);
+					if (res == 0) app->bufferRaderUintLUT[i][j] = 0;
+#elif(VKFFT_BACKEND==4)
+					ze_result_t res = ZE_RESULT_SUCCESS;
+					res = zeMemFree(app->configuration.context[0], app->bufferRaderUintLUT[i][j]);
+					if (res == ZE_RESULT_SUCCESS) app->bufferRaderUintLUT[i][j] = 0;
+#elif(VKFFT_BACKEND==5)
+					if (app->bufferRaderUintLUT[i][j] != 0) {
+						((MTL::Buffer*)app->bufferRaderUintLUT[i][j])->release();
+						//free(app->bufferRaderUintLUT[i][j]);
+						app->bufferRaderUintLUT[i][j] = 0;
+					}
+#endif
+				}
+			}
+		}
 		if (app->useBluesteinFFT[i]) {
 #if(VKFFT_BACKEND==0)
 			if (app->bufferBluestein[i] != 0) {
@@ -21842,57 +28490,75 @@ static inline void deleteVkFFT(VkFFTApplication* app) {
 				app->bufferBluesteinIFFTDeviceMemory[i] = 0;
 			}
 #elif(VKFFT_BACKEND==1)
+			cudaError_t res_t = cudaSuccess;
 			if (app->bufferBluestein[i] != 0) {
 				res_t = cudaFree(app->bufferBluestein[i]);
-				app->bufferBluestein[i] = 0;
+				if (res_t == cudaSuccess) app->bufferBluestein[i] = 0;
 			}
 			if (app->bufferBluesteinFFT[i] != 0) {
 				res_t = cudaFree(app->bufferBluesteinFFT[i]);
-				app->bufferBluesteinFFT[i] = 0;
+				if (res_t == cudaSuccess) app->bufferBluesteinFFT[i] = 0;
 			}
 			if (app->bufferBluesteinIFFT[i] != 0) {
 				res_t = cudaFree(app->bufferBluesteinIFFT[i]);
-				app->bufferBluesteinIFFT[i] = 0;
+				if (res_t == cudaSuccess) app->bufferBluesteinIFFT[i] = 0;
 			}
 #elif(VKFFT_BACKEND==2)
+			hipError_t res_t = hipSuccess;
 			if (app->bufferBluestein[i] != 0) {
 				res_t = hipFree(app->bufferBluestein[i]);
-				app->bufferBluestein[i] = 0;
+				if (res_t == hipSuccess) app->bufferBluestein[i] = 0;
 			}
 			if (app->bufferBluesteinFFT[i] != 0) {
 				res_t = hipFree(app->bufferBluesteinFFT[i]);
-				app->bufferBluesteinFFT[i] = 0;
+				if (res_t == hipSuccess) app->bufferBluesteinFFT[i] = 0;
 			}
 			if (app->bufferBluesteinIFFT[i] != 0) {
 				res_t = hipFree(app->bufferBluesteinIFFT[i]);
-				app->bufferBluesteinIFFT[i] = 0;
+				if (res_t == hipSuccess) app->bufferBluesteinIFFT[i] = 0;
 			}
 #elif(VKFFT_BACKEND==3)
 			cl_int res = 0;
 			if (app->bufferBluestein[i] != 0) {
 				res = clReleaseMemObject(app->bufferBluestein[i]);
-				app->bufferBluestein[i] = 0;
+				if (res == 0) app->bufferBluestein[i] = 0;
 			}
 			if (app->bufferBluesteinFFT[i] != 0) {
 				res = clReleaseMemObject(app->bufferBluesteinFFT[i]);
-				app->bufferBluesteinFFT[i] = 0;
+				if (res == 0) app->bufferBluesteinFFT[i] = 0;
 			}
 			if (app->bufferBluesteinIFFT[i] != 0) {
 				res = clReleaseMemObject(app->bufferBluesteinIFFT[i]);
-				app->bufferBluesteinIFFT[i] = 0;
+				if (res == 0) app->bufferBluesteinIFFT[i] = 0;
 			}
 #elif(VKFFT_BACKEND==4)
 			ze_result_t res = ZE_RESULT_SUCCESS;
 			if (app->bufferBluestein[i] != 0) {
 				res = zeMemFree(app->configuration.context[0], app->bufferBluestein[i]);
-				app->bufferBluestein[i] = 0;
+				if (res == ZE_RESULT_SUCCESS) app->bufferBluestein[i] = 0;
 			}
 			if (app->bufferBluesteinFFT[i] != 0) {
 				res = zeMemFree(app->configuration.context[0], app->bufferBluesteinFFT[i]);
-				app->bufferBluesteinFFT[i] = 0;
+				if (res == ZE_RESULT_SUCCESS) app->bufferBluesteinFFT[i] = 0;
 			}
 			if (app->bufferBluesteinIFFT[i] != 0) {
 				res = zeMemFree(app->configuration.context[0], app->bufferBluesteinIFFT[i]);
+				if (res == ZE_RESULT_SUCCESS) app->bufferBluesteinIFFT[i] = 0;
+			}
+#elif(VKFFT_BACKEND==5)
+			if (app->bufferBluestein[i] != 0) {
+				((MTL::Buffer*)app->bufferBluestein[i])->release();
+				//free(app->bufferBluestein[i]);
+				app->bufferBluestein[i] = 0;
+			}
+			if (app->bufferBluesteinFFT[i] != 0) {
+				((MTL::Buffer*)app->bufferBluesteinFFT[i])->release();
+				//free(app->bufferBluesteinFFT[i]);
+				app->bufferBluesteinFFT[i] = 0;
+			}
+			if (app->bufferBluesteinIFFT[i] != 0) {
+				((MTL::Buffer*)app->bufferBluesteinIFFT[i])->release();
+				//free(app->bufferBluesteinIFFT[i]);
 				app->bufferBluesteinIFFT[i] = 0;
 			}
 #endif
@@ -21948,14 +28614,14 @@ static inline void deleteVkFFT(VkFFTApplication* app) {
 		if (app->configuration.primeSizes != 0) {
 			free(app->configuration.primeSizes);
 			app->configuration.primeSizes = 0;
-}
+		}
 		if (app->configuration.paddedSizes != 0) {
 			free(app->configuration.paddedSizes);
 			app->configuration.paddedSizes = 0;
 		}
 	}
 }
-static inline VkFFTResult VkFFTGetRegistersPerThread(uint64_t fft_length, uint64_t extraSharedMemoryForPow2, uint64_t max_rhs, uint64_t* loc_multipliers, uint64_t* registers_per_thread_per_radix, uint64_t* registers_per_thread, uint64_t* min_registers_per_thread, uint64_t* isGoodSequence) {
+static inline VkFFTResult VkFFTGetRegistersPerThread(VkFFTApplication* app, uint64_t fft_length, uint64_t extraSharedMemoryForPow2, uint64_t max_rhs, uint64_t useRader, uint64_t* loc_multipliers, uint64_t* registers_per_thread_per_radix, uint64_t* registers_per_thread, uint64_t* min_registers_per_thread, uint64_t* isGoodSequence) {
 	for (uint64_t i = 0; i < 33; i++) {
 		registers_per_thread_per_radix[i] = 0;
 	}
@@ -22164,90 +28830,30 @@ static inline VkFFTResult VkFFTGetRegistersPerThread(uint64_t fft_length, uint64
 				if (loc_multipliers[7] > 0) {
 					if (loc_multipliers[11] > 0) {
 						if (loc_multipliers[13] > 0) {
-							switch (loc_multipliers[2]) {
-							case 1:
-								registers_per_thread_per_radix[2] = 22;
-								registers_per_thread_per_radix[3] = 21;
-								registers_per_thread_per_radix[5] = 0;
-								registers_per_thread_per_radix[7] = 21;
-								registers_per_thread_per_radix[11] = 22;
-								registers_per_thread_per_radix[13] = 26;
-								break;
-							case 2:
-								registers_per_thread_per_radix[2] = 12;
-								registers_per_thread_per_radix[3] = 12;
-								registers_per_thread_per_radix[5] = 0;
-								registers_per_thread_per_radix[7] = 14;
-								registers_per_thread_per_radix[11] = 11;
-								registers_per_thread_per_radix[13] = 13;
-								break;
-							default:
-								registers_per_thread_per_radix[2] = 12;
-								registers_per_thread_per_radix[3] = 12;
-								registers_per_thread_per_radix[5] = 0;
-								registers_per_thread_per_radix[7] = 14;
-								registers_per_thread_per_radix[11] = 11;
-								registers_per_thread_per_radix[13] = 13;
-								break;
-							}
+							registers_per_thread_per_radix[2] = 12;
+							registers_per_thread_per_radix[3] = 12;
+							registers_per_thread_per_radix[5] = 0;
+							registers_per_thread_per_radix[7] = 14;
+							registers_per_thread_per_radix[11] = 11;
+							registers_per_thread_per_radix[13] = 13;
 						}
 						else {
-							switch (loc_multipliers[2]) {
-							case 1:
-								registers_per_thread_per_radix[2] = 22;
-								registers_per_thread_per_radix[3] = 21;
-								registers_per_thread_per_radix[5] = 0;
-								registers_per_thread_per_radix[7] = 21;
-								registers_per_thread_per_radix[11] = 22;
-								registers_per_thread_per_radix[13] = 0;
-								break;
-							case 2:
-								registers_per_thread_per_radix[2] = 12;
-								registers_per_thread_per_radix[3] = 12;
-								registers_per_thread_per_radix[5] = 0;
-								registers_per_thread_per_radix[7] = 14;
-								registers_per_thread_per_radix[11] = 11;
-								registers_per_thread_per_radix[13] = 0;
-								break;
-							default:
-								registers_per_thread_per_radix[2] = 12;
-								registers_per_thread_per_radix[3] = 12;
-								registers_per_thread_per_radix[5] = 0;
-								registers_per_thread_per_radix[7] = 14;
-								registers_per_thread_per_radix[11] = 11;
-								registers_per_thread_per_radix[13] = 0;
-								break;
-							}
+							registers_per_thread_per_radix[2] = 12;
+							registers_per_thread_per_radix[3] = 12;
+							registers_per_thread_per_radix[5] = 0;
+							registers_per_thread_per_radix[7] = 14;
+							registers_per_thread_per_radix[11] = 11;
+							registers_per_thread_per_radix[13] = 0;
 						}
 					}
 					else {
 						if (loc_multipliers[13] > 0) {
-							switch (loc_multipliers[2]) {
-							case 1:
-								registers_per_thread_per_radix[2] = 26;
-								registers_per_thread_per_radix[3] = 21;
-								registers_per_thread_per_radix[5] = 0;
-								registers_per_thread_per_radix[7] = 21;
-								registers_per_thread_per_radix[11] = 0;
-								registers_per_thread_per_radix[13] = 26;
-								break;
-							case 2:
-								registers_per_thread_per_radix[2] = 12;
-								registers_per_thread_per_radix[3] = 12;
-								registers_per_thread_per_radix[5] = 0;
-								registers_per_thread_per_radix[7] = 14;
-								registers_per_thread_per_radix[11] = 0;
-								registers_per_thread_per_radix[13] = 13;
-								break;
-							default:
-								registers_per_thread_per_radix[2] = 12;
-								registers_per_thread_per_radix[3] = 12;
-								registers_per_thread_per_radix[5] = 0;
-								registers_per_thread_per_radix[7] = 14;
-								registers_per_thread_per_radix[11] = 0;
-								registers_per_thread_per_radix[13] = 13;
-								break;
-							}
+							registers_per_thread_per_radix[2] = 12;
+							registers_per_thread_per_radix[3] = 12;
+							registers_per_thread_per_radix[5] = 0;
+							registers_per_thread_per_radix[7] = 14;
+							registers_per_thread_per_radix[11] = 0;
+							registers_per_thread_per_radix[13] = 13;
 						}
 						else {
 							switch (loc_multipliers[2]) {
@@ -22377,38 +28983,38 @@ static inline VkFFTResult VkFFTGetRegistersPerThread(uint64_t fft_length, uint64
 								registers_per_thread_per_radix[13] = 0;
 							}
 							else {
-							switch (loc_multipliers[2]) {
-							case 1:
-								registers_per_thread_per_radix[2] = 6;
-								registers_per_thread_per_radix[3] = 6;
-								registers_per_thread_per_radix[5] = 0;
-								registers_per_thread_per_radix[7] = 0;
-								registers_per_thread_per_radix[11] = 0;
-								registers_per_thread_per_radix[13] = 0;
-								break;
-							case 2:
-								registers_per_thread_per_radix[2] = 12;
-								registers_per_thread_per_radix[3] = 12;
-								registers_per_thread_per_radix[5] = 0;
-								registers_per_thread_per_radix[7] = 0;
-								registers_per_thread_per_radix[11] = 0;
-								registers_per_thread_per_radix[13] = 0;
-								break;
-							default:
-								registers_per_thread_per_radix[2] = 12;
-								registers_per_thread_per_radix[3] = 12;
-								registers_per_thread_per_radix[5] = 0;
-								registers_per_thread_per_radix[7] = 0;
-								registers_per_thread_per_radix[11] = 0;
-								registers_per_thread_per_radix[13] = 0;
-								break;
+								switch (loc_multipliers[2]) {
+								case 1:
+									registers_per_thread_per_radix[2] = 6;
+									registers_per_thread_per_radix[3] = 6;
+									registers_per_thread_per_radix[5] = 0;
+									registers_per_thread_per_radix[7] = 0;
+									registers_per_thread_per_radix[11] = 0;
+									registers_per_thread_per_radix[13] = 0;
+									break;
+								case 2:
+									registers_per_thread_per_radix[2] = 12;
+									registers_per_thread_per_radix[3] = 12;
+									registers_per_thread_per_radix[5] = 0;
+									registers_per_thread_per_radix[7] = 0;
+									registers_per_thread_per_radix[11] = 0;
+									registers_per_thread_per_radix[13] = 0;
+									break;
+								default:
+									registers_per_thread_per_radix[2] = 12;
+									registers_per_thread_per_radix[3] = 12;
+									registers_per_thread_per_radix[5] = 0;
+									registers_per_thread_per_radix[7] = 0;
+									registers_per_thread_per_radix[11] = 0;
+									registers_per_thread_per_radix[13] = 0;
+									break;
+								}
 							}
 						}
 					}
 				}
 			}
 		}
-		}
 		else {
 			if (loc_multipliers[5] > 0) {
 				if (loc_multipliers[7] > 0) {
@@ -22821,20 +29427,20 @@ static inline VkFFTResult VkFFTGetRegistersPerThread(uint64_t fft_length, uint64
 						if (loc_multipliers[13] > 0) {
 							switch (loc_multipliers[2]) {
 							case 1:
-								registers_per_thread_per_radix[2] = 22;
+								registers_per_thread_per_radix[2] = 12;
 								registers_per_thread_per_radix[3] = 0;
 								registers_per_thread_per_radix[5] = 0;
 								registers_per_thread_per_radix[7] = 0;
-								registers_per_thread_per_radix[11] = 22;
-								registers_per_thread_per_radix[13] = 26;
+								registers_per_thread_per_radix[11] = 11;
+								registers_per_thread_per_radix[13] = 13;
 								break;
 							case 2:
-								registers_per_thread_per_radix[2] = 22;
+								registers_per_thread_per_radix[2] = 12;
 								registers_per_thread_per_radix[3] = 0;
 								registers_per_thread_per_radix[5] = 0;
 								registers_per_thread_per_radix[7] = 0;
-								registers_per_thread_per_radix[11] = 22;
-								registers_per_thread_per_radix[13] = 26;
+								registers_per_thread_per_radix[11] = 11;
+								registers_per_thread_per_radix[13] = 13;
 								break;
 							default:
 								registers_per_thread_per_radix[2] = 8;
@@ -22849,23 +29455,7 @@ static inline VkFFTResult VkFFTGetRegistersPerThread(uint64_t fft_length, uint64
 						else {
 							switch (loc_multipliers[2]) {
 							case 1:
-								registers_per_thread_per_radix[2] = 22;
-								registers_per_thread_per_radix[3] = 0;
-								registers_per_thread_per_radix[5] = 0;
-								registers_per_thread_per_radix[7] = 0;
-								registers_per_thread_per_radix[11] = 22;
-								registers_per_thread_per_radix[13] = 0;
-								break;
-							case 2:
-								registers_per_thread_per_radix[2] = 22;
-								registers_per_thread_per_radix[3] = 0;
-								registers_per_thread_per_radix[5] = 0;
-								registers_per_thread_per_radix[7] = 0;
-								registers_per_thread_per_radix[11] = 22;
-								registers_per_thread_per_radix[13] = 0;
-								break;
-							case 3:
-								registers_per_thread_per_radix[2] = 8;
+								registers_per_thread_per_radix[2] = 10;
 								registers_per_thread_per_radix[3] = 0;
 								registers_per_thread_per_radix[5] = 0;
 								registers_per_thread_per_radix[7] = 0;
@@ -22887,20 +29477,20 @@ static inline VkFFTResult VkFFTGetRegistersPerThread(uint64_t fft_length, uint64
 						if (loc_multipliers[13] > 0) {
 							switch (loc_multipliers[2]) {
 							case 1:
-								registers_per_thread_per_radix[2] = 26;
+								registers_per_thread_per_radix[2] = 12;
 								registers_per_thread_per_radix[3] = 0;
 								registers_per_thread_per_radix[5] = 0;
 								registers_per_thread_per_radix[7] = 0;
 								registers_per_thread_per_radix[11] = 0;
-								registers_per_thread_per_radix[13] = 26;
+								registers_per_thread_per_radix[13] = 13;
 								break;
 							case 2:
-								registers_per_thread_per_radix[2] = 26;
+								registers_per_thread_per_radix[2] = 12;
 								registers_per_thread_per_radix[3] = 0;
 								registers_per_thread_per_radix[5] = 0;
 								registers_per_thread_per_radix[7] = 0;
 								registers_per_thread_per_radix[11] = 0;
-								registers_per_thread_per_radix[13] = 26;
+								registers_per_thread_per_radix[13] = 13;
 								break;
 							default:
 								registers_per_thread_per_radix[2] = 8;
@@ -22920,7 +29510,7 @@ static inline VkFFTResult VkFFTGetRegistersPerThread(uint64_t fft_length, uint64
 							uint64_t maxRadixMinStages = 1;
 							uint64_t fixMaxCheckRadix2 = 3;
 #if(VKFFT_BACKEND==1)
-							fixMaxCheckRadix2 = ((fft_length >= 2048) && (extraSharedMemoryForPow2)) ? 5 : 3;
+								fixMaxCheckRadix2 = (((fft_length >= 1024) || (fft_length == 256)) && (extraSharedMemoryForPow2) && (!useRader)) ? 5 : 3;
 #endif
 							for (uint64_t i = 1; i <= fixMaxCheckRadix2; i++) {
 								uint64_t numStages = (uint64_t)ceil(log2(fft_length) / ((double)i));
@@ -22942,7 +29532,7 @@ static inline VkFFTResult VkFFTGetRegistersPerThread(uint64_t fft_length, uint64
 							uint64_t final_loc_multipliers_pow2 = 1;
 							uint64_t num_stages_min = (uint64_t)log2(fft_length);
 							for (uint64_t i = 2; i <= max_loc_multipliers_pow2; i++) {
-								uint64_t num_stages = ceil(((uint64_t)log2(fft_length)) / (double)i);
+								uint64_t num_stages = (uint64_t)ceil(((uint64_t)log2(fft_length)) / (double)i);
 								if (num_stages < num_stages_min) {
 									final_loc_multipliers_pow2 = i;
 									num_stages_min = num_stages;
@@ -22971,7 +29561,7 @@ static inline VkFFTResult VkFFTGetRegistersPerThread(uint64_t fft_length, uint64
 							registers_per_thread_per_radix[2] = 0;
 							registers_per_thread_per_radix[3] = 15;
 							registers_per_thread_per_radix[5] = 15;
-							registers_per_thread_per_radix[7] = 21;
+							registers_per_thread_per_radix[7] = 14;
 							registers_per_thread_per_radix[11] = 11;
 							registers_per_thread_per_radix[13] = 13;
 						}
@@ -22979,7 +29569,7 @@ static inline VkFFTResult VkFFTGetRegistersPerThread(uint64_t fft_length, uint64
 							registers_per_thread_per_radix[2] = 0;
 							registers_per_thread_per_radix[3] = 15;
 							registers_per_thread_per_radix[5] = 15;
-							registers_per_thread_per_radix[7] = 21;
+							registers_per_thread_per_radix[7] = 14;
 							registers_per_thread_per_radix[11] = 11;
 							registers_per_thread_per_radix[13] = 0;
 						}
@@ -22989,7 +29579,7 @@ static inline VkFFTResult VkFFTGetRegistersPerThread(uint64_t fft_length, uint64
 							registers_per_thread_per_radix[2] = 0;
 							registers_per_thread_per_radix[3] = 15;
 							registers_per_thread_per_radix[5] = 15;
-							registers_per_thread_per_radix[7] = 21;
+							registers_per_thread_per_radix[7] = 14;
 							registers_per_thread_per_radix[11] = 0;
 							registers_per_thread_per_radix[13] = 13;
 						}
@@ -22997,7 +29587,7 @@ static inline VkFFTResult VkFFTGetRegistersPerThread(uint64_t fft_length, uint64
 							registers_per_thread_per_radix[2] = 0;
 							registers_per_thread_per_radix[3] = 15;
 							registers_per_thread_per_radix[5] = 15;
-							registers_per_thread_per_radix[7] = 21;
+							registers_per_thread_per_radix[7] = 14;
 							registers_per_thread_per_radix[11] = 0;
 							registers_per_thread_per_radix[13] = 0;
 						}
@@ -23049,17 +29639,17 @@ static inline VkFFTResult VkFFTGetRegistersPerThread(uint64_t fft_length, uint64
 						if (loc_multipliers[11] > 0) {
 							if (loc_multipliers[13] > 0) {
 								registers_per_thread_per_radix[2] = 0;
-								registers_per_thread_per_radix[3] = 21;
+								registers_per_thread_per_radix[3] = 12;
 								registers_per_thread_per_radix[5] = 0;
-								registers_per_thread_per_radix[7] = 21;
+								registers_per_thread_per_radix[7] = 14;
 								registers_per_thread_per_radix[11] = 11;
 								registers_per_thread_per_radix[13] = 13;
 							}
 							else {
 								registers_per_thread_per_radix[2] = 0;
-								registers_per_thread_per_radix[3] = 21;
+								registers_per_thread_per_radix[3] = 12;
 								registers_per_thread_per_radix[5] = 0;
-								registers_per_thread_per_radix[7] = 21;
+								registers_per_thread_per_radix[7] = 14;
 								registers_per_thread_per_radix[11] = 11;
 								registers_per_thread_per_radix[13] = 0;
 							}
@@ -23067,17 +29657,17 @@ static inline VkFFTResult VkFFTGetRegistersPerThread(uint64_t fft_length, uint64
 						else {
 							if (loc_multipliers[13] > 0) {
 								registers_per_thread_per_radix[2] = 0;
-								registers_per_thread_per_radix[3] = 21;
+								registers_per_thread_per_radix[3] = 12;
 								registers_per_thread_per_radix[5] = 0;
-								registers_per_thread_per_radix[7] = 21;
+								registers_per_thread_per_radix[7] = 14;
 								registers_per_thread_per_radix[11] = 0;
 								registers_per_thread_per_radix[13] = 13;
 							}
 							else {
 								registers_per_thread_per_radix[2] = 0;
-								registers_per_thread_per_radix[3] = 21;
+								registers_per_thread_per_radix[3] = 6;
 								registers_per_thread_per_radix[5] = 0;
-								registers_per_thread_per_radix[7] = 21;
+								registers_per_thread_per_radix[7] = 7;
 								registers_per_thread_per_radix[11] = 0;
 								registers_per_thread_per_radix[13] = 0;
 							}
@@ -23127,29 +29717,29 @@ static inline VkFFTResult VkFFTGetRegistersPerThread(uint64_t fft_length, uint64
 						if (loc_multipliers[11] > 0) {
 							if (loc_multipliers[13] > 0) {
 								registers_per_thread_per_radix[2] = 0;
-								registers_per_thread_per_radix[3] = 33;
+								registers_per_thread_per_radix[3] = 12;
 								registers_per_thread_per_radix[5] = 0;
 								registers_per_thread_per_radix[7] = 0;
-								registers_per_thread_per_radix[11] = 33;
-								registers_per_thread_per_radix[13] = 39;
+								registers_per_thread_per_radix[11] = 11;
+								registers_per_thread_per_radix[13] = 13;
 							}
 							else {
 								registers_per_thread_per_radix[2] = 0;
-								registers_per_thread_per_radix[3] = 33;
+								registers_per_thread_per_radix[3] = 9;
 								registers_per_thread_per_radix[5] = 0;
 								registers_per_thread_per_radix[7] = 0;
-								registers_per_thread_per_radix[11] = 33;
+								registers_per_thread_per_radix[11] = 11;
 								registers_per_thread_per_radix[13] = 0;
 							}
 						}
 						else {
 							if (loc_multipliers[13] > 0) {
 								registers_per_thread_per_radix[2] = 0;
-								registers_per_thread_per_radix[3] = 39;
+								registers_per_thread_per_radix[3] = 12;
 								registers_per_thread_per_radix[5] = 0;
 								registers_per_thread_per_radix[7] = 0;
 								registers_per_thread_per_radix[11] = 0;
-								registers_per_thread_per_radix[13] = 39;
+								registers_per_thread_per_radix[13] = 13;
 							}
 							else {
 								registers_per_thread_per_radix[2] = 0;
@@ -23350,7 +29940,10 @@ static inline VkFFTResult VkFFTGetRegistersPerThread(uint64_t fft_length, uint64
 							registers_per_thread_per_radix[13] = 13;
 						}
 						else {
-							return VKFFT_ERROR_UNSUPPORTED_RADIX;
+							min_registers_per_thread[0] = 2;
+							registers_per_thread[0] = 2;
+							//Rader-only sequence
+							//return VKFFT_ERROR_UNSUPPORTED_RADIX;
 						}
 					}
 				}
@@ -23381,8 +29974,560 @@ static inline VkFFTResult VkFFTGetRegistersPerThread(uint64_t fft_length, uint64
 	else isGoodSequence[0] = 1;
 	return VKFFT_SUCCESS;
 }
+static inline VkFFTResult VkFFTGetRegistersPerThreadOptimizeShared(uint64_t fft_length, uint64_t* registers_per_thread_per_radix, uint64_t* registers_per_thread, uint64_t* min_registers_per_thread) {
+	//try to split sequence in supported radix to optimize sm usage
+	uint64_t numStages = 20;
+	uint64_t fft_length_copy;
+	uint64_t stages[20];
+	uint64_t k = 0;
+	for (uint64_t i = 0; i < 33; i++) {
+		registers_per_thread_per_radix[i] = 0;
+	}
+	registers_per_thread[0] = 0;
+	min_registers_per_thread[0] = -1;
+
+	for (uint64_t i = 1; i < numStages; i++) {
+		fft_length_copy = fft_length;
+		uint64_t min_comb_radix = (uint64_t)floor(pow(fft_length_copy, 1.0 / i));
+		if (min_comb_radix <= 16) {
+			for (uint64_t j = 0; j < 20; j++) {
+				stages[j] = 0;
+			}
+			k = 0;
+			for (uint64_t j = min_comb_radix; j <= 16; j++) {
+				if (k < i) {
+					if ((fft_length_copy % j) == 0) {
+						fft_length_copy /= j;
+						min_comb_radix = (uint64_t)floor(pow(fft_length_copy, 1.0 / (i - k - 1)));
+
+						stages[k] = j;
+						j = min_comb_radix - 1;
+						k++;
+					}
+				}
+			}
+			if ((fft_length_copy == 1) && (k == i)) break;
+		}
+	}
+	for (uint64_t i = 0; i < k; i++) {
+		for (uint64_t j = 2; j <= stages[i]; j++) {
+			if ((stages[i] % j) == 0) {
+				if (registers_per_thread_per_radix[j] < stages[i])
+					registers_per_thread_per_radix[j] = stages[i];
+			}
+		}
+	}
+	for (uint64_t i = 0; i < 33; i++) {
+		if ((registers_per_thread_per_radix[i] != 0) && (registers_per_thread_per_radix[i] > registers_per_thread[0])) registers_per_thread[0] = registers_per_thread_per_radix[i];
+	}
+
+	for (uint64_t i = 0; i < 33; i++) {
+		if (registers_per_thread_per_radix[i] != 0) {
+			double ratio = (registers_per_thread[0] / (double)registers_per_thread_per_radix[i]);
+			uint64_t ratio_ceil = (uint64_t)ceil(ratio);
+			uint64_t ratio_floor = (uint64_t)floor(ratio);
+			double ratio2 = ((registers_per_thread_per_radix[i] * ratio_ceil) / (double)registers_per_thread[0]);
+			double ratio3 = (registers_per_thread[0] / (double)(registers_per_thread_per_radix[i] * ratio_floor));
+			if (ratio2 > ratio3) registers_per_thread_per_radix[i] *= ratio_floor;
+			else {
+				registers_per_thread_per_radix[i] *= ratio_ceil;
+			}
+		}
+	}
+	registers_per_thread[0] = 0;
+	for (uint64_t i = 0; i < 33; i++) {
+		if ((registers_per_thread_per_radix[i] != 0) && (registers_per_thread_per_radix[i] < min_registers_per_thread[0])) min_registers_per_thread[0] = registers_per_thread_per_radix[i];
+		if ((registers_per_thread_per_radix[i] != 0) && (registers_per_thread_per_radix[i] > registers_per_thread[0])) registers_per_thread[0] = registers_per_thread_per_radix[i];
+	}
+	return VKFFT_SUCCESS;
+}
+static inline VkFFTResult VkFFTConstructRaderTree(VkFFTApplication* app, VkFFTRaderContainer** raderContainer_input, uint64_t* tempSequence, uint64_t* numRaderPrimes, uint64_t fft_radix_part) {
+	VkFFTResult res = VKFFT_SUCCESS;
+	uint64_t locTempSequence = tempSequence[0];
+	uint64_t tempSequence_copy = tempSequence[0];
+	uint64_t limit = ((tempSequence[0] + 1) > app->configuration.fixMaxRaderPrimeFFT) ? app->configuration.fixMaxRaderPrimeFFT : (tempSequence[0] + 1);
+	for (uint64_t i = app->configuration.fixMinRaderPrimeMult; i < limit; i++) {
+		if (locTempSequence % i == 0) {
+			numRaderPrimes[0]++;
+			while (locTempSequence % i == 0) locTempSequence /= i;
+		}
+	}
+	for (uint64_t i = app->configuration.fixMinRaderPrimeMult; i < app->configuration.fixMaxRaderPrimeMult; i++) {
+		if (locTempSequence % i == 0) {
+			numRaderPrimes[0]++;
+			while (locTempSequence % i == 0) locTempSequence /= i;
+		}
+	}
+
+	raderContainer_input[0] = (VkFFTRaderContainer*)calloc(sizeof(VkFFTRaderContainer), numRaderPrimes[0]);
+	if (raderContainer_input[0] == 0) return VKFFT_ERROR_MALLOC_FAILED;
+	VkFFTRaderContainer* raderContainer = raderContainer_input[0];
+	uint64_t tempSequence_temp = 1;
+	limit = ((tempSequence[0] + 1) > app->configuration.fixMaxRaderPrimeFFT) ? app->configuration.fixMaxRaderPrimeFFT : (tempSequence[0] + 1);
+	for (uint64_t i = app->configuration.fixMinRaderPrimeMult; i < limit; i++) {
+		if (tempSequence[0] % i == 0) {
+			if (i < app->configuration.fixMinRaderPrimeFFT) {
+				tempSequence_temp *= i;
+				tempSequence[0] /= i;
+				i--;
+				continue;
+			}
+			//Sophie Germain safe prime check
+			uint64_t tempSequence2 = i - 1;
+			for (uint64_t j = 2; j < app->configuration.fixMinRaderPrimeMult; j++) {
+				if (tempSequence2 % j == 0) {
+					tempSequence2 /= j;
+					j--;
+				}
+			}
+			if (tempSequence2 != 1) {
+				tempSequence_temp *= i;
+				tempSequence[0] /= i;
+				i--;
+				continue;
+			}
+			tempSequence[0] /= i;
+			for (uint64_t j = 0; j < numRaderPrimes[0]; j++) {
+				if (raderContainer[j].prime == i)
+				{
+					raderContainer[j].multiplier++;
+					j = numRaderPrimes[0];
+				}
+				else if (raderContainer[j].prime == 0) {
+					raderContainer[j].type = 0;
+					raderContainer[j].prime = i;
+					raderContainer[j].multiplier = 1;
+					j = numRaderPrimes[0];
+				}
+			}
+			i--;
+		}
+	}
+	tempSequence[0] *= tempSequence_temp;
+	for (uint64_t i = app->configuration.fixMinRaderPrimeMult; i < app->configuration.fixMaxRaderPrimeMult; i++) {
+		if (tempSequence[0] % i == 0) {
+			tempSequence[0] /= i;
+			for (uint64_t j = 0; j < numRaderPrimes[0]; j++) {
+				if (raderContainer[j].prime == i)
+				{
+					raderContainer[j].multiplier++;
+					j = numRaderPrimes[0];
+				}
+				else if (raderContainer[j].prime == 0) {
+					raderContainer[j].type = 1;
+					raderContainer[j].prime = i;
+					raderContainer[j].multiplier = 1;
+					j = numRaderPrimes[0];
+				}
+			}
+			i--;
+		}
+	}
+	//main loop for all primes
+	for (uint64_t i = 0; i < numRaderPrimes[0]; i++) {
+		//generator loop
+		for (uint64_t r = 2; r < raderContainer[i].prime; r++) {
+			uint64_t test = r;
+			for (uint64_t iter = 0; iter < raderContainer[i].prime - 2; iter++) {
+				if (test == 1) {
+					test = 0;
+					iter = raderContainer[i].prime;
+				}
+				test = ((test * r) % raderContainer[i].prime);
+			}
+			if (test == 1) {
+				raderContainer[i].generator = r;
+				r = raderContainer[i].prime;
+			}
+		}
+
+		//subsplit and information initialization
+		if (raderContainer[i].type) {//Multiplication
+			raderContainer[i].registers_per_thread = 2;
+			raderContainer[i].min_registers_per_thread = 2;
+		}
+		else {//FFT
+			locTempSequence = raderContainer[i].prime - 1;
+			raderContainer[i].containerFFTDim = raderContainer[i].prime - 1;
+			raderContainer[i].containerFFTNum = fft_radix_part * tempSequence_copy / raderContainer[i].prime;
+			uint64_t stageID = 0;
+			for (uint64_t j = 2; j < app->configuration.fixMinRaderPrimeMult; j++) {
+				if (locTempSequence % j == 0) {
+					locTempSequence /= j;
+					raderContainer[i].loc_multipliers[j]++;
+					//raderContainer[i].stageRadix[stageID] = j;
+					//raderContainer[i].numThreadLaunches[stageID] = fft_radix_part * (tempSequence_copy / raderContainer[i].prime) * ((raderContainer[i].prime-1) / j);
+					//stageID++;
+					j--;
+				}
+			}
+			//uint64_t isGoodSequence;
+			//if (raderContainer[i].containerFFTNum<8)
+			res = VkFFTGetRegistersPerThreadOptimizeShared(raderContainer[i].prime - 1, raderContainer[i].registers_per_thread_per_radix, &raderContainer[i].registers_per_thread, &raderContainer[i].min_registers_per_thread);
+			//else
+				//res = VkFFTGetRegistersPerThread(raderContainer[i].prime - 1, 0, 0, 1, raderContainer[i].loc_multipliers, raderContainer[i].registers_per_thread_per_radix, &raderContainer[i].registers_per_thread, &raderContainer[i].min_registers_per_thread, &isGoodSequence);
+			if (res != VKFFT_SUCCESS) return res;
+			if (locTempSequence != 1) {
+				res = VkFFTConstructRaderTree(app, &raderContainer[i].container, &locTempSequence, &raderContainer[i].numSubPrimes, fft_radix_part * tempSequence_copy / raderContainer[i].prime);
+				if (res != VKFFT_SUCCESS) return res;
+				for (uint64_t j = 0; j < raderContainer[i].numSubPrimes; j++) {
+					for (uint64_t t = 0; t < raderContainer[i].container[j].multiplier; t++) {
+						raderContainer[i].stageRadix[stageID] = raderContainer[i].container[j].prime;
+						stageID++;
+					}
+				}
+			}
+			raderContainer[i].numStages = stageID;
+		}
+	}
+	return res;
+}
+static inline VkFFTResult VkFFTOptimizeRaderFFTRegisters(VkFFTRaderContainer* raderContainer, uint64_t numRaderPrimes, uint64_t fftDim, uint64_t* min_registers_per_thread, uint64_t* registers_per_thread, uint64_t* registers_per_thread_per_radix) {
+	VkFFTResult res = VKFFT_SUCCESS;
+	for (int64_t i = 0; i < (int64_t)numRaderPrimes; i++) {
+		if (raderContainer[i].type == 0) {
+			if (raderContainer[i].min_registers_per_thread / min_registers_per_thread[0] >= 2) {
+				min_registers_per_thread[0] *= (raderContainer[i].min_registers_per_thread / min_registers_per_thread[0]);
+				for (uint64_t j = 0; j < 33; j++) {
+					if ((registers_per_thread_per_radix[j] > 0) && (registers_per_thread_per_radix[j] < min_registers_per_thread[0])) registers_per_thread_per_radix[j] *= (uint64_t)ceil(min_registers_per_thread[0] / (double)registers_per_thread_per_radix[j]);
+				}
+				for (uint64_t j = 0; j < 33; j++) {
+					if (registers_per_thread_per_radix[j] > registers_per_thread[0]) registers_per_thread[0] = registers_per_thread_per_radix[j];
+				}
+			}
+			else if (min_registers_per_thread[0] / raderContainer[i].min_registers_per_thread >= 2) {
+				raderContainer[i].min_registers_per_thread *= (min_registers_per_thread[0] / raderContainer[i].min_registers_per_thread);
+				for (uint64_t j = 0; j < 33; j++) {
+					if ((raderContainer[i].registers_per_thread_per_radix[j] > 0) && (raderContainer[i].registers_per_thread_per_radix[j] < raderContainer[i].min_registers_per_thread)) raderContainer[i].registers_per_thread_per_radix[j] *= (uint64_t)ceil(raderContainer[i].min_registers_per_thread / (double)raderContainer[i].registers_per_thread_per_radix[j]);
+				}
+				for (uint64_t j = 0; j < 33; j++) {
+					if (raderContainer[i].registers_per_thread_per_radix[j] > raderContainer[i].registers_per_thread) raderContainer[i].registers_per_thread = raderContainer[i].registers_per_thread_per_radix[j];
+				}
+			}
+
+			if (raderContainer[i].min_registers_per_thread < min_registers_per_thread[0]) {
+				for (uint64_t j = 0; j < 33; j++) {
+					if (raderContainer[i].registers_per_thread_per_radix[j] > 0) {
+						while (raderContainer[i].registers_per_thread_per_radix[j] < min_registers_per_thread[0])
+							raderContainer[i].registers_per_thread_per_radix[j] += j;
+						if (raderContainer[i].registers_per_thread_per_radix[j] > raderContainer[i].registers_per_thread)
+							raderContainer[i].registers_per_thread = raderContainer[i].registers_per_thread_per_radix[j];
+					}
+				}
+			}
+
+			for (int64_t j = 2; j < 33; j++) {
+				if (raderContainer[i].registers_per_thread_per_radix[j] != 0) {
+					double scaling = (raderContainer[i].containerFFTDim > raderContainer[i].registers_per_thread_per_radix[j]) ? ceil(raderContainer[i].containerFFTDim / (double)raderContainer[i].registers_per_thread_per_radix[j]) : 1.0 / floor(raderContainer[i].registers_per_thread_per_radix[j] / (double)raderContainer[i].containerFFTDim);
+					while (((uint64_t)ceil(fftDim / (double)min_registers_per_thread[0])) < (raderContainer[i].containerFFTNum * scaling)) {
+						raderContainer[i].registers_per_thread_per_radix[j] += j;
+						scaling = (raderContainer[i].containerFFTDim > raderContainer[i].registers_per_thread_per_radix[j]) ? ceil(raderContainer[i].containerFFTDim / (double)raderContainer[i].registers_per_thread_per_radix[j]) : 1.0 / floor(raderContainer[i].registers_per_thread_per_radix[j] / (double)raderContainer[i].containerFFTDim);
+					}
+					if (raderContainer[i].registers_per_thread_per_radix[j] > raderContainer[i].registers_per_thread) raderContainer[i].registers_per_thread = raderContainer[i].registers_per_thread_per_radix[j];
+				}
+			}
+			if (raderContainer[i].registers_per_thread > registers_per_thread[0]) registers_per_thread[0] = raderContainer[i].registers_per_thread;
+		}
+	}
+	//try to increase registers usage closer to registers_per_thread across all primes
+	for (int64_t i = 0; i < (int64_t)numRaderPrimes; i++) {
+		if (raderContainer[i].type == 0) {
+			for (int64_t j = 2; j < 33; j++) {
+				if (raderContainer[i].registers_per_thread_per_radix[j] > 0) {
+					while ((raderContainer[i].registers_per_thread_per_radix[j] + j) <= registers_per_thread[0] + 1) {// fix
+						raderContainer[i].registers_per_thread_per_radix[j] += j;
+					}
+				}
+			}
+			raderContainer[i].registers_per_thread = 0;
+			raderContainer[i].min_registers_per_thread = -1;
+			for (int64_t j = 2; j < 33; j++) {
+				if (raderContainer[i].registers_per_thread_per_radix[j] > 0) {
+					if (raderContainer[i].registers_per_thread_per_radix[j] < raderContainer[i].min_registers_per_thread) {
+						raderContainer[i].min_registers_per_thread = raderContainer[i].registers_per_thread_per_radix[j];
+					}
+					if (raderContainer[i].registers_per_thread_per_radix[j] > raderContainer[i].registers_per_thread) {
+						raderContainer[i].registers_per_thread = raderContainer[i].registers_per_thread_per_radix[j];
+					}
+				}
+			}
+		}
+	}
+	//subprimes optimization
+	for (int64_t i = 0; i < (int64_t)numRaderPrimes; i++) {
+		if (raderContainer[i].numSubPrimes) {
+			res = VkFFTOptimizeRaderFFTRegisters(raderContainer[i].container, raderContainer[i].numSubPrimes, fftDim, min_registers_per_thread, registers_per_thread, registers_per_thread_per_radix);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+	}
+	return res;
+}
+static inline VkFFTResult VkFFTOptimizeRadixKernels(uint64_t* registers_per_thread_per_radix, uint64_t* loc_multipliers, uint64_t registerBoost, uint64_t* maxNonPow2Radix, uint64_t* reqLocRegs, VkFFTRaderContainer* raderContainer, uint64_t numRaderPrimes) {
+	VkFFTResult res = VKFFT_SUCCESS;
+	if (numRaderPrimes) {
+		for (uint64_t i = 0; i < numRaderPrimes; i++) {
+			res = VkFFTOptimizeRadixKernels(raderContainer[i].registers_per_thread_per_radix, raderContainer[i].loc_multipliers, 1, maxNonPow2Radix, reqLocRegs, raderContainer[i].container, raderContainer[i].numSubPrimes);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+	}
+	//optimize used radix kernels
+	if (((registers_per_thread_per_radix[32] > 0) || ((registers_per_thread_per_radix[2] % 32) == 0)) && ((registers_per_thread_per_radix[32]) % 32 == 0) && (loc_multipliers[2] >= 5)) {
+		loc_multipliers[32] = loc_multipliers[2] / 5;
+		loc_multipliers[2] = loc_multipliers[2] - loc_multipliers[32] * 5;
+		if ((registers_per_thread_per_radix[2] % 32) == 0) registers_per_thread_per_radix[32] = registers_per_thread_per_radix[2];
+	}
+	if (((registers_per_thread_per_radix[16] > 0) || ((registers_per_thread_per_radix[2] % 16) == 0)) && ((registers_per_thread_per_radix[16]) % 16 == 0) && (loc_multipliers[2] >= 4)) {
+		loc_multipliers[16] = loc_multipliers[2] / 4;
+		loc_multipliers[2] = loc_multipliers[2] - loc_multipliers[16] * 4;
+		if ((registers_per_thread_per_radix[2] % 16) == 0) registers_per_thread_per_radix[16] = registers_per_thread_per_radix[2];//if we got 16 regs, why not use r16 kernel
+	}
+	if ((registers_per_thread_per_radix[15] > 0) && ((registers_per_thread_per_radix[15]) % 15 == 0) && (loc_multipliers[3] >= 1) && (loc_multipliers[5] >= 1)) {
+		loc_multipliers[15] = (loc_multipliers[3] > loc_multipliers[5]) ? loc_multipliers[5] : loc_multipliers[3];
+		loc_multipliers[3] = loc_multipliers[3] - loc_multipliers[15];
+		loc_multipliers[5] = loc_multipliers[5] - loc_multipliers[15];
+	}
+	if ((registers_per_thread_per_radix[14] > 0) && ((registers_per_thread_per_radix[14]) % 14 == 0) && (loc_multipliers[2] >= 1) && (loc_multipliers[7] >= 1)) {
+		loc_multipliers[14] = (loc_multipliers[2] > loc_multipliers[7]) ? loc_multipliers[7] : loc_multipliers[2];
+		loc_multipliers[2] = loc_multipliers[2] - loc_multipliers[14];
+		loc_multipliers[7] = loc_multipliers[7] - loc_multipliers[14];
+	}
+	if ((registers_per_thread_per_radix[12] > 0) && ((registers_per_thread_per_radix[12]) % 12 == 0) && (loc_multipliers[2] >= 2) && (loc_multipliers[3] >= 1)) {
+		loc_multipliers[12] = (loc_multipliers[2] > 2 * loc_multipliers[3]) ? loc_multipliers[3] : loc_multipliers[2] / 2;
+		loc_multipliers[2] = loc_multipliers[2] - 2 * loc_multipliers[12];
+		loc_multipliers[3] = loc_multipliers[3] - loc_multipliers[12];
+	}
+	if ((registers_per_thread_per_radix[10] > 0) && ((registers_per_thread_per_radix[10]) % 10 == 0) && (loc_multipliers[2] >= 1) && (loc_multipliers[5] >= 1)) {
+		loc_multipliers[10] = (loc_multipliers[2] > loc_multipliers[5]) ? loc_multipliers[5] : loc_multipliers[2];
+		loc_multipliers[2] = loc_multipliers[2] - loc_multipliers[10];
+		loc_multipliers[5] = loc_multipliers[5] - loc_multipliers[10];
+	}
+	if ((registers_per_thread_per_radix[9] > 0) && ((registers_per_thread_per_radix[9]) % 9 == 0) && (loc_multipliers[3] >= 2)) {
+		loc_multipliers[9] = loc_multipliers[3] / 2;
+		loc_multipliers[3] = loc_multipliers[3] - loc_multipliers[9] * 2;
+	}
+	if (((registers_per_thread_per_radix[8] > 0) || ((registers_per_thread_per_radix[2] % 8) == 0)) && ((registers_per_thread_per_radix[8]) % 8 == 0) && (loc_multipliers[2] >= 3)) {
+		loc_multipliers[8] = loc_multipliers[2] / 3;
+		loc_multipliers[2] = loc_multipliers[2] - loc_multipliers[8] * 3;
+		if ((registers_per_thread_per_radix[2] % 8) == 0) registers_per_thread_per_radix[8] = registers_per_thread_per_radix[2];
+	}
+	if ((registers_per_thread_per_radix[6] > 0) && ((registers_per_thread_per_radix[6]) % 6 == 0) && (loc_multipliers[2] >= 1) && (loc_multipliers[3] >= 1)) {
+		loc_multipliers[6] = (loc_multipliers[2] > loc_multipliers[3]) ? loc_multipliers[3] : loc_multipliers[2];
+		loc_multipliers[2] = loc_multipliers[2] - loc_multipliers[6];
+		loc_multipliers[3] = loc_multipliers[3] - loc_multipliers[6];
+	}
+	if (((registers_per_thread_per_radix[4] > 0) || ((registers_per_thread_per_radix[2] % 4) == 0)) && ((registers_per_thread_per_radix[4]) % 4 == 0) && (loc_multipliers[2] >= 2)) {
+		loc_multipliers[4] = loc_multipliers[2] / 2;
+		loc_multipliers[2] = loc_multipliers[2] - loc_multipliers[4] * 2;
+		if ((registers_per_thread_per_radix[2] % 4) == 0) registers_per_thread_per_radix[4] = registers_per_thread_per_radix[2];
+	}
+	if ((registerBoost == 2) && (loc_multipliers[2] == 0)) {
+		if (loc_multipliers[4] > 0) {
+			loc_multipliers[4]--;
+			loc_multipliers[2] = 2;
+		}
+		else if (loc_multipliers[8] > 0) {
+			loc_multipliers[8]--;
+			loc_multipliers[4]++;
+			loc_multipliers[2]++;
+		}
+		else if (loc_multipliers[16] > 0) {
+			loc_multipliers[16]--;
+			loc_multipliers[8]++;
+			loc_multipliers[2]++;
+		}
+		else if (loc_multipliers[32] > 0) {
+			loc_multipliers[32]--;
+			loc_multipliers[16]++;
+			loc_multipliers[2]++;
+		}
+	}
+	if ((registerBoost == 4) && (loc_multipliers[4] == 0)) {
+		if (loc_multipliers[8] > 0) {
+			loc_multipliers[8]--;
+			loc_multipliers[4]++;
+			loc_multipliers[2]++;
+		}
+		else if (loc_multipliers[16] > 0) {
+			if (loc_multipliers[2] == 0) {
+				loc_multipliers[16]--;
+				loc_multipliers[4] = 2;
+			}
+			else {
+				loc_multipliers[16]--;
+				loc_multipliers[4]++;
+				loc_multipliers[2]--;
+				loc_multipliers[8]++;
+			}
+		}
+		else if (loc_multipliers[32] > 0) {
+			if (loc_multipliers[2] == 0) {
+				loc_multipliers[32]--;
+				loc_multipliers[8]++;
+				loc_multipliers[4]++;
+			}
+			else {
+				loc_multipliers[32]--;
+				loc_multipliers[16]++;
+				loc_multipliers[4]++;
+				loc_multipliers[2]--;
+			}
+		}
+	}
+	for (uint64_t i = 2; i < 33; i++) {
+		uint64_t usedLocRegs = 0;
+		if (loc_multipliers[i] > 0) {
+			switch (i) {
+			case 6:
+				usedLocRegs = 3;
+				break;
+			case 9:
+				usedLocRegs = 3;
+				break;
+			case 10:
+				usedLocRegs = 5;
+				break;
+			case 12:
+				usedLocRegs = 3;
+				break;
+			case 14:
+				usedLocRegs = 7;
+				break;
+			case 15:
+				usedLocRegs = 5;
+				break;
+			default:
+				usedLocRegs = i;
+				break;
+			}
+		}
+		if ((loc_multipliers[i] > 0) && ((i & (i - 1)) != 0) && (i > maxNonPow2Radix[0])) {
+			maxNonPow2Radix[0] = i;
+		}
+		if ((usedLocRegs > reqLocRegs[0]) && ((i & (i - 1)) != 0)) {
+			reqLocRegs[0] = usedLocRegs;
+		}
+	}
+	return res;
+}
+static inline VkFFTResult VkFFTGetRaderFFTStages(VkFFTRaderContainer* raderContainer, uint64_t numRaderPrimes, uint64_t* stageid, uint64_t* stageRadix, uint64_t* stage_rader_generator) {
+	VkFFTResult res = VKFFT_SUCCESS;
+	for (int64_t i = 0; i < (int64_t)numRaderPrimes; i++) {
+		if (raderContainer[i].multiplier > 0) {
+			stageRadix[stageid[0]] = raderContainer[i].prime;
+			stage_rader_generator[stageid[0]] = raderContainer[i].generator;
+			raderContainer[i].multiplier--;
+			i--;
+			stageid[0]++;
+			//axes[k].specializationConstants.numStages++;
+			//find primitive root
+		}
+	}
+	for (int64_t i = 0; i < (int64_t)numRaderPrimes; i++) {
+		if (raderContainer[i].type == 0) {
+			if (raderContainer[i].numSubPrimes > 0) {
+				res = VkFFTGetRaderFFTStages(raderContainer[i].container, raderContainer[i].numSubPrimes, &raderContainer[i].numStages, raderContainer[i].stageRadix, raderContainer[i].stage_rader_generator);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			for (uint64_t j = 32; j > 1; j--) {
+				if (raderContainer[i].loc_multipliers[j] > 0) {
+					raderContainer[i].stageRadix[raderContainer[i].numStages] = j;
+					raderContainer[i].loc_multipliers[j]--;
+					j++;
+					raderContainer[i].numStages++;
+				}
+			}
+			/*//make that convolution step uses min_regs radix - max working threads
+			uint64_t stage_id_swap = axes[k].specializationConstants.raderContainer[i].numStages - 1;
+			uint64_t temp_radix = axes[k].specializationConstants.raderContainer[i].stageRadix[axes[k].specializationConstants.raderContainer[i].numStages - 1];
+			uint64_t temp_regs = axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[axes[k].specializationConstants.raderContainer[i].stageRadix[axes[k].specializationConstants.raderContainer[i].numStages - 1]];
+
+			for (uint64_t j = 0; j < axes[k].specializationConstants.raderContainer[i].numStages-1; j++) {
+				if (axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[axes[k].specializationConstants.raderContainer[i].stageRadix[j]] < axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[axes[k].specializationConstants.raderContainer[i].stageRadix[stage_id_swap]])
+					stage_id_swap = j;
+			}
+			axes[k].specializationConstants.raderContainer[i].stageRadix[axes[k].specializationConstants.raderContainer[i].numStages - 1] = axes[k].specializationConstants.raderContainer[i].stageRadix[stage_id_swap];
+			axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[axes[k].specializationConstants.raderContainer[i].stageRadix[axes[k].specializationConstants.raderContainer[i].numStages - 1]] = axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[axes[k].specializationConstants.raderContainer[i].stageRadix[stage_id_swap]];
+			axes[k].specializationConstants.raderContainer[i].stageRadix[stage_id_swap] = temp_radix;
+			axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[axes[k].specializationConstants.raderContainer[i].stageRadix[stage_id_swap]] = temp_regs;
+
+			//make that first step uses second to min_regs radix
+			stage_id_swap = 0;
+			temp_radix = axes[k].specializationConstants.raderContainer[i].stageRadix[0];
+			temp_regs = axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[axes[k].specializationConstants.raderContainer[i].stageRadix[0]];
+
+			for (uint64_t j = 1; j < axes[k].specializationConstants.raderContainer[i].numStages - 1; j++) {
+				if (axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[axes[k].specializationConstants.raderContainer[i].stageRadix[j]] < axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[axes[k].specializationConstants.raderContainer[i].stageRadix[stage_id_swap]])
+					stage_id_swap = j;
+			}
+			axes[k].specializationConstants.raderContainer[i].stageRadix[0] = axes[k].specializationConstants.raderContainer[i].stageRadix[stage_id_swap];
+			axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[axes[k].specializationConstants.raderContainer[i].stageRadix[0]] = axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[axes[k].specializationConstants.raderContainer[i].stageRadix[stage_id_swap]];
+			axes[k].specializationConstants.raderContainer[i].stageRadix[stage_id_swap] = temp_radix;
+			axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[axes[k].specializationConstants.raderContainer[i].stageRadix[stage_id_swap]] = temp_regs;
+			*/
+		}
+	}
+	return res;
+}
+static inline VkFFTResult VkFFTMinMaxRegisterCheck(uint64_t numStages, uint64_t* stageRadix, uint64_t* min_registers_per_thread, uint64_t* registers_per_thread, uint64_t* registers_per_thread_per_radix, VkFFTRaderContainer* raderContainer, uint64_t numRaderPrimes, uint64_t* stage_rader_generator) {
+	VkFFTResult res = VKFFT_SUCCESS;
+	for (int64_t j = 0; j < (int64_t)numStages; j++) {
+		if (stage_rader_generator[j] == 0) {
+			if (registers_per_thread_per_radix[stageRadix[j]] > 0) {
+				if (registers_per_thread_per_radix[stageRadix[j]] < min_registers_per_thread[0]) {
+					min_registers_per_thread[0] = registers_per_thread_per_radix[stageRadix[j]];
+				}
+				if (registers_per_thread_per_radix[stageRadix[j]] > registers_per_thread[0]) {
+					registers_per_thread[0] = registers_per_thread_per_radix[stageRadix[j]];
+				}
+			}
+		}
+		else {
+			for (int64_t i = 0; i < (int64_t)numRaderPrimes; i++) {
+				if (raderContainer[i].prime == stageRadix[j]) {
+					if (raderContainer[i].type == 0) {
+						for (int64_t j2 = 0; j2 < (int64_t)raderContainer[i].numStages; j2++) {
+							if (raderContainer[i].stage_rader_generator[j] == 0) {
+								if (raderContainer[i].registers_per_thread_per_radix[raderContainer[i].stageRadix[j2]] > 0) {
+									if (raderContainer[i].registers_per_thread_per_radix[raderContainer[i].stageRadix[j2]] < min_registers_per_thread[0]) {
+										min_registers_per_thread[0] = raderContainer[i].registers_per_thread_per_radix[raderContainer[i].stageRadix[j2]];
+									}
+									if (raderContainer[i].registers_per_thread_per_radix[raderContainer[i].stageRadix[j2]] > registers_per_thread[0]) {
+										registers_per_thread[0] = raderContainer[i].registers_per_thread_per_radix[raderContainer[i].stageRadix[j2]];
+									}
+								}
+							}
+							else {
+								res = VkFFTMinMaxRegisterCheck(raderContainer[i].numStages, raderContainer[i].stageRadix, min_registers_per_thread, registers_per_thread, raderContainer[i].registers_per_thread_per_radix, raderContainer[i].container, raderContainer[i].numSubPrimes, raderContainer[i].stage_rader_generator);
+								if (res != VKFFT_SUCCESS) return res;
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+	return res;
+}
+static inline VkFFTResult VkFFTGetRaderFFTThreadsNum(VkFFTRaderContainer* raderContainer, uint64_t numRaderPrimes, uint64_t* numThreads) {
+	VkFFTResult res = VKFFT_SUCCESS;
 
-static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPlan, uint64_t axis_id, uint64_t supportAxis) {
+	for (int64_t i = 0; i < (int64_t)numRaderPrimes; i++) {
+		if (raderContainer[i].type == 0) {
+			if (raderContainer[i].numSubPrimes > 0) {
+				res = VkFFTGetRaderFFTThreadsNum(raderContainer[i].container, raderContainer[i].numSubPrimes, numThreads);
+				if (res != VKFFT_SUCCESS) return res;
+			}
+			for (int64_t j = 0; j < (int64_t)raderContainer[i].numStages; j++) {
+				if (raderContainer[i].stage_rader_generator[j] == 0) {
+					if (raderContainer[i].containerFFTNum * (uint64_t)ceil(raderContainer[i].containerFFTDim / (double)raderContainer[i].registers_per_thread_per_radix[raderContainer[i].stageRadix[j]]) > numThreads[0]) numThreads[0] = raderContainer[i].containerFFTNum * (uint64_t)ceil(raderContainer[i].containerFFTDim / (double)raderContainer[i].registers_per_thread_per_radix[raderContainer[i].stageRadix[j]]);
+				}
+			}
+		}
+	}
+	return res;
+}
+
+static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPlan, uint64_t axis_id) {
 	VkFFTResult res = VKFFT_SUCCESS;
 	VkFFTAxis* axes = FFTPlan->axes[axis_id];
 
@@ -23394,9 +30539,11 @@ static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPl
 			complexSize = (2 * sizeof(float));
 		else
 			complexSize = (2 * sizeof(float));
+
 	uint64_t usedSharedMemory = ((app->configuration.size[axis_id] & (app->configuration.size[axis_id] - 1)) == 0) ? app->configuration.sharedMemorySizePow2 : app->configuration.sharedMemorySize;
 	uint64_t maxSequenceLengthSharedMemory = usedSharedMemory / complexSize;
 	uint64_t maxSingleSizeNonStrided = maxSequenceLengthSharedMemory;
+
 	uint64_t nonStridedAxisId = (app->configuration.considerAllAxesStrided) ? -1 : 0;
 	uint64_t max_rhs = 1;
 	for (uint64_t i = 0; i < 3; i++) {
@@ -23430,18 +30577,117 @@ static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPl
 		if (app->configuration.performBandwidthBoost > 0)
 			axes->specializationConstants.performBandwidthBoost = app->configuration.performBandwidthBoost;
 	}
+	//initial Stockham + Rader check
 	uint64_t multipliers[33];
 	for (uint64_t i = 0; i < 33; i++) {
 		multipliers[i] = 0;
 	}
+
 	uint64_t tempSequence = FFTPlan->actualFFTSizePerAxis[axis_id][axis_id];
-	for (uint64_t i = 2; i < 14; i++) {
+	for (uint64_t i = 2; i < app->configuration.fixMinRaderPrimeMult; i++) {
 		if (tempSequence % i == 0) {
 			tempSequence /= i;
 			multipliers[i]++;
 			i--;
 		}
 	}
+	// verify that we haven't checked for 3 steps being not enougth for Rader before
+	uint64_t forceRaderTwoUpload = 0; // for sequences like 17*1023 it is better to switch to two uploads for better occupancy. We will switch if one of the Rader primes requests more than 512 threads.
+	if (!app->useBluesteinFFT[axis_id]) {
+		uint64_t useRaderMult = 0;
+		uint64_t rader_primes[20];
+		uint64_t rader_multipliers[20];
+		for (uint64_t i = 0; i < 20; i++) {
+			rader_multipliers[i] = 0;
+			rader_primes[i] = 0;
+		}
+		uint64_t tempSequence_temp = 1;
+		uint64_t maxSequenceLengthSharedMemoryStrided_temp = (app->configuration.coalescedMemory > complexSize) ? usedSharedMemory / (app->configuration.coalescedMemory) : usedSharedMemory / complexSize;
+		uint64_t limit_max_rader_prime = ((axis_id == nonStridedAxisId) && (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] <= maxSequenceLengthSharedMemory)) ? maxSequenceLengthSharedMemory : maxSequenceLengthSharedMemoryStrided_temp;
+		if (limit_max_rader_prime > app->configuration.fixMaxRaderPrimeFFT) limit_max_rader_prime = app->configuration.fixMaxRaderPrimeFFT;
+		for (uint64_t i = app->configuration.fixMinRaderPrimeMult; i < limit_max_rader_prime; i++) {
+			if (tempSequence % i == 0) {
+				if (i < app->configuration.fixMinRaderPrimeFFT) {
+					tempSequence_temp *= i;
+					tempSequence /= i;
+					i--;
+					continue;
+				}
+				//Sophie Germain safe prime check
+				uint64_t tempSequence2 = i - 1;
+				for (uint64_t j = 2; j < app->configuration.fixMinRaderPrimeMult; j++) {
+					if (tempSequence2 % j == 0) {
+						tempSequence2 /= j;
+						j--;
+					}
+				}
+				if (tempSequence2 != 1) {
+					maxSequenceLengthSharedMemory = (usedSharedMemory - (i - 1) * complexSize) / complexSize;
+					maxSequenceLengthSharedMemoryStrided_temp = (app->configuration.coalescedMemory > complexSize) ? (usedSharedMemory - (i - 1) * complexSize) / (app->configuration.coalescedMemory) : (usedSharedMemory - (i - 1) * complexSize) / complexSize;
+					limit_max_rader_prime = ((axis_id == nonStridedAxisId) && (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] <= maxSequenceLengthSharedMemory)) ? maxSequenceLengthSharedMemory : maxSequenceLengthSharedMemoryStrided_temp;
+					tempSequence_temp *= i;
+					tempSequence /= i;
+					i--;
+					continue;
+				}
+				tempSequence /= i;
+				if (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] / i > 512) forceRaderTwoUpload = 1;
+				for (uint64_t j = 0; j < 20; j++) {
+					if (rader_primes[j] == i)
+					{
+						rader_multipliers[j]++;
+						j = 20;
+					}
+					else if (rader_primes[j] == 0) {
+						rader_primes[j] = i;
+						rader_multipliers[j]++;
+						j = 20;
+					}
+
+				}
+				i--;
+			}
+		}
+		tempSequence *= tempSequence_temp;
+		uint64_t maxRaderPrimeFromThreadNumCoalesced = (app->configuration.maxThreadsNum / (app->configuration.coalescedMemory / complexSize)) * 2 - 1;
+		if (maxRaderPrimeFromThreadNumCoalesced < app->configuration.fixMaxRaderPrimeMult) app->configuration.fixMaxRaderPrimeMult = maxRaderPrimeFromThreadNumCoalesced;
+
+		for (uint64_t i = app->configuration.fixMinRaderPrimeMult; i < app->configuration.fixMaxRaderPrimeMult; i++) {
+			if (tempSequence % i == 0) {
+				tempSequence /= i;
+				for (uint64_t j = 0; j < 20; j++) {
+					if (rader_primes[j] == i)
+					{
+						rader_multipliers[j]++;
+						j = 20;
+					}
+					else if (rader_primes[j] == 0) {
+						rader_primes[j] = i;
+						rader_multipliers[j]++;
+						j = 20;
+					}
+
+				}
+				useRaderMult = i;
+				i--;
+			}
+		}
+		if (tempSequence != 1) {
+			useRaderMult = 0;
+		}
+		if (useRaderMult) {
+			if (tempSequence == 1) usedSharedMemory -= (useRaderMult - 1) * complexSize; //reserve memory for Rader 
+		//check once again
+			if ((axis_id == 0) && (app->configuration.performR2C) && (app->configuration.size[axis_id] > maxSingleSizeNonStrided)) {
+				FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] = app->configuration.size[axis_id] / 2; // now in actualFFTSize - modified dimension size for R2C/DCT
+				FFTPlan->actualPerformR2CPerAxis[axis_id] = 0;
+				FFTPlan->multiUploadR2C = 1;
+			}
+		}
+		maxSequenceLengthSharedMemory = usedSharedMemory / complexSize;
+		maxSingleSizeNonStrided = maxSequenceLengthSharedMemory;
+	}
+	//initial Bluestein check
 	if (tempSequence != 1) {
 		app->useBluesteinFFT[axis_id] = 1;
 		if (axis_id != nonStridedAxisId) {
@@ -23515,7 +30761,7 @@ static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPl
 					uint64_t registers_per_thread = 0;
 					uint64_t min_registers_per_thread = -1;
 					uint64_t isGoodSequence = 0;
-					res = VkFFTGetRegistersPerThread(tempSequence, 0, max_rhs / tempSequence, multipliers, registers_per_thread_per_radix, &registers_per_thread, &min_registers_per_thread, &isGoodSequence);
+					res = VkFFTGetRegistersPerThread(app, tempSequence, 0, max_rhs / tempSequence, axes->specializationConstants.useRader, multipliers, registers_per_thread_per_radix, &registers_per_thread, &min_registers_per_thread, &isGoodSequence);
 					if (res != VKFFT_SUCCESS) return res;
 					if (isGoodSequence) FFTSizeSelected = 1;
 					else tempSequence++;
@@ -23592,7 +30838,7 @@ static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPl
 						uint64_t registers_per_thread = 0;
 						uint64_t min_registers_per_thread = -1;
 						uint64_t isGoodSequence = 0;
-						res = VkFFTGetRegistersPerThread(tempSequence, 0, max_rhs / tempSequence, multipliers, registers_per_thread_per_radix, &registers_per_thread, &min_registers_per_thread, &isGoodSequence);
+						res = VkFFTGetRegistersPerThread(app, tempSequence, 0, max_rhs / tempSequence, axes->specializationConstants.useRader, multipliers, registers_per_thread_per_radix, &registers_per_thread, &min_registers_per_thread, &isGoodSequence);
 						if (res != VKFFT_SUCCESS) return res;
 						if (isGoodSequence) FFTSizeSelected = 1;
 						else tempSequence++;
@@ -23612,25 +30858,6 @@ static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPl
 	}
 	uint64_t isPowOf2 = (pow(2, (uint64_t)log2(FFTPlan->actualFFTSizePerAxis[axis_id][axis_id])) == FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]) ? 1 : 0;
 	uint64_t locNumBatches = (app->configuration.numberBatches > app->actualNumBatches) ? app->configuration.numberBatches : app->actualNumBatches;
-	if (app->configuration.tempBufferSize[0] == 0) {
-		if ((app->configuration.performR2C) && (axis_id == 0)) {
-			if (FFTPlan->multiUploadR2C)
-				app->configuration.tempBufferSize[0] = (FFTPlan->actualFFTSizePerAxis[axis_id][0] + 1) * FFTPlan->actualFFTSizePerAxis[axis_id][1] * FFTPlan->actualFFTSizePerAxis[axis_id][2] * app->configuration.coordinateFeatures * locNumBatches * app->configuration.numberKernels * complexSize;
-		}
-		else {
-			app->configuration.tempBufferSize[0] = FFTPlan->actualFFTSizePerAxis[axis_id][0] * FFTPlan->actualFFTSizePerAxis[axis_id][1] * FFTPlan->actualFFTSizePerAxis[axis_id][2] * app->configuration.coordinateFeatures * locNumBatches * app->configuration.numberKernels * complexSize;
-		}
-	}
-	if (app->useBluesteinFFT[axis_id]) {
-		if ((app->configuration.performR2C) && (axis_id == 0)) {
-			if (FFTPlan->multiUploadR2C) {
-				if ((FFTPlan->actualFFTSizePerAxis[axis_id][0] + 1) * FFTPlan->actualFFTSizePerAxis[axis_id][1] * FFTPlan->actualFFTSizePerAxis[axis_id][2] * app->configuration.coordinateFeatures * locNumBatches * app->configuration.numberKernels * complexSize > app->configuration.tempBufferSize[0]) app->configuration.tempBufferSize[0] = (FFTPlan->actualFFTSizePerAxis[axis_id][0] + 1) * FFTPlan->actualFFTSizePerAxis[axis_id][1] * FFTPlan->actualFFTSizePerAxis[axis_id][2] * app->configuration.coordinateFeatures * locNumBatches * app->configuration.numberKernels * complexSize;
-			}
-		}
-		else {
-			if (FFTPlan->actualFFTSizePerAxis[axis_id][0] * FFTPlan->actualFFTSizePerAxis[axis_id][1] * FFTPlan->actualFFTSizePerAxis[axis_id][2] * app->configuration.coordinateFeatures * locNumBatches * app->configuration.numberKernels * complexSize > app->configuration.tempBufferSize[0]) app->configuration.tempBufferSize[0] = FFTPlan->actualFFTSizePerAxis[axis_id][0] * FFTPlan->actualFFTSizePerAxis[axis_id][1] * FFTPlan->actualFFTSizePerAxis[axis_id][2] * app->configuration.coordinateFeatures * locNumBatches * app->configuration.numberKernels * complexSize;
-		}
-	}
 	//return VKFFT_ERROR_UNSUPPORTED_RADIX;
 	uint64_t registerBoost = 1;
 	for (uint64_t i = 1; i <= app->configuration.registerBoost; i++) {
@@ -23698,13 +30925,14 @@ static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPl
 		if (numPassesHalfBandwidth < numPasses) numPasses = numPassesHalfBandwidth;
 		else maxSingleSizeStridedHalfBandwidth = maxSingleSizeStrided;
 	}
-	if (((uint64_t)log2(FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]) >= app->configuration.swapTo3Stage4Step) && (app->configuration.swapTo3Stage4Step >= 17)) numPasses = 3;//Force set to 3 stage 4 step algorithm
+	if ((FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] >= app->configuration.swapTo3Stage4Step) && (app->configuration.swapTo3Stage4Step >= 131072)) numPasses = 3;//Force set to 3 stage 4 step algorithm
+	if (forceRaderTwoUpload && (numPasses == 1)) numPasses = 2;//Force set Rader cases that use more than 512 threads per one of Rader primes
 	uint64_t* locAxisSplit = FFTPlan->axisSplit[axis_id];
 	if (numPasses == 1) {
 		locAxisSplit[0] = FFTPlan->actualFFTSizePerAxis[axis_id][axis_id];
 	}
 	if (numPasses == 2) {
-		if (isPowOf2) {
+		if (isPowOf2 && (!((app->configuration.vendorID == 0x10DE) && (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] > 262144)))) {
 			if ((axis_id == nonStridedAxisId) && ((!app->configuration.reorderFourStep) || (app->useBluesteinFFT[axis_id]))) {
 				uint64_t maxPow8SharedMemory = (uint64_t)pow(8, ((uint64_t)log2(maxSequenceLengthSharedMemory)) / 3);
 				//unit stride
@@ -23732,6 +30960,7 @@ static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPl
 			}
 			else {
 				uint64_t maxPow8Strided = (uint64_t)pow(8, ((uint64_t)log2(maxSingleSizeStrided)) / 3);
+				if (maxPow8Strided > 512) maxPow8Strided = 512;
 				//all FFTs are considered as non-unit stride
 				if (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] / maxPow8Strided <= maxSingleSizeStrided) {
 					locAxisSplit[0] = maxPow8Strided;
@@ -23799,7 +31028,7 @@ static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPl
 		}
 	}
 	if (numPasses == 3) {
-		if (isPowOf2) {
+		if (isPowOf2 && (!((app->configuration.vendorID == 0x10DE) && (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] > 262144)))) {
 			uint64_t maxPow8Strided = (uint64_t)pow(8, ((uint64_t)log2(maxSingleSizeStrided)) / 3);
 			if ((axis_id == nonStridedAxisId) && ((!app->configuration.reorderFourStep) || (app->useBluesteinFFT[axis_id]))) {
 				//unit stride
@@ -23862,23 +31091,29 @@ static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPl
 					}
 				}
 			}
-			if (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] / locAxisSplit[0] / maxPow8Strided <= maxSingleSizeStrided) {
-				locAxisSplit[1] = maxPow8Strided;
-				locAxisSplit[2] = FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] / locAxisSplit[1] / locAxisSplit[0];
+			if (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] / locAxisSplit[0] < maxPow8Strided) {
+				locAxisSplit[1] = (uint64_t)pow(2, (uint64_t)(log2(FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] / locAxisSplit[0]) / 2));
+				locAxisSplit[2] = FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] / locAxisSplit[0] / locAxisSplit[1];
 			}
 			else {
-				if (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] / locAxisSplit[0] / maxSingleSizeStrided <= maxSingleSizeStrided) {
-					locAxisSplit[1] = maxSingleSizeStrided;
+				if (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] / locAxisSplit[0] / maxPow8Strided <= maxSingleSizeStrided) {
+					locAxisSplit[1] = maxPow8Strided;
 					locAxisSplit[2] = FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] / locAxisSplit[1] / locAxisSplit[0];
 				}
 				else {
-					locAxisSplit[1] = maxSingleSizeStridedHalfBandwidth;
-					locAxisSplit[2] = FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] / locAxisSplit[1] / locAxisSplit[0];
+					if (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] / locAxisSplit[0] / maxSingleSizeStrided <= maxSingleSizeStrided) {
+						locAxisSplit[1] = maxSingleSizeStrided;
+						locAxisSplit[2] = FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] / locAxisSplit[1] / locAxisSplit[0];
+					}
+					else {
+						locAxisSplit[1] = maxSingleSizeStridedHalfBandwidth;
+						locAxisSplit[2] = FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] / locAxisSplit[1] / locAxisSplit[0];
+					}
+				}
+				if (locAxisSplit[2] < 64) {
+					locAxisSplit[1] = (locAxisSplit[2] == 0) ? locAxisSplit[1] / (64) : locAxisSplit[1] / (64 / locAxisSplit[2]);
+					locAxisSplit[2] = 64;
 				}
-			}
-			if (locAxisSplit[2] < 64) {
-				locAxisSplit[1] = (locAxisSplit[2] == 0) ? locAxisSplit[1] / (64) : locAxisSplit[1] / (64 / locAxisSplit[2]);
-				locAxisSplit[2] = 64;
 			}
 			if (locAxisSplit[2] > locAxisSplit[1]) {
 				uint64_t swap = locAxisSplit[1];
@@ -23943,6 +31178,25 @@ static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPl
 		//printf("sequence length exceeds boundaries\n");
 		return VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH_R2C;
 	}
+	if (app->configuration.tempBufferSize[0] == 0) {
+		if ((app->configuration.performR2C) && (axis_id == 0)) {
+			if (FFTPlan->multiUploadR2C)
+				app->configuration.tempBufferSize[0] = (FFTPlan->actualFFTSizePerAxis[axis_id][0] + 1) * FFTPlan->actualFFTSizePerAxis[axis_id][1] * FFTPlan->actualFFTSizePerAxis[axis_id][2] * app->configuration.coordinateFeatures * locNumBatches * app->configuration.numberKernels * complexSize;
+		}
+		else {
+			app->configuration.tempBufferSize[0] = FFTPlan->actualFFTSizePerAxis[axis_id][0] * FFTPlan->actualFFTSizePerAxis[axis_id][1] * FFTPlan->actualFFTSizePerAxis[axis_id][2] * app->configuration.coordinateFeatures * locNumBatches * app->configuration.numberKernels * complexSize;
+		}
+	}
+	if (app->useBluesteinFFT[axis_id]) {
+		if ((app->configuration.performR2C) && (axis_id == 0)) {
+			if (FFTPlan->multiUploadR2C) {
+				if ((FFTPlan->actualFFTSizePerAxis[axis_id][0] + 1) * FFTPlan->actualFFTSizePerAxis[axis_id][1] * FFTPlan->actualFFTSizePerAxis[axis_id][2] * app->configuration.coordinateFeatures * locNumBatches * app->configuration.numberKernels * complexSize > app->configuration.tempBufferSize[0]) app->configuration.tempBufferSize[0] = (FFTPlan->actualFFTSizePerAxis[axis_id][0] + 1) * FFTPlan->actualFFTSizePerAxis[axis_id][1] * FFTPlan->actualFFTSizePerAxis[axis_id][2] * app->configuration.coordinateFeatures * locNumBatches * app->configuration.numberKernels * complexSize;
+			}
+		}
+		else {
+			if (FFTPlan->actualFFTSizePerAxis[axis_id][0] * FFTPlan->actualFFTSizePerAxis[axis_id][1] * FFTPlan->actualFFTSizePerAxis[axis_id][2] * app->configuration.coordinateFeatures * locNumBatches * app->configuration.numberKernels * complexSize > app->configuration.tempBufferSize[0]) app->configuration.tempBufferSize[0] = FFTPlan->actualFFTSizePerAxis[axis_id][0] * FFTPlan->actualFFTSizePerAxis[axis_id][1] * FFTPlan->actualFFTSizePerAxis[axis_id][2] * app->configuration.coordinateFeatures * locNumBatches * app->configuration.numberKernels * complexSize;
+		}
+	}
 	if (((app->configuration.reorderFourStep) && (!app->useBluesteinFFT[axis_id]))) {
 		for (uint64_t i = 0; i < numPasses; i++) {
 			if ((locAxisSplit[0] % 2 != 0) && (locAxisSplit[i] % 2 == 0)) {
@@ -23970,35 +31224,159 @@ static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPl
 	for (uint64_t k = 0; k < numPasses; k++) {
 		tempSequence = locAxisSplit[k];
 		uint64_t loc_multipliers[33]; //split the smaller sequence
+		//split the smaller sequence
+		//uint64_t rader_multipliers[20]; //split the smaller sequence
+		//uint64_t* rader_generator = axes[k].specializationConstants.rader_generator_sorted; //split the smaller sequence
+		//uint64_t* rader_primes = axes[k].specializationConstants.rader_primes;
+
 		for (uint64_t i = 0; i < 33; i++) {
 			loc_multipliers[i] = 0;
 		}
-		for (uint64_t i = 2; i < 33; i++) {
+
+		for (uint64_t i = 2; i < app->configuration.fixMinRaderPrimeMult; i++) {
 			if (tempSequence % i == 0) {
 				tempSequence /= i;
 				loc_multipliers[i]++;
 				i--;
 			}
 		}
+		axes[k].specializationConstants.useRader = 0;
+		axes[k].specializationConstants.useRaderMult = 0;
+		axes[k].specializationConstants.useRaderFFT = 0;
+		if (tempSequence != 1) {
+			res = VkFFTConstructRaderTree(app, &axes[k].specializationConstants.raderContainer, &tempSequence, &axes[k].specializationConstants.numRaderPrimes, locAxisSplit[k] / tempSequence);
+			if (res != VKFFT_SUCCESS) return res;
+		}
+
+		for (int64_t i = 0; i < (int64_t)axes[k].specializationConstants.numRaderPrimes; i++) {
+			if (axes[k].specializationConstants.raderContainer[i].type == 0) {
+				if (axes[k].specializationConstants.useRaderFFT < axes[k].specializationConstants.raderContainer[i].prime) axes[k].specializationConstants.useRaderFFT = axes[k].specializationConstants.raderContainer[i].prime;
+			}
+			else {
+				if (axes[k].specializationConstants.useRaderMult < axes[k].specializationConstants.raderContainer[i].prime) axes[k].specializationConstants.useRaderMult = axes[k].specializationConstants.raderContainer[i].prime;
+			}
+		}
+		if (axes[k].specializationConstants.useRaderMult) {
+			app->configuration.useLUT = 1; // workaround, Mult Rader is better with LUT
+		}
+
+		axes[k].specializationConstants.useRader = axes[k].specializationConstants.numRaderPrimes;
+
+		if ((axes[k].specializationConstants.useRader) && (app->configuration.useRaderUintLUT)) {
+			app->configuration.useLUT = 1; // useRaderUintLUT forces LUT
+		}
+
 		uint64_t registers_per_thread_per_radix[33];
 		uint64_t registers_per_thread = 0;
 		uint64_t min_registers_per_thread = -1;
 		uint64_t isGoodSequence = 0;
 		uint64_t extraSharedMemoryForPow2 = ((app->configuration.sharedMemorySizePow2 < app->configuration.sharedMemorySize) || ((locAxisSplit[k] < maxSingleSizeNonStrided) && ((axis_id == nonStridedAxisId))) || ((locAxisSplit[k] < maxSingleSizeStrided) && ((axis_id != nonStridedAxisId)))) ? 1 : 0;
 
-		res = VkFFTGetRegistersPerThread(locAxisSplit[k], extraSharedMemoryForPow2, max_rhs / locAxisSplit[k], loc_multipliers, registers_per_thread_per_radix, &registers_per_thread, &min_registers_per_thread, &isGoodSequence);
+		res = VkFFTGetRegistersPerThread(app, locAxisSplit[k], extraSharedMemoryForPow2, max_rhs / locAxisSplit[k], axes[k].specializationConstants.numRaderPrimes, loc_multipliers, registers_per_thread_per_radix, &registers_per_thread, &min_registers_per_thread, &isGoodSequence);
 		if (res != VKFFT_SUCCESS) return res;
+		//first optimizer pass
+		if (axes[k].specializationConstants.numRaderPrimes) {
+			res = VkFFTOptimizeRaderFFTRegisters(axes[k].specializationConstants.raderContainer, axes[k].specializationConstants.numRaderPrimes, locAxisSplit[k], &min_registers_per_thread, &registers_per_thread, registers_per_thread_per_radix);
+			if (res != VKFFT_SUCCESS) return res;
+			/*for (int64_t i = 0; i < axes[k].specializationConstants.numRaderPrimes; i++) {
+				if (axes[k].specializationConstants.raderContainer[i].type == 0) {
+					if (axes[k].specializationConstants.raderContainer[i].min_registers_per_thread / min_registers_per_thread >= 2) {
+						min_registers_per_thread *= (axes[k].specializationConstants.raderContainer[i].min_registers_per_thread / min_registers_per_thread);
+						for (uint64_t j = 0; j < 33; j++) {
+							if ((registers_per_thread_per_radix[j] > 0) && (registers_per_thread_per_radix[j] < min_registers_per_thread)) registers_per_thread_per_radix[j] *= (uint64_t)ceil(min_registers_per_thread / (double)registers_per_thread_per_radix[j]);
+						}
+						for (uint64_t j = 0; j < 33; j++) {
+							if (registers_per_thread_per_radix[j] > registers_per_thread) registers_per_thread = registers_per_thread_per_radix[j];
+						}
+					}
+					else if (min_registers_per_thread / axes[k].specializationConstants.raderContainer[i].min_registers_per_thread >= 2) {
+						axes[k].specializationConstants.raderContainer[i].min_registers_per_thread *= (min_registers_per_thread / axes[k].specializationConstants.raderContainer[i].min_registers_per_thread);
+						for (uint64_t j = 0; j < 33; j++) {
+							if ((axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j] > 0) && (axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j] < axes[k].specializationConstants.raderContainer[i].min_registers_per_thread)) axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j] *= (uint64_t)ceil(axes[k].specializationConstants.raderContainer[i].min_registers_per_thread / (double)axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j]);
+						}
+						for (uint64_t j = 0; j < 33; j++) {
+							if (axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j] > axes[k].specializationConstants.raderContainer[i].registers_per_thread) axes[k].specializationConstants.raderContainer[i].registers_per_thread = axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j];
+						}
+					}
+					if (axes[k].specializationConstants.raderContainer[i].registers_per_thread > registers_per_thread) registers_per_thread = axes[k].specializationConstants.raderContainer[i].registers_per_thread;
+					if (axes[k].specializationConstants.raderContainer[i].min_registers_per_thread < min_registers_per_thread) min_registers_per_thread = axes[k].specializationConstants.raderContainer[i].min_registers_per_thread;
+				}
+			}*/
+		}
 
 		if ((registerBoost == 4) && (registers_per_thread % 4 != 0)) {
 			registers_per_thread *= 2;
 			for (uint64_t i = 2; i < 33; i++) {
-				registers_per_thread_per_radix[i] *= 2;
+				registers_per_thread_per_radix[i] *= 2;
+			}
+			min_registers_per_thread *= 2;
+		}
+		uint64_t maxBatchCoalesced = ((axis_id == 0) && (((k == 0) && ((!app->configuration.reorderFourStep) || (app->useBluesteinFFT[axis_id]))) || (numPasses == 1))) ? 1 : app->configuration.coalescedMemory / complexSize;
+		uint64_t estimate_rader_threadnum = 0;
+		uint64_t scale_registers_rader = 0;
+		uint64_t rader_min_registers = min_registers_per_thread;
+
+		if (axes[k].specializationConstants.useRaderMult) {
+			for (int64_t i = 0; i < (int64_t)axes[k].specializationConstants.numRaderPrimes; i++) {
+				if (axes[k].specializationConstants.raderContainer[i].type == 1) {
+					uint64_t temp_rader = (uint64_t)ceil((locAxisSplit[k] / (double)((rader_min_registers / 2 + scale_registers_rader) * 2)) / (double)((axes[k].specializationConstants.raderContainer[i].prime + 1) / 2));
+					uint64_t active_rader = (uint64_t)ceil((locAxisSplit[k] / axes[k].specializationConstants.raderContainer[i].prime) / (double)temp_rader);
+					if (active_rader > 1) {
+						if ((((double)active_rader - (locAxisSplit[k] / axes[k].specializationConstants.raderContainer[i].prime) / (double)temp_rader) >= 0.5) && ((((uint64_t)ceil((locAxisSplit[k] / axes[k].specializationConstants.raderContainer[i].prime) / (double)(active_rader - 1)) * ((axes[k].specializationConstants.raderContainer[i].prime + 1) / 2)) * maxBatchCoalesced) <= app->configuration.maxThreadsNum)) active_rader--;
+					}
+
+					uint64_t local_estimate_rader_threadnum = (uint64_t)ceil((locAxisSplit[k] / axes[k].specializationConstants.raderContainer[i].prime) / (double)active_rader) * ((axes[k].specializationConstants.raderContainer[i].prime + 1) / 2) * maxBatchCoalesced;
+					if ((maxBatchCoalesced * locAxisSplit[k] / ((rader_min_registers / 2 + scale_registers_rader) * 2 * registerBoost)) > local_estimate_rader_threadnum) local_estimate_rader_threadnum = (maxBatchCoalesced * locAxisSplit[k] / ((rader_min_registers / 2 + scale_registers_rader) * 2 * registerBoost));
+					if ((local_estimate_rader_threadnum > app->configuration.maxThreadsNum) || ((((locAxisSplit[k] / min_registers_per_thread) > 256) || (local_estimate_rader_threadnum > 256)) && (((rader_min_registers / 2 + scale_registers_rader) * 2) <= 4))) {
+						scale_registers_rader++;
+						i = -1;
+					}
+					else {
+						estimate_rader_threadnum = (estimate_rader_threadnum < local_estimate_rader_threadnum) ? local_estimate_rader_threadnum : estimate_rader_threadnum;
+					}
+				}
+			}
+			rader_min_registers = (rader_min_registers / 2 + scale_registers_rader) * 2;//min number of registers for Rader (can be more than min_registers_per_thread, but min_registers_per_thread should be at least 4 for Nvidiaif you have >256 threads)
+			if (registers_per_thread < rader_min_registers) registers_per_thread = rader_min_registers;
+			for (uint64_t i = 2; i < 33; i++) {
+				if (registers_per_thread_per_radix[i] != 0) {
+					if (registers_per_thread / registers_per_thread_per_radix[i] >= 2) {
+						registers_per_thread_per_radix[i] *= (registers_per_thread / registers_per_thread_per_radix[i]);
+					}
+				}
+			}
+
+			for (int64_t i = 0; i < (int64_t)axes[k].specializationConstants.numRaderPrimes; i++) {
+				if (axes[k].specializationConstants.raderContainer[i].type == 0) {
+					for (uint64_t j = 2; j < 33; j++) {
+						if (axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j] != 0) {
+							if (registers_per_thread / axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j] >= 2) {
+								axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j] *= (registers_per_thread / axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j]);
+							}
+						}
+					}
+				}
+			}
+			uint64_t new_min_registers = -1;
+			for (uint64_t i = 2; i < 33; i++) {
+				if ((registers_per_thread_per_radix[i] > 0) && (registers_per_thread_per_radix[i] < new_min_registers)) new_min_registers = registers_per_thread_per_radix[i];
+				if (registers_per_thread_per_radix[i] > registers_per_thread) {
+					registers_per_thread = registers_per_thread_per_radix[i];
+				}
 			}
-			min_registers_per_thread *= 2;
+			for (int64_t i = 0; i < (int64_t)axes[k].specializationConstants.numRaderPrimes; i++) {
+				if (axes[k].specializationConstants.raderContainer[i].type == 0) {
+					for (uint64_t j = 2; j < 33; j++) {
+						if ((axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j] > 0) && (axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j] < new_min_registers)) new_min_registers = axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j];
+						if (axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j] > registers_per_thread) {
+							registers_per_thread = axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j];
+						}
+					}
+				}
+			}
+			min_registers_per_thread = (new_min_registers == -1) ? registers_per_thread : new_min_registers;
 		}
-
-		uint64_t maxBatchCoalesced = ((axis_id == 0) && (((k == 0) && ((!app->configuration.reorderFourStep) || (app->useBluesteinFFT[axis_id]))) || (numPasses == 1))) ? 1 : app->configuration.coalescedMemory / complexSize;
-		if (maxBatchCoalesced * locAxisSplit[k] / (min_registers_per_thread * registerBoost) > app->configuration.maxThreadsNum)
+		if ((maxBatchCoalesced * locAxisSplit[k] / (min_registers_per_thread * registerBoost) > app->configuration.maxThreadsNum) || (axes[k].specializationConstants.useRader && (estimate_rader_threadnum > app->configuration.maxThreadsNum)))
 		{
 			uint64_t scaleRegistersNum = 1;
 			if ((axis_id == 0) && (k == 0) && (maxBatchCoalesced > 1)) {
@@ -24007,26 +31385,47 @@ static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPl
 			}
 			if ((maxBatchCoalesced * locAxisSplit[k] / (min_registers_per_thread * registerBoost * scaleRegistersNum)) > app->configuration.maxThreadsNum) {
 				for (uint64_t i = 2; i < locAxisSplit[k]; i++) {
-					if ((locAxisSplit[k] / (min_registers_per_thread * registerBoost * scaleRegistersNum) % i == 0) && ((maxBatchCoalesced * locAxisSplit[k] / (min_registers_per_thread * registerBoost * i)) <= app->configuration.maxThreadsNum)) {
+					if (((maxBatchCoalesced * locAxisSplit[k] / (min_registers_per_thread * registerBoost * i)) <= app->configuration.maxThreadsNum)) {
 						scaleRegistersNum = i;
 						i = locAxisSplit[k];
 					}
 				}
 			}
 			min_registers_per_thread *= scaleRegistersNum;
-			uint64_t temp_scaleRegistersNum = scaleRegistersNum;
-			while ((locAxisSplit[k] / (registers_per_thread * registerBoost)) % temp_scaleRegistersNum != 0) temp_scaleRegistersNum++;
-			registers_per_thread *= temp_scaleRegistersNum;
+			registers_per_thread *= scaleRegistersNum;
 			for (uint64_t i = 2; i < 33; i++) {
 				if (registers_per_thread_per_radix[i] != 0) {
-					temp_scaleRegistersNum = scaleRegistersNum;
-					while ((locAxisSplit[k] / (registers_per_thread_per_radix[i] * registerBoost)) % temp_scaleRegistersNum != 0) temp_scaleRegistersNum++;
-					registers_per_thread_per_radix[i] *= temp_scaleRegistersNum;
+					registers_per_thread_per_radix[i] *= scaleRegistersNum;
 				}
 			}
-
+			uint64_t new_min_registers = -1;
+			for (uint64_t i = 2; i < 33; i++) {
+				if ((registers_per_thread_per_radix[i] > 0) && (registers_per_thread_per_radix[i] < new_min_registers)) new_min_registers = registers_per_thread_per_radix[i];
+			}
+			for (int64_t i = 0; i < (int64_t)axes[k].specializationConstants.numRaderPrimes; i++) {
+				if (axes[k].specializationConstants.raderContainer[i].type == 0) {
+					for (uint64_t j = 2; j < 33; j++) {
+						if ((axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j] > 0) && (axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j] < new_min_registers)) new_min_registers = axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j];
+					}
+				}
+			}
+			if ((maxBatchCoalesced * locAxisSplit[k] / (new_min_registers * registerBoost)) > app->configuration.maxThreadsNum) {
+				// if we get here, there can be trouble with small primes, as we can have one thread do at max one fftDim. This is only an issue for small primes in sequences close to shared memory limit sizes for extremely big shared memory sizes (>136KB)
+				for (int64_t i = 0; i < (int64_t)axes[k].specializationConstants.numRaderPrimes; i++) {
+					if (axes[k].specializationConstants.raderContainer[i].type == 0) {
+						for (uint64_t j = 2; j < 33; j++) {
+							if (axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j] != 0) {
+								axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j] *= scaleRegistersNum;
+							}
+						}
+					}
+				}
+			}
+			else {
+				min_registers_per_thread = new_min_registers;
+			}
 			if (min_registers_per_thread > registers_per_thread) {
-				uint64_t temp = min_registers_per_thread;
+				temp = min_registers_per_thread;
 				min_registers_per_thread = registers_per_thread;
 				registers_per_thread = temp;
 			}
@@ -24038,7 +31437,19 @@ static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPl
 					min_registers_per_thread = registers_per_thread_per_radix[i];
 				}
 			}
-			if ((loc_multipliers[3] >= 2) && (((registers_per_thread / min_registers_per_thread) % 3) == 0)) {
+			for (int64_t i = 0; i < (int64_t)axes[k].specializationConstants.numRaderPrimes; i++) {
+				if (axes[k].specializationConstants.raderContainer[i].type == 0) {
+					for (uint64_t j = 2; j < 33; j++) {
+						if (axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j] > registers_per_thread) {
+							registers_per_thread = axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j];
+						}
+						if ((axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j] > 0) && (axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j] < min_registers_per_thread)) {
+							min_registers_per_thread = axes[k].specializationConstants.raderContainer[i].registers_per_thread_per_radix[j];
+						}
+					}
+				}
+			}
+			if ((loc_multipliers[3] >= 2) && (((registers_per_thread / min_registers_per_thread) % 3) == 0) && (axes[k].specializationConstants.numRaderPrimes == 0)) {
 				registers_per_thread /= 3;
 				for (uint64_t i = 2; i < 33; i++) {
 					if (registers_per_thread_per_radix[i] % 9 == 0) {
@@ -24055,146 +31466,18 @@ static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPl
 				}
 			}
 		}
-		//optimize used radix kernels
-		if ((registers_per_thread_per_radix[32] > 0) && (registers_per_thread_per_radix[32] % 32 == 0)) {
-			loc_multipliers[32] = loc_multipliers[2] / 5;
-			loc_multipliers[2] = loc_multipliers[2] - loc_multipliers[32] * 5;
-		}
-		if ((registers_per_thread_per_radix[16] > 0) && (registers_per_thread_per_radix[16] % 16 == 0)) {
-			loc_multipliers[16] = loc_multipliers[2] / 4;
-			loc_multipliers[2] = loc_multipliers[2] - loc_multipliers[16] * 4;
-		}
-		if ((registers_per_thread_per_radix[15] > 0) && (registers_per_thread_per_radix[15] % 15 == 0)) {
-			loc_multipliers[15] = (loc_multipliers[3] > loc_multipliers[5]) ? loc_multipliers[5] : loc_multipliers[3];
-			loc_multipliers[3] = loc_multipliers[3] - loc_multipliers[15];
-			loc_multipliers[5] = loc_multipliers[5] - loc_multipliers[15];
-		}
-		if ((registers_per_thread_per_radix[14] > 0) && (registers_per_thread_per_radix[14] % 14 == 0)) {
-			loc_multipliers[14] = (loc_multipliers[2] > loc_multipliers[7]) ? loc_multipliers[7] : loc_multipliers[2];
-			loc_multipliers[2] = loc_multipliers[2] - loc_multipliers[14];
-			loc_multipliers[7] = loc_multipliers[7] - loc_multipliers[14];
-		}
-		if ((registers_per_thread_per_radix[12] > 0) && (registers_per_thread_per_radix[12] % 6 == 0)) {
-			loc_multipliers[12] = (loc_multipliers[2] > 2 * loc_multipliers[3]) ? loc_multipliers[3] : loc_multipliers[2] / 2;
-			loc_multipliers[2] = loc_multipliers[2] - 2 * loc_multipliers[12];
-			loc_multipliers[3] = loc_multipliers[3] - loc_multipliers[12];
-		}
-		if ((registers_per_thread_per_radix[10] > 0) && (registers_per_thread_per_radix[10] % 10 == 0)) {
-			loc_multipliers[10] = (loc_multipliers[2] > loc_multipliers[5]) ? loc_multipliers[5] : loc_multipliers[2];
-			loc_multipliers[2] = loc_multipliers[2] - loc_multipliers[10];
-			loc_multipliers[5] = loc_multipliers[5] - loc_multipliers[10];
-		}
-		if ((registers_per_thread_per_radix[9] > 0) && (registers_per_thread_per_radix[9] % 9 == 0)) {
-			loc_multipliers[9] = loc_multipliers[3] / 2;
-			loc_multipliers[3] = loc_multipliers[3] - loc_multipliers[9] * 2;
-		}
-		if ((registers_per_thread_per_radix[8] > 0) && (registers_per_thread_per_radix[8] % 8 == 0)) {
-			loc_multipliers[8] = loc_multipliers[2] / 3;
-			loc_multipliers[2] = loc_multipliers[2] - loc_multipliers[8] * 3;
-		}
-		if ((registers_per_thread_per_radix[6] > 0) && (registers_per_thread_per_radix[6] % 6 == 0)) {
-			loc_multipliers[6] = (loc_multipliers[2] > loc_multipliers[3]) ? loc_multipliers[3] : loc_multipliers[2];
-			loc_multipliers[2] = loc_multipliers[2] - loc_multipliers[6];
-			loc_multipliers[3] = loc_multipliers[3] - loc_multipliers[6];
-		}
-		if ((registers_per_thread_per_radix[4] > 0) && (registers_per_thread_per_radix[4] % 4 == 0)) {
-			loc_multipliers[4] = loc_multipliers[2] / 2;
-			loc_multipliers[2] = loc_multipliers[2] - loc_multipliers[4] * 2;
-		}
-		if ((registerBoost == 2) && (loc_multipliers[2] == 0)) {
-			if (loc_multipliers[4] > 0) {
-				loc_multipliers[4]--;
-				loc_multipliers[2] = 2;
-			}
-			else if (loc_multipliers[8] > 0) {
-				loc_multipliers[8]--;
-				loc_multipliers[4]++;
-				loc_multipliers[2]++;
-			}
-			else if (loc_multipliers[16] > 0) {
-				loc_multipliers[16]--;
-				loc_multipliers[8]++;
-				loc_multipliers[2]++;
-			}
-			else if (loc_multipliers[32] > 0) {
-				loc_multipliers[32]--;
-				loc_multipliers[16]++;
-				loc_multipliers[2]++;
-			}
-		}
-		if ((registerBoost == 4) && (loc_multipliers[4] == 0)) {
-			if (loc_multipliers[8] > 0) {
-				loc_multipliers[8]--;
-				loc_multipliers[4]++;
-				loc_multipliers[2]++;
-			}
-			else if (loc_multipliers[16] > 0) {
-				if (loc_multipliers[2] == 0) {
-					loc_multipliers[16]--;
-					loc_multipliers[4] = 2;
-				}
-				else {
-					loc_multipliers[16]--;
-					loc_multipliers[4]++;
-					loc_multipliers[2]--;
-					loc_multipliers[8]++;
-				}
-			}
-			else if (loc_multipliers[32] > 0) {
-				if (loc_multipliers[2] == 0) {
-					loc_multipliers[32]--;
-					loc_multipliers[8]++;
-					loc_multipliers[4]++;
-				}
-				else {
-					loc_multipliers[32]--;
-					loc_multipliers[16]++;
-					loc_multipliers[4]++;
-					loc_multipliers[2]--;
-				}
-			}
+		//second optimizer pass
+		if (axes[k].specializationConstants.numRaderPrimes) {
+			res = VkFFTOptimizeRaderFFTRegisters(axes[k].specializationConstants.raderContainer, axes[k].specializationConstants.numRaderPrimes, locAxisSplit[k], &min_registers_per_thread, &registers_per_thread, registers_per_thread_per_radix);
+			if (res != VKFFT_SUCCESS) return res;
 		}
 
 		axes[k].specializationConstants.maxNonPow2Radix = 1;
 		axes[k].specializationConstants.usedLocRegs = 1;
-		for (uint64_t i = 2; i < 33; i++) {
-			uint64_t usedLocRegs = 0;
-			if (loc_multipliers[i] > 0) {
-				switch (i) {
-				case 6:
-					usedLocRegs = 3;
-					break;
-				case 9:
-					usedLocRegs = 3;
-					break;
-				case 10:
-					usedLocRegs = 5;
-					break;
-				case 12:
-					usedLocRegs = 3;
-					break;
-				case 14:
-					usedLocRegs = 7;
-					break;
-				case 15:
-					usedLocRegs = 5;
-					break;
-				default:
-					usedLocRegs = i;
-					break;
-				}
-			}
-			if ((loc_multipliers[i] > 0) && ((i & (i - 1)) != 0)) {
-				axes[k].specializationConstants.maxNonPow2Radix = i;
-			}
-			if ((usedLocRegs > axes[k].specializationConstants.usedLocRegs) && ((i & (i - 1)) != 0)) {
-				axes[k].specializationConstants.usedLocRegs = usedLocRegs;
-			}
-		}
-		uint64_t j = 0;
-		axes[k].specializationConstants.registerBoost = registerBoost;
-		axes[k].specializationConstants.registers_per_thread = registers_per_thread;
-		axes[k].specializationConstants.min_registers_per_thread = min_registers_per_thread;
+
+		res = VkFFTOptimizeRadixKernels(registers_per_thread_per_radix, loc_multipliers, registerBoost, &axes[k].specializationConstants.maxNonPow2Radix, &axes[k].specializationConstants.usedLocRegs, axes[k].specializationConstants.raderContainer, axes[k].specializationConstants.numRaderPrimes);
+		if (res != VKFFT_SUCCESS) return res;
+
 		for (uint64_t i = 2; i < 33; i++) {
 			axes[k].specializationConstants.registers_per_thread_per_radix[i] = registers_per_thread_per_radix[i];
 		}
@@ -24217,26 +31500,69 @@ static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPl
 				}
 			}
 		}
+
+		res = VkFFTGetRaderFFTStages(axes[k].specializationConstants.raderContainer, axes[k].specializationConstants.numRaderPrimes, &axes[k].specializationConstants.numStages, axes[k].specializationConstants.stageRadix, axes[k].specializationConstants.rader_generator);
+		if (res != VKFFT_SUCCESS) return res;
+
 		for (uint64_t i = 32; i > 1; i--) {
 			if (loc_multipliers[i] > 0) {
-				axes[k].specializationConstants.stageRadix[j] = i;
+				axes[k].specializationConstants.stageRadix[axes[k].specializationConstants.numStages] = i;
 				loc_multipliers[i]--;
 				i++;
-				j++;
 				axes[k].specializationConstants.numStages++;
 			}
 		}
+
+		//add more registers for Rader FFT if needed
+		if (axes[k].specializationConstants.useRaderMult) {
+			axes[k].specializationConstants.rader_min_registers = rader_min_registers;
+			for (uint64_t i = 0; i < axes[k].specializationConstants.numRaderPrimes; i++) {
+				if (axes[k].specializationConstants.raderContainer[i].type == 1) {
+					uint64_t temp_rader = (uint64_t)ceil((locAxisSplit[k] / (double)axes[k].specializationConstants.rader_min_registers) / (double)((axes[k].specializationConstants.raderContainer[i].prime + 1) / 2));
+					uint64_t active_rader = (uint64_t)ceil((locAxisSplit[k] / axes[k].specializationConstants.raderContainer[i].prime) / (double)temp_rader);
+					if (active_rader > 1) {
+						if ((((double)active_rader - (locAxisSplit[k] / axes[k].specializationConstants.raderContainer[i].prime) / (double)temp_rader) >= 0.5) && ((((uint64_t)ceil((locAxisSplit[k] / axes[k].specializationConstants.raderContainer[i].prime) / (double)(active_rader - 1)) * ((axes[k].specializationConstants.raderContainer[i].prime + 1) / 2)) * maxBatchCoalesced) <= app->configuration.maxThreadsNum)) active_rader--;
+					}
+					axes[k].specializationConstants.raderRegisters = (active_rader * 2 > axes[k].specializationConstants.raderRegisters) ? active_rader * 2 : axes[k].specializationConstants.raderRegisters;
+					if (active_rader * 2 > registers_per_thread) registers_per_thread = active_rader * 2;
+				}
+			}
+			if (axes[k].specializationConstants.raderRegisters < axes[k].specializationConstants.rader_min_registers)	axes[k].specializationConstants.raderRegisters = axes[k].specializationConstants.rader_min_registers;
+		}
+
+		//final check up on all registers, increase if bigger
+		registers_per_thread = 0;
+		min_registers_per_thread = -1;
+		if (axes[k].specializationConstants.useRaderMult) {
+			registers_per_thread = axes[k].specializationConstants.raderRegisters;
+			min_registers_per_thread = axes[k].specializationConstants.rader_min_registers;
+		}
+		res = VkFFTMinMaxRegisterCheck(axes[k].specializationConstants.numStages, axes[k].specializationConstants.stageRadix, &min_registers_per_thread, &registers_per_thread, axes[k].specializationConstants.registers_per_thread_per_radix, axes[k].specializationConstants.raderContainer, axes[k].specializationConstants.numRaderPrimes, axes[k].specializationConstants.rader_generator);;
+		if (res != VKFFT_SUCCESS) return res;
+		axes[k].specializationConstants.minRaderFFTThreadNum = 0;
+		res = VkFFTGetRaderFFTThreadsNum(axes[k].specializationConstants.raderContainer, axes[k].specializationConstants.numRaderPrimes, &axes[k].specializationConstants.minRaderFFTThreadNum);
+		if (res != VKFFT_SUCCESS) return res;
+		axes[k].specializationConstants.registerBoost = registerBoost;
+		axes[k].specializationConstants.registers_per_thread = registers_per_thread;
+		axes[k].specializationConstants.min_registers_per_thread = min_registers_per_thread;
+
 		if (switchRegisterBoost > 0) {
 			axes[k].specializationConstants.stageRadix[axes[k].specializationConstants.numStages] = switchRegisterBoost;
 			axes[k].specializationConstants.numStages++;
 		}
 		else {
+			//try to read directly to registers
 			if (min_registers_per_thread != registers_per_thread) {
 				for (uint64_t i = 0; i < axes[k].specializationConstants.numStages; i++) {
 					if (axes[k].specializationConstants.registers_per_thread_per_radix[axes[k].specializationConstants.stageRadix[i]] == min_registers_per_thread) {
-						j = axes[k].specializationConstants.stageRadix[i];
+						uint64_t stageid = axes[k].specializationConstants.stageRadix[i];
 						axes[k].specializationConstants.stageRadix[i] = axes[k].specializationConstants.stageRadix[0];
-						axes[k].specializationConstants.stageRadix[0] = j;
+						axes[k].specializationConstants.stageRadix[0] = stageid;
+						if (axes[k].specializationConstants.useRader) {
+							stageid = axes[k].specializationConstants.rader_generator[i];
+							axes[k].specializationConstants.rader_generator[i] = axes[k].specializationConstants.rader_generator[0];
+							axes[k].specializationConstants.rader_generator[0] = stageid;
+						}
 						i = axes[k].specializationConstants.numStages;
 					}
 				}
@@ -24245,66 +31571,12 @@ static inline VkFFTResult VkFFTScheduler(VkFFTApplication* app, VkFFTPlan* FFTPl
 	}
 	return VKFFT_SUCCESS;
 }
-static inline VkFFTResult VkFFTGeneratePhaseVectors(VkFFTApplication* app, VkFFTPlan* FFTPlan, uint64_t axis_id, uint64_t supportAxis) {
+static inline VkFFTResult VkFFTGeneratePhaseVectors(VkFFTApplication* app, VkFFTPlan* FFTPlan, uint64_t axis_id) {
 	//generate two arrays used for Blueestein convolution and post-convolution multiplication
-	double double_PI = 3.1415926535897932384626433832795;
 	VkFFTResult resFFT = VKFFT_SUCCESS;
-	VkFFTApplication kernelPreparationApplication = {};
-	VkFFTConfiguration kernelPreparationConfiguration = {};
-
-	kernelPreparationConfiguration.FFTdim = 1;
-	kernelPreparationConfiguration.size[0] = FFTPlan->actualFFTSizePerAxis[axis_id][axis_id];
-	kernelPreparationConfiguration.size[1] = 1;
-	kernelPreparationConfiguration.size[2] = 1;
-	kernelPreparationConfiguration.doublePrecision = app->configuration.doublePrecision;
-	kernelPreparationConfiguration.useLUT = 1;
-	kernelPreparationConfiguration.registerBoost = 1;
-	kernelPreparationConfiguration.disableReorderFourStep = 1;
-	kernelPreparationConfiguration.saveApplicationToString = app->configuration.saveApplicationToString;
-	kernelPreparationConfiguration.loadApplicationFromString = app->configuration.loadApplicationFromString;
-	if (kernelPreparationConfiguration.loadApplicationFromString) {
-#if((VKFFT_BACKEND==0)||(VKFFT_BACKEND==2)||(VKFFT_BACKEND==4))
-		kernelPreparationConfiguration.loadApplicationString = (void*)((uint32_t*)app->configuration.loadApplicationString + app->currentApplicationStringPos);
-#else
-		kernelPreparationConfiguration.loadApplicationString = (void*)((char*)app->configuration.loadApplicationString + app->currentApplicationStringPos);
-#endif
-	}
-	kernelPreparationConfiguration.performBandwidthBoost = (app->configuration.performBandwidthBoost > 0) ? app->configuration.performBandwidthBoost : 1;
-	if (axis_id == 0) kernelPreparationConfiguration.performBandwidthBoost = 0;
-	if (axis_id > 0) kernelPreparationConfiguration.considerAllAxesStrided = 1;
-	if (app->configuration.tempBuffer) {
-		kernelPreparationConfiguration.userTempBuffer = 1;
-		kernelPreparationConfiguration.tempBuffer = app->configuration.tempBuffer;
-		kernelPreparationConfiguration.tempBufferSize = app->configuration.tempBufferSize;
-		kernelPreparationConfiguration.tempBufferNum = app->configuration.tempBufferNum;
-	}
-	kernelPreparationConfiguration.device = app->configuration.device;
-#if(VKFFT_BACKEND==0)
-	kernelPreparationConfiguration.queue = app->configuration.queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
-	kernelPreparationConfiguration.fence = app->configuration.fence;
-	kernelPreparationConfiguration.commandPool = app->configuration.commandPool;
-	kernelPreparationConfiguration.physicalDevice = app->configuration.physicalDevice;
-	kernelPreparationConfiguration.isCompilerInitialized = 1;//compiler can be initialized before VkFFT plan creation. if not, VkFFT will create and destroy one after initialization
-	kernelPreparationConfiguration.tempBufferDeviceMemory = app->configuration.tempBufferDeviceMemory;
-#elif(VKFFT_BACKEND==3)
-	kernelPreparationConfiguration.context = app->configuration.context;
-#elif(VKFFT_BACKEND==4)
-	kernelPreparationConfiguration.context = app->configuration.context;
-	kernelPreparationConfiguration.commandQueue = app->configuration.commandQueue;
-	kernelPreparationConfiguration.commandQueueID = app->configuration.commandQueueID;
-#endif			
-
-	uint64_t bufferSize = (uint64_t)sizeof(float) * 2 * kernelPreparationConfiguration.size[0] * kernelPreparationConfiguration.size[1] * kernelPreparationConfiguration.size[2];
-	if (kernelPreparationConfiguration.doublePrecision) bufferSize *= sizeof(double) / sizeof(float);
+	uint64_t bufferSize = (uint64_t)sizeof(float) * 2 * FFTPlan->actualFFTSizePerAxis[axis_id][axis_id];
+	if (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) bufferSize *= sizeof(double) / sizeof(float);
 	app->bufferBluesteinSize[axis_id] = bufferSize;
-	kernelPreparationConfiguration.inputBufferSize = &app->bufferBluesteinSize[axis_id];
-	kernelPreparationConfiguration.bufferSize = &app->bufferBluesteinSize[axis_id];
-	kernelPreparationConfiguration.isInputFormatted = 1;
-	resFFT = initializeVkFFT(&kernelPreparationApplication, kernelPreparationConfiguration);
-	if (resFFT != VKFFT_SUCCESS) return resFFT;
-	if (kernelPreparationConfiguration.loadApplicationFromString) {
-		app->currentApplicationStringPos += kernelPreparationApplication.currentApplicationStringPos;
-	}
 #if(VKFFT_BACKEND==0)
 	VkResult res = VK_SUCCESS;
 	resFFT = allocateFFTBuffer(app, &app->bufferBluestein[axis_id], &app->bufferBluesteinDeviceMemory[axis_id], VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, bufferSize);
@@ -24371,102 +31643,463 @@ static inline VkFFTResult VkFFTGeneratePhaseVectors(VkFFTApplication* app, VkFFT
 		res = zeMemAllocDevice(app->configuration.context[0], &device_desc, bufferSize, sizeof(float), app->configuration.device[0], &app->bufferBluesteinIFFT[axis_id]);
 		if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 	}
-#endif
-	void* phaseVectors = malloc(bufferSize);
-	if (!phaseVectors) {
-		deleteVkFFT(&kernelPreparationApplication);
-		deleteVkFFT(app);
-		return VKFFT_ERROR_MALLOC_FAILED;
+#elif(VKFFT_BACKEND==5)
+	app->bufferBluestein[axis_id] = app->configuration.device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+
+	if (!app->configuration.makeInversePlanOnly) {
+		app->bufferBluesteinFFT[axis_id] = app->configuration.device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+	}
+	if (!app->configuration.makeForwardPlanOnly) {
+		app->bufferBluesteinIFFT[axis_id] = app->configuration.device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
 	}
-	uint64_t phaseVectorsNonZeroSize = (((app->configuration.performDCT == 4) && (app->configuration.size[axis_id] % 2 == 0)) || ((FFTPlan->multiUploadR2C) && (axis_id == 0))) ? app->configuration.size[axis_id] / 2 : app->configuration.size[axis_id];
-	if (app->configuration.performDCT == 1) phaseVectorsNonZeroSize = 2 * app->configuration.size[axis_id] - 2;
-	if ((FFTPlan->numAxisUploads[axis_id] > 1) && (!app->configuration.makeForwardPlanOnly)) {
-		if (kernelPreparationConfiguration.doublePrecision) {
-			double* phaseVectors_cast = (double*)phaseVectors;
+#endif
+#ifdef VkFFT_use_FP128_Bluestein_RaderFFT
+	if (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) {
+		double* phaseVectors_fp64 = (double*)malloc(bufferSize);
+		if (!phaseVectors_fp64) {
+			return VKFFT_ERROR_MALLOC_FAILED;
+		}
+		long double* phaseVectors_fp128 = (long double*)malloc(2 * bufferSize);
+		if (!phaseVectors_fp128) {
+			free(phaseVectors_fp64);
+			return VKFFT_ERROR_MALLOC_FAILED;
+		}
+		long double* phaseVectors_fp128_out = (long double*)malloc(2 * bufferSize);
+		if (!phaseVectors_fp128) {
+			free(phaseVectors_fp64);
+			free(phaseVectors_fp128);
+			return VKFFT_ERROR_MALLOC_FAILED;
+		}
+		uint64_t phaseVectorsNonZeroSize = (((app->configuration.performDCT == 4) && (app->configuration.size[axis_id] % 2 == 0)) || ((FFTPlan->multiUploadR2C) && (axis_id == 0))) ? app->configuration.size[axis_id] / 2 : app->configuration.size[axis_id];
+		if (app->configuration.performDCT == 1) phaseVectorsNonZeroSize = 2 * app->configuration.size[axis_id] - 2;
+		long double double_PI = 3.14159265358979323846264338327950288419716939937510L;
+		for (uint64_t i = 0; i < FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]; i++) {
+			uint64_t rm = (i * i) % (2 * phaseVectorsNonZeroSize);
+			long double angle = double_PI * rm / phaseVectorsNonZeroSize;
+			phaseVectors_fp128[2 * i] = (i < phaseVectorsNonZeroSize) ? cos(angle) : 0;
+			phaseVectors_fp128[2 * i + 1] = (i < phaseVectorsNonZeroSize) ? -sin(angle) : 0;
+		}
+		for (uint64_t i = 1; i < phaseVectorsNonZeroSize; i++) {
+			phaseVectors_fp128[2 * (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] - i)] = phaseVectors_fp128[2 * i];
+			phaseVectors_fp128[2 * (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] - i) + 1] = phaseVectors_fp128[2 * i + 1];
+		}
+		if ((FFTPlan->numAxisUploads[axis_id] > 1) && (!app->configuration.makeForwardPlanOnly)) {
+			fftwl_plan p;
+			p = fftwl_plan_dft_1d((int)(FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]), (fftwl_complex*)phaseVectors_fp128, (fftwl_complex*)phaseVectors_fp128_out, -1, FFTW_ESTIMATE);
+			fftwl_execute(p);
+			fftwl_destroy_plan(p);
 			for (uint64_t i = 0; i < FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]; i++) {
-				uint64_t rm = (i * i) % (2 * phaseVectorsNonZeroSize);
-				double angle = double_PI * rm / phaseVectorsNonZeroSize;
-				phaseVectors_cast[2 * i] = (i < phaseVectorsNonZeroSize) ? (double)cos(angle) : 0;
-				phaseVectors_cast[2 * i + 1] = (i < phaseVectorsNonZeroSize) ? (double)-sin(angle) : 0;
+				uint64_t out = 0;
+				if (FFTPlan->numAxisUploads[axis_id] == 1) {
+					out = i;
+				}
+				else if (FFTPlan->numAxisUploads[axis_id] == 2) {
+					out = i / FFTPlan->axisSplit[axis_id][1] + (i % FFTPlan->axisSplit[axis_id][1]) * FFTPlan->axisSplit[axis_id][0];
+				}
+				else {
+					out = (i / FFTPlan->axisSplit[axis_id][2]) / FFTPlan->axisSplit[axis_id][1] + ((i / FFTPlan->axisSplit[axis_id][2]) % FFTPlan->axisSplit[axis_id][1]) * FFTPlan->axisSplit[axis_id][0] + (i % FFTPlan->axisSplit[axis_id][2]) * FFTPlan->axisSplit[axis_id][1] * FFTPlan->axisSplit[axis_id][0];
+				}
+				phaseVectors_fp64[2 * out] = (double)phaseVectors_fp128_out[2 * i];
+				phaseVectors_fp64[2 * out + 1] = (double)phaseVectors_fp128_out[2 * i + 1];
 			}
-			for (uint64_t i = 1; i < phaseVectorsNonZeroSize; i++) {
-				phaseVectors_cast[2 * (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] - i)] = phaseVectors_cast[2 * i];
-				phaseVectors_cast[2 * (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] - i) + 1] = phaseVectors_cast[2 * i + 1];
+			resFFT = VkFFT_transferDataFromCPU(app, phaseVectors_fp64, &app->bufferBluesteinIFFT[axis_id], bufferSize);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors_fp64);
+				free(phaseVectors_fp128);
+				free(phaseVectors_fp128_out);
+				return resFFT;
 			}
 		}
-		else {
-			float* phaseVectors_cast = (float*)phaseVectors;
+		for (uint64_t i = 0; i < FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]; i++) {
+			phaseVectors_fp128[2 * i + 1] = -phaseVectors_fp128[2 * i + 1];
+		}
+		for (uint64_t i = 0; i < FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]; i++) {
+			phaseVectors_fp64[2 * i] = (double)phaseVectors_fp128[2 * i];
+			phaseVectors_fp64[2 * i + 1] = (double)phaseVectors_fp128[2 * i + 1];
+		}
+		resFFT = VkFFT_transferDataFromCPU(app, phaseVectors_fp64, &app->bufferBluestein[axis_id], bufferSize);
+		if (resFFT != VKFFT_SUCCESS) {
+			free(phaseVectors_fp64);
+			free(phaseVectors_fp128);
+			free(phaseVectors_fp128_out);
+			return resFFT;
+		}
+		if (!app->configuration.makeInversePlanOnly) {
+			fftwl_plan p;
+			p = fftwl_plan_dft_1d((int)(FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]), (fftwl_complex*)phaseVectors_fp128, (fftwl_complex*)phaseVectors_fp128_out, -1, FFTW_ESTIMATE);
+			fftwl_execute(p);
+			fftwl_destroy_plan(p);
+			for (uint64_t i = 0; i < FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]; i++) {
+				uint64_t out = 0;
+				if (FFTPlan->numAxisUploads[axis_id] == 1) {
+					out = i;
+				}
+				else if (FFTPlan->numAxisUploads[axis_id] == 2) {
+					out = i / FFTPlan->axisSplit[axis_id][1] + (i % FFTPlan->axisSplit[axis_id][1]) * FFTPlan->axisSplit[axis_id][0];
+				}
+				else {
+					out = (i / FFTPlan->axisSplit[axis_id][2]) / FFTPlan->axisSplit[axis_id][1] + ((i / FFTPlan->axisSplit[axis_id][2]) % FFTPlan->axisSplit[axis_id][1]) * FFTPlan->axisSplit[axis_id][0] + (i % FFTPlan->axisSplit[axis_id][2]) * FFTPlan->axisSplit[axis_id][1] * FFTPlan->axisSplit[axis_id][0];
+				}
+				phaseVectors_fp64[2 * out] = (double)phaseVectors_fp128_out[2 * i];
+				phaseVectors_fp64[2 * out + 1] = (double)phaseVectors_fp128_out[2 * i + 1];
+			}
+			resFFT = VkFFT_transferDataFromCPU(app, phaseVectors_fp64, &app->bufferBluesteinFFT[axis_id], bufferSize);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors_fp64);
+				free(phaseVectors_fp128);
+				free(phaseVectors_fp128_out);
+				return resFFT;
+			}
+		}
+		if ((FFTPlan->numAxisUploads[axis_id] == 1) && (!app->configuration.makeForwardPlanOnly)) {
+			fftwl_plan p;
+			p = fftwl_plan_dft_1d((int)(FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]), (fftwl_complex*)phaseVectors_fp128, (fftwl_complex*)phaseVectors_fp128_out, 1, FFTW_ESTIMATE);
+			fftwl_execute(p);
+			fftwl_destroy_plan(p);
+
 			for (uint64_t i = 0; i < FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]; i++) {
-				uint64_t rm = (i * i) % (2 * phaseVectorsNonZeroSize);
-				double angle = double_PI * rm / phaseVectorsNonZeroSize;
-				phaseVectors_cast[2 * i] = (i < phaseVectorsNonZeroSize) ? (float)cos(angle) : 0;
-				phaseVectors_cast[2 * i + 1] = (i < phaseVectorsNonZeroSize) ? (float)-sin(angle) : 0;
+				phaseVectors_fp64[2 * i] = (double)phaseVectors_fp128_out[2 * i];
+				phaseVectors_fp64[2 * i + 1] = (double)phaseVectors_fp128_out[2 * i + 1];
 			}
-			for (uint64_t i = 1; i < phaseVectorsNonZeroSize; i++) {
-				phaseVectors_cast[2 * (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] - i)] = phaseVectors_cast[2 * i];
-				phaseVectors_cast[2 * (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] - i) + 1] = phaseVectors_cast[2 * i + 1];
+			resFFT = VkFFT_transferDataFromCPU(app, phaseVectors_fp64, &app->bufferBluesteinIFFT[axis_id], bufferSize);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors_fp64);
+				free(phaseVectors_fp128);
+				free(phaseVectors_fp128_out);
+				return resFFT;
 			}
 		}
+		free(phaseVectors_fp64);
+		free(phaseVectors_fp128);
+		free(phaseVectors_fp128_out);
+	}
+	else {
+#endif
+		VkFFTApplication kernelPreparationApplication = {};
+		VkFFTConfiguration kernelPreparationConfiguration = {};
+		kernelPreparationConfiguration.FFTdim = 1;
+		kernelPreparationConfiguration.size[0] = FFTPlan->actualFFTSizePerAxis[axis_id][axis_id];
+		kernelPreparationConfiguration.size[1] = 1;
+		kernelPreparationConfiguration.size[2] = 1;
+		kernelPreparationConfiguration.doublePrecision = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory);
+		kernelPreparationConfiguration.useLUT = 1;
+		kernelPreparationConfiguration.registerBoost = 1;
+		kernelPreparationConfiguration.disableReorderFourStep = 1;
+		kernelPreparationConfiguration.fixMinRaderPrimeFFT = 17;
+		kernelPreparationConfiguration.fixMinRaderPrimeMult = 17;
+		kernelPreparationConfiguration.fixMaxRaderPrimeFFT = 17;
+		kernelPreparationConfiguration.fixMaxRaderPrimeMult = 17;
+		kernelPreparationConfiguration.saveApplicationToString = app->configuration.saveApplicationToString;
+		kernelPreparationConfiguration.loadApplicationFromString = app->configuration.loadApplicationFromString;
+		if (kernelPreparationConfiguration.loadApplicationFromString) {
+			kernelPreparationConfiguration.loadApplicationString = (void*)((char*)app->configuration.loadApplicationString + app->currentApplicationStringPos);
+		}
+		kernelPreparationConfiguration.performBandwidthBoost = (app->configuration.performBandwidthBoost > 0) ? app->configuration.performBandwidthBoost : 1;
+		if (axis_id == 0) kernelPreparationConfiguration.performBandwidthBoost = 0;
+		if (axis_id > 0) kernelPreparationConfiguration.considerAllAxesStrided = 1;
+		if (app->configuration.tempBuffer) {
+			kernelPreparationConfiguration.userTempBuffer = 1;
+			kernelPreparationConfiguration.tempBuffer = app->configuration.tempBuffer;
+			kernelPreparationConfiguration.tempBufferSize = app->configuration.tempBufferSize;
+			kernelPreparationConfiguration.tempBufferNum = app->configuration.tempBufferNum;
+		}
+		kernelPreparationConfiguration.device = app->configuration.device;
 #if(VKFFT_BACKEND==0)
-		resFFT = transferDataFromCPU(&kernelPreparationApplication, phaseVectors, &app->bufferBluestein[axis_id], bufferSize);
-		if (resFFT != VKFFT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return resFFT;
+		kernelPreparationConfiguration.queue = app->configuration.queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
+		kernelPreparationConfiguration.fence = app->configuration.fence;
+		kernelPreparationConfiguration.commandPool = app->configuration.commandPool;
+		kernelPreparationConfiguration.physicalDevice = app->configuration.physicalDevice;
+		kernelPreparationConfiguration.isCompilerInitialized = 1;//compiler can be initialized before VkFFT plan creation. if not, VkFFT will create and destroy one after initialization
+		kernelPreparationConfiguration.tempBufferDeviceMemory = app->configuration.tempBufferDeviceMemory;
+#elif(VKFFT_BACKEND==3)
+		kernelPreparationConfiguration.context = app->configuration.context;
+#elif(VKFFT_BACKEND==4)
+		kernelPreparationConfiguration.context = app->configuration.context;
+		kernelPreparationConfiguration.commandQueue = app->configuration.commandQueue;
+		kernelPreparationConfiguration.commandQueueID = app->configuration.commandQueueID;
+#elif(VKFFT_BACKEND==5)
+		kernelPreparationConfiguration.device = app->configuration.device;
+		kernelPreparationConfiguration.queue = app->configuration.queue;
+#endif			
+
+		kernelPreparationConfiguration.inputBufferSize = &app->bufferBluesteinSize[axis_id];
+		kernelPreparationConfiguration.bufferSize = &app->bufferBluesteinSize[axis_id];
+		kernelPreparationConfiguration.isInputFormatted = 1;
+		resFFT = initializeVkFFT(&kernelPreparationApplication, kernelPreparationConfiguration);
+		if (resFFT != VKFFT_SUCCESS) return resFFT;
+		if (kernelPreparationConfiguration.loadApplicationFromString) {
+			app->currentApplicationStringPos += kernelPreparationApplication.currentApplicationStringPos;
 		}
-#elif(VKFFT_BACKEND==1)
-		res = cudaMemcpy(app->bufferBluestein[axis_id], phaseVectors, bufferSize, cudaMemcpyHostToDevice);
-		if (res != cudaSuccess) {
-			free(phaseVectors);
+		void* phaseVectors = malloc(bufferSize);
+		if (!phaseVectors) {
 			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_COPY;
+			return VKFFT_ERROR_MALLOC_FAILED;
 		}
+		uint64_t phaseVectorsNonZeroSize = (((app->configuration.performDCT == 4) && (app->configuration.size[axis_id] % 2 == 0)) || ((FFTPlan->multiUploadR2C) && (axis_id == 0))) ? app->configuration.size[axis_id] / 2 : app->configuration.size[axis_id];
+		if (app->configuration.performDCT == 1) phaseVectorsNonZeroSize = 2 * app->configuration.size[axis_id] - 2;
+
+		if ((FFTPlan->numAxisUploads[axis_id] > 1) && (!app->configuration.makeForwardPlanOnly)) {
+			if (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) {
+				long double double_PI = 3.14159265358979323846264338327950288419716939937510L;
+				double* phaseVectors_cast = (double*)phaseVectors;
+				for (uint64_t i = 0; i < FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]; i++) {
+					uint64_t rm = (i * i) % (2 * phaseVectorsNonZeroSize);
+					long double angle = double_PI * rm / phaseVectorsNonZeroSize;
+					phaseVectors_cast[2 * i] = (i < phaseVectorsNonZeroSize) ? (double)cos(angle) : 0;
+					phaseVectors_cast[2 * i + 1] = (i < phaseVectorsNonZeroSize) ? (double)-sin(angle) : 0;
+				}
+				for (uint64_t i = 1; i < phaseVectorsNonZeroSize; i++) {
+					phaseVectors_cast[2 * (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] - i)] = phaseVectors_cast[2 * i];
+					phaseVectors_cast[2 * (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] - i) + 1] = phaseVectors_cast[2 * i + 1];
+				}
+			}
+			else {
+				double double_PI = 3.14159265358979323846264338327950288419716939937510;
+				float* phaseVectors_cast = (float*)phaseVectors;
+				for (uint64_t i = 0; i < FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]; i++) {
+					uint64_t rm = (i * i) % (2 * phaseVectorsNonZeroSize);
+					double angle = double_PI * rm / phaseVectorsNonZeroSize;
+					phaseVectors_cast[2 * i] = (i < phaseVectorsNonZeroSize) ? (float)cos(angle) : 0;
+					phaseVectors_cast[2 * i + 1] = (i < phaseVectorsNonZeroSize) ? (float)-sin(angle) : 0;
+				}
+				for (uint64_t i = 1; i < phaseVectorsNonZeroSize; i++) {
+					phaseVectors_cast[2 * (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] - i)] = phaseVectors_cast[2 * i];
+					phaseVectors_cast[2 * (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] - i) + 1] = phaseVectors_cast[2 * i + 1];
+				}
+			}
+			resFFT = VkFFT_transferDataFromCPU(app, phaseVectors, &app->bufferBluestein[axis_id], bufferSize);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return resFFT;
+			}
+#if(VKFFT_BACKEND==0)
+			{
+				VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
+				commandBufferAllocateInfo.commandPool = kernelPreparationApplication.configuration.commandPool[0];
+				commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+				commandBufferAllocateInfo.commandBufferCount = 1;
+				VkCommandBuffer commandBuffer = {};
+				res = vkAllocateCommandBuffers(kernelPreparationApplication.configuration.device[0], &commandBufferAllocateInfo, &commandBuffer);
+				if (res != 0) {
+					free(phaseVectors);
+					deleteVkFFT(&kernelPreparationApplication);
+					return VKFFT_ERROR_FAILED_TO_ALLOCATE_COMMAND_BUFFERS;
+				}
+				VkCommandBufferBeginInfo commandBufferBeginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
+				commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+				res = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);
+				if (res != 0) {
+					free(phaseVectors);
+					deleteVkFFT(&kernelPreparationApplication);
+					return VKFFT_ERROR_FAILED_TO_BEGIN_COMMAND_BUFFER;
+				}
+				VkFFTLaunchParams launchParams = {};
+				launchParams.commandBuffer = &commandBuffer;
+				launchParams.inputBuffer = &app->bufferBluestein[axis_id];
+				launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
+				//Record commands
+				resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
+				if (resFFT != VKFFT_SUCCESS) {
+					free(phaseVectors);
+					deleteVkFFT(&kernelPreparationApplication);
+					return resFFT;
+				}
+				res = vkEndCommandBuffer(commandBuffer);
+				if (res != 0) {
+					free(phaseVectors);
+					deleteVkFFT(&kernelPreparationApplication);
+					return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
+				}
+				VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
+				submitInfo.commandBufferCount = 1;
+				submitInfo.pCommandBuffers = &commandBuffer;
+				res = vkQueueSubmit(kernelPreparationApplication.configuration.queue[0], 1, &submitInfo, kernelPreparationApplication.configuration.fence[0]);
+				if (res != 0) {
+					free(phaseVectors);
+					deleteVkFFT(&kernelPreparationApplication);
+					return VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE;
+				}
+				res = vkWaitForFences(kernelPreparationApplication.configuration.device[0], 1, kernelPreparationApplication.configuration.fence, VK_TRUE, 100000000000);
+				if (res != 0) {
+					free(phaseVectors);
+					deleteVkFFT(&kernelPreparationApplication);
+					return VKFFT_ERROR_FAILED_TO_WAIT_FOR_FENCES;
+				}
+				res = vkResetFences(kernelPreparationApplication.configuration.device[0], 1, kernelPreparationApplication.configuration.fence);
+				if (res != 0) {
+					free(phaseVectors);
+					deleteVkFFT(&kernelPreparationApplication);
+					return VKFFT_ERROR_FAILED_TO_RESET_FENCES;
+				}
+				vkFreeCommandBuffers(kernelPreparationApplication.configuration.device[0], kernelPreparationApplication.configuration.commandPool[0], 1, &commandBuffer);
+			}
+#elif(VKFFT_BACKEND==1)
+			VkFFTLaunchParams launchParams = {};
+			launchParams.inputBuffer = &app->bufferBluestein[axis_id];
+			launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
+			resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return resFFT;
+			}
+			res = cudaDeviceSynchronize();
+			if (res != cudaSuccess) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+			}
 #elif(VKFFT_BACKEND==2)
-		res = hipMemcpy(app->bufferBluestein[axis_id], phaseVectors, bufferSize, hipMemcpyHostToDevice);
-		if (res != hipSuccess) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_COPY;
-		}
+			VkFFTLaunchParams launchParams = {};
+			launchParams.inputBuffer = &app->bufferBluestein[axis_id];
+			launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
+			resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return resFFT;
+			}
+			res = hipDeviceSynchronize();
+			if (res != hipSuccess) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+			}
 #elif(VKFFT_BACKEND==3)
-		res = clEnqueueWriteBuffer(commandQueue, app->bufferBluestein[axis_id], CL_TRUE, 0, bufferSize, phaseVectors, 0, NULL, NULL);
-		if (res != CL_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_COPY;
-		}
+			VkFFTLaunchParams launchParams = {};
+			launchParams.commandQueue = &commandQueue;
+			launchParams.inputBuffer = &app->bufferBluestein[axis_id];
+			launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
+			resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return resFFT;
+			}
+			res = clFinish(commandQueue);
+			if (res != CL_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+			}
 #elif(VKFFT_BACKEND==4)
-		ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				app->configuration.commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-		};
-		ze_command_list_handle_t copyCommandList;
-		res = zeCommandListCreateImmediate(app->configuration.context[0], app->configuration.device[0], &commandQueueCopyDesc, &copyCommandList);
-		if (res != ZE_RESULT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+			ze_command_list_desc_t commandListDescription = {};
+			commandListDescription.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
+			ze_command_list_handle_t commandList = {};
+			res = zeCommandListCreate(app->configuration.context[0], app->configuration.device[0], &commandListDescription, &commandList);
+			if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+			VkFFTLaunchParams launchParams = {};
+			launchParams.commandList = &commandList;
+			launchParams.inputBuffer = &app->bufferBluestein[axis_id];
+			launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
+			resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return resFFT;
+			}
+			res = zeCommandListClose(commandList);
+			if (res != ZE_RESULT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
+			}
+			res = zeCommandQueueExecuteCommandLists(app->configuration.commandQueue[0], 1, &commandList, 0);
+			if (res != ZE_RESULT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE;
+			}
+			res = zeCommandQueueSynchronize(app->configuration.commandQueue[0], UINT32_MAX);
+			if (res != ZE_RESULT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+			}
+			res = zeCommandListDestroy(commandList);
+			if (res != ZE_RESULT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_DESTROY_COMMAND_LIST;
+			}
+#elif(VKFFT_BACKEND==5)
+			VkFFTLaunchParams launchParams = {};
+			MTL::CommandBuffer* commandBuffer = app->configuration.queue->commandBuffer();
+			if (commandBuffer == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+			MTL::ComputeCommandEncoder* commandEncoder = commandBuffer->computeCommandEncoder();
+			if (commandEncoder == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+
+			launchParams.commandBuffer = commandBuffer;
+			launchParams.commandEncoder = commandEncoder;
+			launchParams.inputBuffer = &app->bufferBluestein[axis_id];
+			launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
+			resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return resFFT;
+			}
+			commandEncoder->endEncoding();
+			commandBuffer->commit();
+			commandBuffer->waitUntilCompleted();
+			commandEncoder->release();
+			commandBuffer->release();
+#endif
 		}
-		res = zeCommandListAppendMemoryCopy(copyCommandList, app->bufferBluestein[axis_id], phaseVectors, bufferSize, 0, 0, 0);
-		if (res != ZE_RESULT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_COPY;
+		if ((FFTPlan->numAxisUploads[axis_id] > 1) && (!app->configuration.makeForwardPlanOnly)) {
+			if (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) {
+				double* phaseVectors_cast = (double*)phaseVectors;
+				for (uint64_t i = 0; i < FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]; i++) {
+					phaseVectors_cast[2 * i + 1] = -phaseVectors_cast[2 * i + 1];
+				}
+
+			}
+			else {
+				float* phaseVectors_cast = (float*)phaseVectors;
+				for (uint64_t i = 0; i < FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]; i++) {
+					phaseVectors_cast[2 * i + 1] = -phaseVectors_cast[2 * i + 1];
+				}
+			}
 		}
-		res = zeCommandQueueSynchronize(app->configuration.commandQueue[0], UINT32_MAX);
-		if (res != ZE_RESULT_SUCCESS) {
+		else {
+			if (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) {
+				long double double_PI = 3.14159265358979323846264338327950288419716939937510L;
+				double* phaseVectors_cast = (double*)phaseVectors;
+				for (uint64_t i = 0; i < FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]; i++) {
+					uint64_t rm = (i * i) % (2 * phaseVectorsNonZeroSize);
+					long double angle = double_PI * rm / phaseVectorsNonZeroSize;
+					phaseVectors_cast[2 * i] = (i < phaseVectorsNonZeroSize) ? (double)cos(angle) : 0;
+					phaseVectors_cast[2 * i + 1] = (i < phaseVectorsNonZeroSize) ? (double)sin(angle) : 0;
+				}
+				for (uint64_t i = 1; i < phaseVectorsNonZeroSize; i++) {
+					phaseVectors_cast[2 * (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] - i)] = phaseVectors_cast[2 * i];
+					phaseVectors_cast[2 * (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] - i) + 1] = phaseVectors_cast[2 * i + 1];
+				}
+			}
+			else {
+				double double_PI = 3.14159265358979323846264338327950288419716939937510;
+				float* phaseVectors_cast = (float*)phaseVectors;
+				for (uint64_t i = 0; i < FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]; i++) {
+					uint64_t rm = (i * i) % (2 * phaseVectorsNonZeroSize);
+					double angle = double_PI * rm / phaseVectorsNonZeroSize;
+					phaseVectors_cast[2 * i] = (i < phaseVectorsNonZeroSize) ? (float)cos(angle) : 0;
+					phaseVectors_cast[2 * i + 1] = (i < phaseVectorsNonZeroSize) ? (float)sin(angle) : 0;
+				}
+				for (uint64_t i = 1; i < phaseVectorsNonZeroSize; i++) {
+					phaseVectors_cast[2 * (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] - i)] = phaseVectors_cast[2 * i];
+					phaseVectors_cast[2 * (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] - i) + 1] = phaseVectors_cast[2 * i + 1];
+				}
+			}
+		}
+		resFFT = VkFFT_transferDataFromCPU(app, phaseVectors, &app->bufferBluestein[axis_id], bufferSize);
+		if (resFFT != VKFFT_SUCCESS) {
 			free(phaseVectors);
 			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+			return resFFT;
 		}
-#endif
 #if(VKFFT_BACKEND==0)
-		{
+		if (!app->configuration.makeInversePlanOnly) {
 			VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
 			commandBufferAllocateInfo.commandPool = kernelPreparationApplication.configuration.commandPool[0];
 			commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
@@ -24489,7 +32122,7 @@ static inline VkFFTResult VkFFTGeneratePhaseVectors(VkFFTApplication* app, VkFFT
 			VkFFTLaunchParams launchParams = {};
 			launchParams.commandBuffer = &commandBuffer;
 			launchParams.inputBuffer = &app->bufferBluestein[axis_id];
-			launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
+			launchParams.buffer = &app->bufferBluesteinFFT[axis_id];
 			//Record commands
 			resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
 			if (resFFT != VKFFT_SUCCESS) {
@@ -24526,497 +32159,702 @@ static inline VkFFTResult VkFFTGeneratePhaseVectors(VkFFTApplication* app, VkFFT
 			}
 			vkFreeCommandBuffers(kernelPreparationApplication.configuration.device[0], kernelPreparationApplication.configuration.commandPool[0], 1, &commandBuffer);
 		}
-#elif(VKFFT_BACKEND==1)
-		VkFFTLaunchParams launchParams = {};
-		launchParams.inputBuffer = &app->bufferBluestein[axis_id];
-		launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
-		resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
-		if (resFFT != VKFFT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return resFFT;
-		}
-		res = cudaDeviceSynchronize();
-		if (res != cudaSuccess) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-		}
-#elif(VKFFT_BACKEND==2)
-		VkFFTLaunchParams launchParams = {};
-		launchParams.inputBuffer = &app->bufferBluestein[axis_id];
-		launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
-		resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
-		if (resFFT != VKFFT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return resFFT;
-		}
-		res = hipDeviceSynchronize();
-		if (res != hipSuccess) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-		}
-#elif(VKFFT_BACKEND==3)
-		VkFFTLaunchParams launchParams = {};
-		launchParams.commandQueue = &commandQueue;
-		launchParams.inputBuffer = &app->bufferBluestein[axis_id];
-		launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
-		resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
-		if (resFFT != VKFFT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return resFFT;
-		}
-		res = clFinish(commandQueue);
-		if (res != CL_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-		}
-#elif(VKFFT_BACKEND==4)
-		ze_command_list_desc_t commandListDescription = {};
-		commandListDescription.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
-		ze_command_list_handle_t commandList = {};
-		res = zeCommandListCreate(app->configuration.context[0], app->configuration.device[0], &commandListDescription, &commandList);
-		if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-		VkFFTLaunchParams launchParams = {};
-		launchParams.commandList = &commandList;
-		launchParams.inputBuffer = &app->bufferBluestein[axis_id];
-		launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
-		resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
-		if (resFFT != VKFFT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return resFFT;
-		}
-		res = zeCommandListClose(commandList);
-		if (res != ZE_RESULT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
-		}
-		res = zeCommandQueueExecuteCommandLists(app->configuration.commandQueue[0], 1, &commandList, 0);
-		if (res != ZE_RESULT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE;
-		}
-		res = zeCommandQueueSynchronize(app->configuration.commandQueue[0], UINT32_MAX);
-		if (res != ZE_RESULT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-		}
-		res = zeCommandListDestroy(commandList);
-		if (res != ZE_RESULT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_DESTROY_COMMAND_LIST;
-		}
-#endif
-	}
-	if (kernelPreparationConfiguration.doublePrecision) {
-		double* phaseVectors_cast = (double*)phaseVectors;
-		for (uint64_t i = 0; i < FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]; i++) {
-			uint64_t rm = (i * i) % (2 * phaseVectorsNonZeroSize);
-			double angle = double_PI * rm / phaseVectorsNonZeroSize;
-			phaseVectors_cast[2 * i] = (i < phaseVectorsNonZeroSize) ? (double)cos(angle) : 0;
-			phaseVectors_cast[2 * i + 1] = (i < phaseVectorsNonZeroSize) ? (double)sin(angle) : 0;
-		}
-		for (uint64_t i = 1; i < phaseVectorsNonZeroSize; i++) {
-			phaseVectors_cast[2 * (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] - i)] = phaseVectors_cast[2 * i];
-			phaseVectors_cast[2 * (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] - i) + 1] = phaseVectors_cast[2 * i + 1];
-		}
-	}
-	else {
-		float* phaseVectors_cast = (float*)phaseVectors;
-		for (uint64_t i = 0; i < FFTPlan->actualFFTSizePerAxis[axis_id][axis_id]; i++) {
-			uint64_t rm = (i * i) % (2 * phaseVectorsNonZeroSize);
-			double angle = double_PI * rm / phaseVectorsNonZeroSize;
-			phaseVectors_cast[2 * i] = (i < phaseVectorsNonZeroSize) ? (float)cos(angle) : 0;
-			phaseVectors_cast[2 * i + 1] = (i < phaseVectorsNonZeroSize) ? (float)sin(angle) : 0;
-		}
-		for (uint64_t i = 1; i < phaseVectorsNonZeroSize; i++) {
-			phaseVectors_cast[2 * (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] - i)] = phaseVectors_cast[2 * i];
-			phaseVectors_cast[2 * (FFTPlan->actualFFTSizePerAxis[axis_id][axis_id] - i) + 1] = phaseVectors_cast[2 * i + 1];
-		}
-	}
-#if(VKFFT_BACKEND==0)
-	resFFT = transferDataFromCPU(&kernelPreparationApplication, phaseVectors, &app->bufferBluestein[axis_id], bufferSize);
-	if (resFFT != VKFFT_SUCCESS) {
-		free(phaseVectors);
-		deleteVkFFT(&kernelPreparationApplication);
-		return resFFT;
-	}
-#elif(VKFFT_BACKEND==1)
-	res = cudaMemcpy(app->bufferBluestein[axis_id], phaseVectors, bufferSize, cudaMemcpyHostToDevice);
-	if (res != cudaSuccess) {
-		free(phaseVectors);
-		deleteVkFFT(&kernelPreparationApplication);
-		return VKFFT_ERROR_FAILED_TO_COPY;
-	}
-#elif(VKFFT_BACKEND==2)
-	res = hipMemcpy(app->bufferBluestein[axis_id], phaseVectors, bufferSize, hipMemcpyHostToDevice);
-	if (res != hipSuccess) {
-		free(phaseVectors);
-		deleteVkFFT(&kernelPreparationApplication);
-		return VKFFT_ERROR_FAILED_TO_COPY;
-	}
-#elif(VKFFT_BACKEND==3)
-	res = clEnqueueWriteBuffer(commandQueue, app->bufferBluestein[axis_id], CL_TRUE, 0, bufferSize, phaseVectors, 0, NULL, NULL);
-	if (res != CL_SUCCESS) {
-		free(phaseVectors);
-		deleteVkFFT(&kernelPreparationApplication);
-		return VKFFT_ERROR_FAILED_TO_COPY;
-	}
-#elif(VKFFT_BACKEND==4)
-	ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				app->configuration.commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-	};
-	ze_command_list_handle_t copyCommandList;
-	res = zeCommandListCreateImmediate(app->configuration.context[0], app->configuration.device[0], &commandQueueCopyDesc, &copyCommandList);
-	if (res != ZE_RESULT_SUCCESS) {
-		free(phaseVectors);
-		deleteVkFFT(&kernelPreparationApplication);
-		return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-	}
-	res = zeCommandListAppendMemoryCopy(copyCommandList, app->bufferBluestein[axis_id], phaseVectors, bufferSize, 0, 0, 0);
-	if (res != ZE_RESULT_SUCCESS) {
-		free(phaseVectors);
-		deleteVkFFT(&kernelPreparationApplication);
-		return VKFFT_ERROR_FAILED_TO_COPY;
-	}
-	res = zeCommandQueueSynchronize(app->configuration.commandQueue[0], UINT32_MAX);
-	if (res != ZE_RESULT_SUCCESS) {
-		free(phaseVectors);
-		deleteVkFFT(&kernelPreparationApplication);
-		return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-	}
-#endif
-#if(VKFFT_BACKEND==0)
-	if (!app->configuration.makeInversePlanOnly) {
-		VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
-		commandBufferAllocateInfo.commandPool = kernelPreparationApplication.configuration.commandPool[0];
-		commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
-		commandBufferAllocateInfo.commandBufferCount = 1;
-		VkCommandBuffer commandBuffer = {};
-		res = vkAllocateCommandBuffers(kernelPreparationApplication.configuration.device[0], &commandBufferAllocateInfo, &commandBuffer);
-		if (res != 0) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_ALLOCATE_COMMAND_BUFFERS;
-		}
-		VkCommandBufferBeginInfo commandBufferBeginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
-		commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
-		res = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);
-		if (res != 0) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_BEGIN_COMMAND_BUFFER;
+		if ((FFTPlan->numAxisUploads[axis_id] == 1) && (!app->configuration.makeForwardPlanOnly)) {
+			VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
+			commandBufferAllocateInfo.commandPool = kernelPreparationApplication.configuration.commandPool[0];
+			commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+			commandBufferAllocateInfo.commandBufferCount = 1;
+			VkCommandBuffer commandBuffer = {};
+			res = vkAllocateCommandBuffers(kernelPreparationApplication.configuration.device[0], &commandBufferAllocateInfo, &commandBuffer);
+			if (res != 0) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_ALLOCATE_COMMAND_BUFFERS;
+			}
+			VkCommandBufferBeginInfo commandBufferBeginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
+			commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+			res = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);
+			if (res != 0) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_BEGIN_COMMAND_BUFFER;
+			}
+			VkFFTLaunchParams launchParams = {};
+			launchParams.commandBuffer = &commandBuffer;
+			launchParams.inputBuffer = &app->bufferBluestein[axis_id];
+			launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
+			//Record commands
+			resFFT = VkFFTAppend(&kernelPreparationApplication, 1, &launchParams);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return resFFT;
+			}
+			res = vkEndCommandBuffer(commandBuffer);
+			if (res != 0) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
+			}
+			VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
+			submitInfo.commandBufferCount = 1;
+			submitInfo.pCommandBuffers = &commandBuffer;
+			res = vkQueueSubmit(kernelPreparationApplication.configuration.queue[0], 1, &submitInfo, kernelPreparationApplication.configuration.fence[0]);
+			if (res != 0) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE;
+			}
+			res = vkWaitForFences(kernelPreparationApplication.configuration.device[0], 1, kernelPreparationApplication.configuration.fence, VK_TRUE, 100000000000);
+			if (res != 0) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_WAIT_FOR_FENCES;
+			}
+			res = vkResetFences(kernelPreparationApplication.configuration.device[0], 1, kernelPreparationApplication.configuration.fence);
+			if (res != 0) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_RESET_FENCES;
+			}
+			vkFreeCommandBuffers(kernelPreparationApplication.configuration.device[0], kernelPreparationApplication.configuration.commandPool[0], 1, &commandBuffer);
 		}
+#elif(VKFFT_BACKEND==1)
 		VkFFTLaunchParams launchParams = {};
-		launchParams.commandBuffer = &commandBuffer;
 		launchParams.inputBuffer = &app->bufferBluestein[axis_id];
-		launchParams.buffer = &app->bufferBluesteinFFT[axis_id];
-		//Record commands
-		resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
-		if (resFFT != VKFFT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return resFFT;
-		}
-		res = vkEndCommandBuffer(commandBuffer);
-		if (res != 0) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
-		}
-		VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
-		submitInfo.commandBufferCount = 1;
-		submitInfo.pCommandBuffers = &commandBuffer;
-		res = vkQueueSubmit(kernelPreparationApplication.configuration.queue[0], 1, &submitInfo, kernelPreparationApplication.configuration.fence[0]);
-		if (res != 0) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE;
-		}
-		res = vkWaitForFences(kernelPreparationApplication.configuration.device[0], 1, kernelPreparationApplication.configuration.fence, VK_TRUE, 100000000000);
-		if (res != 0) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_WAIT_FOR_FENCES;
-		}
-		res = vkResetFences(kernelPreparationApplication.configuration.device[0], 1, kernelPreparationApplication.configuration.fence);
-		if (res != 0) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_RESET_FENCES;
-		}
-		vkFreeCommandBuffers(kernelPreparationApplication.configuration.device[0], kernelPreparationApplication.configuration.commandPool[0], 1, &commandBuffer);
-	}
-	if ((FFTPlan->numAxisUploads[axis_id] == 1) && (!app->configuration.makeForwardPlanOnly)) {
-		VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
-		commandBufferAllocateInfo.commandPool = kernelPreparationApplication.configuration.commandPool[0];
-		commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
-		commandBufferAllocateInfo.commandBufferCount = 1;
-		VkCommandBuffer commandBuffer = {};
-		res = vkAllocateCommandBuffers(kernelPreparationApplication.configuration.device[0], &commandBufferAllocateInfo, &commandBuffer);
-		if (res != 0) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_ALLOCATE_COMMAND_BUFFERS;
+		if (!app->configuration.makeInversePlanOnly) {
+			launchParams.buffer = &app->bufferBluesteinFFT[axis_id];
+			resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return resFFT;
+			}
+			res = cudaDeviceSynchronize();
+			if (res != cudaSuccess) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+			}
 		}
-		VkCommandBufferBeginInfo commandBufferBeginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
-		commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
-		res = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);
-		if (res != 0) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_BEGIN_COMMAND_BUFFER;
+		if ((FFTPlan->numAxisUploads[axis_id] == 1) && (!app->configuration.makeForwardPlanOnly)) {
+			launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
+			resFFT = VkFFTAppend(&kernelPreparationApplication, 1, &launchParams);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return resFFT;
+			}
+			res = cudaDeviceSynchronize();
+			if (res != cudaSuccess) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+			}
 		}
+#elif(VKFFT_BACKEND==2)
 		VkFFTLaunchParams launchParams = {};
-		launchParams.commandBuffer = &commandBuffer;
 		launchParams.inputBuffer = &app->bufferBluestein[axis_id];
-		launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
-		//Record commands
-		resFFT = VkFFTAppend(&kernelPreparationApplication, 1, &launchParams);
-		if (resFFT != VKFFT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return resFFT;
-		}
-		res = vkEndCommandBuffer(commandBuffer);
-		if (res != 0) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
-		}
-		VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
-		submitInfo.commandBufferCount = 1;
-		submitInfo.pCommandBuffers = &commandBuffer;
-		res = vkQueueSubmit(kernelPreparationApplication.configuration.queue[0], 1, &submitInfo, kernelPreparationApplication.configuration.fence[0]);
-		if (res != 0) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE;
-		}
-		res = vkWaitForFences(kernelPreparationApplication.configuration.device[0], 1, kernelPreparationApplication.configuration.fence, VK_TRUE, 100000000000);
-		if (res != 0) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_WAIT_FOR_FENCES;
-		}
-		res = vkResetFences(kernelPreparationApplication.configuration.device[0], 1, kernelPreparationApplication.configuration.fence);
-		if (res != 0) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_RESET_FENCES;
-		}
-		vkFreeCommandBuffers(kernelPreparationApplication.configuration.device[0], kernelPreparationApplication.configuration.commandPool[0], 1, &commandBuffer);
-	}
-#elif(VKFFT_BACKEND==1)
-	VkFFTLaunchParams launchParams = {};
-	launchParams.inputBuffer = &app->bufferBluestein[axis_id];
-	if (!app->configuration.makeInversePlanOnly) {
-		launchParams.buffer = &app->bufferBluesteinFFT[axis_id];
-		resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
-		if (resFFT != VKFFT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return resFFT;
-		}
-		res = cudaDeviceSynchronize();
-		if (res != cudaSuccess) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-		}
-	}
-	if ((FFTPlan->numAxisUploads[axis_id] == 1) && (!app->configuration.makeForwardPlanOnly)) {
-		launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
-		resFFT = VkFFTAppend(&kernelPreparationApplication, 1, &launchParams);
-		if (resFFT != VKFFT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return resFFT;
-		}
-		res = cudaDeviceSynchronize();
-		if (res != cudaSuccess) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-		}
-	}
-#elif(VKFFT_BACKEND==2)
-	VkFFTLaunchParams launchParams = {};
-	launchParams.inputBuffer = &app->bufferBluestein[axis_id];
-	if (!app->configuration.makeInversePlanOnly) {
-		launchParams.buffer = &app->bufferBluesteinFFT[axis_id];
-		resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
-		if (resFFT != VKFFT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return resFFT;
-		}
-		res = hipDeviceSynchronize();
-		if (res != hipSuccess) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-		}
-	}
-	if ((FFTPlan->numAxisUploads[axis_id] == 1) && (!app->configuration.makeForwardPlanOnly)) {
-		launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
-		resFFT = VkFFTAppend(&kernelPreparationApplication, 1, &launchParams);
-		if (resFFT != VKFFT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return resFFT;
+		if (!app->configuration.makeInversePlanOnly) {
+			launchParams.buffer = &app->bufferBluesteinFFT[axis_id];
+			resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return resFFT;
+			}
+			res = hipDeviceSynchronize();
+			if (res != hipSuccess) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+			}
 		}
-		res = hipDeviceSynchronize();
-		if (res != hipSuccess) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+		if ((FFTPlan->numAxisUploads[axis_id] == 1) && (!app->configuration.makeForwardPlanOnly)) {
+			launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
+			resFFT = VkFFTAppend(&kernelPreparationApplication, 1, &launchParams);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return resFFT;
+			}
+			res = hipDeviceSynchronize();
+			if (res != hipSuccess) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+			}
 		}
-	}
 #elif(VKFFT_BACKEND==3)
-	VkFFTLaunchParams launchParams = {};
-	launchParams.commandQueue = &commandQueue;
-	launchParams.inputBuffer = &app->bufferBluestein[axis_id];
-	if (!app->configuration.makeInversePlanOnly) {
-		launchParams.buffer = &app->bufferBluesteinFFT[axis_id];
-		resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
-		if (resFFT != VKFFT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return resFFT;
-		}
-		res = clFinish(commandQueue);
-		if (res != CL_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
-		}
-	}
-	if ((FFTPlan->numAxisUploads[axis_id] == 1) && (!app->configuration.makeForwardPlanOnly)) {
-		launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
-		resFFT = VkFFTAppend(&kernelPreparationApplication, 1, &launchParams);
-		if (resFFT != VKFFT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return resFFT;
+		VkFFTLaunchParams launchParams = {};
+		launchParams.commandQueue = &commandQueue;
+		launchParams.inputBuffer = &app->bufferBluestein[axis_id];
+		if (!app->configuration.makeInversePlanOnly) {
+			launchParams.buffer = &app->bufferBluesteinFFT[axis_id];
+			resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return resFFT;
+			}
+			res = clFinish(commandQueue);
+			if (res != CL_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+			}
 		}
-		res = clFinish(commandQueue);
-		if (res != CL_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+		if ((FFTPlan->numAxisUploads[axis_id] == 1) && (!app->configuration.makeForwardPlanOnly)) {
+			launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
+			resFFT = VkFFTAppend(&kernelPreparationApplication, 1, &launchParams);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return resFFT;
+			}
+			res = clFinish(commandQueue);
+			if (res != CL_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+			}
 		}
-	}
 #elif(VKFFT_BACKEND==4)
-	ze_command_list_desc_t commandListDescription = {};
-	commandListDescription.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
-	ze_command_list_handle_t commandList = {};
-	res = zeCommandListCreate(app->configuration.context[0], app->configuration.device[0], &commandListDescription, &commandList);
-	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-	VkFFTLaunchParams launchParams = {};
-	launchParams.commandList = &commandList;
-	launchParams.inputBuffer = &app->bufferBluestein[axis_id];
+		ze_command_list_desc_t commandListDescription = {};
+		commandListDescription.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
+		ze_command_list_handle_t commandList = {};
+		res = zeCommandListCreate(app->configuration.context[0], app->configuration.device[0], &commandListDescription, &commandList);
+		if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+		VkFFTLaunchParams launchParams = {};
+		launchParams.commandList = &commandList;
+		launchParams.inputBuffer = &app->bufferBluestein[axis_id];
 
-	if (!app->configuration.makeInversePlanOnly) {
-		launchParams.buffer = &app->bufferBluesteinFFT[axis_id];
-		resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
-		if (resFFT != VKFFT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return resFFT;
-		}
+		if (!app->configuration.makeInversePlanOnly) {
+			launchParams.buffer = &app->bufferBluesteinFFT[axis_id];
+			resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return resFFT;
+			}
 
-		res = zeCommandListClose(commandList);
-		if (res != ZE_RESULT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
-		}
-		res = zeCommandQueueExecuteCommandLists(app->configuration.commandQueue[0], 1, &commandList, 0);
-		if (res != ZE_RESULT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE;
+			res = zeCommandListClose(commandList);
+			if (res != ZE_RESULT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
+			}
+			res = zeCommandQueueExecuteCommandLists(app->configuration.commandQueue[0], 1, &commandList, 0);
+			if (res != ZE_RESULT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE;
+			}
+			res = zeCommandQueueSynchronize(app->configuration.commandQueue[0], UINT32_MAX);
+			if (res != ZE_RESULT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+			}
+			res = zeCommandListReset(commandList);
+			if (res != ZE_RESULT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_DESTROY_COMMAND_LIST;
+			}
 		}
-		res = zeCommandQueueSynchronize(app->configuration.commandQueue[0], UINT32_MAX);
-		if (res != ZE_RESULT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+		if ((FFTPlan->numAxisUploads[axis_id] == 1) && (!app->configuration.makeForwardPlanOnly)) {
+			launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
+			resFFT = VkFFTAppend(&kernelPreparationApplication, 1, &launchParams);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return resFFT;
+			}
+			res = zeCommandListClose(commandList);
+			if (res != ZE_RESULT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
+			}
+			res = zeCommandQueueExecuteCommandLists(app->configuration.commandQueue[0], 1, &commandList, 0);
+			if (res != ZE_RESULT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE;
+			}
+			res = zeCommandQueueSynchronize(app->configuration.commandQueue[0], UINT32_MAX);
+			if (res != ZE_RESULT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+			}
 		}
-		res = zeCommandListReset(commandList);
+		res = zeCommandListDestroy(commandList);
 		if (res != ZE_RESULT_SUCCESS) {
 			free(phaseVectors);
 			deleteVkFFT(&kernelPreparationApplication);
 			return VKFFT_ERROR_FAILED_TO_DESTROY_COMMAND_LIST;
 		}
-	}
-	if ((FFTPlan->numAxisUploads[axis_id] == 1) && (!app->configuration.makeForwardPlanOnly)) {
-		launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
-		resFFT = VkFFTAppend(&kernelPreparationApplication, 1, &launchParams);
-		if (resFFT != VKFFT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return resFFT;
-		}
-		res = zeCommandListClose(commandList);
-		if (res != ZE_RESULT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
+#elif(VKFFT_BACKEND==5)
+		VkFFTLaunchParams launchParams = {};
+		launchParams.inputBuffer = &app->bufferBluestein[axis_id];
+		if (!app->configuration.makeInversePlanOnly) {
+			MTL::CommandBuffer* commandBuffer = app->configuration.queue->commandBuffer();
+			if (commandBuffer == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+			MTL::ComputeCommandEncoder* commandEncoder = commandBuffer->computeCommandEncoder();
+			if (commandEncoder == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+
+			launchParams.commandBuffer = commandBuffer;
+			launchParams.commandEncoder = commandEncoder;
+			launchParams.buffer = &app->bufferBluesteinFFT[axis_id];
+			resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return resFFT;
+			}
+			commandEncoder->endEncoding();
+			commandBuffer->commit();
+			commandBuffer->waitUntilCompleted();
+			commandEncoder->release();
+			commandBuffer->release();
 		}
-		res = zeCommandQueueExecuteCommandLists(app->configuration.commandQueue[0], 1, &commandList, 0);
-		if (res != ZE_RESULT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE;
+		if ((FFTPlan->numAxisUploads[axis_id] == 1) && (!app->configuration.makeForwardPlanOnly)) {
+			MTL::CommandBuffer* commandBuffer = app->configuration.queue->commandBuffer();
+			if (commandBuffer == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+			MTL::ComputeCommandEncoder* commandEncoder = commandBuffer->computeCommandEncoder();
+			if (commandEncoder == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+
+			launchParams.commandBuffer = commandBuffer;
+			launchParams.commandEncoder = commandEncoder;
+			launchParams.buffer = &app->bufferBluesteinIFFT[axis_id];
+			resFFT = VkFFTAppend(&kernelPreparationApplication, 1, &launchParams);
+			if (resFFT != VKFFT_SUCCESS) {
+				free(phaseVectors);
+				deleteVkFFT(&kernelPreparationApplication);
+				return resFFT;
+			}
+			commandEncoder->endEncoding();
+			commandBuffer->commit();
+			commandBuffer->waitUntilCompleted();
+			commandEncoder->release();
+			commandBuffer->release();
 		}
-		res = zeCommandQueueSynchronize(app->configuration.commandQueue[0], UINT32_MAX);
-		if (res != ZE_RESULT_SUCCESS) {
-			free(phaseVectors);
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+#endif
+#if(VKFFT_BACKEND==0)
+		kernelPreparationApplication.configuration.isCompilerInitialized = 0;
+#elif(VKFFT_BACKEND==3)
+		res = clReleaseCommandQueue(commandQueue);
+		if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_RELEASE_COMMAND_QUEUE;
+#endif
+		if (kernelPreparationConfiguration.saveApplicationToString) {
+			app->applicationBluesteinStringSize[axis_id] = kernelPreparationApplication.applicationStringSize;
+			app->applicationBluesteinString[axis_id] = calloc(app->applicationBluesteinStringSize[axis_id], 1);
+			if (!app->applicationBluesteinString[axis_id]) {
+				deleteVkFFT(&kernelPreparationApplication);
+				return VKFFT_ERROR_MALLOC_FAILED;
+			}
+			memcpy(app->applicationBluesteinString[axis_id], kernelPreparationApplication.saveApplicationString, app->applicationBluesteinStringSize[axis_id]);
 		}
-	}
-	res = zeCommandListDestroy(commandList);
-	if (res != ZE_RESULT_SUCCESS) {
-		free(phaseVectors);
 		deleteVkFFT(&kernelPreparationApplication);
-		return VKFFT_ERROR_FAILED_TO_DESTROY_COMMAND_LIST;
+		free(phaseVectors);
+#ifdef VkFFT_use_FP128_Bluestein_RaderFFT
 	}
 #endif
-	free(phaseVectors);
+	return resFFT;
+}
+static inline VkFFTResult VkFFTGenerateRaderFFTKernel(VkFFTApplication* app, VkFFTAxis* axis) {
+	//generate Rader FFTKernel
+	VkFFTResult resFFT = VKFFT_SUCCESS;
+	if (axis->specializationConstants.useRader) {
+		for (uint64_t i = 0; i < axis->specializationConstants.numRaderPrimes; i++) {
+			if (axis->specializationConstants.raderContainer[i].type == 0) {
+				for (uint64_t j = 0; j < app->numRaderFFTPrimes; j++) {
+					if (app->rader_primes[j] == axis->specializationConstants.raderContainer[i].prime) {
+						axis->specializationConstants.raderContainer[i].raderFFTkernel = app->raderFFTkernel[j];
+					}
+				}
+				if (axis->specializationConstants.raderContainer[i].raderFFTkernel) continue;
+
+				uint64_t write_id = app->numRaderFFTPrimes;
+				app->rader_primes[write_id] = axis->specializationConstants.raderContainer[i].prime;
+				app->numRaderFFTPrimes++;
+
+				if (app->configuration.loadApplicationFromString) continue;
+
+#ifdef VkFFT_use_FP128_Bluestein_RaderFFT
+				if (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) {
+					long double double_PI = 3.14159265358979323846264338327950288419716939937510L;
+					double* raderFFTkernel = (double*)malloc((axis->specializationConstants.raderContainer[i].prime - 1) * sizeof(double) * 2);
+					if (!raderFFTkernel) return VKFFT_ERROR_MALLOC_FAILED;
+					axis->specializationConstants.raderContainer[i].raderFFTkernel = (void*)raderFFTkernel;
+					app->raderFFTkernel[write_id] = (void*)raderFFTkernel;
+					app->rader_buffer_size[write_id] = (axis->specializationConstants.raderContainer[i].prime - 1) * sizeof(double) * 2;
+
+					long double* raderFFTkernel_temp = (long double*)malloc((axis->specializationConstants.raderContainer[i].prime - 1) * sizeof(long double) * 2);
+					if (!raderFFTkernel_temp) return VKFFT_ERROR_MALLOC_FAILED;
+					for (uint64_t j = 0; j < (axis->specializationConstants.raderContainer[i].prime - 1); j++) {//fix later
+						uint64_t g_pow = 1;
+						for (uint64_t t = 0; t < axis->specializationConstants.raderContainer[i].prime - 1 - j; t++) {
+							g_pow = (g_pow * axis->specializationConstants.raderContainer[i].generator) % axis->specializationConstants.raderContainer[i].prime;
+						}
+						raderFFTkernel_temp[2 * j] = cos(2.0 * g_pow * double_PI / axis->specializationConstants.raderContainer[i].prime);
+						raderFFTkernel_temp[2 * j + 1] = -sin(2.0 * g_pow * double_PI / axis->specializationConstants.raderContainer[i].prime);
+					}
+					fftwl_plan p;
+					p = fftwl_plan_dft_1d((int)(axis->specializationConstants.raderContainer[i].prime - 1), (fftwl_complex*)raderFFTkernel_temp, (fftwl_complex*)raderFFTkernel_temp, -1, FFTW_ESTIMATE);
+					fftwl_execute(p);
+					fftwl_destroy_plan(p);
+					for (uint64_t j = 0; j < (axis->specializationConstants.raderContainer[i].prime - 1); j++) {//fix later
+						raderFFTkernel[2 * j] = (double)raderFFTkernel_temp[2 * j];
+						raderFFTkernel[2 * j + 1] = (double)raderFFTkernel_temp[2 * j + 1];
+					}
+					free(raderFFTkernel_temp);
+					continue;
+				}
+#endif
+				if (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) {
+					long double double_PI = 3.14159265358979323846264338327950288419716939937510L;
+					double* raderFFTkernel = (double*)malloc((axis->specializationConstants.raderContainer[i].prime - 1) * sizeof(double) * 2);
+					if (!raderFFTkernel) return VKFFT_ERROR_MALLOC_FAILED;
+					axis->specializationConstants.raderContainer[i].raderFFTkernel = (void*)raderFFTkernel;
+					app->raderFFTkernel[write_id] = (void*)raderFFTkernel;
+					app->rader_buffer_size[write_id] = (axis->specializationConstants.raderContainer[i].prime - 1) * sizeof(double) * 2;
+					for (uint64_t j = 0; j < (axis->specializationConstants.raderContainer[i].prime - 1); j++) {//fix later
+						uint64_t g_pow = 1;
+						for (uint64_t t = 0; t < axis->specializationConstants.raderContainer[i].prime - 1 - j; t++) {
+							g_pow = (g_pow * axis->specializationConstants.raderContainer[i].generator) % axis->specializationConstants.raderContainer[i].prime;
+						}
+						raderFFTkernel[2 * j] = (double)cos(2.0 * g_pow * double_PI / axis->specializationConstants.raderContainer[i].prime);
+						raderFFTkernel[2 * j + 1] = (double)-sin(2.0 * g_pow * double_PI / axis->specializationConstants.raderContainer[i].prime);
+					}
+				}
+				else {
+					double double_PI = 3.14159265358979323846264338327950288419716939937510;
+					float* raderFFTkernel = (float*)malloc((axis->specializationConstants.raderContainer[i].prime - 1) * sizeof(float) * 2);
+					if (!raderFFTkernel) return VKFFT_ERROR_MALLOC_FAILED;
+					axis->specializationConstants.raderContainer[i].raderFFTkernel = (void*)raderFFTkernel;
+					app->raderFFTkernel[write_id] = (void*)raderFFTkernel;
+					app->rader_buffer_size[write_id] = (axis->specializationConstants.raderContainer[i].prime - 1) * sizeof(float) * 2;
+					for (uint64_t j = 0; j < (axis->specializationConstants.raderContainer[i].prime - 1); j++) {//fix later
+						uint64_t g_pow = 1;
+						for (uint64_t t = 0; t < axis->specializationConstants.raderContainer[i].prime - 1 - j; t++) {
+							g_pow = (g_pow * axis->specializationConstants.raderContainer[i].generator) % axis->specializationConstants.raderContainer[i].prime;
+						}
+						raderFFTkernel[2 * j] = (float)cos(2.0 * g_pow * double_PI / axis->specializationConstants.raderContainer[i].prime);
+						raderFFTkernel[2 * j + 1] = (float)(-sin(2.0 * g_pow * double_PI / axis->specializationConstants.raderContainer[i].prime));
+					}
+				}
+
+				VkFFTApplication kernelPreparationApplication = {};
+				VkFFTConfiguration kernelPreparationConfiguration = {};
+
+				kernelPreparationConfiguration.FFTdim = 1;
+				kernelPreparationConfiguration.size[0] = axis->specializationConstants.raderContainer[i].prime - 1;
+				kernelPreparationConfiguration.size[1] = 1;
+				kernelPreparationConfiguration.size[2] = 1;
+				kernelPreparationConfiguration.doublePrecision = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory);
+				kernelPreparationConfiguration.useLUT = 1;
+				kernelPreparationConfiguration.fixMinRaderPrimeFFT = 17;
+				kernelPreparationConfiguration.fixMinRaderPrimeMult = 17;
+				kernelPreparationConfiguration.fixMaxRaderPrimeFFT = 17;
+				kernelPreparationConfiguration.fixMaxRaderPrimeMult = 17;
+
+				kernelPreparationConfiguration.device = app->configuration.device;
 #if(VKFFT_BACKEND==0)
-	kernelPreparationApplication.configuration.isCompilerInitialized = 0;
+				kernelPreparationConfiguration.queue = app->configuration.queue; //to allocate memory for LUT, we have to pass a queue, vkGPU->fence, commandPool and physicalDevice pointers 
+				kernelPreparationConfiguration.fence = app->configuration.fence;
+				kernelPreparationConfiguration.commandPool = app->configuration.commandPool;
+				kernelPreparationConfiguration.physicalDevice = app->configuration.physicalDevice;
+				kernelPreparationConfiguration.isCompilerInitialized = 1;//compiler can be initialized before VkFFT plan creation. if not, VkFFT will create and destroy one after initialization
+				kernelPreparationConfiguration.tempBufferDeviceMemory = app->configuration.tempBufferDeviceMemory;
 #elif(VKFFT_BACKEND==3)
-	res = clReleaseCommandQueue(commandQueue);
-	if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_RELEASE_COMMAND_QUEUE;
+				kernelPreparationConfiguration.context = app->configuration.context;
+#elif(VKFFT_BACKEND==4)
+				kernelPreparationConfiguration.context = app->configuration.context;
+				kernelPreparationConfiguration.commandQueue = app->configuration.commandQueue;
+				kernelPreparationConfiguration.commandQueueID = app->configuration.commandQueueID;
+#elif(VKFFT_BACKEND==5)
+				kernelPreparationConfiguration.device = app->configuration.device;
+				kernelPreparationConfiguration.queue = app->configuration.queue;
+#endif			
+
+				uint64_t bufferSize = (uint64_t)sizeof(float) * 2 * kernelPreparationConfiguration.size[0] * kernelPreparationConfiguration.size[1] * kernelPreparationConfiguration.size[2];
+				if (kernelPreparationConfiguration.doublePrecision) bufferSize *= sizeof(double) / sizeof(float);
+
+				kernelPreparationConfiguration.bufferSize = &bufferSize;
+				resFFT = initializeVkFFT(&kernelPreparationApplication, kernelPreparationConfiguration);
+				if (resFFT != VKFFT_SUCCESS) return resFFT;
+
+#if(VKFFT_BACKEND==0)
+				VkDeviceMemory bufferRaderFFTDeviceMemory;
+				VkBuffer bufferRaderFFT;
+#elif(VKFFT_BACKEND==1)
+				void* bufferRaderFFT;
+#elif(VKFFT_BACKEND==2)
+				void* bufferRaderFFT;
+#elif(VKFFT_BACKEND==3)
+				cl_mem bufferRaderFFT;
+#elif(VKFFT_BACKEND==4)
+				void* bufferRaderFFT;
+#elif(VKFFT_BACKEND==5)
+				MTL::Buffer* bufferRaderFFT;
 #endif
-	if (kernelPreparationConfiguration.saveApplicationToString) {
-		app->applicationBluesteinStringSize[axis_id] = kernelPreparationApplication.applicationStringSize;
-		app->applicationBluesteinString[axis_id] = calloc(app->applicationBluesteinStringSize[axis_id], 1);
-		if (!app->applicationBluesteinString[axis_id]) {
-			deleteVkFFT(&kernelPreparationApplication);
-			return VKFFT_ERROR_MALLOC_FAILED;
+#if(VKFFT_BACKEND==0)
+				VkResult res = VK_SUCCESS;
+				resFFT = allocateFFTBuffer(app, &bufferRaderFFT, &bufferRaderFFTDeviceMemory, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, bufferSize);
+				if (resFFT != VKFFT_SUCCESS) return resFFT;
+#elif(VKFFT_BACKEND==1)
+				cudaError_t res = cudaSuccess;
+				res = cudaMalloc(&bufferRaderFFT, bufferSize);
+				if (res != cudaSuccess) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+#elif(VKFFT_BACKEND==2)
+				hipError_t res = hipSuccess;
+				res = hipMalloc(&bufferRaderFFT, bufferSize);
+				if (res != hipSuccess) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+#elif(VKFFT_BACKEND==3)
+				cl_int res = CL_SUCCESS;
+				bufferRaderFFT = clCreateBuffer(app->configuration.context[0], CL_MEM_READ_WRITE, bufferSize, 0, &res);
+				if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+				cl_command_queue commandQueue = clCreateCommandQueue(app->configuration.context[0], app->configuration.device[0], 0, &res);
+				if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_QUEUE;
+#elif(VKFFT_BACKEND==4)
+				ze_result_t res = ZE_RESULT_SUCCESS;
+				ze_device_mem_alloc_desc_t device_desc = {};
+				device_desc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
+				res = zeMemAllocDevice(app->configuration.context[0], &device_desc, bufferSize, sizeof(float), app->configuration.device[0], &bufferRaderFFT);
+				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+#elif(VKFFT_BACKEND==5)
+				bufferRaderFFT = app->configuration.device->newBuffer(bufferSize, MTL::ResourceStorageModePrivate);
+#endif
+
+				resFFT = VkFFT_transferDataFromCPU(app, axis->specializationConstants.raderContainer[i].raderFFTkernel, &bufferRaderFFT, bufferSize);
+				if (resFFT != VKFFT_SUCCESS) {
+					free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+					deleteVkFFT(&kernelPreparationApplication);
+					return resFFT;
+				}
+#if(VKFFT_BACKEND==0)
+				{
+					VkCommandBufferAllocateInfo commandBufferAllocateInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
+					commandBufferAllocateInfo.commandPool = kernelPreparationApplication.configuration.commandPool[0];
+					commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+					commandBufferAllocateInfo.commandBufferCount = 1;
+					VkCommandBuffer commandBuffer = {};
+					res = vkAllocateCommandBuffers(kernelPreparationApplication.configuration.device[0], &commandBufferAllocateInfo, &commandBuffer);
+					if (res != 0) {
+						free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+						deleteVkFFT(&kernelPreparationApplication);
+						return VKFFT_ERROR_FAILED_TO_ALLOCATE_COMMAND_BUFFERS;
+					}
+					VkCommandBufferBeginInfo commandBufferBeginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
+					commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
+					res = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);
+					if (res != 0) {
+						free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+						deleteVkFFT(&kernelPreparationApplication);
+						return VKFFT_ERROR_FAILED_TO_BEGIN_COMMAND_BUFFER;
+					}
+					VkFFTLaunchParams launchParams = {};
+					launchParams.commandBuffer = &commandBuffer;
+					launchParams.buffer = &bufferRaderFFT;
+					//Record commands
+					resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
+					if (resFFT != VKFFT_SUCCESS) {
+						free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+						deleteVkFFT(&kernelPreparationApplication);
+						return resFFT;
+					}
+					res = vkEndCommandBuffer(commandBuffer);
+					if (res != 0) {
+						free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+						deleteVkFFT(&kernelPreparationApplication);
+						return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
+					}
+					VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
+					submitInfo.commandBufferCount = 1;
+					submitInfo.pCommandBuffers = &commandBuffer;
+					res = vkQueueSubmit(kernelPreparationApplication.configuration.queue[0], 1, &submitInfo, kernelPreparationApplication.configuration.fence[0]);
+					if (res != 0) {
+						free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+						deleteVkFFT(&kernelPreparationApplication);
+						return VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE;
+					}
+					res = vkWaitForFences(kernelPreparationApplication.configuration.device[0], 1, kernelPreparationApplication.configuration.fence, VK_TRUE, 100000000000);
+					if (res != 0) {
+						free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+						deleteVkFFT(&kernelPreparationApplication);
+						return VKFFT_ERROR_FAILED_TO_WAIT_FOR_FENCES;
+					}
+					res = vkResetFences(kernelPreparationApplication.configuration.device[0], 1, kernelPreparationApplication.configuration.fence);
+					if (res != 0) {
+						free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+						deleteVkFFT(&kernelPreparationApplication);
+						return VKFFT_ERROR_FAILED_TO_RESET_FENCES;
+					}
+					vkFreeCommandBuffers(kernelPreparationApplication.configuration.device[0], kernelPreparationApplication.configuration.commandPool[0], 1, &commandBuffer);
+				}
+#elif(VKFFT_BACKEND==1)
+				VkFFTLaunchParams launchParams = {};
+				launchParams.buffer = &bufferRaderFFT;
+				resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
+				if (resFFT != VKFFT_SUCCESS) {
+					free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+					deleteVkFFT(&kernelPreparationApplication);
+					return resFFT;
+				}
+				res = cudaDeviceSynchronize();
+				if (res != cudaSuccess) {
+					free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+					deleteVkFFT(&kernelPreparationApplication);
+					return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+				}
+#elif(VKFFT_BACKEND==2)
+				VkFFTLaunchParams launchParams = {};
+				launchParams.buffer = &bufferRaderFFT;
+				resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
+				if (resFFT != VKFFT_SUCCESS) {
+					free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+					deleteVkFFT(&kernelPreparationApplication);
+					return resFFT;
+				}
+				res = hipDeviceSynchronize();
+				if (res != hipSuccess) {
+					free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+					deleteVkFFT(&kernelPreparationApplication);
+					return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+				}
+#elif(VKFFT_BACKEND==3)
+				VkFFTLaunchParams launchParams = {};
+				launchParams.commandQueue = &commandQueue;
+				launchParams.buffer = &bufferRaderFFT;
+				resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
+				if (resFFT != VKFFT_SUCCESS) {
+					free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+					deleteVkFFT(&kernelPreparationApplication);
+					return resFFT;
+				}
+				res = clFinish(commandQueue);
+				if (res != CL_SUCCESS) {
+					free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+					deleteVkFFT(&kernelPreparationApplication);
+					return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+				}
+#elif(VKFFT_BACKEND==4)
+				ze_command_list_desc_t commandListDescription = {};
+				commandListDescription.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
+				ze_command_list_handle_t commandList = {};
+				res = zeCommandListCreate(app->configuration.context[0], app->configuration.device[0], &commandListDescription, &commandList);
+				if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+				VkFFTLaunchParams launchParams = {};
+				launchParams.commandList = &commandList;
+				launchParams.buffer = &bufferRaderFFT;
+				resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
+				if (resFFT != VKFFT_SUCCESS) {
+					free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+					deleteVkFFT(&kernelPreparationApplication);
+					return resFFT;
+				}
+				res = zeCommandListClose(commandList);
+				if (res != ZE_RESULT_SUCCESS) {
+					free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+					deleteVkFFT(&kernelPreparationApplication);
+					return VKFFT_ERROR_FAILED_TO_END_COMMAND_BUFFER;
+				}
+				res = zeCommandQueueExecuteCommandLists(app->configuration.commandQueue[0], 1, &commandList, 0);
+				if (res != ZE_RESULT_SUCCESS) {
+					free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+					deleteVkFFT(&kernelPreparationApplication);
+					return VKFFT_ERROR_FAILED_TO_SUBMIT_QUEUE;
+				}
+				res = zeCommandQueueSynchronize(app->configuration.commandQueue[0], UINT32_MAX);
+				if (res != ZE_RESULT_SUCCESS) {
+					free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+					deleteVkFFT(&kernelPreparationApplication);
+					return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+				}
+				res = zeCommandListDestroy(commandList);
+				if (res != ZE_RESULT_SUCCESS) {
+					free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+					deleteVkFFT(&kernelPreparationApplication);
+					return VKFFT_ERROR_FAILED_TO_DESTROY_COMMAND_LIST;
+				}
+#elif(VKFFT_BACKEND==5)
+				VkFFTLaunchParams launchParams = {};
+				MTL::CommandBuffer* commandBuffer = app->configuration.queue->commandBuffer();
+				if (commandBuffer == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+				MTL::ComputeCommandEncoder* commandEncoder = commandBuffer->computeCommandEncoder();
+				if (commandEncoder == 0) return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
+
+				launchParams.commandBuffer = commandBuffer;
+				launchParams.commandEncoder = commandEncoder;
+				launchParams.buffer = &bufferRaderFFT;
+				resFFT = VkFFTAppend(&kernelPreparationApplication, -1, &launchParams);
+				if (resFFT != VKFFT_SUCCESS) {
+					free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+					deleteVkFFT(&kernelPreparationApplication);
+					return resFFT;
+				}
+				commandEncoder->endEncoding();
+				commandBuffer->commit();
+				commandBuffer->waitUntilCompleted();
+				commandEncoder->release();
+				commandBuffer->release();
+#endif
+				resFFT = VkFFT_transferDataToCPU(&kernelPreparationApplication, axis->specializationConstants.raderContainer[i].raderFFTkernel, &bufferRaderFFT, bufferSize);
+				if (resFFT != VKFFT_SUCCESS) {
+					free(axis->specializationConstants.raderContainer[i].raderFFTkernel);
+					deleteVkFFT(&kernelPreparationApplication);
+					return resFFT;
+				}
+
+#if(VKFFT_BACKEND==0)
+				kernelPreparationApplication.configuration.isCompilerInitialized = 0;
+#elif(VKFFT_BACKEND==3)
+				res = clReleaseCommandQueue(commandQueue);
+				if (res != CL_SUCCESS) return VKFFT_ERROR_FAILED_TO_RELEASE_COMMAND_QUEUE;
+#endif
+#if(VKFFT_BACKEND==0)
+				vkDestroyBuffer(app->configuration.device[0], bufferRaderFFT, 0);
+				vkFreeMemory(app->configuration.device[0], bufferRaderFFTDeviceMemory, 0);
+#elif(VKFFT_BACKEND==1)
+				cudaFree(bufferRaderFFT);
+#elif(VKFFT_BACKEND==2)
+				hipFree(bufferRaderFFT);
+#elif(VKFFT_BACKEND==3)
+				clReleaseMemObject(bufferRaderFFT);
+#elif(VKFFT_BACKEND==4)
+				zeMemFree(app->configuration.context[0], bufferRaderFFT);
+#elif(VKFFT_BACKEND==5)
+				bufferRaderFFT->release();
+#endif
+				deleteVkFFT(&kernelPreparationApplication);
+			}
+		}
+		if (app->configuration.loadApplicationFromString) {
+			uint64_t offset = 0;
+			for (uint64_t i = 0; i < app->numRaderFFTPrimes; i++) {
+				uint64_t current_size = 0;
+				if (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) {
+					current_size = (app->rader_primes[i] - 1) * sizeof(double) * 2;
+				}
+				else {
+					current_size = (app->rader_primes[i] - 1) * sizeof(float) * 2;
+				}
+				if (!app->raderFFTkernel[i]) {
+					app->raderFFTkernel[i] = (void*)malloc(current_size);
+					if (!app->raderFFTkernel[i]) return VKFFT_ERROR_MALLOC_FAILED;
+					memcpy(app->raderFFTkernel[i], (char*)app->configuration.loadApplicationString + app->applicationStringOffsetRader + offset, current_size);
+				}
+				for (uint64_t j = 0; j < axis->specializationConstants.numRaderPrimes; j++) {
+					if ((app->rader_primes[i] == axis->specializationConstants.raderContainer[j].prime) && (axis->specializationConstants.raderContainer[j].type == 0))
+						axis->specializationConstants.raderContainer[j].raderFFTkernel = app->raderFFTkernel[i];
+				}
+				offset += current_size;
+			}
 		}
-		memcpy(app->applicationBluesteinString[axis_id], kernelPreparationApplication.saveApplicationString, app->applicationBluesteinStringSize[axis_id]);
 	}
-	deleteVkFFT(&kernelPreparationApplication);
 	return resFFT;
 }
 static inline VkFFTResult VkFFTCheckUpdateBufferSet(VkFFTApplication* app, VkFFTAxis* axis, uint64_t planStage, VkFFTLaunchParams* launchParams) {
@@ -25534,13 +33372,22 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan*
 						axis->specializationConstants.kernelOffset = app->configuration.kernelOffset;
 					}
 				}
-				if ((i == axis->specializationConstants.LUTBindingID) && (app->configuration.useLUT)) {
+				if ((i == axis->specializationConstants.LUTBindingID) && (app->configuration.useLUT == 1)) {
 #if(VKFFT_BACKEND==0)
 					if (axis->specializationConstants.performBufferSetUpdate) {
 						descriptorBufferInfo.buffer = axis->bufferLUT;
 						descriptorBufferInfo.offset = 0;
 						descriptorBufferInfo.range = axis->bufferLUTSize;
 					}
+#endif
+				}
+				if ((i == axis->specializationConstants.RaderUintLUTBindingID) && (axis->specializationConstants.raderUintLUT)) {
+#if(VKFFT_BACKEND==0)
+					if (axis->specializationConstants.performBufferSetUpdate) {
+						descriptorBufferInfo.buffer = axis->bufferRaderUintLUT;
+						descriptorBufferInfo.offset = 0;
+						descriptorBufferInfo.range = axis->bufferRaderUintLUTSize;
+					}
 #endif
 				}
 				if ((i == axis->specializationConstants.BluesteinConvolutionBindingID) && (app->useBluesteinFFT[axis_id]) && (axis_upload_id == 0)) {
@@ -25606,8 +33453,6 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA
 				VkDescriptorBufferInfo descriptorBufferInfo = { 0 };
 #endif
 				if (i == 0) {
-					uint64_t bufferId = 0;
-					uint64_t offset = j;
 					if (inverse) {
 						if ((axis_upload_id == FFTPlan->numAxisUploads[axis_id] - 1) && (app->configuration.isInputFormatted) && (!axis->specializationConstants.reverseBluesteinMultiUpload) && (
 							((axis_id == app->firstAxis) && (!inverse))
@@ -25944,7 +33789,7 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA
 						axis->specializationConstants.kernelOffset = app->configuration.kernelOffset;
 					}
 				}
-				if ((i == axis->numBindings - 1) && (app->configuration.useLUT)) {
+				if ((i == axis->numBindings - 1) && (app->configuration.useLUT == 1)) {
 #if(VKFFT_BACKEND==0)
 					if (axis->specializationConstants.performBufferSetUpdate) {
 						descriptorBufferInfo.buffer = axis->bufferLUT;
@@ -25989,11 +33834,17 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	VkFFTAxis* axis = &FFTPlan->R2Cdecomposition;
 	axis->specializationConstants.warpSize = app->configuration.warpSize;
 	axis->specializationConstants.numSharedBanks = app->configuration.numSharedBanks;
 	axis->specializationConstants.useUint64 = app->configuration.useUint64;
+#if(VKFFT_BACKEND==2)
+	axis->specializationConstants.useStrict32BitAddress = app->configuration.useStrict32BitAddress;
+#endif
+	axis->specializationConstants.disableSetLocale = app->configuration.disableSetLocale;
+
 	axis->specializationConstants.numAxisUploads = FFTPlan->numAxisUploads[0];
 	axis->specializationConstants.reorderFourStep = ((FFTPlan->numAxisUploads[0] > 1) && (!app->useBluesteinFFT[0])) ? app->configuration.reorderFourStep : 0;
 	uint64_t complexSize;
@@ -26012,9 +33863,9 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 	axis->specializationConstants.fft_dim_full = app->configuration.size[0];
 	axis->specializationConstants.dispatchZactualFFTSize = 1;
 	//allocate LUT
-	if (app->configuration.useLUT) {
-		double double_PI = 3.1415926535897932384626433832795;
+	if (app->configuration.useLUT == 1) {
 		if (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) {
+			long double double_PI = 3.14159265358979323846264338327950288419716939937510L;
 			axis->bufferLUTSize = (app->configuration.size[0] / 2) * 2 * sizeof(double);
 			double* tempLUT = (double*)malloc(axis->bufferLUTSize);
 			if (!tempLUT) {
@@ -26022,7 +33873,7 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 				return VKFFT_ERROR_MALLOC_FAILED;
 			}
 			for (uint64_t i = 0; i < app->configuration.size[0] / 2; i++) {
-				double angle = double_PI * i / (app->configuration.size[0] / 2);
+				long double angle = double_PI * i / (app->configuration.size[0] / 2);
 				tempLUT[2 * i] = (double)cos(angle);
 				tempLUT[2 * i + 1] = (double)sin(angle);
 			}
@@ -26044,7 +33895,7 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 					tempLUT = 0;
 					return resFFT;
 				}
-				resFFT = transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+				resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
 				if (resFFT != VKFFT_SUCCESS) {
 					deleteVkFFT(app);
 					free(tempLUT);
@@ -26059,12 +33910,12 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 					tempLUT = 0;
 					return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 				}
-				res = cudaMemcpy(axis->bufferLUT, tempLUT, axis->bufferLUTSize, cudaMemcpyHostToDevice);
-				if (res != cudaSuccess) {
+				resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+				if (resFFT != VKFFT_SUCCESS) {
 					deleteVkFFT(app);
 					free(tempLUT);
 					tempLUT = 0;
-					return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+					return resFFT;
 				}
 #elif(VKFFT_BACKEND==2)
 				res = hipMalloc((void**)&axis->bufferLUT, axis->bufferLUTSize);
@@ -26074,12 +33925,12 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 					tempLUT = 0;
 					return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 				}
-				res = hipMemcpy(axis->bufferLUT, tempLUT, axis->bufferLUTSize, hipMemcpyHostToDevice);
-				if (res != hipSuccess) {
+				resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+				if (resFFT != VKFFT_SUCCESS) {
 					deleteVkFFT(app);
 					free(tempLUT);
 					tempLUT = 0;
-					return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+					return resFFT;
 				}
 #elif(VKFFT_BACKEND==3)
 				axis->bufferLUT = clCreateBuffer(app->configuration.context[0], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, axis->bufferLUTSize, tempLUT, &res);
@@ -26099,36 +33950,22 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 					tempLUT = 0;
 					return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 				}
-				ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				app->configuration.commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-				};
-				ze_command_list_handle_t copyCommandList;
-				res = zeCommandListCreateImmediate(app->configuration.context[0], app->configuration.device[0], &commandQueueCopyDesc, &copyCommandList);
-				if (res != ZE_RESULT_SUCCESS) {
-					deleteVkFFT(app);
-					free(tempLUT);
-					tempLUT = 0;
-					return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-				}
-				res = zeCommandListAppendMemoryCopy(copyCommandList, axis->bufferLUT, tempLUT, axis->bufferLUTSize, 0, 0, 0);
-				if (res != ZE_RESULT_SUCCESS) {
+				resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+				if (resFFT != VKFFT_SUCCESS) {
 					deleteVkFFT(app);
 					free(tempLUT);
 					tempLUT = 0;
-					return VKFFT_ERROR_FAILED_TO_COPY;
+					return resFFT;
 				}
-				res = zeCommandQueueSynchronize(app->configuration.commandQueue[0], UINT32_MAX);
-				if (res != ZE_RESULT_SUCCESS) {
+#elif(VKFFT_BACKEND==5)
+				axis->bufferLUT = app->configuration.device->newBuffer(axis->bufferLUTSize, MTL::ResourceStorageModePrivate);
+
+				resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+				if (resFFT != VKFFT_SUCCESS) {
 					deleteVkFFT(app);
 					free(tempLUT);
 					tempLUT = 0;
-					return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+					return resFFT;
 				}
 #endif
 				free(tempLUT);
@@ -26136,6 +33973,7 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 			}
 		}
 		else {
+			double double_PI = 3.14159265358979323846264338327950288419716939937510;
 			axis->bufferLUTSize = (app->configuration.size[0] / 2) * 2 * sizeof(float);
 			float* tempLUT = (float*)malloc(axis->bufferLUTSize);
 			if (!tempLUT) {
@@ -26165,7 +34003,7 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 					tempLUT = 0;
 					return resFFT;
 				}
-				resFFT = transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+				resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
 				if (resFFT != VKFFT_SUCCESS) {
 					deleteVkFFT(app);
 					free(tempLUT);
@@ -26180,12 +34018,12 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 					tempLUT = 0;
 					return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 				}
-				res = cudaMemcpy(axis->bufferLUT, tempLUT, axis->bufferLUTSize, cudaMemcpyHostToDevice);
-				if (res != cudaSuccess) {
+				resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+				if (resFFT != VKFFT_SUCCESS) {
 					deleteVkFFT(app);
 					free(tempLUT);
 					tempLUT = 0;
-					return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+					return resFFT;
 				}
 #elif(VKFFT_BACKEND==2)
 				res = hipMalloc((void**)&axis->bufferLUT, axis->bufferLUTSize);
@@ -26195,12 +34033,12 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 					tempLUT = 0;
 					return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 				}
-				res = hipMemcpy(axis->bufferLUT, tempLUT, axis->bufferLUTSize, hipMemcpyHostToDevice);
-				if (res != hipSuccess) {
+				resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+				if (resFFT != VKFFT_SUCCESS) {
 					deleteVkFFT(app);
 					free(tempLUT);
 					tempLUT = 0;
-					return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+					return resFFT;
 				}
 #elif(VKFFT_BACKEND==3)
 				axis->bufferLUT = clCreateBuffer(app->configuration.context[0], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, axis->bufferLUTSize, tempLUT, &res);
@@ -26220,36 +34058,22 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 					tempLUT = 0;
 					return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 				}
-				ze_command_queue_desc_t commandQueueCopyDesc = {
-				ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-				0,
-				app->configuration.commandQueueID,
-				0, // index
-				0, // flags
-				ZE_COMMAND_QUEUE_MODE_DEFAULT,
-				ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-				};
-				ze_command_list_handle_t copyCommandList;
-				res = zeCommandListCreateImmediate(app->configuration.context[0], app->configuration.device[0], &commandQueueCopyDesc, &copyCommandList);
-				if (res != ZE_RESULT_SUCCESS) {
-					deleteVkFFT(app);
-					free(tempLUT);
-					tempLUT = 0;
-					return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-				}
-				res = zeCommandListAppendMemoryCopy(copyCommandList, axis->bufferLUT, tempLUT, axis->bufferLUTSize, 0, 0, 0);
-				if (res != ZE_RESULT_SUCCESS) {
+				resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+				if (resFFT != VKFFT_SUCCESS) {
 					deleteVkFFT(app);
 					free(tempLUT);
 					tempLUT = 0;
-					return VKFFT_ERROR_FAILED_TO_COPY;
+					return resFFT;
 				}
-				res = zeCommandQueueSynchronize(app->configuration.commandQueue[0], UINT32_MAX);
-				if (res != ZE_RESULT_SUCCESS) {
+#elif(VKFFT_BACKEND==5)
+				axis->bufferLUT = app->configuration.device->newBuffer(axis->bufferLUTSize, MTL::ResourceStorageModePrivate);
+
+				resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+				if (resFFT != VKFFT_SUCCESS) {
 					deleteVkFFT(app);
 					free(tempLUT);
 					tempLUT = 0;
-					return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+					return resFFT;
 				}
 #endif
 				free(tempLUT);
@@ -26318,8 +34142,6 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 	uint64_t axis_upload_id = 0;
 
 	{
-		uint64_t totalSize = 0;
-		uint64_t locPageSize = initPageSize;
 		if (inverse) {
 			if ((axis_upload_id == FFTPlan->numAxisUploads[axis_id] - 1) && (app->configuration.isInputFormatted) && (!axis->specializationConstants.reverseBluesteinMultiUpload) && (
 				((axis_id == app->firstAxis) && (!inverse))
@@ -26551,7 +34373,7 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 		axis->numBindings++;
 	}
 
-	if (app->configuration.useLUT) {
+	if (app->configuration.useLUT == 1) {
 		axis->specializationConstants.numBuffersBound[axis->numBindings] = 1;
 #if(VKFFT_BACKEND==0)
 		descriptorPoolSize.descriptorCount++;
@@ -26627,7 +34449,20 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 
 		uint64_t tempSize[3] = { (uint64_t)ceil((app->configuration.size[0] * app->configuration.size[1] * app->configuration.size[2]) / (double)(2 * axis->axisBlock[0])), 1, 1 };
 		tempSize[2] *= app->configuration.numberKernels * app->configuration.numberBatches * app->configuration.coordinateFeatures;
-
+		if ((app->configuration.maxComputeWorkGroupCount[0] > app->configuration.maxComputeWorkGroupCount[1]) && (tempSize[1] > app->configuration.maxComputeWorkGroupCount[1]) && (tempSize[1] > tempSize[0]) && (tempSize[1] >= tempSize[2])) {
+			uint64_t temp_tempSize = tempSize[0];
+			tempSize[0] = tempSize[1];
+			tempSize[1] = temp_tempSize;
+			axis->specializationConstants.swapComputeWorkGroupID = 1;
+		}
+		else {
+			if ((app->configuration.maxComputeWorkGroupCount[0] > app->configuration.maxComputeWorkGroupCount[2]) && (tempSize[2] > app->configuration.maxComputeWorkGroupCount[2]) && (tempSize[2] > tempSize[0]) && (tempSize[2] >= tempSize[1])) {
+				uint64_t temp_tempSize = tempSize[0];
+				tempSize[0] = tempSize[2];
+				tempSize[2] = temp_tempSize;
+				axis->specializationConstants.swapComputeWorkGroupID = 2;
+			}
+		}
 		if (tempSize[0] > app->configuration.maxComputeWorkGroupCount[0]) axis->specializationConstants.performWorkGroupShift[0] = 1;
 		else  axis->specializationConstants.performWorkGroupShift[0] = 0;
 		if (tempSize[1] > app->configuration.maxComputeWorkGroupCount[1]) axis->specializationConstants.performWorkGroupShift[1] = 1;
@@ -26705,7 +34540,7 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 		char floatTypeKernelMemory[10];
 		char floatType[10];
 		axis->specializationConstants.unroll = 1;
-		axis->specializationConstants.LUT = app->configuration.useLUT;
+		axis->specializationConstants.LUT = (app->configuration.useLUT == 1) ? 1 : 0;
 		if (app->configuration.doublePrecision) {
 			sprintf(floatType, "double");
 			sprintf(floatTypeInputMemory, "double");
@@ -26757,6 +34592,8 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 			sprintf(uintType, "unsigned int");
 #elif(VKFFT_BACKEND==4)
 			sprintf(uintType, "unsigned int");
+#elif(VKFFT_BACKEND==5)
+			sprintf(uintType, "uint");
 #endif
 		}
 		else {
@@ -26770,6 +34607,8 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 			sprintf(uintType, "unsigned long");
 #elif(VKFFT_BACKEND==4)
 			sprintf(uintType, "unsigned long");
+#elif(VKFFT_BACKEND==5)
+			sprintf(uintType, "ulong");
 #endif
 		}
 		{
@@ -26823,10 +34662,10 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 		}
 #if(VKFFT_BACKEND==0)
 		uint32_t* code;
-		size_t codeSize;
+		uint64_t codeSize;
 		if (app->configuration.loadApplicationFromString) {
-			uint32_t* localStrPointer = (uint32_t*)app->configuration.loadApplicationString + app->currentApplicationStringPos;
-			codeSize = localStrPointer[0];
+			char* localStrPointer = (char*)app->configuration.loadApplicationString + app->currentApplicationStringPos;
+			memcpy(&codeSize, localStrPointer, sizeof(uint64_t));
 			code = (uint32_t*)malloc(codeSize);
 			if (!code) {
 				free(code0);
@@ -26834,120 +34673,118 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 				deleteVkFFT(app);
 				return VKFFT_ERROR_MALLOC_FAILED;
 			}
-			memcpy(code, localStrPointer + 1, codeSize);
-			app->currentApplicationStringPos += codeSize / (sizeof(uint32_t)) + 1;
+			memcpy(code, localStrPointer + sizeof(uint64_t), codeSize);
+			app->currentApplicationStringPos += codeSize + sizeof(uint64_t);
 		}
 		else
 		{
-			const glslang_resource_t default_resource = {
-				/* .MaxLights = */ 32,
-				/* .MaxClipPlanes = */ 6,
-				/* .MaxTextureUnits = */ 32,
-				/* .MaxTextureCoords = */ 32,
-				/* .MaxVertexAttribs = */ 64,
-				/* .MaxVertexUniformComponents = */ 4096,
-				/* .MaxVaryingFloats = */ 64,
-				/* .MaxVertexTextureImageUnits = */ 32,
-				/* .MaxCombinedTextureImageUnits = */ 80,
-				/* .MaxTextureImageUnits = */ 32,
-				/* .MaxFragmentUniformComponents = */ 4096,
-				/* .MaxDrawBuffers = */ 32,
-				/* .MaxVertexUniformVectors = */ 128,
-				/* .MaxVaryingVectors = */ 8,
-				/* .MaxFragmentUniformVectors = */ 16,
-				/* .MaxVertexOutputVectors = */ 16,
-				/* .MaxFragmentInputVectors = */ 15,
-				/* .MinProgramTexelOffset = */ -8,
-				/* .MaxProgramTexelOffset = */ 7,
-				/* .MaxClipDistances = */ 8,
-				/* .MaxComputeWorkGroupCountX = */ 65535,
-				/* .MaxComputeWorkGroupCountY = */ 65535,
-				/* .MaxComputeWorkGroupCountZ = */ 65535,
-				/* .MaxComputeWorkGroupSizeX = */ 1024,
-				/* .MaxComputeWorkGroupSizeY = */ 1024,
-				/* .MaxComputeWorkGroupSizeZ = */ 64,
-				/* .MaxComputeUniformComponents = */ 1024,
-				/* .MaxComputeTextureImageUnits = */ 16,
-				/* .MaxComputeImageUniforms = */ 8,
-				/* .MaxComputeAtomicCounters = */ 8,
-				/* .MaxComputeAtomicCounterBuffers = */ 1,
-				/* .MaxVaryingComponents = */ 60,
-				/* .MaxVertexOutputComponents = */ 64,
-				/* .MaxGeometryInputComponents = */ 64,
-				/* .MaxGeometryOutputComponents = */ 128,
-				/* .MaxFragmentInputComponents = */ 128,
-				/* .MaxImageUnits = */ 8,
-				/* .MaxCombinedImageUnitsAndFragmentOutputs = */ 8,
-				/* .MaxCombinedShaderOutputResources = */ 8,
-				/* .MaxImageSamples = */ 0,
-				/* .MaxVertexImageUniforms = */ 0,
-				/* .MaxTessControlImageUniforms = */ 0,
-				/* .MaxTessEvaluationImageUniforms = */ 0,
-				/* .MaxGeometryImageUniforms = */ 0,
-				/* .MaxFragmentImageUniforms = */ 8,
-				/* .MaxCombinedImageUniforms = */ 8,
-				/* .MaxGeometryTextureImageUnits = */ 16,
-				/* .MaxGeometryOutputVertices = */ 256,
-				/* .MaxGeometryTotalOutputComponents = */ 1024,
-				/* .MaxGeometryUniformComponents = */ 1024,
-				/* .MaxGeometryVaryingComponents = */ 64,
-				/* .MaxTessControlInputComponents = */ 128,
-				/* .MaxTessControlOutputComponents = */ 128,
-				/* .MaxTessControlTextureImageUnits = */ 16,
-				/* .MaxTessControlUniformComponents = */ 1024,
-				/* .MaxTessControlTotalOutputComponents = */ 4096,
-				/* .MaxTessEvaluationInputComponents = */ 128,
-				/* .MaxTessEvaluationOutputComponents = */ 128,
-				/* .MaxTessEvaluationTextureImageUnits = */ 16,
-				/* .MaxTessEvaluationUniformComponents = */ 1024,
-				/* .MaxTessPatchComponents = */ 120,
-				/* .MaxPatchVertices = */ 32,
-				/* .MaxTessGenLevel = */ 64,
-				/* .MaxViewports = */ 16,
-				/* .MaxVertexAtomicCounters = */ 0,
-				/* .MaxTessControlAtomicCounters = */ 0,
-				/* .MaxTessEvaluationAtomicCounters = */ 0,
-				/* .MaxGeometryAtomicCounters = */ 0,
-				/* .MaxFragmentAtomicCounters = */ 8,
-				/* .MaxCombinedAtomicCounters = */ 8,
-				/* .MaxAtomicCounterBindings = */ 1,
-				/* .MaxVertexAtomicCounterBuffers = */ 0,
-				/* .MaxTessControlAtomicCounterBuffers = */ 0,
-				/* .MaxTessEvaluationAtomicCounterBuffers = */ 0,
-				/* .MaxGeometryAtomicCounterBuffers = */ 0,
-				/* .MaxFragmentAtomicCounterBuffers = */ 1,
-				/* .MaxCombinedAtomicCounterBuffers = */ 1,
-				/* .MaxAtomicCounterBufferSize = */ 16384,
-				/* .MaxTransformFeedbackBuffers = */ 4,
-				/* .MaxTransformFeedbackInterleavedComponents = */ 64,
-				/* .MaxCullDistances = */ 8,
-				/* .MaxCombinedClipAndCullDistances = */ 8,
-				/* .MaxSamples = */ 4,
-				/* .maxMeshOutputVerticesNV = */ 256,
-				/* .maxMeshOutputPrimitivesNV = */ 512,
-				/* .maxMeshWorkGroupSizeX_NV = */ 32,
-				/* .maxMeshWorkGroupSizeY_NV = */ 1,
-				/* .maxMeshWorkGroupSizeZ_NV = */ 1,
-				/* .maxTaskWorkGroupSizeX_NV = */ 32,
-				/* .maxTaskWorkGroupSizeY_NV = */ 1,
-				/* .maxTaskWorkGroupSizeZ_NV = */ 1,
-				/* .maxMeshViewCountNV = */ 4,
-				/* .maxDualSourceDrawBuffersEXT = */ 1,
-
-				/* .limits = */ {
-					/* .nonInductiveForLoops = */ 1,
-					/* .whileLoops = */ 1,
-					/* .doWhileLoops = */ 1,
-					/* .generalUniformIndexing = */ 1,
-					/* .generalAttributeMatrixVectorIndexing = */ 1,
-					/* .generalVaryingIndexing = */ 1,
-					/* .generalSamplerIndexing = */ 1,
-					/* .generalVariableIndexing = */ 1,
-					/* .generalConstantMatrixVectorIndexing = */ 1,
-				} };
+			glslang_resource_t default_resource = {};
+			default_resource.max_lights = 32;
+			default_resource.max_clip_planes = 6;
+			default_resource.max_texture_units = 32;
+			default_resource.max_texture_coords = 32;
+			default_resource.max_vertex_attribs = 64;
+			default_resource.max_vertex_uniform_components = 4096;
+			default_resource.max_varying_floats = 64;
+			default_resource.max_vertex_texture_image_units = 32;
+			default_resource.max_combined_texture_image_units = 80;
+			default_resource.max_texture_image_units = 32;
+			default_resource.max_fragment_uniform_components = 4096;
+			default_resource.max_draw_buffers = 32;
+			default_resource.max_vertex_uniform_vectors = 128;
+			default_resource.max_varying_vectors = 8;
+			default_resource.max_fragment_uniform_vectors = 16;
+			default_resource.max_vertex_output_vectors = 16;
+			default_resource.max_fragment_input_vectors = 15;
+			default_resource.min_program_texel_offset = -8;
+			default_resource.max_program_texel_offset = 7;
+			default_resource.max_clip_distances = 8;
+			default_resource.max_compute_work_group_count_x = (int)app->configuration.maxComputeWorkGroupCount[0];
+			default_resource.max_compute_work_group_count_y = (int)app->configuration.maxComputeWorkGroupCount[1];
+			default_resource.max_compute_work_group_count_z = (int)app->configuration.maxComputeWorkGroupCount[2];
+			default_resource.max_compute_work_group_size_x = (int)app->configuration.maxComputeWorkGroupSize[0];
+			default_resource.max_compute_work_group_size_y = (int)app->configuration.maxComputeWorkGroupSize[1];
+			default_resource.max_compute_work_group_size_z = (int)app->configuration.maxComputeWorkGroupSize[2];
+			default_resource.max_compute_uniform_components = 1024;
+			default_resource.max_compute_texture_image_units = 16;
+			default_resource.max_compute_image_uniforms = 8;
+			default_resource.max_compute_atomic_counters = 8;
+			default_resource.max_compute_atomic_counter_buffers = 1;
+			default_resource.max_varying_components = 60;
+			default_resource.max_vertex_output_components = 64;
+			default_resource.max_geometry_input_components = 64;
+			default_resource.max_geometry_output_components = 128;
+			default_resource.max_fragment_input_components = 128;
+			default_resource.max_image_units = 8;
+			default_resource.max_combined_image_units_and_fragment_outputs = 8;
+			default_resource.max_combined_shader_output_resources = 8;
+			default_resource.max_image_samples = 0;
+			default_resource.max_vertex_image_uniforms = 0;
+			default_resource.max_tess_control_image_uniforms = 0;
+			default_resource.max_tess_evaluation_image_uniforms = 0;
+			default_resource.max_geometry_image_uniforms = 0;
+			default_resource.max_fragment_image_uniforms = 8;
+			default_resource.max_combined_image_uniforms = 8;
+			default_resource.max_geometry_texture_image_units = 16;
+			default_resource.max_geometry_output_vertices = 256;
+			default_resource.max_geometry_total_output_components = 1024;
+			default_resource.max_geometry_uniform_components = 1024;
+			default_resource.max_geometry_varying_components = 64;
+			default_resource.max_tess_control_input_components = 128;
+			default_resource.max_tess_control_output_components = 128;
+			default_resource.max_tess_control_texture_image_units = 16;
+			default_resource.max_tess_control_uniform_components = 1024;
+			default_resource.max_tess_control_total_output_components = 4096;
+			default_resource.max_tess_evaluation_input_components = 128;
+			default_resource.max_tess_evaluation_output_components = 128;
+			default_resource.max_tess_evaluation_texture_image_units = 16;
+			default_resource.max_tess_evaluation_uniform_components = 1024;
+			default_resource.max_tess_patch_components = 120;
+			default_resource.max_patch_vertices = 32;
+			default_resource.max_tess_gen_level = 64;
+			default_resource.max_viewports = 16;
+			default_resource.max_vertex_atomic_counters = 0;
+			default_resource.max_tess_control_atomic_counters = 0;
+			default_resource.max_tess_evaluation_atomic_counters = 0;
+			default_resource.max_geometry_atomic_counters = 0;
+			default_resource.max_fragment_atomic_counters = 8;
+			default_resource.max_combined_atomic_counters = 8;
+			default_resource.max_atomic_counter_bindings = 1;
+			default_resource.max_vertex_atomic_counter_buffers = 0;
+			default_resource.max_tess_control_atomic_counter_buffers = 0;
+			default_resource.max_tess_evaluation_atomic_counter_buffers = 0;
+			default_resource.max_geometry_atomic_counter_buffers = 0;
+			default_resource.max_fragment_atomic_counter_buffers = 1;
+			default_resource.max_combined_atomic_counter_buffers = 1;
+			default_resource.max_atomic_counter_buffer_size = 16384;
+			default_resource.max_transform_feedback_buffers = 4;
+			default_resource.max_transform_feedback_interleaved_components = 64;
+			default_resource.max_cull_distances = 8;
+			default_resource.max_combined_clip_and_cull_distances = 8;
+			default_resource.max_samples = 4;
+			default_resource.max_mesh_output_vertices_nv = 256;
+			default_resource.max_mesh_output_primitives_nv = 512;
+			default_resource.max_mesh_work_group_size_x_nv = 32;
+			default_resource.max_mesh_work_group_size_y_nv = 1;
+			default_resource.max_mesh_work_group_size_z_nv = 1;
+			default_resource.max_task_work_group_size_x_nv = 32;
+			default_resource.max_task_work_group_size_y_nv = 1;
+			default_resource.max_task_work_group_size_z_nv = 1;
+			default_resource.max_mesh_view_count_nv = 4;
+
+			default_resource.limits.non_inductive_for_loops = 1;
+			default_resource.limits.while_loops = 1;
+			default_resource.limits.do_while_loops = 1;
+			default_resource.limits.general_uniform_indexing = 1;
+			default_resource.limits.general_attribute_matrix_vector_indexing = 1;
+			default_resource.limits.general_varying_indexing = 1;
+			default_resource.limits.general_sampler_indexing = 1;
+			default_resource.limits.general_variable_indexing = 1;
+			default_resource.limits.general_constant_matrix_vector_indexing = 1;
+
 			glslang_target_client_version_t client_version = (app->configuration.halfPrecision) ? GLSLANG_TARGET_VULKAN_1_1 : GLSLANG_TARGET_VULKAN_1_0;
 			glslang_target_language_version_t target_language_version = (app->configuration.halfPrecision) ? GLSLANG_TARGET_SPV_1_3 : GLSLANG_TARGET_SPV_1_0;
-			const glslang_input_t input =
+			glslang_input_t input =
 			{
 				GLSLANG_SOURCE_GLSL,
 				GLSLANG_STAGE_COMPUTE,
@@ -26961,10 +34798,10 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 				1,
 				0,
 				GLSLANG_MSG_DEFAULT_BIT,
-				&default_resource,
+				(const glslang_resource_t*)&default_resource,
 			};
 			//printf("%s\n", code0);
-			glslang_shader_t* shader = glslang_shader_create(&input);
+			glslang_shader_t* shader = glslang_shader_create((const glslang_input_t*)&input);
 			const char* err;
 			if (!glslang_shader_preprocess(shader, &input))
 			{
@@ -27069,7 +34906,10 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 		pipelineShaderStageCreateInfo.pSpecializationInfo = 0;// &specializationInfo;
 		computePipelineCreateInfo.stage = pipelineShaderStageCreateInfo;
 		computePipelineCreateInfo.layout = axis->pipelineLayout;
-		res = vkCreateComputePipelines(app->configuration.device[0], VK_NULL_HANDLE, 1, &computePipelineCreateInfo, 0, &axis->pipeline);
+		if (app->configuration.pipelineCache)
+			res = vkCreateComputePipelines(app->configuration.device[0], app->configuration.pipelineCache[0], 1, &computePipelineCreateInfo, 0, &axis->pipeline);
+		else
+			res = vkCreateComputePipelines(app->configuration.device[0], 0, 1, &computePipelineCreateInfo, 0, &axis->pipeline);
 		if (res != VK_SUCCESS) {
 			deleteVkFFT(app);
 			return VKFFT_ERROR_FAILED_TO_CREATE_PIPELINE;
@@ -27081,10 +34921,10 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 		}
 #elif(VKFFT_BACKEND==1)
 		char* code;
-		size_t codeSize;
+		uint64_t codeSize;
 		if (app->configuration.loadApplicationFromString) {
 			char* localStrPointer = (char*)app->configuration.loadApplicationString + app->currentApplicationStringPos;
-			codeSize = strtol(localStrPointer, &localStrPointer, 10);
+			memcpy(&codeSize, localStrPointer, sizeof(uint64_t));
 			code = (char*)malloc(codeSize);
 			if (!code) {
 				free(code0);
@@ -27092,9 +34932,8 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 				deleteVkFFT(app);
 				return VKFFT_ERROR_MALLOC_FAILED;
 			}
-			memcpy(code, localStrPointer + 1, codeSize - 1);
-			code[codeSize - 1] = '\0';
-			app->currentApplicationStringPos += codeSize + (uint64_t)(floor(log10((double)codeSize))) + 1;
+			memcpy(code, localStrPointer + sizeof(uint64_t), codeSize);
+			app->currentApplicationStringPos += codeSize + sizeof(uint64_t);
 		}
 		else {
 			nvrtcProgram prog;
@@ -27113,12 +34952,21 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 				deleteVkFFT(app);
 				return VKFFT_ERROR_FAILED_TO_CREATE_PROGRAM;
 			}
-			//const char opts[20] = "--fmad=false";
+			char* opts[5];
+			opts[0] = (char*)malloc(sizeof(char) * 50);
+			if (!opts[0]) {
+				free(code0);
+				code0 = 0;
+				deleteVkFFT(app);
+				return VKFFT_ERROR_MALLOC_FAILED;
+			}
+			sprintf(opts[0], "--gpu-architecture=sm_%" PRIu64 "%" PRIu64 "", app->configuration.computeCapabilityMajor, app->configuration.computeCapabilityMinor);
 			//result = nvrtcAddNameExpression(prog, "&consts");
 			//if (result != NVRTC_SUCCESS) printf("1.5 error: %s\n", nvrtcGetErrorString(result));
 			result = nvrtcCompileProgram(prog,  // prog
-				0,     // numOptions
-				0); // options
+				1,     // numOptions
+				(const char* const*)opts); // options
+			free(opts[0]);
 			if (result != NVRTC_SUCCESS) {
 				printf("nvrtcCompileProgram error: %s\n", nvrtcGetErrorString(result));
 				char* log = (char*)malloc(sizeof(char) * 4000000);
@@ -27140,9 +34988,17 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 					return VKFFT_ERROR_FAILED_TO_COMPILE_PROGRAM;
 				}
 			}
+#if (CUDA_VERSION >= 11030)
+			result = nvrtcGetCUBINSize(prog, &codeSize);
+#else
 			result = nvrtcGetPTXSize(prog, &codeSize);
+#endif
 			if (result != NVRTC_SUCCESS) {
+#if (CUDA_VERSION >= 11030)
+				printf("nvrtcGetCUBINSize error: %s\n", nvrtcGetErrorString(result));
+#else
 				printf("nvrtcGetPTXSize error: %s\n", nvrtcGetErrorString(result));
+#endif
 				free(code0);
 				code0 = 0;
 				deleteVkFFT(app);
@@ -27157,9 +35013,17 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 				return VKFFT_ERROR_MALLOC_FAILED;
 			}
 			axis->binary = code;
+#if (CUDA_VERSION >= 11030)
+			result = nvrtcGetCUBIN(prog, code);
+#else
 			result = nvrtcGetPTX(prog, code);
+#endif
 			if (result != NVRTC_SUCCESS) {
+#if (CUDA_VERSION >= 11030)
+				printf("nvrtcGetCUBIN error: %s\n", nvrtcGetErrorString(result));
+#else
 				printf("nvrtcGetPTX error: %s\n", nvrtcGetErrorString(result));
+#endif
 				free(code);
 				code = 0;
 				free(code0);
@@ -27230,10 +35094,10 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 		}
 #elif(VKFFT_BACKEND==2)
 		uint32_t* code;
-		size_t codeSize;
+		uint64_t codeSize;
 		if (app->configuration.loadApplicationFromString) {
-			uint32_t* localStrPointer = (uint32_t*)app->configuration.loadApplicationString + app->currentApplicationStringPos;
-			codeSize = localStrPointer[0];
+			char* localStrPointer = (char*)app->configuration.loadApplicationString + app->currentApplicationStringPos;
+			memcpy(&codeSize, localStrPointer, sizeof(uint64_t));
 			code = (uint32_t*)malloc(codeSize);
 			if (!code) {
 				free(code0);
@@ -27241,8 +35105,8 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 				deleteVkFFT(app);
 				return VKFFT_ERROR_MALLOC_FAILED;
 			}
-			memcpy(code, localStrPointer + 1, codeSize);
-			app->currentApplicationStringPos += codeSize / (sizeof(uint32_t)) + 1;
+			memcpy(code, localStrPointer + sizeof(uint64_t), codeSize);
+			app->currentApplicationStringPos += codeSize + sizeof(uint64_t);
 		}
 		else
 		{
@@ -27270,6 +35134,7 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 					return VKFFT_ERROR_FAILED_TO_ADD_NAME_EXPRESSION;
 				}
 			}
+
 			result = hiprtcCompileProgram(prog,  // prog
 				0,     // numOptions
 				0); // options
@@ -27388,9 +35253,10 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 #elif(VKFFT_BACKEND==3)
 		if (app->configuration.loadApplicationFromString) {
 			char* code;
-			size_t codeSize;
+			uint64_t codeSize;
 			char* localStrPointer = (char*)app->configuration.loadApplicationString + app->currentApplicationStringPos;
-			codeSize = strtol(localStrPointer, &localStrPointer, 10);
+			memcpy(&codeSize, localStrPointer, sizeof(uint64_t));
+			size_t codeSize_size_t = (size_t)codeSize;
 			code = (char*)malloc(codeSize);
 			if (!code) {
 				free(code0);
@@ -27398,11 +35264,10 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 				deleteVkFFT(app);
 				return VKFFT_ERROR_MALLOC_FAILED;
 			}
-			memcpy(code, localStrPointer + 1, codeSize - 2);
-			code[codeSize - 2] = '\0';
-			app->currentApplicationStringPos += codeSize + (uint64_t)(floor(log10((double)codeSize)));
-
-			axis->program = clCreateProgramWithBinary(app->configuration.context[0], 1, app->configuration.device, &codeSize, (const unsigned char**)(&code), 0, &res);
+			memcpy(code, localStrPointer + sizeof(uint64_t), codeSize);
+			app->currentApplicationStringPos += codeSize + sizeof(uint64_t);
+			const unsigned char* temp_code = (const unsigned char*)code;
+			axis->program = clCreateProgramWithBinary(app->configuration.context[0], 1, app->configuration.device, &codeSize_size_t, (const unsigned char**)(&temp_code), 0, &res);
 			if (res != CL_SUCCESS) {
 				free(code);
 				code = 0;
@@ -27416,7 +35281,8 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 		}
 		else {
 			size_t codelen = strlen(code0);
-			axis->program = clCreateProgramWithSource(app->configuration.context[0], 1, (const char**)&code0, &codelen, &res);
+			const char* temp_code = (const char*)code0;
+			axis->program = clCreateProgramWithSource(app->configuration.context[0], 1, (const char**)&temp_code, &codelen, &res);
 			if (res != CL_SUCCESS) {
 				free(code0);
 				code0 = 0;
@@ -27456,7 +35322,7 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 				deleteVkFFT(app);
 				return VKFFT_ERROR_FAILED_TO_COMPILE_PROGRAM;
 			}
-			axis->binarySize = codeSize;
+			axis->binarySize = (uint64_t)codeSize;
 			axis->binary = (char*)malloc(axis->binarySize);
 			if (!axis->binary) {
 				free(code0);
@@ -27464,7 +35330,7 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 				deleteVkFFT(app);
 				return VKFFT_ERROR_MALLOC_FAILED;
 			}
-			res = clGetProgramInfo(axis->program, CL_PROGRAM_BINARIES, axis->binarySize, &axis->binary, NULL);
+			res = clGetProgramInfo(axis->program, CL_PROGRAM_BINARIES, codeSize, &axis->binary, NULL);
 			if (res != CL_SUCCESS) {
 				if (app->configuration.saveApplicationToString) {
 					free(axis->binary);
@@ -27489,10 +35355,10 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 		}
 #elif(VKFFT_BACKEND==4)
 		uint32_t* code;
-		size_t codeSize;
+		uint64_t codeSize;
 		if (app->configuration.loadApplicationFromString) {
-			uint32_t* localStrPointer = (uint32_t*)app->configuration.loadApplicationString + app->currentApplicationStringPos;
-			codeSize = localStrPointer[0];
+			char* localStrPointer = (char*)app->configuration.loadApplicationString + app->currentApplicationStringPos;
+			memcpy(&codeSize, localStrPointer, sizeof(uint64_t));
 			code = (uint32_t*)malloc(codeSize);
 			if (!code) {
 				free(code0);
@@ -27500,8 +35366,9 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 				deleteVkFFT(app);
 				return VKFFT_ERROR_MALLOC_FAILED;
 			}
-			memcpy(code, localStrPointer + 1, codeSize);
-			app->currentApplicationStringPos += codeSize / (sizeof(uint32_t)) + 1;
+			memcpy(code, localStrPointer + sizeof(uint64_t), codeSize);
+			app->currentApplicationStringPos += codeSize + sizeof(uint64_t);
+
 			const char* pBuildFlags = (app->configuration.useUint64) ? "-ze-opt-greater-than-4GB-buffer-required" : 0;
 			ze_module_desc_t moduleDesc = {
 				ZE_STRUCTURE_TYPE_MODULE_DESC,
@@ -27637,6 +35504,53 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
 			deleteVkFFT(app);
 			return VKFFT_ERROR_FAILED_TO_CREATE_SHADER_MODULE;
 		}
+#elif(VKFFT_BACKEND==5)
+		NS::Error* error;
+		if (app->configuration.loadApplicationFromString) {
+			char* code;
+			uint64_t codeSize;
+			char* localStrPointer = (char*)app->configuration.loadApplicationString + app->currentApplicationStringPos;
+			memcpy(&codeSize, localStrPointer, sizeof(uint64_t));
+			size_t codeSize_size_t = (size_t)codeSize;
+			code = (char*)malloc(codeSize);
+			if (!code) {
+				free(code0);
+				code0 = 0;
+				deleteVkFFT(app);
+				return VKFFT_ERROR_MALLOC_FAILED;
+			}
+			memcpy(code, localStrPointer + sizeof(uint64_t), codeSize);
+			app->currentApplicationStringPos += codeSize + sizeof(uint64_t);
+			dispatch_data_t data = dispatch_data_create(code, codeSize, 0, 0);
+			axis->library = app->configuration.device->newLibrary(data, &error);
+			free(code);
+			code = 0;
+		}
+		else {
+			size_t codelen = strlen(code0);
+			MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc();
+			compileOptions->setFastMathEnabled(true);
+			NS::String* str = NS::String::string(code0, NS::UTF8StringEncoding);
+			axis->library = app->configuration.device->newLibrary(str, compileOptions, &error);
+			if (error) {
+				printf("%s\n%s\n", error->debugDescription()->cString(NS::ASCIIStringEncoding), error->localizedDescription()->cString(NS::ASCIIStringEncoding));
+				free(code0);
+				code0 = 0;
+				deleteVkFFT(app);
+				return VKFFT_ERROR_FAILED_TO_COMPILE_PROGRAM;
+			}
+			compileOptions->release();
+			if (app->configuration.saveApplicationToString) {
+
+			}
+			str->release();
+		}
+		const char function_name[20] = "VkFFT_main_R2C";
+		NS::String* str = NS::String::string(function_name, NS::UTF8StringEncoding);
+		MTL::Function* function = axis->library->newFunction(str);
+		axis->pipeline = app->configuration.device->newComputePipelineState(function, &error);
+		function->release();
+		str->release();
 #endif
 		if (!app->configuration.keepShaderCode) {
 			free(code0);
@@ -27659,6 +35573,7 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 	cl_int res = CL_SUCCESS;
 #elif(VKFFT_BACKEND==4)
 	ze_result_t res = ZE_RESULT_SUCCESS;
+#elif(VKFFT_BACKEND==5)
 #endif
 	VkFFTAxis* axis = (reverseBluesteinMultiUpload) ? &FFTPlan->inverseBluesteinAxes[axis_id][axis_upload_id] : &FFTPlan->axes[axis_id][axis_upload_id];
 
@@ -27674,7 +35589,20 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 	axis->specializationConstants.warpSize = app->configuration.warpSize;
 	axis->specializationConstants.numSharedBanks = app->configuration.numSharedBanks;
 	axis->specializationConstants.useUint64 = app->configuration.useUint64;
+#if(VKFFT_BACKEND==2)
+	axis->specializationConstants.useStrict32BitAddress = app->configuration.useStrict32BitAddress;
+#endif
+	axis->specializationConstants.disableSetLocale = app->configuration.disableSetLocale;
+
 	axis->specializationConstants.numAxisUploads = FFTPlan->numAxisUploads[axis_id];
+	axis->specializationConstants.fixMinRaderPrimeMult = app->configuration.fixMinRaderPrimeMult;
+	axis->specializationConstants.fixMaxRaderPrimeMult = app->configuration.fixMaxRaderPrimeMult;
+	axis->specializationConstants.fixMinRaderPrimeFFT = app->configuration.fixMinRaderPrimeFFT;
+	axis->specializationConstants.fixMaxRaderPrimeFFT = app->configuration.fixMaxRaderPrimeFFT;
+
+	axis->specializationConstants.raderUintLUT = (axis->specializationConstants.useRader) ? app->configuration.useRaderUintLUT : 0;
+	axis->specializationConstants.inline_rader_g_pow = (axis->specializationConstants.raderUintLUT) ? 2 : 1;
+	axis->specializationConstants.inline_rader_kernel = (app->configuration.useLUT == 1) ? 0 : 1;
 	uint64_t complexSize;
 	if (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory)
 		complexSize = (2 * sizeof(double));
@@ -27689,10 +35617,18 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 	axis->specializationConstants.conjugateConvolution = app->configuration.conjugateConvolution;
 	axis->specializationConstants.crossPowerSpectrumNormalization = app->configuration.crossPowerSpectrumNormalization;
 
-	uint64_t maxSequenceLengthSharedMemory = app->configuration.sharedMemorySize / complexSize;
-	uint64_t maxSequenceLengthSharedMemoryPow2 = app->configuration.sharedMemorySizePow2 / complexSize;
-	uint64_t maxSingleSizeStrided = (app->configuration.coalescedMemory > complexSize) ? app->configuration.sharedMemorySize / (app->configuration.coalescedMemory) : app->configuration.sharedMemorySize / complexSize;
-	uint64_t maxSingleSizeStridedPow2 = (app->configuration.coalescedMemory > complexSize) ? app->configuration.sharedMemorySizePow2 / (app->configuration.coalescedMemory) : app->configuration.sharedMemorySizePow2 / complexSize;
+	uint64_t allowedSharedMemory = app->configuration.sharedMemorySize;
+	uint64_t allowedSharedMemoryPow2 = app->configuration.sharedMemorySizePow2;
+
+	if (axis->specializationConstants.useRaderMult) {
+		allowedSharedMemory -= (axis->specializationConstants.useRaderMult - 1) * complexSize;
+		allowedSharedMemoryPow2 -= (axis->specializationConstants.useRaderMult - 1) * complexSize;
+	}
+
+	uint64_t maxSequenceLengthSharedMemory = allowedSharedMemory / complexSize;
+	uint64_t maxSequenceLengthSharedMemoryPow2 = allowedSharedMemoryPow2 / complexSize;
+	uint64_t maxSingleSizeStrided = (app->configuration.coalescedMemory > complexSize) ? allowedSharedMemory / (app->configuration.coalescedMemory) : allowedSharedMemory / complexSize;
+	uint64_t maxSingleSizeStridedPow2 = (app->configuration.coalescedMemory > complexSize) ? allowedSharedMemoryPow2 / (app->configuration.coalescedMemory) : allowedSharedMemoryPow2 / complexSize;
 
 	axis->specializationConstants.stageStartSize = 1;
 	for (uint64_t i = 0; i < axis_upload_id; i++)
@@ -27826,52 +35762,64 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 			deleteVkFFT(app);
 			return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 		}
+#elif(VKFFT_BACKEND==5)
+		app->configuration.tempBuffer = (MTL::Buffer**)malloc(sizeof(MTL::Buffer*));
+		if (!app->configuration.tempBuffer) {
+			deleteVkFFT(app);
+			return VKFFT_ERROR_MALLOC_FAILED;
+		}
+		app->configuration.tempBuffer[0] = app->configuration.device->newBuffer(app->configuration.tempBufferSize[0], MTL::ResourceStorageModePrivate);
 #endif
 	}
+	//generate Rader Kernels
+	resFFT = VkFFTGenerateRaderFFTKernel(app, axis);
+	if (resFFT != VKFFT_SUCCESS) {
+		deleteVkFFT(app);
+		return resFFT;
+	}
 	//allocate LUT
-	if (app->configuration.useLUT) {
-		double double_PI = 3.1415926535897932384626433832795;
+	if (app->configuration.useLUT == 1) {
 		uint64_t dimMult = 1;
 		uint64_t maxStageSum = 0;
 		for (uint64_t i = 0; i < axis->specializationConstants.numStages; i++) {
 			if (i > 0) {
-			switch (axis->specializationConstants.stageRadix[i]) {
-			case 2:
-				maxStageSum += dimMult;
-				break;
-			case 3:
-				maxStageSum += dimMult * 2;
-				break;
-			case 4:
-				maxStageSum += dimMult * 2;
-				break;
-			case 5:
-				maxStageSum += dimMult * 4;
-				break;
+				switch (axis->specializationConstants.stageRadix[i]) {
+				case 2:
+					maxStageSum += dimMult;
+					break;
+				case 3:
+					maxStageSum += dimMult * 2;
+					break;
+				case 4:
+					maxStageSum += dimMult * 2;
+					break;
+				case 5:
+					maxStageSum += dimMult * 4;
+					break;
 				case 6:
 					maxStageSum += dimMult * 5;
 					break;
-			case 7:
-				maxStageSum += dimMult * 6;
-				break;
-			case 8:
-				maxStageSum += dimMult * 3;
-				break;
+				case 7:
+					maxStageSum += dimMult * 6;
+					break;
+				case 8:
+					maxStageSum += dimMult * 3;
+					break;
 				case 9:
 					maxStageSum += dimMult * 8;
 					break;
 				case 10:
 					maxStageSum += dimMult * 9;
 					break;
-			case 11:
-				maxStageSum += dimMult * 10;
-				break;
+				case 11:
+					maxStageSum += dimMult * 10;
+					break;
 				case 12:
 					maxStageSum += dimMult * 11;
 					break;
-			case 13:
-				maxStageSum += dimMult * 12;
-				break;
+				case 13:
+					maxStageSum += dimMult * 12;
+					break;
 				case 14:
 					maxStageSum += dimMult * 13;
 					break;
@@ -27884,27 +35832,168 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 				case 32:
 					maxStageSum += dimMult * 5;
 					break;
+				default:
+					maxStageSum += dimMult * (axis->specializationConstants.stageRadix[i]);
+					break;
 				}
 			}
 			dimMult *= axis->specializationConstants.stageRadix[i];
 		}
 		axis->specializationConstants.maxStageSumLUT = maxStageSum;
+
 		dimMult = 1;
+		for (uint64_t k = 0; k < axis->specializationConstants.numRaderPrimes; k++) {
+			if (axis->specializationConstants.raderContainer[k].type == 0) {
+				axis->specializationConstants.raderContainer[k].RaderRadixOffsetLUT = maxStageSum;
+				for (uint64_t i = 0; i < axis->specializationConstants.raderContainer[k].numStages; i++) {
+					if (i > 0) {
+						switch (axis->specializationConstants.raderContainer[k].stageRadix[i]) {
+						case 2:
+							maxStageSum += dimMult;
+							break;
+						case 3:
+							maxStageSum += dimMult * 2;
+							break;
+						case 4:
+							maxStageSum += dimMult * 2;
+							break;
+						case 5:
+							maxStageSum += dimMult * 4;
+							break;
+						case 6:
+							maxStageSum += dimMult * 5;
+							break;
+						case 7:
+							maxStageSum += dimMult * 6;
+							break;
+						case 8:
+							maxStageSum += dimMult * 3;
+							break;
+						case 9:
+							maxStageSum += dimMult * 8;
+							break;
+						case 10:
+							maxStageSum += dimMult * 9;
+							break;
+						case 11:
+							maxStageSum += dimMult * 10;
+							break;
+						case 12:
+							maxStageSum += dimMult * 11;
+							break;
+						case 13:
+							maxStageSum += dimMult * 12;
+							break;
+						case 14:
+							maxStageSum += dimMult * 13;
+							break;
+						case 15:
+							maxStageSum += dimMult * 14;
+							break;
+						case 16:
+							maxStageSum += dimMult * 4;
+							break;
+						case 32:
+							maxStageSum += dimMult * 5;
+							break;
+						default:
+							maxStageSum += dimMult * (axis->specializationConstants.raderContainer[k].stageRadix[i]);
+							break;
+						}
+					}
+					dimMult *= axis->specializationConstants.raderContainer[k].stageRadix[i];
+				}
+				axis->specializationConstants.maxStageSumLUT = maxStageSum;
+				dimMult = 1;
+			}
+		}
+		//iFFT LUT
+		dimMult = 1;
+		for (uint64_t k = 0; k < axis->specializationConstants.numRaderPrimes; k++) {
+			if (axis->specializationConstants.raderContainer[k].type == 0) {
+				axis->specializationConstants.raderContainer[k].RaderRadixOffsetLUTiFFT = maxStageSum;
+				for (int64_t i = axis->specializationConstants.raderContainer[k].numStages - 1; i >= 0; i--) {
+					if (i < (int64_t)axis->specializationConstants.raderContainer[k].numStages - 1) {
+						switch (axis->specializationConstants.raderContainer[k].stageRadix[i]) {
+						case 2:
+							maxStageSum += dimMult;
+							break;
+						case 3:
+							maxStageSum += dimMult * 2;
+							break;
+						case 4:
+							maxStageSum += dimMult * 2;
+							break;
+						case 5:
+							maxStageSum += dimMult * 4;
+							break;
+						case 6:
+							maxStageSum += dimMult * 5;
+							break;
+						case 7:
+							maxStageSum += dimMult * 6;
+							break;
+						case 8:
+							maxStageSum += dimMult * 3;
+							break;
+						case 9:
+							maxStageSum += dimMult * 8;
+							break;
+						case 10:
+							maxStageSum += dimMult * 9;
+							break;
+						case 11:
+							maxStageSum += dimMult * 10;
+							break;
+						case 12:
+							maxStageSum += dimMult * 11;
+							break;
+						case 13:
+							maxStageSum += dimMult * 12;
+							break;
+						case 14:
+							maxStageSum += dimMult * 13;
+							break;
+						case 15:
+							maxStageSum += dimMult * 14;
+							break;
+						case 16:
+							maxStageSum += dimMult * 4;
+							break;
+						case 32:
+							maxStageSum += dimMult * 5;
+							break;
+						default:
+							maxStageSum += dimMult * (axis->specializationConstants.raderContainer[k].stageRadix[i]);
+							break;
+						}
+					}
+					dimMult *= axis->specializationConstants.raderContainer[k].stageRadix[i];
+				}
+				axis->specializationConstants.maxStageSumLUT = maxStageSum;
+				dimMult = 1;
+			}
+		}
+
 		if (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) {
+			long double double_PI = 3.14159265358979323846264338327950288419716939937510L;
 			if (axis_upload_id > 0) {
 				if ((app->configuration.performDCT == 2) || (app->configuration.performDCT == 3)) {
-					axis->specializationConstants.startDCT3LUT = (maxStageSum + axis->specializationConstants.stageStartSize * axis->specializationConstants.fftDim);
-					axis->bufferLUTSize = (maxStageSum + axis->specializationConstants.stageStartSize * axis->specializationConstants.fftDim + (app->configuration.size[axis_id] / 2 + 2)) * 2 * sizeof(double);
+					axis->specializationConstants.startDCT3LUT = (maxStageSum);
+					if (app->configuration.useLUT_4step == 1) axis->specializationConstants.startDCT3LUT += axis->specializationConstants.stageStartSize * axis->specializationConstants.fftDim;
+					axis->bufferLUTSize = (maxStageSum + (app->configuration.size[axis_id] / 2 + 2)) * 2 * sizeof(double);
 				}
 				else {
 					if ((app->configuration.performDCT == 4) && (app->configuration.size[axis_id] % 2 == 0)) {
-						axis->specializationConstants.startDCT3LUT = (maxStageSum + axis->specializationConstants.stageStartSize * axis->specializationConstants.fftDim);
+						axis->specializationConstants.startDCT3LUT = (maxStageSum);
+						if (app->configuration.useLUT_4step == 1) axis->specializationConstants.startDCT3LUT += axis->specializationConstants.stageStartSize * axis->specializationConstants.fftDim;
 						axis->specializationConstants.startDCT4LUT = (axis->specializationConstants.startDCT3LUT + (app->configuration.size[axis_id] / 4 + 2));
-						axis->bufferLUTSize = (maxStageSum + axis->specializationConstants.stageStartSize * axis->specializationConstants.fftDim + (app->configuration.size[axis_id] / 4 + 2) + app->configuration.size[axis_id] / 2) * 2 * sizeof(double);
+						axis->bufferLUTSize = (maxStageSum + (app->configuration.size[axis_id] / 4 + 2) + app->configuration.size[axis_id] / 2) * 2 * sizeof(double);
 					}
 					else
-						axis->bufferLUTSize = (maxStageSum + axis->specializationConstants.stageStartSize * axis->specializationConstants.fftDim) * 2 * sizeof(double);
+						axis->bufferLUTSize = (maxStageSum) * 2 * sizeof(double);
 				}
+				if (app->configuration.useLUT_4step == 1) axis->bufferLUTSize += axis->specializationConstants.stageStartSize * axis->specializationConstants.fftDim * 2 * sizeof(double);
 			}
 			else {
 				if ((app->configuration.performDCT == 2) || (app->configuration.performDCT == 3)) {
@@ -27922,7 +36011,15 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 						axis->bufferLUTSize = (maxStageSum) * 2 * sizeof(double);
 				}
 			}
-			if (axis->bufferLUTSize==0) axis->bufferLUTSize = sizeof(double);
+			if (axis->specializationConstants.useRader) {
+				for (uint64_t i = 0; i < axis->specializationConstants.numRaderPrimes; i++) {
+					if (!axis->specializationConstants.inline_rader_kernel) {
+						axis->specializationConstants.raderContainer[i].RaderKernelOffsetLUT = axis->bufferLUTSize / (2 * sizeof(double));
+						axis->bufferLUTSize += (axis->specializationConstants.raderContainer[i].prime - 1) * 2 * sizeof(double);
+					}
+				}
+			}
+			if (axis->bufferLUTSize == 0) axis->bufferLUTSize = sizeof(double);
 			double* tempLUT = (double*)malloc(axis->bufferLUTSize);
 			if (!tempLUT) {
 				deleteVkFFT(app);
@@ -27934,51 +36031,133 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 				if ((axis->specializationConstants.stageRadix[i] & (axis->specializationConstants.stageRadix[i] - 1)) == 0) {
 					for (uint64_t k = 0; k < log2(axis->specializationConstants.stageRadix[i]); k++) {
 						for (uint64_t j = 0; j < localStageSize; j++) {
-							tempLUT[2 * (j + localStageSum)] = cos(j * double_PI / localStageSize / pow(2, k));
-							tempLUT[2 * (j + localStageSum) + 1] = sin(j * double_PI / localStageSize / pow(2, k));
+							tempLUT[2 * (j + localStageSum)] = (double)cos(j * double_PI / localStageSize / pow(2, k));
+							tempLUT[2 * (j + localStageSum) + 1] = (double)sin(j * double_PI / localStageSize / pow(2, k));
 						}
 						localStageSum += localStageSize;
 					}
-					localStageSize *= axis->specializationConstants.stageRadix[i];
+				}
+				else if (axis->specializationConstants.rader_generator[i] > 0) {
+					for (uint64_t j = 0; j < localStageSize; j++) {
+						for (int64_t k = (axis->specializationConstants.stageRadix[i] - 1); k >= 0; k--) {
+							tempLUT[2 * (k + localStageSum)] = (double)cos(j * 2.0 * k / axis->specializationConstants.stageRadix[i] * double_PI / localStageSize);
+							tempLUT[2 * (k + localStageSum) + 1] = (double)sin(j * 2.0 * k / axis->specializationConstants.stageRadix[i] * double_PI / localStageSize);
+						}
+						localStageSum += (axis->specializationConstants.stageRadix[i]);
+					}
 				}
 				else {
 					for (uint64_t k = (axis->specializationConstants.stageRadix[i] - 1); k > 0; k--) {
 						for (uint64_t j = 0; j < localStageSize; j++) {
-							tempLUT[2 * (j + localStageSum)] = cos(j * 2.0 * k / axis->specializationConstants.stageRadix[i] * double_PI / localStageSize);
-							tempLUT[2 * (j + localStageSum) + 1] = sin(j * 2.0 * k / axis->specializationConstants.stageRadix[i] * double_PI / localStageSize);
+							tempLUT[2 * (j + localStageSum)] = (double)cos(j * 2.0 * k / axis->specializationConstants.stageRadix[i] * double_PI / localStageSize);
+							tempLUT[2 * (j + localStageSum) + 1] = (double)sin(j * 2.0 * k / axis->specializationConstants.stageRadix[i] * double_PI / localStageSize);
 						}
 						localStageSum += localStageSize;
 					}
-					localStageSize *= axis->specializationConstants.stageRadix[i];
 				}
+				localStageSize *= axis->specializationConstants.stageRadix[i];
 			}
 
-			if (axis_upload_id > 0) {
+
+			if (axis->specializationConstants.useRader) {
+				for (uint64_t i = 0; i < axis->specializationConstants.numRaderPrimes; i++) {
+					if (axis->specializationConstants.raderContainer[i].type) {
+						if (!axis->specializationConstants.inline_rader_kernel) {
+							for (uint64_t j = 0; j < (axis->specializationConstants.raderContainer[i].prime - 1); j++) {//fix later
+								uint64_t g_pow = 1;
+								for (uint64_t t = 0; t < axis->specializationConstants.raderContainer[i].prime - 1 - j; t++) {
+									g_pow = (g_pow * axis->specializationConstants.raderContainer[i].generator) % axis->specializationConstants.raderContainer[i].prime;
+								}
+								tempLUT[2 * (j + axis->specializationConstants.raderContainer[i].RaderKernelOffsetLUT)] = (double)cos(2.0 * g_pow * double_PI / axis->specializationConstants.raderContainer[i].prime);
+								tempLUT[2 * (j + axis->specializationConstants.raderContainer[i].RaderKernelOffsetLUT) + 1] = (double)(-sin(2.0 * g_pow * double_PI / axis->specializationConstants.raderContainer[i].prime));
+							}
+						}
+					}
+					else {
+						localStageSize = axis->specializationConstants.raderContainer[i].stageRadix[0];
+						localStageSum = 0;
+						for (uint64_t l = 1; l < axis->specializationConstants.raderContainer[i].numStages; l++) {
+							if ((axis->specializationConstants.raderContainer[i].stageRadix[l] & (axis->specializationConstants.raderContainer[i].stageRadix[l] - 1)) == 0) {
+								for (uint64_t k = 0; k < log2(axis->specializationConstants.raderContainer[i].stageRadix[l]); k++) {
+									for (uint64_t j = 0; j < localStageSize; j++) {
+										tempLUT[2 * (j + localStageSum + axis->specializationConstants.raderContainer[i].RaderRadixOffsetLUT)] = (double)cos(j * double_PI / localStageSize / pow(2, k));
+										tempLUT[2 * (j + localStageSum + axis->specializationConstants.raderContainer[i].RaderRadixOffsetLUT) + 1] = (double)sin(j * double_PI / localStageSize / pow(2, k));
+									}
+									localStageSum += localStageSize;
+								}
+							}
+							else {
+								for (uint64_t k = (axis->specializationConstants.raderContainer[i].stageRadix[l] - 1); k > 0; k--) {
+									for (uint64_t j = 0; j < localStageSize; j++) {
+										tempLUT[2 * (j + localStageSum + axis->specializationConstants.raderContainer[i].RaderRadixOffsetLUT)] = (double)cos(j * 2.0 * k / axis->specializationConstants.raderContainer[i].stageRadix[l] * double_PI / localStageSize);
+										tempLUT[2 * (j + localStageSum + axis->specializationConstants.raderContainer[i].RaderRadixOffsetLUT) + 1] = (double)sin(j * 2.0 * k / axis->specializationConstants.raderContainer[i].stageRadix[l] * double_PI / localStageSize);
+									}
+									localStageSum += localStageSize;
+								}
+							}
+							localStageSize *= axis->specializationConstants.raderContainer[i].stageRadix[l];
+						}
+
+						localStageSize = axis->specializationConstants.raderContainer[i].stageRadix[axis->specializationConstants.raderContainer[i].numStages - 1];
+						localStageSum = 0;
+						for (int64_t l = (int64_t)axis->specializationConstants.raderContainer[i].numStages - 2; l >= 0; l--) {
+							if ((axis->specializationConstants.raderContainer[i].stageRadix[l] & (axis->specializationConstants.raderContainer[i].stageRadix[l] - 1)) == 0) {
+								for (uint64_t k = 0; k < log2(axis->specializationConstants.raderContainer[i].stageRadix[l]); k++) {
+									for (uint64_t j = 0; j < localStageSize; j++) {
+										tempLUT[2 * (j + localStageSum + axis->specializationConstants.raderContainer[i].RaderRadixOffsetLUTiFFT)] = (double)cos(j * double_PI / localStageSize / pow(2, k));
+										tempLUT[2 * (j + localStageSum + axis->specializationConstants.raderContainer[i].RaderRadixOffsetLUTiFFT) + 1] = (double)sin(j * double_PI / localStageSize / pow(2, k));
+									}
+									localStageSum += localStageSize;
+								}
+							}
+							else {
+								for (uint64_t k = (axis->specializationConstants.raderContainer[i].stageRadix[l] - 1); k > 0; k--) {
+									for (uint64_t j = 0; j < localStageSize; j++) {
+										tempLUT[2 * (j + localStageSum + axis->specializationConstants.raderContainer[i].RaderRadixOffsetLUTiFFT)] = (double)cos(j * 2.0 * k / axis->specializationConstants.raderContainer[i].stageRadix[l] * double_PI / localStageSize);
+										tempLUT[2 * (j + localStageSum + axis->specializationConstants.raderContainer[i].RaderRadixOffsetLUTiFFT) + 1] = (double)sin(j * 2.0 * k / axis->specializationConstants.raderContainer[i].stageRadix[l] * double_PI / localStageSize);
+									}
+									localStageSum += localStageSize;
+								}
+							}
+							localStageSize *= axis->specializationConstants.raderContainer[i].stageRadix[l];
+						}
+
+						if (!axis->specializationConstants.inline_rader_kernel) {
+							double* raderFFTkernel = (double*)axis->specializationConstants.raderContainer[i].raderFFTkernel;
+							for (uint64_t j = 0; j < (axis->specializationConstants.raderContainer[i].prime - 1); j++) {//fix later
+								tempLUT[2 * (j + axis->specializationConstants.raderContainer[i].RaderKernelOffsetLUT)] = (double)(raderFFTkernel[2 * j] / (long double)(axis->specializationConstants.raderContainer[i].prime - 1));
+								tempLUT[2 * (j + axis->specializationConstants.raderContainer[i].RaderKernelOffsetLUT) + 1] = (double)(raderFFTkernel[2 * j + 1] / (long double)(axis->specializationConstants.raderContainer[i].prime - 1));
+							}
+						}
+					}
+				}
+			}
+			if ((axis_upload_id > 0) && (app->configuration.useLUT_4step == 1)) {
 				for (uint64_t i = 0; i < axis->specializationConstants.stageStartSize; i++) {
 					for (uint64_t j = 0; j < axis->specializationConstants.fftDim; j++) {
-						double angle = 2 * double_PI * ((i * j) / (double)(axis->specializationConstants.stageStartSize * axis->specializationConstants.fftDim));
-						tempLUT[maxStageSum * 2 + 2 * (i + j * axis->specializationConstants.stageStartSize)] = cos(angle);
-						tempLUT[maxStageSum * 2 + 2 * (i + j * axis->specializationConstants.stageStartSize) + 1] = sin(angle);
+						long double angle = 2 * double_PI * ((i * j) / (long double)(axis->specializationConstants.stageStartSize * axis->specializationConstants.fftDim));
+						tempLUT[maxStageSum * 2 + 2 * (i + j * axis->specializationConstants.stageStartSize)] = (double)cos(angle);
+						tempLUT[maxStageSum * 2 + 2 * (i + j * axis->specializationConstants.stageStartSize) + 1] = (double)sin(angle);
 					}
 				}
 			}
 			if ((app->configuration.performDCT == 2) || (app->configuration.performDCT == 3)) {
 				for (uint64_t j = 0; j < app->configuration.size[axis_id] / 2 + 2; j++) {
-					double angle = (double_PI / 2.0 / (double)(app->configuration.size[axis_id])) * j;
-					tempLUT[2 * axis->specializationConstants.startDCT3LUT + 2 * j] = cos(angle);
-					tempLUT[2 * axis->specializationConstants.startDCT3LUT + 2 * j + 1] = sin(angle);
+					long double angle = (double_PI / 2.0 / (long double)(app->configuration.size[axis_id])) * j;
+					tempLUT[2 * axis->specializationConstants.startDCT3LUT + 2 * j] = (double)cos(angle);
+					tempLUT[2 * axis->specializationConstants.startDCT3LUT + 2 * j + 1] = (double)sin(angle);
 				}
 			}
 			if ((app->configuration.performDCT == 4) && (app->configuration.size[axis_id] % 2 == 0)) {
 				for (uint64_t j = 0; j < app->configuration.size[axis_id] / 4 + 2; j++) {
-					double angle = (double_PI / 2.0 / (double)(app->configuration.size[axis_id] / 2)) * j;
-					tempLUT[2 * axis->specializationConstants.startDCT3LUT + 2 * j] = cos(angle);
-					tempLUT[2 * axis->specializationConstants.startDCT3LUT + 2 * j + 1] = sin(angle);
+					long double angle = (double_PI / 2.0 / (long double)(app->configuration.size[axis_id] / 2)) * j;
+					tempLUT[2 * axis->specializationConstants.startDCT3LUT + 2 * j] = (double)cos(angle);
+					tempLUT[2 * axis->specializationConstants.startDCT3LUT + 2 * j + 1] = (double)sin(angle);
 				}
 				for (uint64_t j = 0; j < app->configuration.size[axis_id] / 2; j++) {
-					double angle = (-double_PI / 8.0 / (double)(app->configuration.size[axis_id] / 2)) * (2 * j + 1);
-					tempLUT[2 * axis->specializationConstants.startDCT4LUT + 2 * j] = cos(angle);
-					tempLUT[2 * axis->specializationConstants.startDCT4LUT + 2 * j + 1] = sin(angle);
+					long double angle = (-double_PI / 8.0 / (long double)(app->configuration.size[axis_id] / 2)) * (2 * j + 1);
+					tempLUT[2 * axis->specializationConstants.startDCT4LUT + 2 * j] = (double)cos(angle);
+					tempLUT[2 * axis->specializationConstants.startDCT4LUT + 2 * j + 1] = (double)sin(angle);
 				}
 			}
 			axis->referenceLUT = 0;
@@ -28000,7 +36179,10 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 					axis->referenceLUT = 1;
 				}
 				else {
-					if (((axis_id == 1) || (axis_id == 2)) && (!((!axis->specializationConstants.reorderFourStep) && (FFTPlan->numAxisUploads[axis_id] > 1))) && ((axis->specializationConstants.fft_dim_full == FFTPlan->axes[0][0].specializationConstants.fft_dim_full) && (FFTPlan->numAxisUploads[axis_id] == 1) && (axis->specializationConstants.fft_dim_full < maxSingleSizeStrided / axis->specializationConstants.registerBoost)) && ((!app->configuration.performDCT) || (app->configuration.size[axis_id] == app->configuration.size[0]))) {
+					uint64_t checkRadixOrder = 1;
+					for (uint64_t i = 0; i < axis->specializationConstants.numStages; i++)
+						if (FFTPlan->axes[0][0].specializationConstants.stageRadix[i] != axis->specializationConstants.stageRadix[i]) checkRadixOrder = 0;
+					if (checkRadixOrder && ((axis_id == 1) || (axis_id == 2)) && (!((!axis->specializationConstants.reorderFourStep) && (FFTPlan->numAxisUploads[axis_id] > 1))) && ((axis->specializationConstants.fft_dim_full == FFTPlan->axes[0][0].specializationConstants.fft_dim_full) && (FFTPlan->numAxisUploads[axis_id] == 1) && (axis->specializationConstants.fft_dim_full < maxSingleSizeStrided / axis->specializationConstants.registerBoost)) && ((!app->configuration.performDCT) || (app->configuration.size[axis_id] == app->configuration.size[0]))) {
 						axis->bufferLUT = FFTPlan->axes[0][axis_upload_id].bufferLUT;
 #if(VKFFT_BACKEND==0)
 						axis->bufferLUTDeviceMemory = FFTPlan->axes[0][axis_upload_id].bufferLUTDeviceMemory;
@@ -28009,7 +36191,11 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 						axis->referenceLUT = 1;
 					}
 					else {
-						if ((axis_id == 2) && (axis->specializationConstants.fft_dim_full == FFTPlan->axes[1][0].specializationConstants.fft_dim_full) && ((!app->configuration.performDCT) || (app->configuration.size[2] == app->configuration.size[1]))) {
+						checkRadixOrder = 1;
+						for (uint64_t i = 0; i < axis->specializationConstants.numStages; i++)
+							if (FFTPlan->axes[1][0].specializationConstants.stageRadix[i] != axis->specializationConstants.stageRadix[i]) checkRadixOrder = 0;
+
+						if (checkRadixOrder && (axis_id == 2) && (axis->specializationConstants.fft_dim_full == FFTPlan->axes[1][0].specializationConstants.fft_dim_full) && ((!app->configuration.performDCT) || (app->configuration.size[2] == app->configuration.size[1]))) {
 							axis->bufferLUT = FFTPlan->axes[1][axis_upload_id].bufferLUT;
 #if(VKFFT_BACKEND==0)
 							axis->bufferLUTDeviceMemory = FFTPlan->axes[1][axis_upload_id].bufferLUTDeviceMemory;
@@ -28026,7 +36212,7 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 								tempLUT = 0;
 								return resFFT;
 							}
-							resFFT = transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+							resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
 							if (resFFT != VKFFT_SUCCESS) {
 								deleteVkFFT(app);
 								free(tempLUT);
@@ -28041,12 +36227,12 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 								tempLUT = 0;
 								return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 							}
-							res = cudaMemcpy(axis->bufferLUT, tempLUT, axis->bufferLUTSize, cudaMemcpyHostToDevice);
-							if (res != cudaSuccess) {
+							resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+							if (resFFT != VKFFT_SUCCESS) {
 								deleteVkFFT(app);
 								free(tempLUT);
 								tempLUT = 0;
-								return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+								return resFFT;
 							}
 #elif(VKFFT_BACKEND==2)
 							res = hipMalloc((void**)&axis->bufferLUT, axis->bufferLUTSize);
@@ -28056,12 +36242,12 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 								tempLUT = 0;
 								return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 							}
-							res = hipMemcpy(axis->bufferLUT, tempLUT, axis->bufferLUTSize, hipMemcpyHostToDevice);
-							if (res != hipSuccess) {
+							resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+							if (resFFT != VKFFT_SUCCESS) {
 								deleteVkFFT(app);
 								free(tempLUT);
 								tempLUT = 0;
-								return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+								return resFFT;
 							}
 #elif(VKFFT_BACKEND==3)
 							axis->bufferLUT = clCreateBuffer(app->configuration.context[0], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, axis->bufferLUTSize, tempLUT, &res);
@@ -28081,36 +36267,21 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 								tempLUT = 0;
 								return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 							}
-							ze_command_queue_desc_t commandQueueCopyDesc = {
-							ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-							0,
-							app->configuration.commandQueueID,
-							0, // index
-							0, // flags
-							ZE_COMMAND_QUEUE_MODE_DEFAULT,
-							ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-							};
-							ze_command_list_handle_t copyCommandList;
-							res = zeCommandListCreateImmediate(app->configuration.context[0], app->configuration.device[0], &commandQueueCopyDesc, &copyCommandList);
-							if (res != ZE_RESULT_SUCCESS) {
-								deleteVkFFT(app);
-								free(tempLUT);
-								tempLUT = 0;
-								return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-							}
-							res = zeCommandListAppendMemoryCopy(copyCommandList, axis->bufferLUT, tempLUT, axis->bufferLUTSize, 0, 0, 0);
-							if (res != ZE_RESULT_SUCCESS) {
+							resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+							if (resFFT != VKFFT_SUCCESS) {
 								deleteVkFFT(app);
 								free(tempLUT);
 								tempLUT = 0;
-								return VKFFT_ERROR_FAILED_TO_COPY;
+								return resFFT;
 							}
-							res = zeCommandQueueSynchronize(app->configuration.commandQueue[0], UINT32_MAX);
-							if (res != ZE_RESULT_SUCCESS) {
+#elif(VKFFT_BACKEND==5)
+							axis->bufferLUT = app->configuration.device->newBuffer(axis->bufferLUTSize, MTL::ResourceStorageModePrivate);
+							resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+							if (resFFT != VKFFT_SUCCESS) {
 								deleteVkFFT(app);
 								free(tempLUT);
 								tempLUT = 0;
-								return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+								return resFFT;
 							}
 #endif
 						}
@@ -28121,20 +36292,24 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 			tempLUT = 0;
 		}
 		else {
+			double double_PI = 3.14159265358979323846264338327950288419716939937510;
 			if (axis_upload_id > 0) {
 				if ((app->configuration.performDCT == 2) || (app->configuration.performDCT == 3)) {
-					axis->specializationConstants.startDCT3LUT = (maxStageSum + axis->specializationConstants.stageStartSize * axis->specializationConstants.fftDim);
-					axis->bufferLUTSize = (maxStageSum + axis->specializationConstants.stageStartSize * axis->specializationConstants.fftDim + (app->configuration.size[axis_id] / 2 + 2)) * 2 * sizeof(float);
+					axis->specializationConstants.startDCT3LUT = (maxStageSum);
+					if (app->configuration.useLUT_4step == 1) axis->specializationConstants.startDCT3LUT += axis->specializationConstants.stageStartSize * axis->specializationConstants.fftDim;
+					axis->bufferLUTSize = (maxStageSum + (app->configuration.size[axis_id] / 2 + 2)) * 2 * sizeof(float);
 				}
 				else {
 					if ((app->configuration.performDCT == 4) && (app->configuration.size[axis_id] % 2 == 0)) {
-						axis->specializationConstants.startDCT3LUT = (maxStageSum + axis->specializationConstants.stageStartSize * axis->specializationConstants.fftDim);
+						axis->specializationConstants.startDCT3LUT = (maxStageSum);
+						if (app->configuration.useLUT_4step == 1) axis->specializationConstants.startDCT3LUT += axis->specializationConstants.stageStartSize * axis->specializationConstants.fftDim;
 						axis->specializationConstants.startDCT4LUT = (axis->specializationConstants.startDCT3LUT + (axis->specializationConstants.fftDim / 4 + 2));
-						axis->bufferLUTSize = (maxStageSum + axis->specializationConstants.stageStartSize * axis->specializationConstants.fftDim + (app->configuration.size[axis_id] / 4 + 2) + app->configuration.size[axis_id] / 2) * 2 * sizeof(float);
+						axis->bufferLUTSize = (maxStageSum + (app->configuration.size[axis_id] / 4 + 2) + app->configuration.size[axis_id] / 2) * 2 * sizeof(float);
 					}
 					else
-						axis->bufferLUTSize = (maxStageSum + axis->specializationConstants.stageStartSize * axis->specializationConstants.fftDim) * 2 * sizeof(float);
+						axis->bufferLUTSize = (maxStageSum) * 2 * sizeof(float);
 				}
+				if (app->configuration.useLUT_4step == 1) axis->bufferLUTSize += axis->specializationConstants.stageStartSize * axis->specializationConstants.fftDim * 2 * sizeof(float);
 			}
 			else {
 				if ((app->configuration.performDCT == 2) || (app->configuration.performDCT == 3)) {
@@ -28151,6 +36326,14 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 						axis->bufferLUTSize = (maxStageSum) * 2 * sizeof(float);
 				}
 			}
+			if (axis->specializationConstants.useRader) {
+				for (uint64_t i = 0; i < axis->specializationConstants.numRaderPrimes; i++) {
+					if (!axis->specializationConstants.inline_rader_kernel) {
+						axis->specializationConstants.raderContainer[i].RaderKernelOffsetLUT = axis->bufferLUTSize / (2 * sizeof(float));
+						axis->bufferLUTSize += (axis->specializationConstants.raderContainer[i].prime - 1) * 2 * sizeof(float);
+					}
+				}
+			}
 			if (axis->bufferLUTSize == 0) axis->bufferLUTSize = sizeof(float);
 			float* tempLUT = (float*)malloc(axis->bufferLUTSize);
 			if (!tempLUT) {
@@ -28168,7 +36351,15 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 						}
 						localStageSum += localStageSize;
 					}
-					localStageSize *= axis->specializationConstants.stageRadix[i];
+				}
+				else if (axis->specializationConstants.rader_generator[i] > 0) {
+					for (uint64_t j = 0; j < localStageSize; j++) {
+						for (int64_t k = (axis->specializationConstants.stageRadix[i] - 1); k >= 0; k--) {
+							tempLUT[2 * (k + localStageSum)] = (float)cos(j * 2.0 * k / axis->specializationConstants.stageRadix[i] * double_PI / localStageSize);
+							tempLUT[2 * (k + localStageSum) + 1] = (float)sin(j * 2.0 * k / axis->specializationConstants.stageRadix[i] * double_PI / localStageSize);
+						}
+						localStageSum += (axis->specializationConstants.stageRadix[i]);
+					}
 				}
 				else {
 					for (uint64_t k = (axis->specializationConstants.stageRadix[i] - 1); k > 0; k--) {
@@ -28178,11 +36369,83 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 						}
 						localStageSum += localStageSize;
 					}
-					localStageSize *= axis->specializationConstants.stageRadix[i];
 				}
+				localStageSize *= axis->specializationConstants.stageRadix[i];
 			}
 
-			if (axis_upload_id > 0) {
+			if (axis->specializationConstants.useRader) {
+				for (uint64_t i = 0; i < axis->specializationConstants.numRaderPrimes; i++) {
+					if (axis->specializationConstants.raderContainer[i].type) {
+						if (!axis->specializationConstants.inline_rader_kernel) {
+							for (uint64_t j = 0; j < (axis->specializationConstants.raderContainer[i].prime - 1); j++) {//fix later
+								uint64_t g_pow = 1;
+								for (uint64_t t = 0; t < axis->specializationConstants.raderContainer[i].prime - 1 - j; t++) {
+									g_pow = (g_pow * axis->specializationConstants.raderContainer[i].generator) % axis->specializationConstants.raderContainer[i].prime;
+								}
+								tempLUT[2 * (j + axis->specializationConstants.raderContainer[i].RaderKernelOffsetLUT)] = (float)(cos(2.0 * g_pow * double_PI / axis->specializationConstants.raderContainer[i].prime));
+								tempLUT[2 * (j + axis->specializationConstants.raderContainer[i].RaderKernelOffsetLUT) + 1] = (float)(-sin(2.0 * g_pow * double_PI / axis->specializationConstants.raderContainer[i].prime));
+							}
+						}
+					}
+					else {
+						localStageSize = axis->specializationConstants.raderContainer[i].stageRadix[0];
+						localStageSum = 0;
+						for (uint64_t l = 1; l < axis->specializationConstants.raderContainer[i].numStages; l++) {
+							if ((axis->specializationConstants.raderContainer[i].stageRadix[l] & (axis->specializationConstants.raderContainer[i].stageRadix[l] - 1)) == 0) {
+								for (uint64_t k = 0; k < log2(axis->specializationConstants.raderContainer[i].stageRadix[l]); k++) {
+									for (uint64_t j = 0; j < localStageSize; j++) {
+										tempLUT[2 * (j + localStageSum + axis->specializationConstants.raderContainer[i].RaderRadixOffsetLUT)] = (float)cos(j * double_PI / localStageSize / pow(2, k));
+										tempLUT[2 * (j + localStageSum + axis->specializationConstants.raderContainer[i].RaderRadixOffsetLUT) + 1] = (float)sin(j * double_PI / localStageSize / pow(2, k));
+									}
+									localStageSum += localStageSize;
+								}
+							}
+							else {
+								for (uint64_t k = (axis->specializationConstants.raderContainer[i].stageRadix[l] - 1); k > 0; k--) {
+									for (uint64_t j = 0; j < localStageSize; j++) {
+										tempLUT[2 * (j + localStageSum + axis->specializationConstants.raderContainer[i].RaderRadixOffsetLUT)] = (float)cos(j * 2.0 * k / axis->specializationConstants.raderContainer[i].stageRadix[l] * double_PI / localStageSize);
+										tempLUT[2 * (j + localStageSum + axis->specializationConstants.raderContainer[i].RaderRadixOffsetLUT) + 1] = (float)sin(j * 2.0 * k / axis->specializationConstants.raderContainer[i].stageRadix[l] * double_PI / localStageSize);
+									}
+									localStageSum += localStageSize;
+								}
+							}
+							localStageSize *= axis->specializationConstants.raderContainer[i].stageRadix[l];
+						}
+						localStageSize = axis->specializationConstants.raderContainer[i].stageRadix[axis->specializationConstants.raderContainer[i].numStages - 1];
+						localStageSum = 0;
+						for (int64_t l = (int64_t)axis->specializationConstants.raderContainer[i].numStages - 2; l >= 0; l--) {
+							if ((axis->specializationConstants.raderContainer[i].stageRadix[l] & (axis->specializationConstants.raderContainer[i].stageRadix[l] - 1)) == 0) {
+								for (uint64_t k = 0; k < log2(axis->specializationConstants.raderContainer[i].stageRadix[l]); k++) {
+									for (uint64_t j = 0; j < localStageSize; j++) {
+										tempLUT[2 * (j + localStageSum + axis->specializationConstants.raderContainer[i].RaderRadixOffsetLUTiFFT)] = (float)cos(j * double_PI / localStageSize / pow(2, k));
+										tempLUT[2 * (j + localStageSum + axis->specializationConstants.raderContainer[i].RaderRadixOffsetLUTiFFT) + 1] = (float)sin(j * double_PI / localStageSize / pow(2, k));
+									}
+									localStageSum += localStageSize;
+								}
+							}
+							else {
+								for (uint64_t k = (axis->specializationConstants.raderContainer[i].stageRadix[l] - 1); k > 0; k--) {
+									for (uint64_t j = 0; j < localStageSize; j++) {
+										tempLUT[2 * (j + localStageSum + axis->specializationConstants.raderContainer[i].RaderRadixOffsetLUTiFFT)] = (float)cos(j * 2.0 * k / axis->specializationConstants.raderContainer[i].stageRadix[l] * double_PI / localStageSize);
+										tempLUT[2 * (j + localStageSum + axis->specializationConstants.raderContainer[i].RaderRadixOffsetLUTiFFT) + 1] = (float)sin(j * 2.0 * k / axis->specializationConstants.raderContainer[i].stageRadix[l] * double_PI / localStageSize);
+									}
+									localStageSum += localStageSize;
+								}
+							}
+							localStageSize *= axis->specializationConstants.raderContainer[i].stageRadix[l];
+						}
+						if (!axis->specializationConstants.inline_rader_kernel) {
+							float* raderFFTkernel = (float*)axis->specializationConstants.raderContainer[i].raderFFTkernel;
+							for (uint64_t j = 0; j < (axis->specializationConstants.raderContainer[i].prime - 1); j++) {//fix later
+								tempLUT[2 * (j + axis->specializationConstants.raderContainer[i].RaderKernelOffsetLUT)] = (float)(raderFFTkernel[2 * j] / (axis->specializationConstants.raderContainer[i].prime - 1));
+								tempLUT[2 * (j + axis->specializationConstants.raderContainer[i].RaderKernelOffsetLUT) + 1] = (float)(raderFFTkernel[2 * j + 1] / (axis->specializationConstants.raderContainer[i].prime - 1));
+							}
+						}
+					}
+				}
+			}
+
+			if ((axis_upload_id > 0) && (app->configuration.useLUT_4step == 1)) {
 				for (uint64_t i = 0; i < axis->specializationConstants.stageStartSize; i++) {
 					for (uint64_t j = 0; j < axis->specializationConstants.fftDim; j++) {
 						double angle = 2 * double_PI * ((i * j) / (double)(axis->specializationConstants.stageStartSize * axis->specializationConstants.fftDim));
@@ -28211,6 +36474,7 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 				}
 			}
 			axis->referenceLUT = 0;
+
 			if (reverseBluesteinMultiUpload == 1) {
 				axis->bufferLUT = FFTPlan->axes[axis_id][axis_upload_id].bufferLUT;
 #if(VKFFT_BACKEND==0)
@@ -28229,7 +36493,10 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 					axis->referenceLUT = 1;
 				}
 				else {
-					if (((axis_id == 1) || (axis_id == 2)) && (!((!axis->specializationConstants.reorderFourStep) && (FFTPlan->numAxisUploads[axis_id] > 1))) && ((axis->specializationConstants.fft_dim_full == FFTPlan->axes[0][0].specializationConstants.fft_dim_full) && (FFTPlan->numAxisUploads[axis_id] == 1) && (axis->specializationConstants.fft_dim_full < maxSingleSizeStrided / axis->specializationConstants.registerBoost)) && ((!app->configuration.performDCT) || (app->configuration.size[axis_id] == app->configuration.size[0]))) {
+					uint64_t checkRadixOrder = 1;
+					for (uint64_t i = 0; i < axis->specializationConstants.numStages; i++)
+						if (FFTPlan->axes[0][0].specializationConstants.stageRadix[i] != axis->specializationConstants.stageRadix[i]) checkRadixOrder = 0;
+					if (checkRadixOrder && ((axis_id == 1) || (axis_id == 2)) && (!((!axis->specializationConstants.reorderFourStep) && (FFTPlan->numAxisUploads[axis_id] > 1))) && ((axis->specializationConstants.fft_dim_full == FFTPlan->axes[0][0].specializationConstants.fft_dim_full) && (FFTPlan->numAxisUploads[axis_id] == 1) && (axis->specializationConstants.fft_dim_full < maxSingleSizeStrided / axis->specializationConstants.registerBoost)) && ((!app->configuration.performDCT) || (app->configuration.size[axis_id] == app->configuration.size[0]))) {
 						axis->bufferLUT = FFTPlan->axes[0][axis_upload_id].bufferLUT;
 #if(VKFFT_BACKEND==0)
 						axis->bufferLUTDeviceMemory = FFTPlan->axes[0][axis_upload_id].bufferLUTDeviceMemory;
@@ -28238,7 +36505,10 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 						axis->referenceLUT = 1;
 					}
 					else {
-						if ((axis_id == 2) && (axis->specializationConstants.fft_dim_full == FFTPlan->axes[1][0].specializationConstants.fft_dim_full) && ((!app->configuration.performDCT) || (app->configuration.size[2] == app->configuration.size[1]))) {
+						checkRadixOrder = 1;
+						for (uint64_t i = 0; i < axis->specializationConstants.numStages; i++)
+							if (FFTPlan->axes[1][0].specializationConstants.stageRadix[i] != axis->specializationConstants.stageRadix[i]) checkRadixOrder = 0;
+						if (checkRadixOrder && (axis_id == 2) && (axis->specializationConstants.fft_dim_full == FFTPlan->axes[1][0].specializationConstants.fft_dim_full) && ((!app->configuration.performDCT) || (app->configuration.size[2] == app->configuration.size[1]))) {
 							axis->bufferLUT = FFTPlan->axes[1][axis_upload_id].bufferLUT;
 #if(VKFFT_BACKEND==0)
 							axis->bufferLUTDeviceMemory = FFTPlan->axes[1][axis_upload_id].bufferLUTDeviceMemory;
@@ -28255,7 +36525,7 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 								tempLUT = 0;
 								return resFFT;
 							}
-							resFFT = transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+							resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
 							if (resFFT != VKFFT_SUCCESS) {
 								deleteVkFFT(app);
 								free(tempLUT);
@@ -28270,12 +36540,12 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 								tempLUT = 0;
 								return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 							}
-							res = cudaMemcpy(axis->bufferLUT, tempLUT, axis->bufferLUTSize, cudaMemcpyHostToDevice);
-							if (res != cudaSuccess) {
+							resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+							if (resFFT != VKFFT_SUCCESS) {
 								deleteVkFFT(app);
 								free(tempLUT);
 								tempLUT = 0;
-								return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+								return resFFT;
 							}
 #elif(VKFFT_BACKEND==2)
 							res = hipMalloc((void**)&axis->bufferLUT, axis->bufferLUTSize);
@@ -28285,12 +36555,12 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 								tempLUT = 0;
 								return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 							}
-							res = hipMemcpy(axis->bufferLUT, tempLUT, axis->bufferLUTSize, hipMemcpyHostToDevice);
-							if (res != hipSuccess) {
+							resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+							if (resFFT != VKFFT_SUCCESS) {
 								deleteVkFFT(app);
 								free(tempLUT);
 								tempLUT = 0;
-								return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+								return resFFT;
 							}
 #elif(VKFFT_BACKEND==3)
 							axis->bufferLUT = clCreateBuffer(app->configuration.context[0], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, axis->bufferLUTSize, tempLUT, &res);
@@ -28310,36 +36580,21 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 								tempLUT = 0;
 								return VKFFT_ERROR_FAILED_TO_ALLOCATE;
 							}
-							ze_command_queue_desc_t commandQueueCopyDesc = {
-							ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
-							0,
-							app->configuration.commandQueueID,
-							0, // index
-							0, // flags
-							ZE_COMMAND_QUEUE_MODE_DEFAULT,
-							ZE_COMMAND_QUEUE_PRIORITY_NORMAL
-							};
-							ze_command_list_handle_t copyCommandList;
-							res = zeCommandListCreateImmediate(app->configuration.context[0], app->configuration.device[0], &commandQueueCopyDesc, &copyCommandList);
-							if (res != ZE_RESULT_SUCCESS) {
-								deleteVkFFT(app);
-								free(tempLUT);
-								tempLUT = 0;
-								return VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST;
-							}
-							res = zeCommandListAppendMemoryCopy(copyCommandList, axis->bufferLUT, tempLUT, axis->bufferLUTSize, 0, 0, 0);
-							if (res != ZE_RESULT_SUCCESS) {
+							resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+							if (resFFT != VKFFT_SUCCESS) {
 								deleteVkFFT(app);
 								free(tempLUT);
 								tempLUT = 0;
-								return VKFFT_ERROR_FAILED_TO_COPY;
+								return resFFT;
 							}
-							res = zeCommandQueueSynchronize(app->configuration.commandQueue[0], UINT32_MAX);
-							if (res != ZE_RESULT_SUCCESS) {
+#elif(VKFFT_BACKEND==5)
+							axis->bufferLUT = app->configuration.device->newBuffer(axis->bufferLUTSize, MTL::ResourceStorageModePrivate);
+							resFFT = VkFFT_transferDataFromCPU(app, tempLUT, &axis->bufferLUT, axis->bufferLUTSize);
+							if (resFFT != VKFFT_SUCCESS) {
 								deleteVkFFT(app);
 								free(tempLUT);
 								tempLUT = 0;
-								return VKFFT_ERROR_FAILED_TO_SYNCHRONIZE;
+								return resFFT;
 							}
 #endif
 						}
@@ -28350,7 +36605,135 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 			tempLUT = 0;
 		}
 	}
+	if (axis->specializationConstants.useRaderMult)	axis->specializationConstants.additionalRaderSharedSize = (axis->specializationConstants.useRaderMult - 1);
 
+	//allocate RaderUintLUT
+	if (axis->specializationConstants.raderUintLUT) {
+		if (app->bufferRaderUintLUT[axis_id][axis_upload_id] == 0) {
+			app->bufferRaderUintLUTSize[axis_id][axis_upload_id] = 0;
+			for (uint64_t i = 0; i < axis->specializationConstants.numRaderPrimes; i++) {
+				app->bufferRaderUintLUTSize[axis_id][axis_upload_id] += axis->specializationConstants.raderContainer[i].prime * sizeof(uint32_t);
+			}
+			uint32_t* tempRaderUintLUT = (uint32_t*)malloc(app->bufferRaderUintLUTSize[axis_id][axis_upload_id]);
+			if (!tempRaderUintLUT) {
+				deleteVkFFT(app);
+				return VKFFT_ERROR_MALLOC_FAILED;
+			}
+			uint64_t current_offset = 0;
+			for (uint64_t i = 0; i < axis->specializationConstants.numRaderPrimes; i++) {
+				if (axis->specializationConstants.raderContainer[i].prime > 0) {
+					axis->specializationConstants.raderContainer[i].raderUintLUToffset = current_offset;
+					uint64_t g_pow = 1;
+					tempRaderUintLUT[current_offset] = 1;
+					current_offset++;
+					for (uint64_t t = 0; t < axis->specializationConstants.raderContainer[i].prime - 1; t++) {
+						g_pow = (g_pow * axis->specializationConstants.raderContainer[i].generator) % axis->specializationConstants.raderContainer[i].prime;
+						tempRaderUintLUT[current_offset] = (uint32_t)g_pow;
+						current_offset++;
+					}
+				}
+			}
+
+#if(VKFFT_BACKEND==0)
+			resFFT = allocateFFTBuffer(app, &app->bufferRaderUintLUT[axis_id][axis_upload_id], &app->bufferRaderUintLUTDeviceMemory[axis_id][axis_upload_id], VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, app->bufferRaderUintLUTSize[axis_id][axis_upload_id]);
+			if (resFFT != VKFFT_SUCCESS) {
+				deleteVkFFT(app);
+				free(tempRaderUintLUT);
+				tempRaderUintLUT = 0;
+				return resFFT;
+			}
+			resFFT = VkFFT_transferDataFromCPU(app, tempRaderUintLUT, &app->bufferRaderUintLUT[axis_id][axis_upload_id], app->bufferRaderUintLUTSize[axis_id][axis_upload_id]);
+			if (resFFT != VKFFT_SUCCESS) {
+				deleteVkFFT(app);
+				free(tempRaderUintLUT);
+				tempRaderUintLUT = 0;
+				return resFFT;
+			}
+#elif(VKFFT_BACKEND==1)
+			res = cudaMalloc((void**)&app->bufferRaderUintLUT[axis_id][axis_upload_id], app->bufferRaderUintLUTSize[axis_id][axis_upload_id]);
+			if (res != cudaSuccess) {
+				deleteVkFFT(app);
+				free(tempRaderUintLUT);
+				tempRaderUintLUT = 0;
+				return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+			}
+			resFFT = VkFFT_transferDataFromCPU(app, tempRaderUintLUT, &app->bufferRaderUintLUT[axis_id][axis_upload_id], app->bufferRaderUintLUTSize[axis_id][axis_upload_id]);
+			if (resFFT != VKFFT_SUCCESS) {
+				deleteVkFFT(app);
+				free(tempRaderUintLUT);
+				tempRaderUintLUT = 0;
+				return resFFT;
+			}
+#elif(VKFFT_BACKEND==2)
+			res = hipMalloc((void**)&app->bufferRaderUintLUT[axis_id][axis_upload_id], app->bufferRaderUintLUTSize[axis_id][axis_upload_id]);
+			if (res != hipSuccess) {
+				deleteVkFFT(app);
+				free(tempRaderUintLUT);
+				tempRaderUintLUT = 0;
+				return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+			}
+			resFFT = VkFFT_transferDataFromCPU(app, tempRaderUintLUT, &app->bufferRaderUintLUT[axis_id][axis_upload_id], app->bufferRaderUintLUTSize[axis_id][axis_upload_id]);
+			if (resFFT != VKFFT_SUCCESS) {
+				deleteVkFFT(app);
+				free(tempRaderUintLUT);
+				tempRaderUintLUT = 0;
+				return resFFT;
+			}
+#elif(VKFFT_BACKEND==3)
+			app->bufferRaderUintLUT[axis_id][axis_upload_id] = clCreateBuffer(app->configuration.context[0], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, app->bufferRaderUintLUTSize[axis_id][axis_upload_id], tempRaderUintLUT, &res);
+			if (res != CL_SUCCESS) {
+				deleteVkFFT(app);
+				free(tempRaderUintLUT);
+				tempRaderUintLUT = 0;
+				return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+			}
+#elif(VKFFT_BACKEND==4)
+			ze_device_mem_alloc_desc_t device_desc = {};
+			device_desc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
+			res = zeMemAllocDevice(app->configuration.context[0], &device_desc, app->bufferRaderUintLUTSize[axis_id][axis_upload_id], sizeof(uint32_t), app->configuration.device[0], &app->bufferRaderUintLUT[axis_id][axis_upload_id]);
+			if (res != ZE_RESULT_SUCCESS) {
+				deleteVkFFT(app);
+				free(tempRaderUintLUT);
+				tempRaderUintLUT = 0;
+				return VKFFT_ERROR_FAILED_TO_ALLOCATE;
+			}
+			resFFT = VkFFT_transferDataFromCPU(app, tempRaderUintLUT, &app->bufferRaderUintLUT[axis_id][axis_upload_id], app->bufferRaderUintLUTSize[axis_id][axis_upload_id]);
+			if (resFFT != VKFFT_SUCCESS) {
+				deleteVkFFT(app);
+				free(tempRaderUintLUT);
+				tempRaderUintLUT = 0;
+				return resFFT;
+			}
+#elif(VKFFT_BACKEND==5)
+			app->bufferRaderUintLUT[axis_id][axis_upload_id] = app->configuration.device->newBuffer(app->bufferRaderUintLUTSize[axis_id][axis_upload_id], MTL::ResourceStorageModePrivate);
+			resFFT = VkFFT_transferDataFromCPU(app, tempRaderUintLUT, &app->bufferRaderUintLUT[axis_id][axis_upload_id], app->bufferRaderUintLUTSize[axis_id][axis_upload_id]);
+			if (resFFT != VKFFT_SUCCESS) {
+				deleteVkFFT(app);
+				free(tempRaderUintLUT);
+				tempRaderUintLUT = 0;
+				return resFFT;
+			}
+#endif
+			free(tempRaderUintLUT);
+			tempRaderUintLUT = 0;
+		}
+		else {
+			uint64_t current_offset = 0;
+			for (uint64_t i = 0; i < axis->specializationConstants.numRaderPrimes; i++) {
+				if (axis->specializationConstants.raderContainer[i].prime > 0) {
+					axis->specializationConstants.raderContainer[i].raderUintLUToffset = current_offset;
+					uint64_t g_pow = 1;
+					current_offset += axis->specializationConstants.raderContainer[i].prime;
+				}
+			}
+		}
+
+		axis->bufferRaderUintLUT = app->bufferRaderUintLUT[axis_id][axis_upload_id];
+#if(VKFFT_BACKEND==0)
+		axis->bufferRaderUintLUTDeviceMemory = app->bufferRaderUintLUTDeviceMemory[axis_id][axis_upload_id];
+#endif
+		axis->bufferRaderUintLUTSize = app->bufferRaderUintLUTSize[axis_id][axis_upload_id];
+	}
 	//configure strides
 
 	uint64_t* axisStride = axis->specializationConstants.inputStride;
@@ -28725,11 +37108,19 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 #endif
 		axis->numBindings++;
 	}
-	if (app->configuration.useLUT) {
+	if (app->configuration.useLUT == 1) {
 		axis->specializationConstants.LUTBindingID = axis->numBindings;
 		axis->specializationConstants.numBuffersBound[axis->numBindings] = 1;
 #if(VKFFT_BACKEND==0)
 		descriptorPoolSize.descriptorCount++;
+#endif
+		axis->numBindings++;
+	}
+	if (axis->specializationConstants.raderUintLUT) {
+		axis->specializationConstants.RaderUintLUTBindingID = axis->numBindings;
+		axis->specializationConstants.numBuffersBound[axis->numBindings] = 1;
+#if(VKFFT_BACKEND==0)
+		descriptorPoolSize.descriptorCount++;
 #endif
 		axis->numBindings++;
 	}
@@ -28834,57 +37225,114 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 		//if (axis->groupedBatch * (uint64_t)ceil(axis->specializationConstants.fftDim / 8.0) < app->configuration.warpSize) axis->groupedBatch = app->configuration.warpSize / (uint64_t)ceil(axis->specializationConstants.fftDim / 8.0);
 		//axis->groupedBatch = (app->configuration.sharedMemorySize / axis->specializationConstants.fftDim >= app->configuration.coalescedMemory) ? maxSequenceLengthSharedMemory / axis->specializationConstants.fftDim : axis->groupedBatch;
 		if (((FFTPlan->numAxisUploads[axis_id] == 1) && (axis_id == 0)) || ((axis_id == 0) && (!axis->specializationConstants.reorderFourStep) && (axis_upload_id == 0))) {
-			axis->groupedBatch = (maxSequenceLengthSharedMemoryPow2 / axis->specializationConstants.fftDim > axis->groupedBatch) ? maxSequenceLengthSharedMemoryPow2 / axis->specializationConstants.fftDim : axis->groupedBatch;
+			axis->groupedBatch = (maxSequenceLengthSharedMemory / axis->specializationConstants.fftDim > axis->groupedBatch) ? maxSequenceLengthSharedMemory / axis->specializationConstants.fftDim : axis->groupedBatch;
 		}
 		else {
-			axis->groupedBatch = (maxSingleSizeStridedPow2 / axis->specializationConstants.fftDim > 1) ? maxSingleSizeStridedPow2 / axis->specializationConstants.fftDim * axis->groupedBatch : axis->groupedBatch;
+			axis->groupedBatch = (maxSingleSizeStrided / axis->specializationConstants.fftDim > 1) ? maxSingleSizeStrided / axis->specializationConstants.fftDim * axis->groupedBatch : axis->groupedBatch;
 		}
 		//axis->groupedBatch = 8;
 		//shared memory bank conflict resolve
 //#if(VKFFT_BACKEND!=2)//for some reason, hip doesn't get performance increase from having variable shared memory strides.
-		if ((FFTPlan->numAxisUploads[axis_id] == 2) && (axis_upload_id == 0) && (axis->specializationConstants.fftDim * maxBatchCoalesced <= maxSequenceLengthSharedMemory)) {
-			axis->groupedBatch = (uint64_t)ceil(axis->groupedBatch / 2.0);
+		if (app->configuration.vendorID == 0x10DE) {
+			if (FFTPlan->numAxisUploads[axis_id] == 2) {
+				if ((axis_upload_id > 0) || (axis->specializationConstants.fftDim <= 512)) {
+					if (axis->specializationConstants.fftDim * (64 / complexSize) <= maxSequenceLengthSharedMemory) {
+						axis->groupedBatch = 64 / complexSize;
+						maxBatchCoalesced = 64 / complexSize;
+					}
+					if (axis->specializationConstants.fftDim * (128 / complexSize) <= maxSequenceLengthSharedMemory) {
+						axis->groupedBatch = 128 / complexSize;
+						maxBatchCoalesced = 128 / complexSize;
+					}
+				}
+			}
+			//#endif
+			if (FFTPlan->numAxisUploads[axis_id] == 3) {
+				if (axis->specializationConstants.fftDim * (64 / complexSize) <= maxSequenceLengthSharedMemory) {
+					axis->groupedBatch = 64 / complexSize;
+					maxBatchCoalesced = 64 / complexSize;
+				}
+				if (axis->specializationConstants.fftDim * (128 / complexSize) <= maxSequenceLengthSharedMemory) {
+					axis->groupedBatch = 128 / complexSize;
+					maxBatchCoalesced = 128 / complexSize;
+				}
+			}
 		}
-		//#endif
-		if ((FFTPlan->numAxisUploads[axis_id] == 3) && (axis_upload_id == 0) && (axis->specializationConstants.fftDim < maxSequenceLengthSharedMemory / (2 * complexSize))) {
-			axis->groupedBatch = (uint64_t)ceil(axis->groupedBatch / 2.0);
+		else {
+			if ((FFTPlan->numAxisUploads[axis_id] == 2) && (axis_upload_id == 0) && (axis->specializationConstants.fftDim * maxBatchCoalesced <= maxSequenceLengthSharedMemory)) {
+				axis->groupedBatch = (uint64_t)ceil(axis->groupedBatch / 2.0);
+			}
+			//#endif
+			if ((FFTPlan->numAxisUploads[axis_id] == 3) && (axis_upload_id == 0) && (axis->specializationConstants.fftDim < maxSequenceLengthSharedMemory / (2 * complexSize))) {
+				axis->groupedBatch = (uint64_t)ceil(axis->groupedBatch / 2.0);
+			}
 		}
 		if (axis->groupedBatch < maxBatchCoalesced) axis->groupedBatch = maxBatchCoalesced;
 		axis->groupedBatch = (axis->groupedBatch / maxBatchCoalesced) * maxBatchCoalesced;
 		//half bandiwdth technique
 		if (!((axis_id == 0) && (FFTPlan->numAxisUploads[axis_id] == 1)) && !((axis_id == 0) && (axis_upload_id == 0) && (!axis->specializationConstants.reorderFourStep)) && (axis->specializationConstants.fftDim > maxSingleSizeStrided)) {
-			axis->groupedBatch = (uint64_t)ceil(axis->groupedBatch / 2.0);
+			axis->groupedBatch = maxSequenceLengthSharedMemory / axis->specializationConstants.fftDim;
+			if (axis->groupedBatch == 0) axis->groupedBatch = 1;
 		}
 
 		if ((app->configuration.halfThreads) && (axis->groupedBatch * axis->specializationConstants.fftDim * complexSize >= app->configuration.sharedMemorySize))
 			axis->groupedBatch = (uint64_t)ceil(axis->groupedBatch / 2.0);
 		if (axis->groupedBatch > app->configuration.warpSize) axis->groupedBatch = (axis->groupedBatch / app->configuration.warpSize) * app->configuration.warpSize;
 		if (axis->groupedBatch > 2 * maxBatchCoalesced) axis->groupedBatch = (axis->groupedBatch / (2 * maxBatchCoalesced)) * (2 * maxBatchCoalesced);
-		if (axis->groupedBatch > 4 * maxBatchCoalesced) axis->groupedBatch = (axis->groupedBatch / (4 * maxBatchCoalesced)) * (2 * maxBatchCoalesced);
-		uint64_t maxThreadNum = (axis_id) ? (maxSingleSizeStrided * app->configuration.coalescedMemory / complexSize) / (axis->specializationConstants.min_registers_per_thread * axis->specializationConstants.registerBoost) : maxSequenceLengthSharedMemory / (axis->specializationConstants.min_registers_per_thread * axis->specializationConstants.registerBoost);
-		if (maxThreadNum > app->configuration.maxThreadsNum) maxThreadNum = app->configuration.maxThreadsNum;
+		if (axis->groupedBatch > 4 * maxBatchCoalesced) axis->groupedBatch = (axis->groupedBatch / (4 * maxBatchCoalesced)) * (4 * maxBatchCoalesced);
+		//uint64_t maxThreadNum = (axis_id) ? (maxSingleSizeStrided * app->configuration.coalescedMemory / complexSize) / (axis->specializationConstants.min_registers_per_thread * axis->specializationConstants.registerBoost) : maxSequenceLengthSharedMemory / (axis->specializationConstants.min_registers_per_thread * axis->specializationConstants.registerBoost);
+		//if (maxThreadNum > app->configuration.maxThreadsNum) maxThreadNum = app->configuration.maxThreadsNum;
+		uint64_t maxThreadNum = app->configuration.maxThreadsNum;
 		axis->specializationConstants.axisSwapped = 0;
 		uint64_t r2cmult = (axis->specializationConstants.mergeSequencesR2C) ? 2 : 1;
 		if (axis_id == 0) {
-
 			if (axis_upload_id == 0) {
-				axis->axisBlock[0] = (axis->specializationConstants.fftDim / axis->specializationConstants.min_registers_per_thread / axis->specializationConstants.registerBoost > 1) ? axis->specializationConstants.fftDim / axis->specializationConstants.min_registers_per_thread / axis->specializationConstants.registerBoost : 1;
+				axis->axisBlock[0] = (((uint64_t)ceil(axis->specializationConstants.fftDim / (double)axis->specializationConstants.min_registers_per_thread)) / axis->specializationConstants.registerBoost > 1) ? ((uint64_t)ceil(axis->specializationConstants.fftDim / (double)axis->specializationConstants.min_registers_per_thread)) / axis->specializationConstants.registerBoost : 1;
+				if (axis->specializationConstants.useRaderMult) {
+					uint64_t locMaxBatchCoalesced = ((axis_id == 0) && (((axis_upload_id == 0) && ((!app->configuration.reorderFourStep) || (app->useBluesteinFFT[axis_id]))) || (axis->specializationConstants.numAxisUploads == 1))) ? 1 : maxBatchCoalesced;
+					uint64_t final_rader_thread_count = 0;
+					for (uint64_t i = 0; i < axis->specializationConstants.numRaderPrimes; i++) {
+						if (axis->specializationConstants.raderContainer[i].type == 1) {
+							uint64_t temp_rader = (uint64_t)ceil((axis->specializationConstants.fftDim / (double)((axis->specializationConstants.rader_min_registers / 2) * 2)) / (double)((axis->specializationConstants.raderContainer[i].prime + 1) / 2));
+							uint64_t active_rader = (uint64_t)ceil((axis->specializationConstants.fftDim / axis->specializationConstants.raderContainer[i].prime) / (double)temp_rader);
+							if (active_rader > 1) {
+								if ((((double)active_rader - (axis->specializationConstants.fftDim / axis->specializationConstants.raderContainer[i].prime) / (double)temp_rader) >= 0.5) && ((((uint64_t)ceil((axis->specializationConstants.fftDim / axis->specializationConstants.raderContainer[i].prime) / (double)(active_rader - 1)) * ((axis->specializationConstants.raderContainer[i].prime + 1) / 2)) * locMaxBatchCoalesced) <= app->configuration.maxThreadsNum)) active_rader--;
+							}
+							uint64_t local_estimate_rader_threadnum = (uint64_t)ceil((axis->specializationConstants.fftDim / axis->specializationConstants.raderContainer[i].prime) / (double)active_rader) * ((axis->specializationConstants.raderContainer[i].prime + 1) / 2);
+
+							uint64_t temp_rader_thread_count = ((uint64_t)ceil(axis->axisBlock[0] / (double)((axis->specializationConstants.raderContainer[i].prime + 1) / 2))) * ((axis->specializationConstants.raderContainer[i].prime + 1) / 2);
+							if (temp_rader_thread_count < local_estimate_rader_threadnum) temp_rader_thread_count = local_estimate_rader_threadnum;
+							if (temp_rader_thread_count > final_rader_thread_count) final_rader_thread_count = temp_rader_thread_count;
+						}
+					}
+					axis->axisBlock[0] = final_rader_thread_count;
+					if (axis->axisBlock[0] * axis->groupedBatch > maxThreadNum) axis->groupedBatch = locMaxBatchCoalesced;
+				}
+				if (axis->specializationConstants.useRaderFFT) {
+					if (axis->axisBlock[0] < axis->specializationConstants.minRaderFFTThreadNum) axis->axisBlock[0] = axis->specializationConstants.minRaderFFTThreadNum;
+				}
 				if (axis->axisBlock[0] > maxThreadNum) axis->axisBlock[0] = maxThreadNum;
 				if (axis->axisBlock[0] > app->configuration.maxComputeWorkGroupSize[0]) axis->axisBlock[0] = app->configuration.maxComputeWorkGroupSize[0];
 				if (axis->specializationConstants.reorderFourStep && (FFTPlan->numAxisUploads[axis_id] > 1))
 					axis->axisBlock[1] = axis->groupedBatch;
 				else {
 					//axis->axisBlock[1] = (axis->axisBlock[0] < app->configuration.warpSize) ? app->configuration.warpSize / axis->axisBlock[0] : 1;
-					axis->axisBlock[1] = ((axis->axisBlock[0] < app->configuration.aimThreads) && ((axis->axisBlock[0] < 32) || ((axis->axisBlock[0] & (axis->axisBlock[0] - 1)) != 0))) ? app->configuration.aimThreads / axis->axisBlock[0] : 1;
+					uint64_t estimate_batch = (((axis->axisBlock[0] / app->configuration.warpSize) == 1) && ((axis->axisBlock[0] / (double)app->configuration.warpSize) < 1.5)) ? app->configuration.aimThreads / app->configuration.warpSize : app->configuration.aimThreads / axis->axisBlock[0];
+					if (estimate_batch == 0) estimate_batch = 1;
+					axis->axisBlock[1] = ((axis->axisBlock[0] < app->configuration.aimThreads) && ((axis->axisBlock[0] < app->configuration.warpSize) || (axis->specializationConstants.useRader))) ? estimate_batch : 1;
 				}
+
 				uint64_t currentAxisBlock1 = axis->axisBlock[1];
 				for (uint64_t i = currentAxisBlock1; i < 2 * currentAxisBlock1; i++) {
-					if (((FFTPlan->numAxisUploads[0] > 1) && (((FFTPlan->actualFFTSizePerAxis[axis_id][0] / axis->specializationConstants.fftDim) % i) == 0)) || ((FFTPlan->numAxisUploads[0] == 1) && (((FFTPlan->actualFFTSizePerAxis[axis_id][1] / r2cmult) % i) == 0))) {
-						if (i * axis->specializationConstants.fftDim * complexSize <= app->configuration.sharedMemorySize) axis->axisBlock[1] = i;
+					if (((FFTPlan->numAxisUploads[0] > 1) && (!(((FFTPlan->actualFFTSizePerAxis[axis_id][0] / axis->specializationConstants.fftDim) % axis->axisBlock[1]) == 0))) || ((FFTPlan->numAxisUploads[0] == 1) && (!(((FFTPlan->actualFFTSizePerAxis[axis_id][1] / r2cmult) % axis->axisBlock[1]) == 0)))) {
+						if (i * axis->specializationConstants.fftDim * complexSize <= allowedSharedMemory) axis->axisBlock[1] = i;
 						i = 2 * currentAxisBlock1;
 					}
 				}
-
+				if (((axis->specializationConstants.fftDim % 2 == 0) || (axis->axisBlock[0] < app->configuration.numSharedBanks / 4)) && (!(((!axis->specializationConstants.reorderFourStep) || (axis->specializationConstants.useBluesteinFFT)) && (FFTPlan->numAxisUploads[0] > 1))) && (axis->axisBlock[1] > 1) && (axis->axisBlock[1] * axis->specializationConstants.fftDim < maxSequenceLengthSharedMemoryPow2) && (!((app->configuration.performZeropadding[0] || app->configuration.performZeropadding[1] || app->configuration.performZeropadding[2])))) {
+					//we plan to swap - this reduces bank conflicts
+					axis->axisBlock[1] = (uint64_t)pow(2, (uint64_t)ceil(log2((double)axis->axisBlock[1])));
+				}
 				if ((FFTPlan->numAxisUploads[0] > 1) && ((uint64_t)ceil(FFTPlan->actualFFTSizePerAxis[axis_id][0] / axis->specializationConstants.fftDim) < axis->axisBlock[1])) axis->axisBlock[1] = (uint64_t)ceil(FFTPlan->actualFFTSizePerAxis[axis_id][0] / axis->specializationConstants.fftDim);
 				if ((axis->specializationConstants.mergeSequencesR2C != 0) && (axis->specializationConstants.fftDim * axis->axisBlock[1] >= maxSequenceLengthSharedMemory)) {
 					axis->specializationConstants.mergeSequencesR2C = 0;
@@ -28903,7 +37351,12 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 					r2cmult = 1;
 				}
 				if ((FFTPlan->numAxisUploads[0] == 1) && ((uint64_t)ceil(FFTPlan->actualFFTSizePerAxis[axis_id][1] / (double)r2cmult) < axis->axisBlock[1])) axis->axisBlock[1] = (uint64_t)ceil(FFTPlan->actualFFTSizePerAxis[axis_id][1] / (double)r2cmult);
-
+				if (app->configuration.vendorID == 0x10DE) {
+					while ((axis->axisBlock[1] * axis->axisBlock[0] >= 2 * app->configuration.aimThreads) && (axis->axisBlock[1] > maxBatchCoalesced)) {
+						axis->axisBlock[1] /= 2;
+						if (axis->axisBlock[1] < maxBatchCoalesced) axis->axisBlock[1] = maxBatchCoalesced;
+					}
+				}
 				if (axis->axisBlock[1] > app->configuration.maxComputeWorkGroupSize[1]) axis->axisBlock[1] = app->configuration.maxComputeWorkGroupSize[1];
 				//if (axis->axisBlock[0] * axis->axisBlock[1] > app->configuration.maxThreadsNum) axis->axisBlock[1] /= 2;
 				if (axis->axisBlock[0] * axis->axisBlock[1] > maxThreadNum) {
@@ -28917,7 +37370,7 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 					}
 				}
 				while ((axis->axisBlock[1] * (axis->specializationConstants.fftDim / axis->specializationConstants.registerBoost)) > maxSequenceLengthSharedMemory) axis->axisBlock[1] /= 2;
-				if (((axis->specializationConstants.fftDim % 2 == 0) || (axis->axisBlock[0] < app->configuration.numSharedBanks / 4)) && (!(((!axis->specializationConstants.reorderFourStep) || (axis->specializationConstants.useBluesteinFFT)) && (FFTPlan->numAxisUploads[0] > 1))) && (axis->axisBlock[1] > 1) && (axis->axisBlock[1] * axis->specializationConstants.fftDim < maxSequenceLengthSharedMemoryPow2) && (!((app->configuration.performZeropadding[0] || app->configuration.performZeropadding[1] || app->configuration.performZeropadding[2])))) {
+				if (((axis->specializationConstants.fftDim % 2 == 0) || (axis->axisBlock[0] < app->configuration.numSharedBanks / 4)) && (!(((!axis->specializationConstants.reorderFourStep) || (axis->specializationConstants.useBluesteinFFT)) && (FFTPlan->numAxisUploads[0] > 1))) && (axis->axisBlock[1] > 1) && (axis->axisBlock[1] * axis->specializationConstants.fftDim < maxSequenceLengthSharedMemory) && (!((app->configuration.performZeropadding[0] || app->configuration.performZeropadding[1] || app->configuration.performZeropadding[2])))) {
 					/*#if (VKFFT_BACKEND==0)
 										if (((axis->specializationConstants.fftDim & (axis->specializationConstants.fftDim - 1)) != 0)) {
 											uint64_t temp = axis->axisBlock[1];
@@ -28936,10 +37389,40 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 				axis->axisBlock[3] = axis->specializationConstants.fftDim;
 			}
 			else {
-				axis->axisBlock[1] = (axis->specializationConstants.fftDim / axis->specializationConstants.min_registers_per_thread / axis->specializationConstants.registerBoost > 1) ? axis->specializationConstants.fftDim / axis->specializationConstants.min_registers_per_thread / axis->specializationConstants.registerBoost : 1;
+				axis->axisBlock[1] = ((uint64_t)ceil(axis->specializationConstants.fftDim / (double)axis->specializationConstants.min_registers_per_thread) / axis->specializationConstants.registerBoost > 1) ? (uint64_t)ceil(axis->specializationConstants.fftDim / (double)axis->specializationConstants.min_registers_per_thread) / axis->specializationConstants.registerBoost : 1;
+				if (axis->specializationConstants.useRaderMult) {
+					uint64_t final_rader_thread_count = 0;
+					for (uint64_t i = 0; i < axis->specializationConstants.numRaderPrimes; i++) {
+						if (axis->specializationConstants.raderContainer[i].type == 1) {
+							uint64_t temp_rader = (uint64_t)ceil((axis->specializationConstants.fftDim / (double)((axis->specializationConstants.rader_min_registers / 2) * 2)) / (double)((axis->specializationConstants.raderContainer[i].prime + 1) / 2));
+							uint64_t active_rader = (uint64_t)ceil((axis->specializationConstants.fftDim / axis->specializationConstants.raderContainer[i].prime) / (double)temp_rader);
+							if (active_rader > 1) {
+								if ((((double)active_rader - (axis->specializationConstants.fftDim / axis->specializationConstants.raderContainer[i].prime) / (double)temp_rader) >= 0.5) && ((((uint64_t)ceil((axis->specializationConstants.fftDim / axis->specializationConstants.raderContainer[i].prime) / (double)(active_rader - 1)) * ((axis->specializationConstants.raderContainer[i].prime + 1) / 2)) * maxBatchCoalesced) <= app->configuration.maxThreadsNum)) active_rader--;
+							}
+							uint64_t local_estimate_rader_threadnum = (uint64_t)ceil((axis->specializationConstants.fftDim / axis->specializationConstants.raderContainer[i].prime) / (double)active_rader) * ((axis->specializationConstants.raderContainer[i].prime + 1) / 2);
+
+							uint64_t temp_rader_thread_count = ((uint64_t)ceil(axis->axisBlock[1] / (double)((axis->specializationConstants.raderContainer[i].prime + 1) / 2))) * ((axis->specializationConstants.raderContainer[i].prime + 1) / 2);
+							if (temp_rader_thread_count < local_estimate_rader_threadnum) temp_rader_thread_count = local_estimate_rader_threadnum;
+							if (temp_rader_thread_count > final_rader_thread_count) final_rader_thread_count = temp_rader_thread_count;
+						}
+					}
+					axis->axisBlock[1] = final_rader_thread_count;
+					if (axis->groupedBatch * axis->axisBlock[1] > maxThreadNum) axis->groupedBatch = maxBatchCoalesced;
+				}
+				if (axis->specializationConstants.useRaderFFT) {
+					if (axis->axisBlock[1] < axis->specializationConstants.minRaderFFTThreadNum) axis->axisBlock[1] = axis->specializationConstants.minRaderFFTThreadNum;
+				}
+
 				uint64_t scale = app->configuration.aimThreads / axis->axisBlock[1] / axis->groupedBatch;
-				if (scale > 1) axis->groupedBatch *= scale;
+				if ((scale > 1) && ((axis->specializationConstants.fftDim * axis->groupedBatch * scale <= maxSequenceLengthSharedMemory))) axis->groupedBatch *= scale;
+
 				axis->axisBlock[0] = (axis->specializationConstants.stageStartSize > axis->groupedBatch) ? axis->groupedBatch : axis->specializationConstants.stageStartSize;
+				if (app->configuration.vendorID == 0x10DE) {
+					while ((axis->axisBlock[1] * axis->axisBlock[0] >= 2 * app->configuration.aimThreads) && (axis->axisBlock[0] > maxBatchCoalesced)) {
+						axis->axisBlock[0] /= 2;
+						if (axis->axisBlock[0] < maxBatchCoalesced) axis->axisBlock[0] = maxBatchCoalesced;
+					}
+				}
 				if (axis->axisBlock[0] > app->configuration.maxComputeWorkGroupSize[0]) axis->axisBlock[0] = app->configuration.maxComputeWorkGroupSize[0];
 				if (axis->axisBlock[0] * axis->axisBlock[1] > maxThreadNum) {
 					for (uint64_t i = 1; i <= axis->axisBlock[0]; i++) {
@@ -28958,9 +37441,37 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 		}
 		if (axis_id == 1) {
 
-			axis->axisBlock[1] = (axis->specializationConstants.fftDim / axis->specializationConstants.min_registers_per_thread / axis->specializationConstants.registerBoost > 1) ? axis->specializationConstants.fftDim / axis->specializationConstants.min_registers_per_thread / axis->specializationConstants.registerBoost : 1;
+			axis->axisBlock[1] = ((uint64_t)ceil(axis->specializationConstants.fftDim / (double)axis->specializationConstants.min_registers_per_thread) / axis->specializationConstants.registerBoost > 1) ? ((uint64_t)ceil(axis->specializationConstants.fftDim / (double)axis->specializationConstants.min_registers_per_thread)) / axis->specializationConstants.registerBoost : 1;
+			if (axis->specializationConstants.useRaderMult) {
+				uint64_t final_rader_thread_count = 0;
+				for (uint64_t i = 0; i < axis->specializationConstants.numRaderPrimes; i++) {
+					if (axis->specializationConstants.raderContainer[i].type == 1) {
+						uint64_t temp_rader = (uint64_t)ceil((axis->specializationConstants.fftDim / (double)((axis->specializationConstants.rader_min_registers / 2) * 2)) / (double)((axis->specializationConstants.raderContainer[i].prime + 1) / 2));
+						uint64_t active_rader = (uint64_t)ceil((axis->specializationConstants.fftDim / axis->specializationConstants.raderContainer[i].prime) / (double)temp_rader);
+						if (active_rader > 1) {
+							if ((((double)active_rader - (axis->specializationConstants.fftDim / axis->specializationConstants.raderContainer[i].prime) / (double)temp_rader) >= 0.5) && ((((uint64_t)ceil((axis->specializationConstants.fftDim / axis->specializationConstants.raderContainer[i].prime) / (double)(active_rader - 1)) * ((axis->specializationConstants.raderContainer[i].prime + 1) / 2)) * maxBatchCoalesced) <= app->configuration.maxThreadsNum)) active_rader--;
+						}
+						uint64_t local_estimate_rader_threadnum = (uint64_t)ceil((axis->specializationConstants.fftDim / axis->specializationConstants.raderContainer[i].prime) / (double)active_rader) * ((axis->specializationConstants.raderContainer[i].prime + 1) / 2);
+
+						uint64_t temp_rader_thread_count = ((uint64_t)ceil(axis->axisBlock[1] / (double)((axis->specializationConstants.raderContainer[i].prime + 1) / 2))) * ((axis->specializationConstants.raderContainer[i].prime + 1) / 2);
+						if (temp_rader_thread_count < local_estimate_rader_threadnum) temp_rader_thread_count = local_estimate_rader_threadnum;
+						if (temp_rader_thread_count > final_rader_thread_count) final_rader_thread_count = temp_rader_thread_count;
+					}
+				}
+				axis->axisBlock[1] = final_rader_thread_count;
+				if (axis->groupedBatch * axis->axisBlock[1] > maxThreadNum) axis->groupedBatch = maxBatchCoalesced;
+			}
+			if (axis->specializationConstants.useRaderFFT) {
+				if (axis->axisBlock[1] < axis->specializationConstants.minRaderFFTThreadNum) axis->axisBlock[1] = axis->specializationConstants.minRaderFFTThreadNum;
+			}
 
 			axis->axisBlock[0] = (FFTPlan->actualFFTSizePerAxis[axis_id][0] > axis->groupedBatch) ? axis->groupedBatch : FFTPlan->actualFFTSizePerAxis[axis_id][0];
+			if (app->configuration.vendorID == 0x10DE) {
+				while ((axis->axisBlock[1] * axis->axisBlock[0] >= 2 * app->configuration.aimThreads) && (axis->axisBlock[0] > maxBatchCoalesced)) {
+					axis->axisBlock[0] /= 2;
+					if (axis->axisBlock[0] < maxBatchCoalesced) axis->axisBlock[0] = maxBatchCoalesced;
+				}
+			}
 			if (axis->axisBlock[0] > app->configuration.maxComputeWorkGroupSize[0]) axis->axisBlock[0] = app->configuration.maxComputeWorkGroupSize[0];
 			if (axis->axisBlock[0] * axis->axisBlock[1] > maxThreadNum) {
 				for (uint64_t i = 1; i <= axis->axisBlock[0]; i++) {
@@ -28977,10 +37488,37 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 
 		}
 		if (axis_id == 2) {
-			axis->axisBlock[1] = (axis->specializationConstants.fftDim / axis->specializationConstants.min_registers_per_thread / axis->specializationConstants.registerBoost > 1) ? axis->specializationConstants.fftDim / axis->specializationConstants.min_registers_per_thread / axis->specializationConstants.registerBoost : 1;
+			axis->axisBlock[1] = ((uint64_t)ceil(axis->specializationConstants.fftDim / (double)axis->specializationConstants.min_registers_per_thread) / axis->specializationConstants.registerBoost > 1) ? ((uint64_t)ceil(axis->specializationConstants.fftDim / (double)axis->specializationConstants.min_registers_per_thread)) / axis->specializationConstants.registerBoost : 1;
+			if (axis->specializationConstants.useRaderMult) {
+				uint64_t final_rader_thread_count = 0;
+				for (uint64_t i = 0; i < axis->specializationConstants.numRaderPrimes; i++) {
+					if (axis->specializationConstants.raderContainer[i].type == 1) {
+						uint64_t temp_rader = (uint64_t)ceil((axis->specializationConstants.fftDim / (double)((axis->specializationConstants.rader_min_registers / 2) * 2)) / (double)((axis->specializationConstants.raderContainer[i].prime + 1) / 2));
+						uint64_t active_rader = (uint64_t)ceil((axis->specializationConstants.fftDim / axis->specializationConstants.raderContainer[i].prime) / (double)temp_rader);
+						if (active_rader > 1) {
+							if ((((double)active_rader - (axis->specializationConstants.fftDim / axis->specializationConstants.raderContainer[i].prime) / (double)temp_rader) >= 0.5) && ((((uint64_t)ceil((axis->specializationConstants.fftDim / axis->specializationConstants.raderContainer[i].prime) / (double)(active_rader - 1)) * ((axis->specializationConstants.raderContainer[i].prime + 1) / 2)) * maxBatchCoalesced) <= app->configuration.maxThreadsNum)) active_rader--;
+						}
+						uint64_t local_estimate_rader_threadnum = (uint64_t)ceil((axis->specializationConstants.fftDim / axis->specializationConstants.raderContainer[i].prime) / (double)active_rader) * ((axis->specializationConstants.raderContainer[i].prime + 1) / 2);
 
-			axis->axisBlock[0] = (FFTPlan->actualFFTSizePerAxis[axis_id][0] > axis->groupedBatch) ? axis->groupedBatch : FFTPlan->actualFFTSizePerAxis[axis_id][0];
+						uint64_t temp_rader_thread_count = ((uint64_t)ceil(axis->axisBlock[1] / (double)((axis->specializationConstants.raderContainer[i].prime + 1) / 2))) * ((axis->specializationConstants.raderContainer[i].prime + 1) / 2);
+						if (temp_rader_thread_count < local_estimate_rader_threadnum) temp_rader_thread_count = local_estimate_rader_threadnum;
+						if (temp_rader_thread_count > final_rader_thread_count) final_rader_thread_count = temp_rader_thread_count;
+					}
+				}
+				axis->axisBlock[1] = final_rader_thread_count;
+				if (axis->groupedBatch * axis->axisBlock[1] > maxThreadNum) axis->groupedBatch = maxBatchCoalesced;
+			}
+			if (axis->specializationConstants.useRaderFFT) {
+				if (axis->axisBlock[1] < axis->specializationConstants.minRaderFFTThreadNum) axis->axisBlock[1] = axis->specializationConstants.minRaderFFTThreadNum;
+			}
 
+			axis->axisBlock[0] = (FFTPlan->actualFFTSizePerAxis[axis_id][0] > axis->groupedBatch) ? axis->groupedBatch : FFTPlan->actualFFTSizePerAxis[axis_id][0];
+			if (app->configuration.vendorID == 0x10DE) {
+				while ((axis->axisBlock[1] * axis->axisBlock[0] >= 2 * app->configuration.aimThreads) && (axis->axisBlock[0] > maxBatchCoalesced)) {
+					axis->axisBlock[0] /= 2;
+					if (axis->axisBlock[0] < maxBatchCoalesced) axis->axisBlock[0] = maxBatchCoalesced;
+				}
+			}
 			if (axis->axisBlock[0] > app->configuration.maxComputeWorkGroupSize[0]) axis->axisBlock[0] = app->configuration.maxComputeWorkGroupSize[0];
 			if (axis->axisBlock[0] * axis->axisBlock[1] > maxThreadNum) {
 				for (uint64_t i = 1; i <= axis->axisBlock[0]; i++) {
@@ -29011,6 +37549,7 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 		axis->specializationConstants.localSize[0] = axis->axisBlock[0];
 		axis->specializationConstants.localSize[1] = axis->axisBlock[1];
 		axis->specializationConstants.localSize[2] = axis->axisBlock[2];
+		axis->specializationConstants.numSubgroups = (uint64_t)ceil(axis->axisBlock[0] * axis->axisBlock[1] * axis->axisBlock[2] / (double)app->configuration.warpSize);
 		//specializationInfo.pData = &axis->specializationConstants;
 		//uint64_t registerBoost = (FFTPlan->numAxisUploads[axis_id] > 1) ? app->configuration.registerBoost4Step : app->configuration.registerBoost;
 
@@ -29114,12 +37653,6 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 			if (!(axis->specializationConstants.convolutionStep && (app->configuration.matrixConvolution > 1))) tempSize[2] *= app->configuration.coordinateFeatures;
 			//if (app->configuration.performZeropadding[1]) tempSize[1] = (uint64_t)ceil(tempSize[1] / 2.0);
 			//if (app->configuration.performZeropadding[2]) tempSize[2] = (uint64_t)ceil(tempSize[2] / 2.0);
-			if (tempSize[0] > app->configuration.maxComputeWorkGroupCount[0]) axis->specializationConstants.performWorkGroupShift[0] = 1;
-			else  axis->specializationConstants.performWorkGroupShift[0] = 0;
-			if (tempSize[1] > app->configuration.maxComputeWorkGroupCount[1]) axis->specializationConstants.performWorkGroupShift[1] = 1;
-			else  axis->specializationConstants.performWorkGroupShift[1] = 0;
-			if (tempSize[2] > app->configuration.maxComputeWorkGroupCount[2]) axis->specializationConstants.performWorkGroupShift[2] = 1;
-			else  axis->specializationConstants.performWorkGroupShift[2] = 0;
 		}
 		if (axis_id == 1) {
 			tempSize[0] = (uint64_t)ceil(FFTPlan->actualFFTSizePerAxis[axis_id][0] / (double)axis->axisBlock[0] * FFTPlan->actualFFTSizePerAxis[axis_id][1] / (double)axis->specializationConstants.fftDim);
@@ -29129,14 +37662,6 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 			if (!(axis->specializationConstants.convolutionStep && (app->configuration.matrixConvolution > 1))) tempSize[2] *= app->configuration.coordinateFeatures;
 			//if (app->configuration.actualPerformR2C == 1) tempSize[0] = (uint64_t)ceil(tempSize[0] / 2.0);
 			//if (app->configuration.performZeropadding[2]) tempSize[2] = (uint64_t)ceil(tempSize[2] / 2.0);
-
-			if (tempSize[0] > app->configuration.maxComputeWorkGroupCount[0]) axis->specializationConstants.performWorkGroupShift[0] = 1;
-			else  axis->specializationConstants.performWorkGroupShift[0] = 0;
-			if (tempSize[1] > app->configuration.maxComputeWorkGroupCount[1]) axis->specializationConstants.performWorkGroupShift[1] = 1;
-			else  axis->specializationConstants.performWorkGroupShift[1] = 0;
-			if (tempSize[2] > app->configuration.maxComputeWorkGroupCount[2]) axis->specializationConstants.performWorkGroupShift[2] = 1;
-			else  axis->specializationConstants.performWorkGroupShift[2] = 0;
-
 		}
 		if (axis_id == 2) {
 			tempSize[0] = (uint64_t)ceil(FFTPlan->actualFFTSizePerAxis[axis_id][0] / (double)axis->axisBlock[0] * FFTPlan->actualFFTSizePerAxis[axis_id][2] / (double)axis->specializationConstants.fftDim);
@@ -29146,21 +37671,35 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 			if (!(axis->specializationConstants.convolutionStep && (app->configuration.matrixConvolution > 1))) tempSize[2] *= app->configuration.coordinateFeatures;
 			//if (app->configuration.actualPerformR2C == 1) tempSize[0] = (uint64_t)ceil(tempSize[0] / 2.0);
 
-			if (tempSize[0] > app->configuration.maxComputeWorkGroupCount[0]) axis->specializationConstants.performWorkGroupShift[0] = 1;
-			else  axis->specializationConstants.performWorkGroupShift[0] = 0;
-			if (tempSize[1] > app->configuration.maxComputeWorkGroupCount[1]) axis->specializationConstants.performWorkGroupShift[1] = 1;
-			else  axis->specializationConstants.performWorkGroupShift[1] = 0;
-			if (tempSize[2] > app->configuration.maxComputeWorkGroupCount[2]) axis->specializationConstants.performWorkGroupShift[2] = 1;
-			else  axis->specializationConstants.performWorkGroupShift[2] = 0;
-
 		}
+		if ((app->configuration.maxComputeWorkGroupCount[0] > app->configuration.maxComputeWorkGroupCount[1]) && (tempSize[1] > app->configuration.maxComputeWorkGroupCount[1]) && (tempSize[1] > tempSize[0]) && (tempSize[1] >= tempSize[2])) {
+			uint64_t temp_tempSize = tempSize[0];
+			tempSize[0] = tempSize[1];
+			tempSize[1] = temp_tempSize;
+			axis->specializationConstants.swapComputeWorkGroupID = 1;
+		}
+		else {
+			if ((app->configuration.maxComputeWorkGroupCount[0] > app->configuration.maxComputeWorkGroupCount[2]) && (tempSize[2] > app->configuration.maxComputeWorkGroupCount[2]) && (tempSize[2] > tempSize[0]) && (tempSize[2] >= tempSize[1])) {
+				uint64_t temp_tempSize = tempSize[0];
+				tempSize[0] = tempSize[2];
+				tempSize[2] = temp_tempSize;
+				axis->specializationConstants.swapComputeWorkGroupID = 2;
+			}
+		}
+		if (tempSize[0] > app->configuration.maxComputeWorkGroupCount[0]) axis->specializationConstants.performWorkGroupShift[0] = 1;
+		else  axis->specializationConstants.performWorkGroupShift[0] = 0;
+		if (tempSize[1] > app->configuration.maxComputeWorkGroupCount[1]) axis->specializationConstants.performWorkGroupShift[1] = 1;
+		else  axis->specializationConstants.performWorkGroupShift[1] = 0;
+		if (tempSize[2] > app->configuration.maxComputeWorkGroupCount[2]) axis->specializationConstants.performWorkGroupShift[2] = 1;
+		else  axis->specializationConstants.performWorkGroupShift[2] = 0;
 
 		char floatTypeInputMemory[10];
 		char floatTypeOutputMemory[10];
 		char floatTypeKernelMemory[10];
 		char floatType[10];
 		axis->specializationConstants.unroll = 1;
-		axis->specializationConstants.LUT = app->configuration.useLUT;
+		axis->specializationConstants.LUT = (app->configuration.useLUT == 1) ? 1 : 0;
+		axis->specializationConstants.LUT_4step = (app->configuration.useLUT_4step == 1) ? 1 : 0;
 		if (app->configuration.doublePrecision) {
 			sprintf(floatType, "double");
 			sprintf(floatTypeInputMemory, "double");
@@ -29218,6 +37757,8 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 			sprintf(uintType, "unsigned int");
 #elif(VKFFT_BACKEND==4)
 			sprintf(uintType, "unsigned int");
+#elif(VKFFT_BACKEND==5)
+			sprintf(uintType, "uint");
 #endif
 		}
 		else {
@@ -29231,6 +37772,8 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 			sprintf(uintType, "unsigned long");
 #elif(VKFFT_BACKEND==4)
 			sprintf(uintType, "unsigned long");
+#elif(VKFFT_BACKEND==5)
+			sprintf(uintType, "ulong");
 #endif
 		}
 		{
@@ -29293,6 +37836,8 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 		axis->specializationConstants.cacheShuffle = 0;
 #elif(VKFFT_BACKEND==4)
 		axis->specializationConstants.cacheShuffle = 0;
+#elif(VKFFT_BACKEND==5)
+		axis->specializationConstants.cacheShuffle = 0;
 #endif
 		axis->specializationConstants.maxCodeLength = app->configuration.maxCodeLength;
 		axis->specializationConstants.maxTempLength = app->configuration.maxTempLength;
@@ -29310,10 +37855,10 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 		}
 #if(VKFFT_BACKEND==0)
 		uint32_t* code;
-		size_t codeSize;
+		uint64_t codeSize;
 		if (app->configuration.loadApplicationFromString) {
-			uint32_t* localStrPointer = (uint32_t*)app->configuration.loadApplicationString + app->currentApplicationStringPos;
-			codeSize = localStrPointer[0];
+			char* localStrPointer = (char*)app->configuration.loadApplicationString + app->currentApplicationStringPos;
+			memcpy(&codeSize, localStrPointer, sizeof(uint64_t));
 			code = (uint32_t*)malloc(codeSize);
 			if (!code) {
 				free(code0);
@@ -29321,120 +37866,117 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 				deleteVkFFT(app);
 				return VKFFT_ERROR_MALLOC_FAILED;
 			}
-			memcpy(code, localStrPointer + 1, codeSize);
-			app->currentApplicationStringPos += codeSize / (sizeof(uint32_t)) + 1;
+			memcpy(code, localStrPointer + sizeof(uint64_t), codeSize);
+			app->currentApplicationStringPos += codeSize + sizeof(uint64_t);
 		}
 		else
 		{
-			const glslang_resource_t default_resource = {
-				/* .MaxLights = */ 32,
-				/* .MaxClipPlanes = */ 6,
-				/* .MaxTextureUnits = */ 32,
-				/* .MaxTextureCoords = */ 32,
-				/* .MaxVertexAttribs = */ 64,
-				/* .MaxVertexUniformComponents = */ 4096,
-				/* .MaxVaryingFloats = */ 64,
-				/* .MaxVertexTextureImageUnits = */ 32,
-				/* .MaxCombinedTextureImageUnits = */ 80,
-				/* .MaxTextureImageUnits = */ 32,
-				/* .MaxFragmentUniformComponents = */ 4096,
-				/* .MaxDrawBuffers = */ 32,
-				/* .MaxVertexUniformVectors = */ 128,
-				/* .MaxVaryingVectors = */ 8,
-				/* .MaxFragmentUniformVectors = */ 16,
-				/* .MaxVertexOutputVectors = */ 16,
-				/* .MaxFragmentInputVectors = */ 15,
-				/* .MinProgramTexelOffset = */ -8,
-				/* .MaxProgramTexelOffset = */ 7,
-				/* .MaxClipDistances = */ 8,
-				/* .MaxComputeWorkGroupCountX = */ 65535,
-				/* .MaxComputeWorkGroupCountY = */ 65535,
-				/* .MaxComputeWorkGroupCountZ = */ 65535,
-				/* .MaxComputeWorkGroupSizeX = */ 1024,
-				/* .MaxComputeWorkGroupSizeY = */ 1024,
-				/* .MaxComputeWorkGroupSizeZ = */ 64,
-				/* .MaxComputeUniformComponents = */ 1024,
-				/* .MaxComputeTextureImageUnits = */ 16,
-				/* .MaxComputeImageUniforms = */ 8,
-				/* .MaxComputeAtomicCounters = */ 8,
-				/* .MaxComputeAtomicCounterBuffers = */ 1,
-				/* .MaxVaryingComponents = */ 60,
-				/* .MaxVertexOutputComponents = */ 64,
-				/* .MaxGeometryInputComponents = */ 64,
-				/* .MaxGeometryOutputComponents = */ 128,
-				/* .MaxFragmentInputComponents = */ 128,
-				/* .MaxImageUnits = */ 8,
-				/* .MaxCombinedImageUnitsAndFragmentOutputs = */ 8,
-				/* .MaxCombinedShaderOutputResources = */ 8,
-				/* .MaxImageSamples = */ 0,
-				/* .MaxVertexImageUniforms = */ 0,
-				/* .MaxTessControlImageUniforms = */ 0,
-				/* .MaxTessEvaluationImageUniforms = */ 0,
-				/* .MaxGeometryImageUniforms = */ 0,
-				/* .MaxFragmentImageUniforms = */ 8,
-				/* .MaxCombinedImageUniforms = */ 8,
-				/* .MaxGeometryTextureImageUnits = */ 16,
-				/* .MaxGeometryOutputVertices = */ 256,
-				/* .MaxGeometryTotalOutputComponents = */ 1024,
-				/* .MaxGeometryUniformComponents = */ 1024,
-				/* .MaxGeometryVaryingComponents = */ 64,
-				/* .MaxTessControlInputComponents = */ 128,
-				/* .MaxTessControlOutputComponents = */ 128,
-				/* .MaxTessControlTextureImageUnits = */ 16,
-				/* .MaxTessControlUniformComponents = */ 1024,
-				/* .MaxTessControlTotalOutputComponents = */ 4096,
-				/* .MaxTessEvaluationInputComponents = */ 128,
-				/* .MaxTessEvaluationOutputComponents = */ 128,
-				/* .MaxTessEvaluationTextureImageUnits = */ 16,
-				/* .MaxTessEvaluationUniformComponents = */ 1024,
-				/* .MaxTessPatchComponents = */ 120,
-				/* .MaxPatchVertices = */ 32,
-				/* .MaxTessGenLevel = */ 64,
-				/* .MaxViewports = */ 16,
-				/* .MaxVertexAtomicCounters = */ 0,
-				/* .MaxTessControlAtomicCounters = */ 0,
-				/* .MaxTessEvaluationAtomicCounters = */ 0,
-				/* .MaxGeometryAtomicCounters = */ 0,
-				/* .MaxFragmentAtomicCounters = */ 8,
-				/* .MaxCombinedAtomicCounters = */ 8,
-				/* .MaxAtomicCounterBindings = */ 1,
-				/* .MaxVertexAtomicCounterBuffers = */ 0,
-				/* .MaxTessControlAtomicCounterBuffers = */ 0,
-				/* .MaxTessEvaluationAtomicCounterBuffers = */ 0,
-				/* .MaxGeometryAtomicCounterBuffers = */ 0,
-				/* .MaxFragmentAtomicCounterBuffers = */ 1,
-				/* .MaxCombinedAtomicCounterBuffers = */ 1,
-				/* .MaxAtomicCounterBufferSize = */ 16384,
-				/* .MaxTransformFeedbackBuffers = */ 4,
-				/* .MaxTransformFeedbackInterleavedComponents = */ 64,
-				/* .MaxCullDistances = */ 8,
-				/* .MaxCombinedClipAndCullDistances = */ 8,
-				/* .MaxSamples = */ 4,
-				/* .maxMeshOutputVerticesNV = */ 256,
-				/* .maxMeshOutputPrimitivesNV = */ 512,
-				/* .maxMeshWorkGroupSizeX_NV = */ 32,
-				/* .maxMeshWorkGroupSizeY_NV = */ 1,
-				/* .maxMeshWorkGroupSizeZ_NV = */ 1,
-				/* .maxTaskWorkGroupSizeX_NV = */ 32,
-				/* .maxTaskWorkGroupSizeY_NV = */ 1,
-				/* .maxTaskWorkGroupSizeZ_NV = */ 1,
-				/* .maxMeshViewCountNV = */ 4,
-				/* .maxDualSourceDrawBuffersEXT = */ 1,
-
-				/* .limits = */ {
-					/* .nonInductiveForLoops = */ 1,
-					/* .whileLoops = */ 1,
-					/* .doWhileLoops = */ 1,
-					/* .generalUniformIndexing = */ 1,
-					/* .generalAttributeMatrixVectorIndexing = */ 1,
-					/* .generalVaryingIndexing = */ 1,
-					/* .generalSamplerIndexing = */ 1,
-					/* .generalVariableIndexing = */ 1,
-					/* .generalConstantMatrixVectorIndexing = */ 1,
-				} };
+			glslang_resource_t default_resource = {};
+			default_resource.max_lights = 32;
+			default_resource.max_clip_planes = 6;
+			default_resource.max_texture_units = 32;
+			default_resource.max_texture_coords = 32;
+			default_resource.max_vertex_attribs = 64;
+			default_resource.max_vertex_uniform_components = 4096;
+			default_resource.max_varying_floats = 64;
+			default_resource.max_vertex_texture_image_units = 32;
+			default_resource.max_combined_texture_image_units = 80;
+			default_resource.max_texture_image_units = 32;
+			default_resource.max_fragment_uniform_components = 4096;
+			default_resource.max_draw_buffers = 32;
+			default_resource.max_vertex_uniform_vectors = 128;
+			default_resource.max_varying_vectors = 8;
+			default_resource.max_fragment_uniform_vectors = 16;
+			default_resource.max_vertex_output_vectors = 16;
+			default_resource.max_fragment_input_vectors = 15;
+			default_resource.min_program_texel_offset = -8;
+			default_resource.max_program_texel_offset = 7;
+			default_resource.max_clip_distances = 8;
+			default_resource.max_compute_work_group_count_x = (int)app->configuration.maxComputeWorkGroupCount[0];
+			default_resource.max_compute_work_group_count_y = (int)app->configuration.maxComputeWorkGroupCount[1];
+			default_resource.max_compute_work_group_count_z = (int)app->configuration.maxComputeWorkGroupCount[2];
+			default_resource.max_compute_work_group_size_x = (int)app->configuration.maxComputeWorkGroupSize[0];
+			default_resource.max_compute_work_group_size_y = (int)app->configuration.maxComputeWorkGroupSize[1];
+			default_resource.max_compute_work_group_size_z = (int)app->configuration.maxComputeWorkGroupSize[2];
+			default_resource.max_compute_uniform_components = 1024;
+			default_resource.max_compute_texture_image_units = 16;
+			default_resource.max_compute_image_uniforms = 8;
+			default_resource.max_compute_atomic_counters = 8;
+			default_resource.max_compute_atomic_counter_buffers = 1;
+			default_resource.max_varying_components = 60;
+			default_resource.max_vertex_output_components = 64;
+			default_resource.max_geometry_input_components = 64;
+			default_resource.max_geometry_output_components = 128;
+			default_resource.max_fragment_input_components = 128;
+			default_resource.max_image_units = 8;
+			default_resource.max_combined_image_units_and_fragment_outputs = 8;
+			default_resource.max_combined_shader_output_resources = 8;
+			default_resource.max_image_samples = 0;
+			default_resource.max_vertex_image_uniforms = 0;
+			default_resource.max_tess_control_image_uniforms = 0;
+			default_resource.max_tess_evaluation_image_uniforms = 0;
+			default_resource.max_geometry_image_uniforms = 0;
+			default_resource.max_fragment_image_uniforms = 8;
+			default_resource.max_combined_image_uniforms = 8;
+			default_resource.max_geometry_texture_image_units = 16;
+			default_resource.max_geometry_output_vertices = 256;
+			default_resource.max_geometry_total_output_components = 1024;
+			default_resource.max_geometry_uniform_components = 1024;
+			default_resource.max_geometry_varying_components = 64;
+			default_resource.max_tess_control_input_components = 128;
+			default_resource.max_tess_control_output_components = 128;
+			default_resource.max_tess_control_texture_image_units = 16;
+			default_resource.max_tess_control_uniform_components = 1024;
+			default_resource.max_tess_control_total_output_components = 4096;
+			default_resource.max_tess_evaluation_input_components = 128;
+			default_resource.max_tess_evaluation_output_components = 128;
+			default_resource.max_tess_evaluation_texture_image_units = 16;
+			default_resource.max_tess_evaluation_uniform_components = 1024;
+			default_resource.max_tess_patch_components = 120;
+			default_resource.max_patch_vertices = 32;
+			default_resource.max_tess_gen_level = 64;
+			default_resource.max_viewports = 16;
+			default_resource.max_vertex_atomic_counters = 0;
+			default_resource.max_tess_control_atomic_counters = 0;
+			default_resource.max_tess_evaluation_atomic_counters = 0;
+			default_resource.max_geometry_atomic_counters = 0;
+			default_resource.max_fragment_atomic_counters = 8;
+			default_resource.max_combined_atomic_counters = 8;
+			default_resource.max_atomic_counter_bindings = 1;
+			default_resource.max_vertex_atomic_counter_buffers = 0;
+			default_resource.max_tess_control_atomic_counter_buffers = 0;
+			default_resource.max_tess_evaluation_atomic_counter_buffers = 0;
+			default_resource.max_geometry_atomic_counter_buffers = 0;
+			default_resource.max_fragment_atomic_counter_buffers = 1;
+			default_resource.max_combined_atomic_counter_buffers = 1;
+			default_resource.max_atomic_counter_buffer_size = 16384;
+			default_resource.max_transform_feedback_buffers = 4;
+			default_resource.max_transform_feedback_interleaved_components = 64;
+			default_resource.max_cull_distances = 8;
+			default_resource.max_combined_clip_and_cull_distances = 8;
+			default_resource.max_samples = 4;
+			default_resource.max_mesh_output_vertices_nv = 256;
+			default_resource.max_mesh_output_primitives_nv = 512;
+			default_resource.max_mesh_work_group_size_x_nv = 32;
+			default_resource.max_mesh_work_group_size_y_nv = 1;
+			default_resource.max_mesh_work_group_size_z_nv = 1;
+			default_resource.max_task_work_group_size_x_nv = 32;
+			default_resource.max_task_work_group_size_y_nv = 1;
+			default_resource.max_task_work_group_size_z_nv = 1;
+			default_resource.max_mesh_view_count_nv = 4;
+
+			default_resource.limits.non_inductive_for_loops = 1;
+			default_resource.limits.while_loops = 1;
+			default_resource.limits.do_while_loops = 1;
+			default_resource.limits.general_uniform_indexing = 1;
+			default_resource.limits.general_attribute_matrix_vector_indexing = 1;
+			default_resource.limits.general_varying_indexing = 1;
+			default_resource.limits.general_sampler_indexing = 1;
+			default_resource.limits.general_variable_indexing = 1;
+			default_resource.limits.general_constant_matrix_vector_indexing = 1;
 			glslang_target_client_version_t client_version = (app->configuration.halfPrecision) ? GLSLANG_TARGET_VULKAN_1_1 : GLSLANG_TARGET_VULKAN_1_0;
 			glslang_target_language_version_t target_language_version = (app->configuration.halfPrecision) ? GLSLANG_TARGET_SPV_1_3 : GLSLANG_TARGET_SPV_1_0;
-			const glslang_input_t input =
+			glslang_input_t input =
 			{
 				GLSLANG_SOURCE_GLSL,
 				GLSLANG_STAGE_COMPUTE,
@@ -29448,10 +37990,10 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 				1,
 				0,
 				GLSLANG_MSG_DEFAULT_BIT,
-				&default_resource,
+				(const glslang_resource_t*)&default_resource,
 			};
 			//printf("%s\n", code0);
-			glslang_shader_t* shader = glslang_shader_create(&input);
+			glslang_shader_t* shader = glslang_shader_create((const glslang_input_t*)&input);
 			const char* err;
 			if (!glslang_shader_preprocess(shader, &input))
 			{
@@ -29559,7 +38101,10 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 		pipelineShaderStageCreateInfo.pSpecializationInfo = 0;// &specializationInfo;
 		computePipelineCreateInfo.stage = pipelineShaderStageCreateInfo;
 		computePipelineCreateInfo.layout = axis->pipelineLayout;
-		res = vkCreateComputePipelines(app->configuration.device[0], VK_NULL_HANDLE, 1, &computePipelineCreateInfo, 0, &axis->pipeline);
+		if (app->configuration.pipelineCache)
+			res = vkCreateComputePipelines(app->configuration.device[0], app->configuration.pipelineCache[0], 1, &computePipelineCreateInfo, 0, &axis->pipeline);
+		else
+			res = vkCreateComputePipelines(app->configuration.device[0], 0, 1, &computePipelineCreateInfo, 0, &axis->pipeline);
 		if (res != VK_SUCCESS) {
 			deleteVkFFT(app);
 			return VKFFT_ERROR_FAILED_TO_CREATE_PIPELINE;
@@ -29571,10 +38116,10 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 		}
 #elif(VKFFT_BACKEND==1)
 		char* code;
-		size_t codeSize;
+		uint64_t codeSize;
 		if (app->configuration.loadApplicationFromString) {
 			char* localStrPointer = (char*)app->configuration.loadApplicationString + app->currentApplicationStringPos;
-			codeSize = strtol(localStrPointer, &localStrPointer, 10);
+			memcpy(&codeSize, localStrPointer, sizeof(uint64_t));
 			code = (char*)malloc(codeSize);
 			if (!code) {
 				free(code0);
@@ -29582,10 +38127,8 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 				deleteVkFFT(app);
 				return VKFFT_ERROR_MALLOC_FAILED;
 			}
-			memcpy(code, localStrPointer + 1, codeSize - 1);
-			code[codeSize - 1] = '\0';
-			//printf("%s\n", code);
-			app->currentApplicationStringPos += codeSize + (uint64_t)(floor(log10((double)codeSize))) + 1;
+			memcpy(code, localStrPointer + sizeof(uint64_t), codeSize);
+			app->currentApplicationStringPos += codeSize + sizeof(uint64_t);
 		}
 		else {
 			nvrtcProgram prog;
@@ -29604,12 +38147,22 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 				deleteVkFFT(app);
 				return VKFFT_ERROR_FAILED_TO_CREATE_PROGRAM;
 			}
-			//const char opts[20] = "--fmad=false";
+			char* opts[5];
+			opts[0] = (char*)malloc(sizeof(char) * 50);
+			if (!opts[0]) {
+				free(code0);
+				code0 = 0;
+				deleteVkFFT(app);
+				return VKFFT_ERROR_MALLOC_FAILED;
+			}
+			sprintf(opts[0], "--gpu-architecture=sm_%" PRIu64 "%" PRIu64 "", app->configuration.computeCapabilityMajor, app->configuration.computeCapabilityMinor);
 			//result = nvrtcAddNameExpression(prog, "&consts");
 			//if (result != NVRTC_SUCCESS) printf("1.5 error: %s\n", nvrtcGetErrorString(result));
 			result = nvrtcCompileProgram(prog,  // prog
-				0,     // numOptions
-				0); // options
+				1,     // numOptions
+				(const char* const*)opts); // options
+			free(opts[0]);
+
 			if (result != NVRTC_SUCCESS) {
 				printf("nvrtcCompileProgram error: %s\n", nvrtcGetErrorString(result));
 				char* log = (char*)malloc(sizeof(char) * 4000000);
@@ -29631,9 +38184,17 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 					return VKFFT_ERROR_FAILED_TO_COMPILE_PROGRAM;
 				}
 			}
+#if (CUDA_VERSION >= 11030)
+			result = nvrtcGetCUBINSize(prog, &codeSize);
+#else
 			result = nvrtcGetPTXSize(prog, &codeSize);
+#endif
 			if (result != NVRTC_SUCCESS) {
+#if (CUDA_VERSION >= 11030)
+				printf("nvrtcGetCUBINSize error: %s\n", nvrtcGetErrorString(result));
+#else
 				printf("nvrtcGetPTXSize error: %s\n", nvrtcGetErrorString(result));
+#endif
 				free(code0);
 				code0 = 0;
 				deleteVkFFT(app);
@@ -29648,9 +38209,17 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 				return VKFFT_ERROR_MALLOC_FAILED;
 			}
 			axis->binary = code;
+#if (CUDA_VERSION >= 11030)
+			result = nvrtcGetCUBIN(prog, code);
+#else
 			result = nvrtcGetPTX(prog, code);
+#endif
 			if (result != NVRTC_SUCCESS) {
+#if (CUDA_VERSION >= 11030)
+				printf("nvrtcGetCUBIN error: %s\n", nvrtcGetErrorString(result));
+#else
 				printf("nvrtcGetPTX error: %s\n", nvrtcGetErrorString(result));
+#endif
 				free(code);
 				code = 0;
 				free(code0);
@@ -29690,6 +38259,18 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 			deleteVkFFT(app);
 			return VKFFT_ERROR_FAILED_TO_GET_FUNCTION;
 		}
+
+		/*result2 = cuFuncSetCacheConfig(axis->VkFFTKernel, CU_FUNC_CACHE_PREFER_SHARED);
+		if (result2 != CUDA_SUCCESS) {
+			printf("cuFuncSetAttribute error: %d\n", result2);
+			free(code);
+			code = 0;
+			free(code0);
+			code0 = 0;
+			deleteVkFFT(app);
+			return VKFFT_ERROR_FAILED_TO_SET_DYNAMIC_SHARED_MEMORY;
+		}*/
+
 		if (axis->specializationConstants.usedSharedMemory > app->configuration.sharedMemorySizeStatic) {
 			result2 = cuFuncSetAttribute(axis->VkFFTKernel, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, (int)axis->specializationConstants.usedSharedMemory);
 			if (result2 != CUDA_SUCCESS) {
@@ -29721,10 +38302,10 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 		}
 #elif(VKFFT_BACKEND==2)
 		uint32_t* code;
-		size_t codeSize;
+		uint64_t codeSize;
 		if (app->configuration.loadApplicationFromString) {
-			uint32_t* localStrPointer = (uint32_t*)app->configuration.loadApplicationString + app->currentApplicationStringPos;
-			codeSize = localStrPointer[0];
+			char* localStrPointer = (char*)app->configuration.loadApplicationString + app->currentApplicationStringPos;
+			memcpy(&codeSize, localStrPointer, sizeof(uint64_t));
 			code = (uint32_t*)malloc(codeSize);
 			if (!code) {
 				free(code0);
@@ -29732,8 +38313,8 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 				deleteVkFFT(app);
 				return VKFFT_ERROR_MALLOC_FAILED;
 			}
-			memcpy(code, localStrPointer + 1, codeSize);
-			app->currentApplicationStringPos += codeSize / (sizeof(uint32_t)) + 1;
+			memcpy(code, localStrPointer + sizeof(uint64_t), codeSize);
+			app->currentApplicationStringPos += codeSize + sizeof(uint64_t);
 		}
 		else
 		{
@@ -29846,6 +38427,18 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 			deleteVkFFT(app);
 			return VKFFT_ERROR_FAILED_TO_GET_FUNCTION;
 		}
+
+		/*result2 = hipFuncSetCacheConfig(axis->VkFFTKernel, hipFuncCachePreferShared);
+		if (result2 != hipSuccess) {
+			printf("hipFuncSetAttribute error: %d\n", result2);
+			free(code);
+			code = 0;
+			free(code0);
+			code0 = 0;
+			deleteVkFFT(app);
+			return VKFFT_ERROR_FAILED_TO_SET_DYNAMIC_SHARED_MEMORY;
+		}*/
+
 		if (axis->specializationConstants.usedSharedMemory > app->configuration.sharedMemorySizeStatic) {
 			result2 = hipFuncSetAttribute(axis->VkFFTKernel, hipFuncAttributeMaxDynamicSharedMemorySize, (int)axis->specializationConstants.usedSharedMemory);
 			//result2 = hipFuncSetCacheConfig(axis->VkFFTKernel, hipFuncCachePreferShared);
@@ -29879,21 +38472,21 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 #elif(VKFFT_BACKEND==3)
 		if (app->configuration.loadApplicationFromString) {
 			char* code;
-			size_t codeSize;
+			uint64_t codeSize;
 			char* localStrPointer = (char*)app->configuration.loadApplicationString + app->currentApplicationStringPos;
-			codeSize = strtol(localStrPointer, &localStrPointer, 10);
-			code = (char*)malloc(codeSize - 1);
+			memcpy(&codeSize, localStrPointer, sizeof(uint64_t));
+			size_t codeSize_size_t = (size_t)codeSize;
+			code = (char*)malloc(codeSize);
 			if (!code) {
 				free(code0);
 				code0 = 0;
 				deleteVkFFT(app);
 				return VKFFT_ERROR_MALLOC_FAILED;
 			}
-			memcpy(code, localStrPointer + 1, codeSize - 2);
-			code[codeSize - 2] = '\0';
-			app->currentApplicationStringPos += codeSize + (uint64_t)(floor(log10((double)codeSize)));
-
-			axis->program = clCreateProgramWithBinary(app->configuration.context[0], 1, app->configuration.device, &codeSize, (const unsigned char**)(&code), 0, &res);
+			memcpy(code, localStrPointer + sizeof(uint64_t), codeSize);
+			app->currentApplicationStringPos += codeSize + sizeof(uint64_t);
+			const unsigned char* temp_code = (const unsigned char*)code;
+			axis->program = clCreateProgramWithBinary(app->configuration.context[0], 1, app->configuration.device, &codeSize_size_t, (const unsigned char**)(&temp_code), 0, &res);
 			if (res != CL_SUCCESS) {
 				free(code0);
 				code0 = 0;
@@ -29905,7 +38498,8 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 		}
 		else {
 			size_t codelen = strlen(code0);
-			axis->program = clCreateProgramWithSource(app->configuration.context[0], 1, (const char**)&code0, &codelen, &res);
+			const char* temp_code = (const char*)code0;
+			axis->program = clCreateProgramWithSource(app->configuration.context[0], 1, (const char**)&temp_code, &codelen, &res);
 			if (res != CL_SUCCESS) {
 				free(code0);
 				code0 = 0;
@@ -29945,7 +38539,7 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 				deleteVkFFT(app);
 				return VKFFT_ERROR_FAILED_TO_COMPILE_PROGRAM;
 			}
-			axis->binarySize = codeSize;
+			axis->binarySize = (uint64_t)codeSize;
 			axis->binary = (char*)malloc(axis->binarySize);
 			if (!axis->binary) {
 				free(code0);
@@ -29953,7 +38547,7 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 				deleteVkFFT(app);
 				return VKFFT_ERROR_MALLOC_FAILED;
 			}
-			res = clGetProgramInfo(axis->program, CL_PROGRAM_BINARIES, axis->binarySize, &axis->binary, NULL);
+			res = clGetProgramInfo(axis->program, CL_PROGRAM_BINARIES, codeSize, &axis->binary, NULL);
 			if (res != CL_SUCCESS) {
 				free(axis->binary);
 				axis->binary = 0;
@@ -29976,10 +38570,10 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 		}
 #elif(VKFFT_BACKEND==4)
 		uint32_t* code;
-		size_t codeSize;
+		uint64_t codeSize;
 		if (app->configuration.loadApplicationFromString) {
-			uint32_t* localStrPointer = (uint32_t*)app->configuration.loadApplicationString + app->currentApplicationStringPos;
-			codeSize = localStrPointer[0];
+			char* localStrPointer = (char*)app->configuration.loadApplicationString + app->currentApplicationStringPos;
+			memcpy(&codeSize, localStrPointer, sizeof(uint64_t));
 			code = (uint32_t*)malloc(codeSize);
 			if (!code) {
 				free(code0);
@@ -29987,8 +38581,9 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 				deleteVkFFT(app);
 				return VKFFT_ERROR_MALLOC_FAILED;
 			}
-			memcpy(code, localStrPointer + 1, codeSize);
-			app->currentApplicationStringPos += codeSize / (sizeof(uint32_t)) + 1;
+			memcpy(code, localStrPointer + sizeof(uint64_t), codeSize);
+			app->currentApplicationStringPos += codeSize + sizeof(uint64_t);
+
 			const char* pBuildFlags = (app->configuration.useUint64) ? "-ze-opt-greater-than-4GB-buffer-required" : 0;
 			ze_module_desc_t moduleDesc = {
 				ZE_STRUCTURE_TYPE_MODULE_DESC,
@@ -30124,6 +38719,54 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
 			deleteVkFFT(app);
 			return VKFFT_ERROR_FAILED_TO_CREATE_SHADER_MODULE;
 		}
+#elif(VKFFT_BACKEND==5)
+		NS::Error* error;
+		if (app->configuration.loadApplicationFromString) {
+			char* code;
+			uint64_t codeSize;
+			char* localStrPointer = (char*)app->configuration.loadApplicationString + app->currentApplicationStringPos;
+			memcpy(&codeSize, localStrPointer, sizeof(uint64_t));
+			size_t codeSize_size_t = (size_t)codeSize;
+			code = (char*)malloc(codeSize);
+			if (!code) {
+				free(code0);
+				code0 = 0;
+				deleteVkFFT(app);
+				return VKFFT_ERROR_MALLOC_FAILED;
+			}
+			memcpy(code, localStrPointer + sizeof(uint64_t), codeSize);
+			app->currentApplicationStringPos += codeSize + sizeof(uint64_t);
+			dispatch_data_t data = dispatch_data_create(code, codeSize, 0, 0);
+			axis->library = app->configuration.device->newLibrary(data, &error);
+			if (error)std::cout << error->debugDescription()->cString(NS::ASCIIStringEncoding) << error->localizedDescription()->cString(NS::ASCIIStringEncoding) << std::endl;
+			free(code);
+			code = 0;
+		}
+		else {
+			size_t codelen = strlen(code0);
+			MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc();
+			compileOptions->setFastMathEnabled(true);
+			NS::String* str = NS::String::string(code0, NS::UTF8StringEncoding);
+			axis->library = app->configuration.device->newLibrary(str, compileOptions, &error);
+			if (error) {
+				printf("%s\n%s\n", error->debugDescription()->cString(NS::ASCIIStringEncoding), error->localizedDescription()->cString(NS::ASCIIStringEncoding));
+				free(code0);
+				code0 = 0;
+				deleteVkFFT(app);
+				return VKFFT_ERROR_FAILED_TO_COMPILE_PROGRAM;
+			}
+			compileOptions->release();
+			if (app->configuration.saveApplicationToString) {
+
+			}
+			str->release();
+		}
+		const char function_name[20] = "VkFFT_main";
+		NS::String* str = NS::String::string(function_name, NS::UTF8StringEncoding);
+		MTL::Function* function = axis->library->newFunction(str);
+		axis->pipeline = app->configuration.device->newComputePipelineState(function, &error);
+		function->release();
+		str->release();
 #endif
 		if (!app->configuration.keepShaderCode) {
 			free(code0);
@@ -30153,10 +38796,10 @@ static inline VkFFTResult initializeBluesteinAutoPadding(VkFFTApplication* app)
 			break;
 		default: //have not done a test run for Intel, so everything else uses AMD profile
 			if (app->configuration.doublePrecision) {
-				app->configuration.autoCustomBluesteinPaddingPattern = 27;
+				app->configuration.autoCustomBluesteinPaddingPattern = 54;
 			}
 			else {
-				app->configuration.autoCustomBluesteinPaddingPattern = 30;
+				app->configuration.autoCustomBluesteinPaddingPattern = 29;
 			}
 			break;
 		}
@@ -30246,7 +38889,7 @@ static inline VkFFTResult initializeBluesteinAutoPadding(VkFFTApplication* app)
 				app->configuration.primeSizes[38] = 2185;
 				app->configuration.paddedSizes[38] = 4608;
 				app->configuration.primeSizes[39] = 2305;
-				app->configuration.paddedSizes[39] = 4725;
+				app->configuration.paddedSizes[39] = 4900;
 				app->configuration.primeSizes[40] = 2364;
 				app->configuration.paddedSizes[40] = 4900;
 				app->configuration.primeSizes[41] = 2451;
@@ -30376,92 +39019,146 @@ static inline VkFFTResult initializeBluesteinAutoPadding(VkFFTApplication* app)
 				app->configuration.primeSizes[7] = 43;
 				app->configuration.paddedSizes[7] = 90;
 				app->configuration.primeSizes[8] = 46;
-				app->configuration.paddedSizes[8] = 112;
-				app->configuration.primeSizes[9] = 57;
-				app->configuration.paddedSizes[9] = 125;
-				app->configuration.primeSizes[10] = 67;
-				app->configuration.paddedSizes[10] = 150;
-				app->configuration.primeSizes[11] = 76;
-				app->configuration.paddedSizes[11] = 256;
-				app->configuration.primeSizes[12] = 129;
-				app->configuration.paddedSizes[12] = 270;
-				app->configuration.primeSizes[13] = 136;
-				app->configuration.paddedSizes[13] = 512;
-				app->configuration.primeSizes[14] = 257;
-				app->configuration.paddedSizes[14] = 625;
-				app->configuration.primeSizes[15] = 314;
-				app->configuration.paddedSizes[15] = 750;
-				app->configuration.primeSizes[16] = 376;
-				app->configuration.paddedSizes[16] = 756;
-				app->configuration.primeSizes[17] = 379;
-				app->configuration.paddedSizes[17] = 768;
-				app->configuration.primeSizes[18] = 386;
-				app->configuration.paddedSizes[18] = 875;
-				app->configuration.primeSizes[19] = 439;
-				app->configuration.paddedSizes[19] = 1024;
-				app->configuration.primeSizes[20] = 513;
-				app->configuration.paddedSizes[20] = 1296;
-				app->configuration.primeSizes[21] = 649;
-				app->configuration.paddedSizes[21] = 1300;
-				app->configuration.primeSizes[22] = 651;
-				app->configuration.paddedSizes[22] = 1323;
-				app->configuration.primeSizes[23] = 663;
-				app->configuration.paddedSizes[23] = 1512;
-				app->configuration.primeSizes[24] = 757;
-				app->configuration.paddedSizes[24] = 1792;
-				app->configuration.primeSizes[25] = 897;
-				app->configuration.paddedSizes[25] = 2016;
-				app->configuration.primeSizes[26] = 1009;
-				app->configuration.paddedSizes[26] = 2048;
+				app->configuration.paddedSizes[8] = 125;
+				app->configuration.primeSizes[9] = 67;
+				app->configuration.paddedSizes[9] = 150;
+				app->configuration.primeSizes[10] = 76;
+				app->configuration.paddedSizes[10] = 175;
+				app->configuration.primeSizes[11] = 89;
+				app->configuration.paddedSizes[11] = 189;
+				app->configuration.primeSizes[12] = 97;
+				app->configuration.paddedSizes[12] = 198;
+				app->configuration.primeSizes[13] = 101;
+				app->configuration.paddedSizes[13] = 243;
+				app->configuration.primeSizes[14] = 123;
+				app->configuration.paddedSizes[14] = 256;
+				app->configuration.primeSizes[15] = 129;
+				app->configuration.paddedSizes[15] = 270;
+				app->configuration.primeSizes[16] = 136;
+				app->configuration.paddedSizes[16] = 512;
+				app->configuration.primeSizes[17] = 257;
+				app->configuration.paddedSizes[17] = 625;
+				app->configuration.primeSizes[18] = 314;
+				app->configuration.paddedSizes[18] = 640;
+				app->configuration.primeSizes[19] = 321;
+				app->configuration.paddedSizes[19] = 702;
+				app->configuration.primeSizes[20] = 353;
+				app->configuration.paddedSizes[20] = 750;
+				app->configuration.primeSizes[21] = 376;
+				app->configuration.paddedSizes[21] = 756;
+				app->configuration.primeSizes[22] = 379;
+				app->configuration.paddedSizes[22] = 768;
+				app->configuration.primeSizes[23] = 386;
+				app->configuration.paddedSizes[23] = 875;
+				app->configuration.primeSizes[24] = 439;
+				app->configuration.paddedSizes[24] = 1024;
+				app->configuration.primeSizes[25] = 513;
+				app->configuration.paddedSizes[25] = 1296;
+				app->configuration.primeSizes[26] = 649;
+				app->configuration.paddedSizes[26] = 1300;
+				app->configuration.primeSizes[27] = 651;
+				app->configuration.paddedSizes[27] = 1323;
+				app->configuration.primeSizes[28] = 663;
+				app->configuration.paddedSizes[28] = 1344;
+				app->configuration.primeSizes[29] = 673;
+				app->configuration.paddedSizes[29] = 1512;
+				app->configuration.primeSizes[30] = 757;
+				app->configuration.paddedSizes[30] = 1792;
+				app->configuration.primeSizes[31] = 897;
+				app->configuration.paddedSizes[31] = 2016;
+				app->configuration.primeSizes[32] = 1009;
+				app->configuration.paddedSizes[32] = 2048;
+				app->configuration.primeSizes[33] = 1025;
+				app->configuration.paddedSizes[33] = 2187;
+				app->configuration.primeSizes[34] = 1095;
+				app->configuration.paddedSizes[34] = 3136;
+				app->configuration.primeSizes[35] = 1569;
+				app->configuration.paddedSizes[35] = 3159;
+				app->configuration.primeSizes[36] = 1581;
+				app->configuration.paddedSizes[36] = 3430;
+				app->configuration.primeSizes[37] = 1717;
+				app->configuration.paddedSizes[37] = 3584;
+				app->configuration.primeSizes[38] = 1793;
+				app->configuration.paddedSizes[38] = 4096;
+				app->configuration.primeSizes[39] = 2049;
+				app->configuration.paddedSizes[39] = 4224;
+				app->configuration.primeSizes[40] = 2113;
+				app->configuration.paddedSizes[40] = 4375;
+				app->configuration.primeSizes[41] = 2189;
+				app->configuration.paddedSizes[41] = 4480;
+				app->configuration.primeSizes[42] = 2241;
+				app->configuration.paddedSizes[42] = 4704;
+				app->configuration.primeSizes[43] = 2353;
+				app->configuration.paddedSizes[43] = 4928;
+				app->configuration.primeSizes[44] = 2465;
+				app->configuration.paddedSizes[44] = 4992;
+				app->configuration.primeSizes[45] = 2497;
+				app->configuration.paddedSizes[45] = 5005;
+				app->configuration.primeSizes[46] = 2504;
+				app->configuration.paddedSizes[46] = 5103;
+				app->configuration.primeSizes[47] = 2553;
+				app->configuration.paddedSizes[47] = 5376;
+				app->configuration.primeSizes[48] = 2689;
+				app->configuration.paddedSizes[48] = 5632;
+				app->configuration.primeSizes[49] = 2817;
+				app->configuration.paddedSizes[49] = 5824;
+				app->configuration.primeSizes[50] = 2913;
+				app->configuration.paddedSizes[50] = 6048;
+				app->configuration.primeSizes[51] = 3026;
+				app->configuration.paddedSizes[51] = 6144;
+				app->configuration.primeSizes[52] = 3073;
+				app->configuration.paddedSizes[52] = 6875;
+				app->configuration.primeSizes[53] = 3439;
+				app->configuration.paddedSizes[53] = 8192;
 			}
 			else {
 				app->configuration.primeSizes[0] = 17;
-				app->configuration.paddedSizes[0] = 35;
+				app->configuration.paddedSizes[0] = 36;
 				app->configuration.primeSizes[1] = 19;
 				app->configuration.paddedSizes[1] = 42;
 				app->configuration.primeSizes[2] = 23;
 				app->configuration.paddedSizes[2] = 64;
 				app->configuration.primeSizes[3] = 34;
-				app->configuration.paddedSizes[3] = 70;
-				app->configuration.primeSizes[4] = 37;
-				app->configuration.paddedSizes[4] = 84;
-				app->configuration.primeSizes[5] = 43;
-				app->configuration.paddedSizes[5] = 88;
-				app->configuration.primeSizes[6] = 46;
-				app->configuration.paddedSizes[6] = 128;
-				app->configuration.primeSizes[7] = 67;
-				app->configuration.paddedSizes[7] = 150;
-				app->configuration.primeSizes[8] = 76;
-				app->configuration.paddedSizes[8] = 162;
-				app->configuration.primeSizes[9] = 82;
-				app->configuration.paddedSizes[9] = 176;
-				app->configuration.primeSizes[10] = 89;
-				app->configuration.paddedSizes[10] = 256;
-				app->configuration.primeSizes[11] = 129;
-				app->configuration.paddedSizes[11] = 512;
-				app->configuration.primeSizes[12] = 257;
-				app->configuration.paddedSizes[12] = 625;
-				app->configuration.primeSizes[13] = 314;
-				app->configuration.paddedSizes[13] = 768;
-				app->configuration.primeSizes[14] = 386;
-				app->configuration.paddedSizes[14] = 1024;
-				app->configuration.primeSizes[15] = 513;
-				app->configuration.paddedSizes[15] = 1296;
-				app->configuration.primeSizes[16] = 649;
-				app->configuration.paddedSizes[16] = 2048;
-				app->configuration.primeSizes[17] = 1025;
-				app->configuration.paddedSizes[17] = 2187;
-				app->configuration.primeSizes[18] = 1095;
-				app->configuration.paddedSizes[18] = 2304;
-				app->configuration.primeSizes[19] = 1153;
-				app->configuration.paddedSizes[19] = 2500;
-				app->configuration.primeSizes[20] = 1251;
-				app->configuration.paddedSizes[20] = 2592;
-				app->configuration.primeSizes[21] = 1297;
-				app->configuration.paddedSizes[21] = 3072;
-				app->configuration.primeSizes[22] = 1537;
-				app->configuration.paddedSizes[22] = 3125;
-				app->configuration.primeSizes[23] = 1564;
+				app->configuration.paddedSizes[3] = 81;
+				app->configuration.primeSizes[4] = 43;
+				app->configuration.paddedSizes[4] = 88;
+				app->configuration.primeSizes[5] = 46;
+				app->configuration.paddedSizes[5] = 125;
+				app->configuration.primeSizes[6] = 67;
+				app->configuration.paddedSizes[6] = 150;
+				app->configuration.primeSizes[7] = 76;
+				app->configuration.paddedSizes[7] = 162;
+				app->configuration.primeSizes[8] = 82;
+				app->configuration.paddedSizes[8] = 175;
+				app->configuration.primeSizes[9] = 89;
+				app->configuration.paddedSizes[9] = 256;
+				app->configuration.primeSizes[10] = 129;
+				app->configuration.paddedSizes[10] = 512;
+				app->configuration.primeSizes[11] = 257;
+				app->configuration.paddedSizes[11] = 625;
+				app->configuration.primeSizes[12] = 314;
+				app->configuration.paddedSizes[12] = 768;
+				app->configuration.primeSizes[13] = 386;
+				app->configuration.paddedSizes[13] = 1024;
+				app->configuration.primeSizes[14] = 513;
+				app->configuration.paddedSizes[14] = 1296;
+				app->configuration.primeSizes[15] = 649;
+				app->configuration.paddedSizes[15] = 2048;
+				app->configuration.primeSizes[16] = 1025;
+				app->configuration.paddedSizes[16] = 2187;
+				app->configuration.primeSizes[17] = 1095;
+				app->configuration.paddedSizes[17] = 2304;
+				app->configuration.primeSizes[18] = 1153;
+				app->configuration.paddedSizes[18] = 2500;
+				app->configuration.primeSizes[19] = 1251;
+				app->configuration.paddedSizes[19] = 2592;
+				app->configuration.primeSizes[20] = 1297;
+				app->configuration.paddedSizes[20] = 3072;
+				app->configuration.primeSizes[21] = 1537;
+				app->configuration.paddedSizes[21] = 3125;
+				app->configuration.primeSizes[22] = 1564;
+				app->configuration.paddedSizes[22] = 3136;
+				app->configuration.primeSizes[23] = 1569;
 				app->configuration.paddedSizes[23] = 4096;
 				app->configuration.primeSizes[24] = 2049;
 				app->configuration.paddedSizes[24] = 4375;
@@ -30470,11 +39167,9 @@ static inline VkFFTResult initializeBluesteinAutoPadding(VkFFTApplication* app)
 				app->configuration.primeSizes[26] = 2305;
 				app->configuration.paddedSizes[26] = 5184;
 				app->configuration.primeSizes[27] = 2593;
-				app->configuration.paddedSizes[27] = 5632;
-				app->configuration.primeSizes[28] = 2817;
-				app->configuration.paddedSizes[28] = 6561;
-				app->configuration.primeSizes[29] = 3282;
-				app->configuration.paddedSizes[29] = 8192;
+				app->configuration.paddedSizes[27] = 6561;
+				app->configuration.primeSizes[28] = 3282;
+				app->configuration.paddedSizes[28] = 8192;
 			}
 			break;
 		}
@@ -30491,9 +39186,9 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 	if (inputLaunchConfiguration.useCustomBluesteinPaddingPattern != 0) {
 		app->configuration.useCustomBluesteinPaddingPattern = inputLaunchConfiguration.useCustomBluesteinPaddingPattern;
 		app->configuration.primeSizes = inputLaunchConfiguration.primeSizes;
-		if (!app->configuration.primeSizes) return VKFFT_ERROR_EMPRY_useCustomBluesteinPaddingPattern_arrays;
+		if (!app->configuration.primeSizes) return VKFFT_ERROR_EMPTY_useCustomBluesteinPaddingPattern_arrays;
 		app->configuration.paddedSizes = inputLaunchConfiguration.paddedSizes;
-		if (!app->configuration.paddedSizes) return VKFFT_ERROR_EMPRY_useCustomBluesteinPaddingPattern_arrays;
+		if (!app->configuration.paddedSizes) return VKFFT_ERROR_EMPTY_useCustomBluesteinPaddingPattern_arrays;
 	}
 	//set device parameters
 #if(VKFFT_BACKEND==0)
@@ -30544,15 +39239,17 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 	app->configuration.sharedMemorySize = physicalDeviceProperties.limits.maxComputeSharedMemorySize;
 	app->configuration.sharedMemorySizePow2 = (uint64_t)pow(2, (uint64_t)log2(physicalDeviceProperties.limits.maxComputeSharedMemorySize));
 	app->configuration.vendorID = physicalDeviceProperties.vendorID;
+	if (inputLaunchConfiguration.pipelineCache != 0)	app->configuration.pipelineCache = inputLaunchConfiguration.pipelineCache;
+	app->configuration.useRaderUintLUT = 1;
 	switch (physicalDeviceProperties.vendorID) {
 	case 0x10DE://NVIDIA
 		app->configuration.coalescedMemory = (app->configuration.halfPrecision) ? 64 : 32;//the coalesced memory is equal to 32 bytes between L2 and VRAM.
-		app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) ? 1 : 0;
+		app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) ? 1 : -1;
 		app->configuration.warpSize = 32;
 		app->configuration.registerBoostNonPow2 = 0;
 		app->configuration.registerBoost = 4;
 		app->configuration.registerBoost4Step = 1;
-		app->configuration.swapTo3Stage4Step = 0;
+		app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision) ? 4194305 : 4194305;
 		break;
 	case 0x8086://INTEL
 		app->configuration.coalescedMemory = (app->configuration.halfPrecision) ? 128 : 64;
@@ -30561,25 +39258,25 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 		app->configuration.registerBoostNonPow2 = 0;
 		app->configuration.registerBoost = (physicalDeviceProperties.limits.maxComputeSharedMemorySize >= 65536) ? 1 : 2;
 		app->configuration.registerBoost4Step = 1;
-		app->configuration.swapTo3Stage4Step = 0;
+		app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision) ? 262144 : 524288;
 		break;
 	case 0x1002://AMD
 		app->configuration.coalescedMemory = (app->configuration.halfPrecision) ? 64 : 32;
-		app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) ? 1 : 0;
+		app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) ? 1 : -1;
 		app->configuration.warpSize = 64;
 		app->configuration.registerBoostNonPow2 = 0;
 		app->configuration.registerBoost = (physicalDeviceProperties.limits.maxComputeSharedMemorySize >= 65536) ? 2 : 4;
 		app->configuration.registerBoost4Step = 1;
-		app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision) ? 20 : 21;
+		app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision) ? 262144 : 524288;
 		break;
 	default:
 		app->configuration.coalescedMemory = (app->configuration.halfPrecision) ? 128 : 64;
-		app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) ? 1 : 0;
+		app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) ? 1 : -1;
 		app->configuration.warpSize = 32;
 		app->configuration.registerBoostNonPow2 = 0;
 		app->configuration.registerBoost = 1;
 		app->configuration.registerBoost4Step = 1;
-		app->configuration.swapTo3Stage4Step = 0;
+		app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision) ? 262144 : 524288;
 		break;
 	}
 #elif(VKFFT_BACKEND==1)
@@ -30594,12 +39291,27 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 	if (inputLaunchConfiguration.stream != 0)	app->configuration.stream = inputLaunchConfiguration.stream;
 	app->configuration.streamID = 0;
 	int value = 0;
+	res = cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, app->configuration.device[0]);
+	if (res != CUDA_SUCCESS) {
+		deleteVkFFT(app);
+		return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE;
+	}
+	app->configuration.computeCapabilityMajor = value;
+
+	res = cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, app->configuration.device[0]);
+	if (res != CUDA_SUCCESS) {
+		deleteVkFFT(app);
+		return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE;
+	}
+	app->configuration.computeCapabilityMinor = value;
+
 	res = cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, app->configuration.device[0]);
 	if (res != CUDA_SUCCESS) {
 		deleteVkFFT(app);
 		return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE;
 	}
 	app->configuration.maxThreadsNum = value;
+
 	res = cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, app->configuration.device[0]);
 	if (res != CUDA_SUCCESS) {
 		deleteVkFFT(app);
@@ -30654,8 +39366,15 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 		return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE;
 	}
 	app->configuration.warpSize = value;
+	res = cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO, app->configuration.device[0]);
+	if (res != CUDA_SUCCESS) {
+		deleteVkFFT(app);
+		return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE;
+	}
+	app->configuration.useLUT_4step = (value <= 4) ? -1 : 1;
 	//we don't need this in CUDA
 	app->configuration.sharedMemorySizePow2 = (uint64_t)pow(2, (uint64_t)log2(app->configuration.sharedMemorySize));
+	app->configuration.useRaderUintLUT = 0;
 	if (app->configuration.num_streams > 1) {
 		app->configuration.stream_event = (cudaEvent_t*)malloc(app->configuration.num_streams * sizeof(cudaEvent_t));
 		if (!app->configuration.stream_event) {
@@ -30664,7 +39383,7 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 		}
 		for (uint64_t i = 0; i < app->configuration.num_streams; i++) {
 			res_t = cudaEventCreate(&app->configuration.stream_event[i]);
-			if (res != CUDA_SUCCESS) {
+			if (res_t != cudaSuccess) {
 				deleteVkFFT(app);
 				return VKFFT_ERROR_FAILED_TO_CREATE_EVENT;
 			}
@@ -30672,11 +39391,11 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 	}
 
 	app->configuration.coalescedMemory = (app->configuration.halfPrecision) ? 64 : 32;//the coalesced memory is equal to 32 bytes between L2 and VRAM.
-	app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) ? 1 : 0;
+	app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) ? 1 : -1;
 	app->configuration.registerBoostNonPow2 = 0;
 	app->configuration.registerBoost = 1;
 	app->configuration.registerBoost4Step = 1;
-	app->configuration.swapTo3Stage4Step = 0;
+	app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision) ? 4194305 : 4194305;
 	app->configuration.vendorID = 0x10DE;
 #elif(VKFFT_BACKEND==2)
 	hipError_t res = hipSuccess;
@@ -30689,12 +39408,27 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 	if (inputLaunchConfiguration.stream != 0)	app->configuration.stream = inputLaunchConfiguration.stream;
 	app->configuration.streamID = 0;
 	int value = 0;
+	res = hipDeviceGetAttribute(&value, hipDeviceAttributeComputeCapabilityMajor, app->configuration.device[0]);
+	if (res != hipSuccess) {
+		deleteVkFFT(app);
+		return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE;
+	}
+	app->configuration.computeCapabilityMajor = value;
+
+	res = hipDeviceGetAttribute(&value, hipDeviceAttributeComputeCapabilityMinor, app->configuration.device[0]);
+	if (res != hipSuccess) {
+		deleteVkFFT(app);
+		return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE;
+	}
+	app->configuration.computeCapabilityMinor = value;
+
 	res = hipDeviceGetAttribute(&value, hipDeviceAttributeMaxThreadsPerBlock, app->configuration.device[0]);
 	if (res != hipSuccess) {
 		deleteVkFFT(app);
 		return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE;
 	}
 	app->configuration.maxThreadsNum = value;
+
 	res = hipDeviceGetAttribute(&value, hipDeviceAttributeMaxGridDimX, app->configuration.device[0]);
 	if (res != hipSuccess) {
 		deleteVkFFT(app);
@@ -30746,6 +39480,7 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 	}
 	app->configuration.warpSize = value;
 	app->configuration.sharedMemorySizePow2 = (uint64_t)pow(2, (uint64_t)log2(app->configuration.sharedMemorySize));
+	app->configuration.useRaderUintLUT = 0;
 	if (app->configuration.num_streams > 1) {
 		app->configuration.stream_event = (hipEvent_t*)malloc(app->configuration.num_streams * sizeof(hipEvent_t));
 		if (!app->configuration.stream_event) {
@@ -30761,11 +39496,12 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 		}
 	}
 	app->configuration.coalescedMemory = (app->configuration.halfPrecision) ? 64 : 32;
-	app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) ? 1 : 0;
+	app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) ? 1 : -1;
+	app->configuration.useLUT_4step = -1;
 	app->configuration.registerBoostNonPow2 = 0;
 	app->configuration.registerBoost = 1;
 	app->configuration.registerBoost4Step = 1;
-	app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision) ? 20 : 21;
+	app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision) ? 1048576 : 2097152;
 	app->configuration.vendorID = 0x1002;
 #elif(VKFFT_BACKEND==3)
 	cl_int res = 0;
@@ -30793,6 +39529,7 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 		return VKFFT_ERROR_FAILED_TO_GET_ATTRIBUTE;
 	}
 	app->configuration.maxThreadsNum = value_int64;
+
 	res = clGetDeviceInfo(app->configuration.device[0], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &value_cl_uint, 0);
 	if (res != 0) {
 		deleteVkFFT(app);
@@ -30828,15 +39565,16 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 	app->configuration.sharedMemorySize = sharedMemorySize;
 	app->configuration.sharedMemorySizePow2 = (uint64_t)pow(2, (uint64_t)log2(sharedMemorySize));
 	app->configuration.vendorID = vendorID;
+	app->configuration.useRaderUintLUT = 1;
 	switch (vendorID) {
 	case 0x10DE://NVIDIA
 		app->configuration.coalescedMemory = (app->configuration.halfPrecision) ? 64 : 32;//the coalesced memory is equal to 32 bytes between L2 and VRAM.
-		app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) ? 1 : 0;
+		app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) ? 1 : -1;
 		app->configuration.warpSize = 32;
 		app->configuration.registerBoostNonPow2 = 0;
 		app->configuration.registerBoost = 4;
 		app->configuration.registerBoost4Step = 1;
-		app->configuration.swapTo3Stage4Step = 0;
+		app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision) ? 4194305 : 4194305;
 		app->configuration.sharedMemorySize -= 0x10;//reserved by system
 		app->configuration.sharedMemorySizePow2 = (uint64_t)pow(2, (uint64_t)log2(app->configuration.sharedMemorySize));
 		break;
@@ -30847,25 +39585,25 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 		app->configuration.registerBoostNonPow2 = 0;
 		app->configuration.registerBoost = (sharedMemorySize >= 65536) ? 1 : 2;
 		app->configuration.registerBoost4Step = 1;
-		app->configuration.swapTo3Stage4Step = 0;
+		app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision) ? 262144 : 524288;
 		break;
 	case 0x1002://AMD
 		app->configuration.coalescedMemory = (app->configuration.halfPrecision) ? 64 : 32;
-		app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) ? 1 : 0;
+		app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) ? 1 : -1;
 		app->configuration.warpSize = 64;
 		app->configuration.registerBoostNonPow2 = 0;
 		app->configuration.registerBoost = (sharedMemorySize >= 65536) ? 2 : 4;
 		app->configuration.registerBoost4Step = 1;
-		app->configuration.swapTo3Stage4Step = 0;
+		app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision) ? 262144 : 524288;
 		break;
 	default:
 		app->configuration.coalescedMemory = (app->configuration.halfPrecision) ? 128 : 64;
-		app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) ? 1 : 0;
+		app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) ? 1 : -1;
 		app->configuration.warpSize = 32;
 		app->configuration.registerBoostNonPow2 = 0;
 		app->configuration.registerBoost = 1;
 		app->configuration.registerBoost4Step = 1;
-		app->configuration.swapTo3Stage4Step = 0;
+		app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision) ? 262144 : 524288;
 		break;
 	}
 #elif(VKFFT_BACKEND==4)
@@ -30911,8 +39649,75 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 	app->configuration.registerBoostNonPow2 = 0;
 	app->configuration.registerBoost = (app->configuration.sharedMemorySize >= 65536) ? 1 : 2;
 	app->configuration.registerBoost4Step = 1;
-	app->configuration.swapTo3Stage4Step = 0;
+	app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision) ? 262144 : 524288;
 	app->configuration.vendorID = 0x8086;
+	app->configuration.useRaderUintLUT = 1;
+#elif(VKFFT_BACKEND==5)
+	if (inputLaunchConfiguration.device == 0) {
+		deleteVkFFT(app);
+		return VKFFT_ERROR_INVALID_DEVICE;
+	}
+	app->configuration.device = inputLaunchConfiguration.device;
+
+	if (inputLaunchConfiguration.queue == 0) {
+		deleteVkFFT(app);
+		return VKFFT_ERROR_INVALID_QUEUE;
+	}
+	app->configuration.queue = inputLaunchConfiguration.queue;
+
+	const char dummy_kernel[50] = "kernel void VkFFT_dummy (){}";
+	const char function_name[20] = "VkFFT_dummy";
+
+	NS::Error* error;
+	MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc();
+	NS::String* str_code = NS::String::string(dummy_kernel, NS::UTF8StringEncoding);
+	MTL::Library* dummy_library = app->configuration.device->newLibrary(str_code, compileOptions, &error);
+	NS::String* str_name = NS::String::string(function_name, NS::UTF8StringEncoding);
+	MTL::Function* function = dummy_library->newFunction(str_name);
+	MTL::ComputePipelineState* dummy_state = app->configuration.device->newComputePipelineState(function, &error);
+
+	MTL::Size size = app->configuration.device->maxThreadsPerThreadgroup();
+	app->configuration.maxThreadsNum = dummy_state->maxTotalThreadsPerThreadgroup();
+
+	app->configuration.maxComputeWorkGroupSize[0] = size.width;
+	app->configuration.maxComputeWorkGroupSize[1] = size.height;
+	app->configuration.maxComputeWorkGroupSize[2] = size.depth;
+
+	if (app->configuration.maxThreadsNum > 256) {
+		app->configuration.maxThreadsNum = 256;
+
+		app->configuration.maxComputeWorkGroupSize[0] = 256;
+		app->configuration.maxComputeWorkGroupSize[1] = 256;
+		app->configuration.maxComputeWorkGroupSize[2] = 256;
+		//The dummy kernel approach (above) does not work for some DCT-IV kernels (like 256x256x256). They refuse to have more than 256 threads. I will just force OpenCL thread limits for now.
+	}
+
+	app->configuration.maxComputeWorkGroupCount[0] = -1;
+	app->configuration.maxComputeWorkGroupCount[1] = -1;
+	app->configuration.maxComputeWorkGroupCount[2] = -1;
+
+	app->configuration.sharedMemorySizeStatic = app->configuration.device->maxThreadgroupMemoryLength();
+	app->configuration.sharedMemorySize = app->configuration.device->maxThreadgroupMemoryLength();
+
+	app->configuration.warpSize = dummy_state->threadExecutionWidth();
+
+	app->configuration.sharedMemorySizePow2 = (uint64_t)pow(2, (uint64_t)log2(app->configuration.sharedMemorySize));
+	app->configuration.useRaderUintLUT = 1;
+
+	app->configuration.coalescedMemory = (app->configuration.halfPrecision) ? 128 : 64;//the coalesced memory is equal to 64 bytes between L2 and VRAM.
+	app->configuration.useLUT = (app->configuration.doublePrecision || app->configuration.doublePrecisionFloatMemory) ? 1 : -1;
+	app->configuration.registerBoostNonPow2 = 0;
+	app->configuration.registerBoost = 1;
+	app->configuration.registerBoost4Step = 1;
+	app->configuration.swapTo3Stage4Step = (app->configuration.doublePrecision) ? 262144 : 524288;
+	app->configuration.vendorID = 0x1027f00;
+
+	dummy_state->release();
+	function->release();
+	str_name->release();
+	dummy_library->release();
+	str_code->release();
+	compileOptions->release();
 #endif
 
 	resFFT = initializeBluesteinAutoPadding(app);
@@ -31130,12 +39935,19 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 			if (app->configuration.doublePrecision) checkBufferSizeFor64BitAddressing *= 2;
 		}
 	}
+#if(VKFFT_BACKEND==2)
+	app->configuration.useStrict32BitAddress = 0;
+	if (checkBufferSizeFor64BitAddressing >= (uint64_t)pow((uint64_t)2, (uint64_t)32)) app->configuration.useStrict32BitAddress = -1;
+#endif
 	if (checkBufferSizeFor64BitAddressing >= (uint64_t)pow((uint64_t)2, (uint64_t)34)) app->configuration.useUint64 = 1;
 	checkBufferSizeFor64BitAddressing = 0;
 	for (uint64_t i = 0; i < app->configuration.inputBufferNum; i++) {
 		if (app->configuration.inputBufferSize)
 			checkBufferSizeFor64BitAddressing += app->configuration.inputBufferSize[i];
 	}
+#if(VKFFT_BACKEND==2)
+	if (checkBufferSizeFor64BitAddressing >= (uint64_t)pow((uint64_t)2, (uint64_t)32)) app->configuration.useStrict32BitAddress = -1;
+#endif
 	if (checkBufferSizeFor64BitAddressing >= (uint64_t)pow((uint64_t)2, (uint64_t)34)) app->configuration.useUint64 = 1;
 
 	checkBufferSizeFor64BitAddressing = 0;
@@ -31150,9 +39962,16 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 		if (app->configuration.kernelSize)
 			checkBufferSizeFor64BitAddressing += app->configuration.kernelSize[i];
 	}
+#if(VKFFT_BACKEND==2)
+	if (checkBufferSizeFor64BitAddressing >= (uint64_t)pow((uint64_t)2, (uint64_t)32)) app->configuration.useStrict32BitAddress = -1;
+	// No reason was found to disable strict 32 bit addressing, so enable it
+	if (app->configuration.useStrict32BitAddress == 0) app->configuration.useStrict32BitAddress = 1;
+#endif
 	if (checkBufferSizeFor64BitAddressing >= (uint64_t)pow((uint64_t)2, (uint64_t)34)) app->configuration.useUint64 = 1;
 	if (inputLaunchConfiguration.useUint64 != 0)	app->configuration.useUint64 = inputLaunchConfiguration.useUint64;
-
+#if(VKFFT_BACKEND==2)
+	if (inputLaunchConfiguration.useStrict32BitAddress != 0) app->configuration.useStrict32BitAddress = inputLaunchConfiguration.useStrict32BitAddress;
+#endif
 	if (inputLaunchConfiguration.coalescedMemory != 0)	app->configuration.coalescedMemory = inputLaunchConfiguration.coalescedMemory;
 	app->configuration.aimThreads = 128;
 	if (inputLaunchConfiguration.aimThreads != 0)	app->configuration.aimThreads = inputLaunchConfiguration.aimThreads;
@@ -31161,8 +39980,48 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 	if (inputLaunchConfiguration.inverseReturnToInputBuffer != 0)	app->configuration.inverseReturnToInputBuffer = inputLaunchConfiguration.inverseReturnToInputBuffer;
 
 	if (inputLaunchConfiguration.useLUT != 0)	app->configuration.useLUT = inputLaunchConfiguration.useLUT;
+	if (inputLaunchConfiguration.useLUT_4step != 0) {
+		if (inputLaunchConfiguration.useLUT_4step > 0)
+			app->configuration.useLUT = 1;
+		app->configuration.useLUT_4step = inputLaunchConfiguration.useLUT_4step;
+	}
+	else {
+		if (app->configuration.useLUT_4step == 0)
+			app->configuration.useLUT_4step = app->configuration.useLUT;
+	}
+
+	if (app->configuration.useLUT == -1)	app->configuration.useLUT_4step = -1;
+
 	if (inputLaunchConfiguration.fixMaxRadixBluestein != 0) app->configuration.fixMaxRadixBluestein = inputLaunchConfiguration.fixMaxRadixBluestein;
 	if (inputLaunchConfiguration.forceBluesteinSequenceSize != 0) app->configuration.forceBluesteinSequenceSize = inputLaunchConfiguration.forceBluesteinSequenceSize;
+
+	app->configuration.fixMinRaderPrimeMult = 17;
+	switch (app->configuration.vendorID) {
+	case 0x10DE://NVIDIA
+		app->configuration.fixMaxRaderPrimeMult = 89;
+		break;
+	case 0x1002://AMD profile
+		app->configuration.fixMaxRaderPrimeMult = 89;
+		break;
+	default:
+		app->configuration.fixMaxRaderPrimeMult = 17;
+		break;
+	}
+	if (inputLaunchConfiguration.fixMinRaderPrimeMult != 0) app->configuration.fixMinRaderPrimeMult = inputLaunchConfiguration.fixMinRaderPrimeMult;
+	if (inputLaunchConfiguration.fixMaxRaderPrimeMult != 0) app->configuration.fixMaxRaderPrimeMult = inputLaunchConfiguration.fixMaxRaderPrimeMult;
+
+	switch (app->configuration.vendorID) {
+	case 0x1002://AMD profile
+		app->configuration.fixMinRaderPrimeFFT = 29;
+		break;
+	default:
+		app->configuration.fixMinRaderPrimeFFT = 17;
+		break;
+	}
+	app->configuration.fixMaxRaderPrimeFFT = 16384;
+	if (inputLaunchConfiguration.fixMinRaderPrimeFFT != 0) app->configuration.fixMinRaderPrimeFFT = inputLaunchConfiguration.fixMinRaderPrimeFFT;
+	if (inputLaunchConfiguration.fixMaxRaderPrimeFFT != 0) app->configuration.fixMaxRaderPrimeFFT = inputLaunchConfiguration.fixMaxRaderPrimeFFT;
+
 	if (inputLaunchConfiguration.performR2C != 0) {
 		app->configuration.performR2C = inputLaunchConfiguration.performR2C;
 	}
@@ -31179,7 +40038,10 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 	if (inputLaunchConfiguration.makeInversePlanOnly != 0)	app->configuration.makeInversePlanOnly = inputLaunchConfiguration.makeInversePlanOnly;
 
 	app->configuration.reorderFourStep = 1;
-	if (inputLaunchConfiguration.disableReorderFourStep != 0) app->configuration.reorderFourStep = 0;
+	if (inputLaunchConfiguration.disableReorderFourStep != 0) {
+		app->configuration.reorderFourStep = 0;
+		if (app->configuration.swapTo3Stage4Step < 1048576) app->configuration.swapTo3Stage4Step = 1048576;
+	}
 	if (inputLaunchConfiguration.frequencyZeroPadding != 0) app->configuration.frequencyZeroPadding = inputLaunchConfiguration.frequencyZeroPadding;
 	for (uint64_t i = 0; i < app->configuration.FFTdim; i++) {
 		if (inputLaunchConfiguration.performZeropadding[i] != 0) {
@@ -31269,6 +40131,7 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 	app->configuration.maxTempLength = 5000;
 	if (inputLaunchConfiguration.maxTempLength != 0) app->configuration.maxTempLength = inputLaunchConfiguration.maxTempLength;
 
+	if (inputLaunchConfiguration.useRaderUintLUT != 0)	app->configuration.useRaderUintLUT = inputLaunchConfiguration.useRaderUintLUT;
 	if (inputLaunchConfiguration.halfThreads != 0)	app->configuration.halfThreads = inputLaunchConfiguration.halfThreads;
 	if (inputLaunchConfiguration.swapTo3Stage4Step != 0)	app->configuration.swapTo3Stage4Step = inputLaunchConfiguration.swapTo3Stage4Step;
 	if (app->configuration.performDCT > 0) app->configuration.performBandwidthBoost = -1;
@@ -31278,9 +40141,12 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 	if (inputLaunchConfiguration.keepShaderCode != 0)	app->configuration.keepShaderCode = inputLaunchConfiguration.keepShaderCode;
 	if (inputLaunchConfiguration.printMemoryLayout != 0)	app->configuration.printMemoryLayout = inputLaunchConfiguration.printMemoryLayout;
 	if (inputLaunchConfiguration.considerAllAxesStrided != 0)	app->configuration.considerAllAxesStrided = inputLaunchConfiguration.considerAllAxesStrided;
-
+#if(VKFFT_BACKEND!=5)
 	if (inputLaunchConfiguration.loadApplicationString != 0)	app->configuration.loadApplicationString = inputLaunchConfiguration.loadApplicationString;
 	if (inputLaunchConfiguration.saveApplicationToString != 0)	app->configuration.saveApplicationToString = inputLaunchConfiguration.saveApplicationToString;
+#endif
+	if (inputLaunchConfiguration.disableSetLocale != 0)	app->configuration.disableSetLocale = inputLaunchConfiguration.disableSetLocale;
+
 	if (inputLaunchConfiguration.loadApplicationFromString != 0) {
 		app->configuration.loadApplicationFromString = inputLaunchConfiguration.loadApplicationFromString;
 		if (app->configuration.saveApplicationToString != 0) {
@@ -31291,7 +40157,9 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 			deleteVkFFT(app);
 			return VKFFT_ERROR_EMPTY_applicationString;
 		}
-		app->currentApplicationStringPos = 0;
+		memcpy(&app->applicationStringSize, app->configuration.loadApplicationString, sizeof(uint64_t));
+		memcpy(&app->applicationStringOffsetRader, (char*)app->configuration.loadApplicationString + 2 * sizeof(uint64_t), sizeof(uint64_t));
+		app->currentApplicationStringPos = 5 * sizeof(uint64_t);
 	}
 	//temporary set:
 	app->configuration.registerBoost4Step = 1;
@@ -31304,7 +40172,17 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 		if (app->localFFTPlan_inverse) {
 			for (uint64_t i = 0; i < app->configuration.FFTdim; i++) {
 				//app->configuration.sharedMemorySize = ((app->configuration.size[i] & (app->configuration.size[i] - 1)) == 0) ? app->configuration.sharedMemorySizePow2 : initSharedMemory;
-				resFFT = VkFFTScheduler(app, app->localFFTPlan_inverse, i, 0);
+				resFFT = VkFFTScheduler(app, app->localFFTPlan_inverse, i);
+				if (resFFT == VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH) {
+					//try again with Rader disabled - sequences like 89^4 can still be done with Bluestein FFT
+					uint64_t temp_fixMaxRaderPrimeFFT = app->configuration.fixMaxRaderPrimeFFT;
+					app->configuration.fixMaxRaderPrimeFFT = app->configuration.fixMinRaderPrimeFFT;
+					uint64_t temp_fixMaxRaderPrimeMult = app->configuration.fixMaxRaderPrimeMult;
+					app->configuration.fixMaxRaderPrimeMult = app->configuration.fixMinRaderPrimeMult;
+					resFFT = VkFFTScheduler(app, app->localFFTPlan_inverse, i);
+					app->configuration.fixMaxRaderPrimeFFT = temp_fixMaxRaderPrimeFFT;
+					app->configuration.fixMaxRaderPrimeMult = temp_fixMaxRaderPrimeMult;
+				}
 				if (resFFT != VKFFT_SUCCESS) {
 					deleteVkFFT(app);
 					return resFFT;
@@ -31352,7 +40230,17 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 		if (app->localFFTPlan) {
 			for (uint64_t i = 0; i < app->configuration.FFTdim; i++) {
 				//app->configuration.sharedMemorySize = ((app->configuration.size[i] & (app->configuration.size[i] - 1)) == 0) ? app->configuration.sharedMemorySizePow2 : initSharedMemory;
-				resFFT = VkFFTScheduler(app, app->localFFTPlan, i, 0);
+				resFFT = VkFFTScheduler(app, app->localFFTPlan, i);
+				if (resFFT == VKFFT_ERROR_UNSUPPORTED_FFT_LENGTH) {
+					//try again with Rader disabled - sequences like 89^4 can still be done with Bluestein FFT
+					uint64_t temp_fixMaxRaderPrimeFFT = app->configuration.fixMaxRaderPrimeFFT;
+					app->configuration.fixMaxRaderPrimeFFT = app->configuration.fixMinRaderPrimeFFT;
+					uint64_t temp_fixMaxRaderPrimeMult = app->configuration.fixMaxRaderPrimeMult;
+					app->configuration.fixMaxRaderPrimeMult = app->configuration.fixMinRaderPrimeMult;
+					resFFT = VkFFTScheduler(app, app->localFFTPlan, i);
+					app->configuration.fixMaxRaderPrimeFFT = temp_fixMaxRaderPrimeFFT;
+					app->configuration.fixMaxRaderPrimeMult = temp_fixMaxRaderPrimeMult;
+				}
 				if (resFFT != VKFFT_SUCCESS) {
 					deleteVkFFT(app);
 					return resFFT;
@@ -31398,9 +40286,9 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 	for (uint64_t i = 0; i < app->configuration.FFTdim; i++) {
 		if (app->useBluesteinFFT[i]) {
 			if (!app->configuration.makeInversePlanOnly)
-				resFFT = VkFFTGeneratePhaseVectors(app, app->localFFTPlan, i, 0);
+				resFFT = VkFFTGeneratePhaseVectors(app, app->localFFTPlan, i);
 			else
-				resFFT = VkFFTGeneratePhaseVectors(app, app->localFFTPlan_inverse, i, 0);
+				resFFT = VkFFTGeneratePhaseVectors(app, app->localFFTPlan_inverse, i);
 			if (resFFT != VKFFT_SUCCESS) {
 				deleteVkFFT(app);
 				return resFFT;
@@ -31409,35 +40297,34 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 	}
 
 	if (inputLaunchConfiguration.saveApplicationToString != 0) {
-#if((VKFFT_BACKEND==0)||(VKFFT_BACKEND==2)||(VKFFT_BACKEND==4))
-		uint64_t totalBinarySize = 0;
+		uint64_t totalBinarySize = 5 * sizeof(uint64_t);
 		if (!app->configuration.makeForwardPlanOnly) {
 			for (uint64_t i = 0; i < app->configuration.FFTdim; i++) {
 				for (uint64_t j = 0; j < app->localFFTPlan_inverse->numAxisUploads[i]; j++) {
-					totalBinarySize += app->localFFTPlan_inverse->axes[i][j].binarySize + sizeof(uint32_t);
+					totalBinarySize += app->localFFTPlan_inverse->axes[i][j].binarySize + sizeof(uint64_t);
 				}
 				if (app->useBluesteinFFT[i] && (app->localFFTPlan_inverse->numAxisUploads[i] > 1)) {
 					for (uint64_t j = 1; j < app->localFFTPlan_inverse->numAxisUploads[i]; j++) {
-						totalBinarySize += app->localFFTPlan_inverse->inverseBluesteinAxes[i][j].binarySize + sizeof(uint32_t);
+						totalBinarySize += app->localFFTPlan_inverse->inverseBluesteinAxes[i][j].binarySize + sizeof(uint64_t);
 					}
 				}
 				if ((app->localFFTPlan_inverse->multiUploadR2C) && (i == 0)) {
-					totalBinarySize += app->localFFTPlan_inverse->R2Cdecomposition.binarySize + sizeof(uint32_t);
+					totalBinarySize += app->localFFTPlan_inverse->R2Cdecomposition.binarySize + sizeof(uint64_t);
 				}
 			}
 		}
 		if (!app->configuration.makeInversePlanOnly) {
 			for (uint64_t i = 0; i < app->configuration.FFTdim; i++) {
 				for (uint64_t j = 0; j < app->localFFTPlan->numAxisUploads[i]; j++) {
-					totalBinarySize += app->localFFTPlan->axes[i][j].binarySize + sizeof(uint32_t);
+					totalBinarySize += app->localFFTPlan->axes[i][j].binarySize + sizeof(uint64_t);
 				}
 				if (app->useBluesteinFFT[i] && (app->localFFTPlan->numAxisUploads[i] > 1)) {
 					for (uint64_t j = 1; j < app->localFFTPlan->numAxisUploads[i]; j++) {
-						totalBinarySize += app->localFFTPlan->inverseBluesteinAxes[i][j].binarySize + sizeof(uint32_t);
+						totalBinarySize += app->localFFTPlan->inverseBluesteinAxes[i][j].binarySize + sizeof(uint64_t);
 					}
 				}
 				if ((app->localFFTPlan->multiUploadR2C) && (i == 0)) {
-					totalBinarySize += app->localFFTPlan->R2Cdecomposition.binarySize + sizeof(uint32_t);
+					totalBinarySize += app->localFFTPlan->R2Cdecomposition.binarySize + sizeof(uint64_t);
 				}
 			}
 		}
@@ -31446,158 +40333,80 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 				totalBinarySize += app->applicationBluesteinStringSize[i];
 			}
 		}
+		if (app->numRaderFFTPrimes > 0) {
+			app->applicationStringOffsetRader = totalBinarySize;
+			for (uint64_t i = 0; i < app->numRaderFFTPrimes; i++) {
+				totalBinarySize += app->rader_buffer_size[i];
+			}
+		}
 		app->saveApplicationString = calloc(totalBinarySize, 1);
 		if (!app->saveApplicationString) {
 			deleteVkFFT(app);
 			return VKFFT_ERROR_MALLOC_FAILED;
 		}
 		app->applicationStringSize = totalBinarySize;
-		uint64_t currentPos = 0;
-		uint32_t* localApplicationStringCast = (uint32_t*)app->saveApplicationString;
+		char* localApplicationStringCast = (char*)app->saveApplicationString;
+		memcpy(localApplicationStringCast, &totalBinarySize, sizeof(uint64_t));
+		memcpy(localApplicationStringCast + 2, &app->applicationStringOffsetRader, sizeof(uint64_t));
+		uint64_t currentPos = 5 * sizeof(uint64_t);
 		if (!app->configuration.makeForwardPlanOnly) {
 			for (uint64_t i = 0; i < app->configuration.FFTdim; i++) {
 				for (uint64_t j = 0; j < app->localFFTPlan_inverse->numAxisUploads[i]; j++) {
-					localApplicationStringCast[currentPos] = (uint32_t)app->localFFTPlan_inverse->axes[i][j].binarySize;
-					currentPos++;
+					memcpy(localApplicationStringCast + currentPos, &app->localFFTPlan_inverse->axes[i][j].binarySize, sizeof(uint64_t));
+					currentPos += sizeof(uint64_t);
 					memcpy(localApplicationStringCast + currentPos, app->localFFTPlan_inverse->axes[i][j].binary, app->localFFTPlan_inverse->axes[i][j].binarySize);
-					currentPos += app->localFFTPlan_inverse->axes[i][j].binarySize / sizeof(uint32_t);
+					currentPos += app->localFFTPlan_inverse->axes[i][j].binarySize;
 				}
 				if (app->useBluesteinFFT[i] && (app->localFFTPlan_inverse->numAxisUploads[i] > 1)) {
 					for (uint64_t j = 1; j < app->localFFTPlan_inverse->numAxisUploads[i]; j++) {
-						localApplicationStringCast[currentPos] = (uint32_t)app->localFFTPlan_inverse->inverseBluesteinAxes[i][j].binarySize;
-						currentPos++;
+						memcpy(localApplicationStringCast + currentPos, &app->localFFTPlan_inverse->inverseBluesteinAxes[i][j].binarySize, sizeof(uint64_t));
+						currentPos += sizeof(uint64_t);
 						memcpy(localApplicationStringCast + currentPos, app->localFFTPlan_inverse->inverseBluesteinAxes[i][j].binary, app->localFFTPlan_inverse->inverseBluesteinAxes[i][j].binarySize);
-						currentPos += app->localFFTPlan_inverse->inverseBluesteinAxes[i][j].binarySize / sizeof(uint32_t);
+						currentPos += app->localFFTPlan_inverse->inverseBluesteinAxes[i][j].binarySize;
 					}
 				}
 				if ((app->localFFTPlan_inverse->multiUploadR2C) && (i == 0)) {
-					localApplicationStringCast[currentPos] = (uint32_t)app->localFFTPlan_inverse->R2Cdecomposition.binarySize;
-					currentPos++;
+					memcpy(localApplicationStringCast + currentPos, &app->localFFTPlan_inverse->R2Cdecomposition.binarySize, sizeof(uint64_t));
+					currentPos += sizeof(uint64_t);
 					memcpy(localApplicationStringCast + currentPos, app->localFFTPlan_inverse->R2Cdecomposition.binary, app->localFFTPlan_inverse->R2Cdecomposition.binarySize);
-					currentPos += app->localFFTPlan_inverse->R2Cdecomposition.binarySize / sizeof(uint32_t);
+					currentPos += app->localFFTPlan_inverse->R2Cdecomposition.binarySize;
 				}
 			}
 		}
 		if (!app->configuration.makeInversePlanOnly) {
 			for (uint64_t i = 0; i < app->configuration.FFTdim; i++) {
 				for (uint64_t j = 0; j < app->localFFTPlan->numAxisUploads[i]; j++) {
-					localApplicationStringCast[currentPos] = (uint32_t)app->localFFTPlan->axes[i][j].binarySize;
-					currentPos++;
+					memcpy(localApplicationStringCast + currentPos, &app->localFFTPlan->axes[i][j].binarySize, sizeof(uint64_t));
+					currentPos += sizeof(uint64_t);
 					memcpy(localApplicationStringCast + currentPos, app->localFFTPlan->axes[i][j].binary, app->localFFTPlan->axes[i][j].binarySize);
-					currentPos += app->localFFTPlan->axes[i][j].binarySize / sizeof(uint32_t);
+					currentPos += app->localFFTPlan->axes[i][j].binarySize;
 				}
 				if (app->useBluesteinFFT[i] && (app->localFFTPlan->numAxisUploads[i] > 1)) {
 					for (uint64_t j = 1; j < app->localFFTPlan->numAxisUploads[i]; j++) {
-						localApplicationStringCast[currentPos] = (uint32_t)app->localFFTPlan->inverseBluesteinAxes[i][j].binarySize;
-						currentPos++;
+						memcpy(localApplicationStringCast + currentPos, &app->localFFTPlan->inverseBluesteinAxes[i][j].binarySize, sizeof(uint64_t));
+						currentPos += sizeof(uint64_t);
 						memcpy(localApplicationStringCast + currentPos, app->localFFTPlan->inverseBluesteinAxes[i][j].binary, app->localFFTPlan->inverseBluesteinAxes[i][j].binarySize);
-						currentPos += app->localFFTPlan->inverseBluesteinAxes[i][j].binarySize / sizeof(uint32_t);
+						currentPos += app->localFFTPlan->inverseBluesteinAxes[i][j].binarySize;
 					}
 				}
 				if ((app->localFFTPlan->multiUploadR2C) && (i == 0)) {
-					localApplicationStringCast[currentPos] = (uint32_t)app->localFFTPlan->R2Cdecomposition.binarySize;
-					currentPos++;
+					memcpy(localApplicationStringCast + currentPos, &app->localFFTPlan->R2Cdecomposition.binarySize, sizeof(uint64_t));
+					currentPos += sizeof(uint64_t);
 					memcpy(localApplicationStringCast + currentPos, app->localFFTPlan->R2Cdecomposition.binary, app->localFFTPlan->R2Cdecomposition.binarySize);
-					currentPos += app->localFFTPlan->R2Cdecomposition.binarySize / sizeof(uint32_t);
+					currentPos += app->localFFTPlan->R2Cdecomposition.binarySize;
 				}
 			}
 		}
 		for (uint64_t i = 0; i < app->configuration.FFTdim; i++) {
 			if (app->useBluesteinFFT[i]) {
 				memcpy(localApplicationStringCast + currentPos, app->applicationBluesteinString[i], app->applicationBluesteinStringSize[i]);
-				currentPos += app->applicationBluesteinStringSize[i] / sizeof(uint32_t);
-			}
-		}
-		for (uint64_t i = 0; i < app->configuration.FFTdim; i++) {
-			if (app->applicationBluesteinString[i] != 0) {
-				free(app->applicationBluesteinString[i]);
-				app->applicationBluesteinString[i] = 0;
-			}
-		}
-#else
-		uint64_t totalBinarySize = 1;
-		if (!app->configuration.makeForwardPlanOnly) {
-			for (uint64_t i = 0; i < app->configuration.FFTdim; i++) {
-				for (uint64_t j = 0; j < app->localFFTPlan_inverse->numAxisUploads[i]; j++) {
-					totalBinarySize += app->localFFTPlan_inverse->axes[i][j].binarySize + (uint64_t)(floor(log10((double)app->localFFTPlan_inverse->axes[i][j].binarySize))) + 1;
-				}
-				if (app->useBluesteinFFT[i] && (app->localFFTPlan_inverse->numAxisUploads[i] > 1)) {
-					for (uint64_t j = 1; j < app->localFFTPlan_inverse->numAxisUploads[i]; j++) {
-						totalBinarySize += app->localFFTPlan_inverse->inverseBluesteinAxes[i][j].binarySize + (uint64_t)(floor(log10((double)app->localFFTPlan_inverse->inverseBluesteinAxes[i][j].binarySize))) + 1;
-					}
-				}
-				if ((app->localFFTPlan_inverse->multiUploadR2C) && (i == 0)) {
-					totalBinarySize += app->localFFTPlan_inverse->R2Cdecomposition.binarySize + (uint64_t)(floor(log10((double)app->localFFTPlan_inverse->R2Cdecomposition.binarySize))) + 1;
-				}
-			}
-		}
-		if (!app->configuration.makeInversePlanOnly) {
-			for (uint64_t i = 0; i < app->configuration.FFTdim; i++) {
-				for (uint64_t j = 0; j < app->localFFTPlan->numAxisUploads[i]; j++) {
-					totalBinarySize += app->localFFTPlan->axes[i][j].binarySize + (uint64_t)(floor(log10((double)app->localFFTPlan->axes[i][j].binarySize))) + 1;
-				}
-				if (app->useBluesteinFFT[i] && (app->localFFTPlan->numAxisUploads[i] > 1)) {
-					for (uint64_t j = 1; j < app->localFFTPlan->numAxisUploads[i]; j++) {
-						totalBinarySize += app->localFFTPlan->inverseBluesteinAxes[i][j].binarySize + (uint64_t)(floor(log10((double)app->localFFTPlan->inverseBluesteinAxes[i][j].binarySize))) + 1;
-					}
-				}
-				if ((app->localFFTPlan->multiUploadR2C) && (i == 0)) {
-					totalBinarySize += app->localFFTPlan->R2Cdecomposition.binarySize + (uint64_t)(floor(log10((double)app->localFFTPlan->R2Cdecomposition.binarySize))) + 1;
-				}
-			}
-		}
-		for (uint64_t i = 0; i < app->configuration.FFTdim; i++) {
-			if (app->useBluesteinFFT[i]) {
-				totalBinarySize += app->applicationBluesteinStringSize[i] - 1;
-			}
-		}
-		app->saveApplicationString = (char*)calloc(totalBinarySize, 1);
-		if (!app->saveApplicationString) {
-			deleteVkFFT(app);
-			return VKFFT_ERROR_MALLOC_FAILED;
-		}
-		app->applicationStringSize = totalBinarySize;
-		uint64_t currentPos = 0;
-		char* localApplicationStringCast = (char*)app->saveApplicationString;
-		if (!app->configuration.makeForwardPlanOnly) {
-			for (uint64_t i = 0; i < app->configuration.FFTdim; i++) {
-				for (uint64_t j = 0; j < app->localFFTPlan_inverse->numAxisUploads[i]; j++) {
-					currentPos += sprintf(localApplicationStringCast + currentPos, "%" PRIu64 "\n", app->localFFTPlan_inverse->axes[i][j].binarySize);
-					currentPos += sprintf(localApplicationStringCast + currentPos, "%s", (char*)app->localFFTPlan_inverse->axes[i][j].binary);
-				}
-				if (app->useBluesteinFFT[i] && (app->localFFTPlan_inverse->numAxisUploads[i] > 1)) {
-					for (uint64_t j = 1; j < app->localFFTPlan_inverse->numAxisUploads[i]; j++) {
-						currentPos += sprintf(localApplicationStringCast + currentPos, "%" PRIu64 "\n", app->localFFTPlan_inverse->inverseBluesteinAxes[i][j].binarySize);
-						currentPos += sprintf(localApplicationStringCast + currentPos, "%s", (char*)app->localFFTPlan_inverse->inverseBluesteinAxes[i][j].binary);
-					}
-				}
-				if ((app->localFFTPlan_inverse->multiUploadR2C) && (i == 0)) {
-					currentPos += sprintf(localApplicationStringCast + currentPos, "%" PRIu64 "\n", app->localFFTPlan_inverse->R2Cdecomposition.binarySize);
-					currentPos += sprintf(localApplicationStringCast + currentPos, "%s", (char*)app->localFFTPlan_inverse->R2Cdecomposition.binary);
-				}
-			}
-		}
-		if (!app->configuration.makeInversePlanOnly) {
-			for (uint64_t i = 0; i < app->configuration.FFTdim; i++) {
-				for (uint64_t j = 0; j < app->localFFTPlan->numAxisUploads[i]; j++) {
-					currentPos += sprintf(localApplicationStringCast + currentPos, "%" PRIu64 "\n", app->localFFTPlan->axes[i][j].binarySize);
-					currentPos += sprintf(localApplicationStringCast + currentPos, "%s", (char*)app->localFFTPlan->axes[i][j].binary);
-				}
-				if (app->useBluesteinFFT[i] && (app->localFFTPlan->numAxisUploads[i] > 1)) {
-					for (uint64_t j = 1; j < app->localFFTPlan->numAxisUploads[i]; j++) {
-						currentPos += sprintf(localApplicationStringCast + currentPos, "%" PRIu64 "\n", app->localFFTPlan->inverseBluesteinAxes[i][j].binarySize);
-						currentPos += sprintf(localApplicationStringCast + currentPos, "%s", (char*)app->localFFTPlan->inverseBluesteinAxes[i][j].binary);
-					}
-				}
-				if ((app->localFFTPlan->multiUploadR2C) && (i == 0)) {
-					currentPos += sprintf(localApplicationStringCast + currentPos, "%" PRIu64 "\n", app->localFFTPlan->R2Cdecomposition.binarySize);
-					currentPos += sprintf(localApplicationStringCast + currentPos, "%s", (char*)app->localFFTPlan->R2Cdecomposition.binary);
-				}
+				currentPos += app->applicationBluesteinStringSize[i];
 			}
 		}
-		for (uint64_t i = 0; i < app->configuration.FFTdim; i++) {
-			if (app->useBluesteinFFT[i]) {
-				currentPos += sprintf(localApplicationStringCast + currentPos, "%s", (char*)app->applicationBluesteinString[i]);
+		if (app->numRaderFFTPrimes > 0) {
+			for (uint64_t i = 0; i < app->numRaderFFTPrimes; i++) {
+				memcpy(localApplicationStringCast + currentPos, app->raderFFTkernel[i], app->rader_buffer_size[i]);
+				currentPos += app->rader_buffer_size[i];
 			}
 		}
 		for (uint64_t i = 0; i < app->configuration.FFTdim; i++) {
@@ -31606,7 +40415,6 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 				app->applicationBluesteinString[i] = 0;
 			}
 		}
-#endif
 	}
 #if(VKFFT_BACKEND==0)
 	if (app->configuration.isCompilerInitialized) {
@@ -31618,37 +40426,31 @@ static inline VkFFTResult initializeVkFFT(VkFFTApplication* app, VkFFTConfigurat
 }
 static inline VkFFTResult dispatchEnhanced(VkFFTApplication* app, VkFFTAxis* axis, uint64_t* dispatchBlock) {
 	VkFFTResult resFFT = VKFFT_SUCCESS;
-	uint64_t maxBlockSize[3] = { (uint64_t)pow(2,(uint64_t)log2(app->configuration.maxComputeWorkGroupCount[0])),(uint64_t)pow(2,(uint64_t)log2(app->configuration.maxComputeWorkGroupCount[1])),(uint64_t)pow(2,(uint64_t)log2(app->configuration.maxComputeWorkGroupCount[2])) };
-	uint64_t blockNumber[3] = { (uint64_t)ceil(dispatchBlock[0] / (double)maxBlockSize[0]),(uint64_t)ceil(dispatchBlock[1] / (double)maxBlockSize[1]),(uint64_t)ceil(dispatchBlock[2] / (double)maxBlockSize[2]) };
+	if (axis->specializationConstants.swapComputeWorkGroupID == 1) {
+		uint64_t temp = dispatchBlock[0];
+		dispatchBlock[0] = dispatchBlock[1];
+		dispatchBlock[1] = temp;
+	}
+	if (axis->specializationConstants.swapComputeWorkGroupID == 2) {
+		uint64_t temp = dispatchBlock[0];
+		dispatchBlock[0] = dispatchBlock[2];
+		dispatchBlock[2] = temp;
+	}
+	uint64_t blockNumber[3] = { (uint64_t)ceil(dispatchBlock[0] / (double)app->configuration.maxComputeWorkGroupCount[0]),(uint64_t)ceil(dispatchBlock[1] / (double)app->configuration.maxComputeWorkGroupCount[1]),(uint64_t)ceil(dispatchBlock[2] / (double)app->configuration.maxComputeWorkGroupCount[2]) };
+	uint64_t blockSize[3] = { (uint64_t)ceil(dispatchBlock[0] / (double)blockNumber[0]), (uint64_t)ceil(dispatchBlock[1] / (double)blockNumber[1]), (uint64_t)ceil(dispatchBlock[2] / (double)blockNumber[2]) };
+	uint64_t lastBlockSize[3] = { blockSize[0],blockSize[1],blockSize[2] };
+	uint64_t dispatchSize[3] = { 1,1,1 };
 	if (blockNumber[0] == 0) blockNumber[0] = 1;
 	if (blockNumber[1] == 0) blockNumber[1] = 1;
 	if (blockNumber[2] == 0) blockNumber[2] = 1;
-	if ((blockNumber[0] > 1) && (blockNumber[0] * maxBlockSize[0] != dispatchBlock[0])) {
-		for (uint64_t i = app->configuration.maxComputeWorkGroupCount[0]; i > 0; i--) {
-			if (dispatchBlock[0] % i == 0) {
-				maxBlockSize[0] = i;
-				blockNumber[0] = dispatchBlock[0] / i;
-				i = 1;
-			}
-		}
+	if ((blockNumber[0] > 1) && (blockNumber[0] * blockSize[0] != dispatchBlock[0])) {
+		lastBlockSize[0] = dispatchBlock[0] % blockSize[0];
 	}
-	if ((blockNumber[1] > 1) && (blockNumber[1] * maxBlockSize[1] != dispatchBlock[1])) {
-		for (uint64_t i = app->configuration.maxComputeWorkGroupCount[1]; i > 0; i--) {
-			if (dispatchBlock[1] % i == 0) {
-				maxBlockSize[1] = i;
-				blockNumber[1] = dispatchBlock[1] / i;
-				i = 1;
-			}
-		}
+	if ((blockNumber[1] > 1) && (blockNumber[1] * blockSize[1] != dispatchBlock[1])) {
+		lastBlockSize[1] = dispatchBlock[1] % blockSize[1];
 	}
-	if ((blockNumber[2] > 1) && (blockNumber[2] * maxBlockSize[2] != dispatchBlock[2])) {
-		for (uint64_t i = app->configuration.maxComputeWorkGroupCount[2]; i > 0; i--) {
-			if (dispatchBlock[2] % i == 0) {
-				maxBlockSize[2] = i;
-				blockNumber[2] = dispatchBlock[2] / i;
-				i = 1;
-			}
-		}
+	if ((blockNumber[2] > 1) && (blockNumber[2] * blockSize[2] != dispatchBlock[2])) {
+		lastBlockSize[2] = dispatchBlock[2] % blockSize[2];
 	}
 	if (app->configuration.specifyOffsetsAtLaunch) {
 		axis->updatePushConstants = 1;
@@ -31656,20 +40458,20 @@ static inline VkFFTResult dispatchEnhanced(VkFFTApplication* app, VkFFTAxis* axi
 	//printf("%" PRIu64 " %" PRIu64 " %" PRIu64 "\n", dispatchBlock[0], dispatchBlock[1], dispatchBlock[2]);
 	//printf("%" PRIu64 " %" PRIu64 " %" PRIu64 "\n", blockNumber[0], blockNumber[1], blockNumber[2]);
 	for (uint64_t i = 0; i < 3; i++)
-		if (blockNumber[i] == 1) maxBlockSize[i] = dispatchBlock[i];
+		if (blockNumber[i] == 1) blockSize[i] = dispatchBlock[i];
 	for (uint64_t i = 0; i < blockNumber[0]; i++) {
 		for (uint64_t j = 0; j < blockNumber[1]; j++) {
 			for (uint64_t k = 0; k < blockNumber[2]; k++) {
-				if (axis->pushConstants.workGroupShift[0] != i * maxBlockSize[0]) {
-					axis->pushConstants.workGroupShift[0] = i * maxBlockSize[0];
+				if (axis->pushConstants.workGroupShift[0] != i * blockSize[0]) {
+					axis->pushConstants.workGroupShift[0] = i * blockSize[0];
 					axis->updatePushConstants = 1;
 				}
-				if (axis->pushConstants.workGroupShift[1] != j * maxBlockSize[1]) {
-					axis->pushConstants.workGroupShift[1] = j * maxBlockSize[1];
+				if (axis->pushConstants.workGroupShift[1] != j * blockSize[1]) {
+					axis->pushConstants.workGroupShift[1] = j * blockSize[1];
 					axis->updatePushConstants = 1;
 				}
-				if (axis->pushConstants.workGroupShift[2] != k * maxBlockSize[2]) {
-					axis->pushConstants.workGroupShift[2] = k * maxBlockSize[2];
+				if (axis->pushConstants.workGroupShift[2] != k * blockSize[2]) {
+					axis->pushConstants.workGroupShift[2] = k * blockSize[2];
 					axis->updatePushConstants = 1;
 				}
 				if (axis->updatePushConstants) {
@@ -31734,6 +40536,9 @@ static inline VkFFTResult dispatchEnhanced(VkFFTApplication* app, VkFFTAxis* axi
 						}
 					}
 				}
+				dispatchSize[0] = (i == blockNumber[0] - 1) ? lastBlockSize[0] : blockSize[0];
+				dispatchSize[1] = (j == blockNumber[1] - 1) ? lastBlockSize[1] : blockSize[1];
+				dispatchSize[2] = (k == blockNumber[2] - 1) ? lastBlockSize[2] : blockSize[2];
 #if(VKFFT_BACKEND==0)
 				if (axis->pushConstants.structSize > 0) {
 					if (app->configuration.useUint64) {
@@ -31743,9 +40548,9 @@ static inline VkFFTResult dispatchEnhanced(VkFFTApplication* app, VkFFTAxis* axi
 						vkCmdPushConstants(app->configuration.commandBuffer[0], axis->pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, 0, (uint32_t)axis->pushConstants.structSize, axis->pushConstants.dataUint32);
 					}
 				}
-				vkCmdDispatch(app->configuration.commandBuffer[0], (uint32_t)maxBlockSize[0], (uint32_t)maxBlockSize[1], (uint32_t)maxBlockSize[2]);
+				vkCmdDispatch(app->configuration.commandBuffer[0], (uint32_t)dispatchSize[0], (uint32_t)dispatchSize[1], (uint32_t)dispatchSize[2]);
 #elif(VKFFT_BACKEND==1)
-				void* args[6];
+				void* args[10];
 				CUresult result = CUDA_SUCCESS;
 				args[0] = axis->inputBuffer;
 				args[1] = axis->outputBuffer;
@@ -31758,6 +40563,10 @@ static inline VkFFTResult dispatchEnhanced(VkFFTApplication* app, VkFFTAxis* axi
 					args[args_id] = &axis->bufferLUT;
 					args_id++;
 				}
+				if (axis->specializationConstants.raderUintLUT) {
+					args[args_id] = &axis->bufferRaderUintLUT;
+					args_id++;
+				}
 				if (axis->specializationConstants.useBluesteinFFT && axis->specializationConstants.BluesteinConvolutionStep) {
 					if (axis->specializationConstants.inverseBluestein)
 						args[args_id] = &app->bufferBluesteinIFFT[axis->specializationConstants.axis_id];
@@ -31787,20 +40596,20 @@ static inline VkFFTResult dispatchEnhanced(VkFFTApplication* app, VkFFTAxis* axi
 				}
 				if (app->configuration.num_streams >= 1) {
 					result = cuLaunchKernel(axis->VkFFTKernel,
-						(unsigned int)maxBlockSize[0], (unsigned int)maxBlockSize[1], (unsigned int)maxBlockSize[2],     // grid dim
+						(unsigned int)dispatchSize[0], (unsigned int)dispatchSize[1], (unsigned int)dispatchSize[2],     // grid dim
 						(unsigned int)axis->specializationConstants.localSize[0], (unsigned int)axis->specializationConstants.localSize[1], (unsigned int)axis->specializationConstants.localSize[2],   // block dim
 						(unsigned int)axis->specializationConstants.usedSharedMemory, app->configuration.stream[app->configuration.streamID],             // shared mem and stream
 						args, 0);
 				}
 				else {
 					result = cuLaunchKernel(axis->VkFFTKernel,
-						(unsigned int)maxBlockSize[0], (unsigned int)maxBlockSize[1], (unsigned int)maxBlockSize[2],     // grid dim
+						(unsigned int)dispatchSize[0], (unsigned int)dispatchSize[1], (unsigned int)dispatchSize[2],     // grid dim
 						(unsigned int)axis->specializationConstants.localSize[0], (unsigned int)axis->specializationConstants.localSize[1], (unsigned int)axis->specializationConstants.localSize[2],   // block dim
 						(unsigned int)axis->specializationConstants.usedSharedMemory, 0,             // shared mem and stream
 						args, 0);
 				}
 				if (result != CUDA_SUCCESS) {
-					printf("cuLaunchKernel error: %d, %" PRIu64 " %" PRIu64 " %" PRIu64 " - %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", result, maxBlockSize[0], maxBlockSize[1], maxBlockSize[2], axis->specializationConstants.localSize[0], axis->specializationConstants.localSize[1], axis->specializationConstants.localSize[2]);
+					printf("cuLaunchKernel error: %d, %" PRIu64 " %" PRIu64 " %" PRIu64 " - %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", result, dispatchSize[0], dispatchSize[1], dispatchSize[2], axis->specializationConstants.localSize[0], axis->specializationConstants.localSize[1], axis->specializationConstants.localSize[2]);
 					return VKFFT_ERROR_FAILED_TO_LAUNCH_KERNEL;
 				}
 				if (app->configuration.num_streams > 1) {
@@ -31813,7 +40622,7 @@ static inline VkFFTResult dispatchEnhanced(VkFFTApplication* app, VkFFTAxis* axi
 				}
 #elif(VKFFT_BACKEND==2)
 				hipError_t result = hipSuccess;
-				void* args[6];
+				void* args[10];
 				args[0] = axis->inputBuffer;
 				args[1] = axis->outputBuffer;
 				uint64_t args_id = 2;
@@ -31825,6 +40634,10 @@ static inline VkFFTResult dispatchEnhanced(VkFFTApplication* app, VkFFTAxis* axi
 					args[args_id] = &axis->bufferLUT;
 					args_id++;
 				}
+				if (axis->specializationConstants.raderUintLUT) {
+					args[args_id] = &axis->bufferRaderUintLUT;
+					args_id++;
+				}
 				if (axis->specializationConstants.useBluesteinFFT && axis->specializationConstants.BluesteinConvolutionStep) {
 					if (axis->specializationConstants.inverseBluestein)
 						args[args_id] = &app->bufferBluesteinIFFT[axis->specializationConstants.axis_id];
@@ -31855,20 +40668,20 @@ static inline VkFFTResult dispatchEnhanced(VkFFTApplication* app, VkFFTAxis* axi
 				//printf("%" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",maxBlockSize[0], maxBlockSize[1], maxBlockSize[2], axis->specializationConstants.localSize[0], axis->specializationConstants.localSize[1], axis->specializationConstants.localSize[2]);
 				if (app->configuration.num_streams >= 1) {
 					result = hipModuleLaunchKernel(axis->VkFFTKernel,
-						(unsigned int)maxBlockSize[0], (unsigned int)maxBlockSize[1], (unsigned int)maxBlockSize[2],     // grid dim
+						(unsigned int)dispatchSize[0], (unsigned int)dispatchSize[1], (unsigned int)dispatchSize[2],     // grid dim
 						(unsigned int)axis->specializationConstants.localSize[0], (unsigned int)axis->specializationConstants.localSize[1], (unsigned int)axis->specializationConstants.localSize[2],   // block dim
 						(unsigned int)axis->specializationConstants.usedSharedMemory, app->configuration.stream[app->configuration.streamID],             // shared mem and stream
 						args, 0);
 				}
 				else {
 					result = hipModuleLaunchKernel(axis->VkFFTKernel,
-						(unsigned int)maxBlockSize[0], (unsigned int)maxBlockSize[1], (unsigned int)maxBlockSize[2],     // grid dim
+						(unsigned int)dispatchSize[0], (unsigned int)dispatchSize[1], (unsigned int)dispatchSize[2],     // grid dim
 						(unsigned int)axis->specializationConstants.localSize[0], (unsigned int)axis->specializationConstants.localSize[1], (unsigned int)axis->specializationConstants.localSize[2],   // block dim
 						(unsigned int)axis->specializationConstants.usedSharedMemory, 0,             // shared mem and stream
 						args, 0);
 				}
 				if (result != hipSuccess) {
-					printf("hipModuleLaunchKernel error: %d, %" PRIu64 " %" PRIu64 " %" PRIu64 " - %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", result, maxBlockSize[0], maxBlockSize[1], maxBlockSize[2], axis->specializationConstants.localSize[0], axis->specializationConstants.localSize[1], axis->specializationConstants.localSize[2]);
+					printf("hipModuleLaunchKernel error: %d, %" PRIu64 " %" PRIu64 " %" PRIu64 " - %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", result, dispatchSize[0], dispatchSize[1], dispatchSize[2], axis->specializationConstants.localSize[0], axis->specializationConstants.localSize[1], axis->specializationConstants.localSize[2]);
 					return VKFFT_ERROR_FAILED_TO_LAUNCH_KERNEL;
 				}
 				if (app->configuration.num_streams > 1) {
@@ -31881,7 +40694,7 @@ static inline VkFFTResult dispatchEnhanced(VkFFTApplication* app, VkFFTAxis* axi
 				}
 #elif(VKFFT_BACKEND==3)
 				cl_int result = CL_SUCCESS;
-				void* args[6];
+				void* args[10];
 				args[0] = axis->inputBuffer;
 				result = clSetKernelArg(axis->kernel, 0, sizeof(cl_mem), args[0]);
 				if (result != CL_SUCCESS) {
@@ -31909,6 +40722,14 @@ static inline VkFFTResult dispatchEnhanced(VkFFTApplication* app, VkFFTAxis* axi
 					}
 					args_id++;
 				}
+				if (axis->specializationConstants.raderUintLUT) {
+					args[args_id] = &axis->bufferRaderUintLUT;
+					result = clSetKernelArg(axis->kernel, (cl_uint)args_id, sizeof(cl_mem), args[args_id]);
+					if (result != CL_SUCCESS) {
+						return VKFFT_ERROR_FAILED_TO_SET_KERNEL_ARG;
+					}
+					args_id++;
+				}
 				if (axis->specializationConstants.useBluesteinFFT && axis->specializationConstants.BluesteinConvolutionStep) {
 					if (axis->specializationConstants.inverseBluestein)
 						args[args_id] = &app->bufferBluesteinIFFT[axis->specializationConstants.axis_id];
@@ -31942,7 +40763,7 @@ static inline VkFFTResult dispatchEnhanced(VkFFTApplication* app, VkFFTAxis* axi
 					args_id++;
 				}
 				size_t local_work_size[3] = { (size_t)axis->specializationConstants.localSize[0], (size_t)axis->specializationConstants.localSize[1],(size_t)axis->specializationConstants.localSize[2] };
-				size_t global_work_size[3] = { (size_t)maxBlockSize[0] * local_work_size[0] , (size_t)maxBlockSize[1] * local_work_size[1] ,(size_t)maxBlockSize[2] * local_work_size[2] };
+				size_t global_work_size[3] = { (size_t)dispatchSize[0] * local_work_size[0] , (size_t)dispatchSize[1] * local_work_size[1] ,(size_t)dispatchSize[2] * local_work_size[2] };
 				result = clEnqueueNDRangeKernel(app->configuration.commandQueue[0], axis->kernel, 3, 0, global_work_size, local_work_size, 0, 0, 0);
 				//printf("%" PRIu64 " %" PRIu64 " %" PRIu64 " - %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", maxBlockSize[0], maxBlockSize[1], maxBlockSize[2], axis->specializationConstants.localSize[0], axis->specializationConstants.localSize[1], axis->specializationConstants.localSize[2]);
 
@@ -31951,7 +40772,7 @@ static inline VkFFTResult dispatchEnhanced(VkFFTApplication* app, VkFFTAxis* axi
 				}
 #elif(VKFFT_BACKEND==4)
 				ze_result_t result = ZE_RESULT_SUCCESS;
-				void* args[6];
+				void* args[10];
 				args[0] = axis->inputBuffer;
 				result = zeKernelSetArgumentValue(axis->VkFFTKernel, 0, sizeof(void*), args[0]);
 				if (result != ZE_RESULT_SUCCESS) {
@@ -31979,6 +40800,14 @@ static inline VkFFTResult dispatchEnhanced(VkFFTApplication* app, VkFFTAxis* axi
 					}
 					args_id++;
 				}
+				if (axis->specializationConstants.raderUintLUT) {
+					args[args_id] = &axis->bufferRaderUintLUT;
+					result = zeKernelSetArgumentValue(axis->VkFFTKernel, (uint32_t)args_id, sizeof(void*), args[args_id]);
+					if (result != ZE_RESULT_SUCCESS) {
+						return VKFFT_ERROR_FAILED_TO_SET_KERNEL_ARG;
+					}
+					args_id++;
+				}
 				if (axis->specializationConstants.useBluesteinFFT && axis->specializationConstants.BluesteinConvolutionStep) {
 					if (axis->specializationConstants.inverseBluestein)
 						args[args_id] = &app->bufferBluesteinIFFT[axis->specializationConstants.axis_id];
@@ -32012,13 +40841,77 @@ static inline VkFFTResult dispatchEnhanced(VkFFTApplication* app, VkFFTAxis* axi
 					args_id++;
 				}
 				size_t local_work_size[3] = { (size_t)axis->specializationConstants.localSize[0], (size_t)axis->specializationConstants.localSize[1],(size_t)axis->specializationConstants.localSize[2] };
-				ze_group_count_t launchArgs = { (uint32_t)maxBlockSize[0], (uint32_t)maxBlockSize[1],(uint32_t)maxBlockSize[2] };
+				ze_group_count_t launchArgs = { (uint32_t)dispatchSize[0], (uint32_t)dispatchSize[1],(uint32_t)dispatchSize[2] };
 				result = zeCommandListAppendLaunchKernel(app->configuration.commandList[0], axis->VkFFTKernel, &launchArgs, 0, 0, 0);
 				//printf("%" PRIu64 " %" PRIu64 " %" PRIu64 " - %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", maxBlockSize[0], maxBlockSize[1], maxBlockSize[2], axis->specializationConstants.localSize[0], axis->specializationConstants.localSize[1], axis->specializationConstants.localSize[2]);
 
 				if (result != ZE_RESULT_SUCCESS) {
 					return VKFFT_ERROR_FAILED_TO_LAUNCH_KERNEL;
 				}
+#elif(VKFFT_BACKEND==5)
+				app->configuration.commandEncoder->setComputePipelineState(axis->pipeline);
+				void* args[10];
+				app->configuration.commandEncoder->setBuffer(axis->inputBuffer[0], 0, 0);
+				app->configuration.commandEncoder->setBuffer(axis->outputBuffer[0], 0, 1);
+				app->configuration.commandEncoder->setThreadgroupMemoryLength((uint64_t)ceil(axis->specializationConstants.usedSharedMemory / 16.0) * 16, 0);
+
+				uint64_t args_id = 2;
+				if (axis->specializationConstants.convolutionStep) {
+					app->configuration.commandEncoder->setBuffer(app->configuration.kernel[0], 0, args_id);
+					args_id++;
+				}
+				if (axis->specializationConstants.LUT) {
+					app->configuration.commandEncoder->setBuffer(axis->bufferLUT, 0, args_id);
+					args_id++;
+				}
+				if (axis->specializationConstants.raderUintLUT) {
+					app->configuration.commandEncoder->setBuffer(axis->bufferRaderUintLUT, 0, args_id);
+					args_id++;
+				}
+				if (axis->specializationConstants.useBluesteinFFT && axis->specializationConstants.BluesteinConvolutionStep) {
+					if (axis->specializationConstants.inverseBluestein)
+						app->configuration.commandEncoder->setBuffer(app->bufferBluesteinIFFT[axis->specializationConstants.axis_id], 0, args_id);
+					else
+						app->configuration.commandEncoder->setBuffer(app->bufferBluesteinFFT[axis->specializationConstants.axis_id], 0, args_id);
+					args_id++;
+				}
+				if (axis->specializationConstants.useBluesteinFFT && (axis->specializationConstants.BluesteinPreMultiplication || axis->specializationConstants.BluesteinPostMultiplication)) {
+					app->configuration.commandEncoder->setBuffer(app->bufferBluestein[axis->specializationConstants.axis_id], 0, args_id);
+					args_id++;
+				}
+				//args[args_id] = &axis->pushConstants;
+				if (axis->pushConstants.structSize > 0) {
+					if (app->configuration.useUint64) {
+						if (!axis->pushConstants.dataUintBuffer) {
+							axis->pushConstants.dataUintBuffer = app->configuration.device->newBuffer(axis->pushConstants.structSize, MTL::ResourceStorageModeShared);
+							memcpy(axis->pushConstants.dataUintBuffer->contents(), axis->pushConstants.dataUint64, axis->pushConstants.structSize);
+							axis->updatePushConstants = 0;
+						}
+						else if (axis->updatePushConstants) {
+							memcpy(axis->pushConstants.dataUintBuffer->contents(), axis->pushConstants.dataUint64, axis->pushConstants.structSize);
+							axis->updatePushConstants = 0;
+						}
+						app->configuration.commandEncoder->setBuffer(axis->pushConstants.dataUintBuffer, 0, args_id);
+					}
+					else {
+						if (!axis->pushConstants.dataUintBuffer) {
+							axis->pushConstants.dataUintBuffer = app->configuration.device->newBuffer(axis->pushConstants.structSize, MTL::ResourceStorageModeShared);
+							memcpy(axis->pushConstants.dataUintBuffer->contents(), axis->pushConstants.dataUint32, axis->pushConstants.structSize);
+							axis->updatePushConstants = 0;
+						}
+						else if (axis->updatePushConstants) {
+							memcpy(axis->pushConstants.dataUintBuffer->contents(), axis->pushConstants.dataUint32, axis->pushConstants.structSize);
+							axis->updatePushConstants = 0;
+						}
+						app->configuration.commandEncoder->setBuffer(axis->pushConstants.dataUintBuffer, 0, args_id);
+					}
+					args_id++;
+				}
+				MTL::Size threadsPerGrid = { dispatchSize[0] * axis->specializationConstants.localSize[0], dispatchSize[1] * axis->specializationConstants.localSize[1],dispatchSize[2] * axis->specializationConstants.localSize[2] };
+				MTL::Size threadsPerThreadgroup = { axis->specializationConstants.localSize[0],axis->specializationConstants.localSize[1], axis->specializationConstants.localSize[2] };
+
+				app->configuration.commandEncoder->dispatchThreads(threadsPerGrid, threadsPerThreadgroup);
+
 #endif
 			}
 		}
@@ -32051,6 +40944,7 @@ static inline VkFFTResult VkFFTSync(VkFFTApplication* app) {
 	ze_result_t res = ZE_RESULT_SUCCESS;
 	res = zeCommandListAppendBarrier(app->configuration.commandList[0], nullptr, 0, nullptr);
 	if (res != ZE_RESULT_SUCCESS) return VKFFT_ERROR_FAILED_TO_SUBMIT_BARRIER;
+#elif(VKFFT_BACKEND==5)
 #endif
 	return VKFFT_SUCCESS;
 }
@@ -32094,6 +40988,9 @@ static inline VkFFTResult VkFFTAppend(VkFFTApplication* app, int inverse, VkFFTL
 	app->configuration.commandQueue = launchParams->commandQueue;
 #elif(VKFFT_BACKEND==4)
 	app->configuration.commandList = launchParams->commandList;
+#elif(VKFFT_BACKEND==5)
+	app->configuration.commandBuffer = launchParams->commandBuffer;
+	app->configuration.commandEncoder = launchParams->commandEncoder;
 #endif
 	uint64_t localSize0[3];
 	if ((inverse != 1) && (app->configuration.makeInversePlanOnly)) return VKFFT_ERROR_ONLY_INVERSE_FFT_INITIALIZED;
@@ -32764,6 +41661,6 @@ static inline VkFFTResult VkFFTAppend(VkFFTApplication* app, int inverse, VkFFTL
 	return resFFT;
 }
 static inline int VkFFTGetVersion() {
-	return 10226; //X.XX.XX format
+	return 10231; //X.XX.XX format
 }
 #endif
\ No newline at end of file

More details

Full run details