New Upstream Snapshot - erlang-cl

Ready changes

Summary

Merged new upstream version: 1.2.4+git20200516.2.dd7caaa (was: 1.2.4+git20200516.1.dd7caaa).

Resulting package

Built on 2023-01-21T22:20 (took 5m52s)

The resulting binary packages can be installed (if you have the apt repository enabled) by running one of:

apt install -t fresh-snapshots erlang-cl-dbgsymapt install -t fresh-snapshots erlang-cl

Lintian Result

Diff

diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index fa1a4fc..0000000
--- a/.gitignore
+++ /dev/null
@@ -1,21 +0,0 @@
-# git-ls-files --others --exclude-from=.git/info/exclude
-# Lines that start with '#' are comments.
-# For a project mostly in C, the following would be a good set of
-# exclude patterns (uncomment them if you want to use them):
-# *.[oa]
-*~
-.DS_Store
-
-*.beam
-# Emacs Tag files
-TAGS
-
-# c_src
-/c_src/*.o
-/c_src/*.exp
-/c_src/*.lib
-/c_src/*.pdb
-# Derivates
-/_build/*
-/priv/*
-rebar.lock
\ No newline at end of file
diff --git a/c_src/Makefile b/c_src/Makefile
index 0cdf469..224fd5f 100644
--- a/c_src/Makefile
+++ b/c_src/Makefile
@@ -45,6 +45,9 @@ endif
 
 WORDSIZE = $(shell $(ERL) -noshell -eval "io:format([126,119,126,110],[erlang:system_info(wordsize)*8])" -s erlang halt)
 
+# uncomment to enable use of dirty scheduler
+# CFLAGS += -DUSE_DIRTY_SCHEDULER
+
 ifeq ($(OSNAME)$(WSLcross), Linux)
 LINUX = Yes
 CFLAGS += -I/usr/include/nvidia-current
@@ -111,7 +114,6 @@ endif
 ifeq ($(WIN32_CL), Yes)
 ## Use Microsoft CL
   CC=cl.exe
-  MS2C = MSYS2_ARG_CONV_EXCL=*
   OUT_C = /Fo
   ifeq ($(OPENCL_DIR), )
     OPENCL_DIR = c:/msys64/opt/local/
diff --git a/c_src/cl_nif.c b/c_src/cl_nif.c
index 6c3144c..42abfff 100644
--- a/c_src/cl_nif.c
+++ b/c_src/cl_nif.c
@@ -33,6 +33,7 @@
 #endif
 
 #define CL_USE_DEPRECATED_OPENCL_1_1_APIS 1
+#define CL_TARGET_OPENCL_VERSION 210
 
 #ifdef DARWIN
 #include <OpenCL/opencl.h>
@@ -301,6 +302,7 @@ typedef struct _ecl_env_t {
     ErlNifRWLock* context_list_lock;
     ecl_context_t*  context_list;
     cl_int icd_version;
+    int dirty_scheduler_support;
 } ecl_env_t;
 
 typedef struct _ecl_func_t {
@@ -413,7 +415,8 @@ typedef struct _ecl_func_t {
 	ECL_FUNC(clEnqueueWaitForEvents,10),			\
 	ECL_FUNC(clEnqueueBarrier,10),				\
 	ECL_FUNC(clUnloadCompiler,10),				\
-	ECL_FUNC(clGetExtensionFunctionAddress,10)
+	ECL_FUNC(clGetExtensionFunctionAddress,10),		\
+	ECL_FUNC(clCreateProgramWithIL,21)
 
 #include "ecl_types.h"
 
@@ -452,6 +455,8 @@ static ERL_NIF_TERM ecl_versions(ErlNifEnv* env, int argc,
 
 static ERL_NIF_TERM ecl_noop(ErlNifEnv* env, int argc, 
 			    const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM ecl_noop_(ErlNifEnv* env, int argc, 
+			      const ERL_NIF_TERM argv[]);
 
 static ERL_NIF_TERM ecl_get_platform_ids(ErlNifEnv* env, int argc, 
 					 const ERL_NIF_TERM argv[]);
@@ -520,6 +525,11 @@ static ERL_NIF_TERM ecl_create_program_with_source(ErlNifEnv* env, int argc,
 						   const ERL_NIF_TERM argv[]);
 static ERL_NIF_TERM ecl_create_program_with_binary(ErlNifEnv* env, int argc, 
 						   const ERL_NIF_TERM argv[]);
+#if CL_VERSION_2_1 == 1
+static ERL_NIF_TERM ecl_create_program_with_il(ErlNifEnv* env, int argc, 
+					       const ERL_NIF_TERM argv[]);
+#endif
+
 #if CL_VERSION_1_2 == 1
 static ERL_NIF_TERM ecl_create_program_with_builtin_kernels(
     ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
@@ -658,125 +668,160 @@ static ERL_NIF_TERM ecl_async_wait_for_event(ErlNifEnv* env, int argc,
 static ERL_NIF_TERM ecl_get_event_info(ErlNifEnv* env, int argc, 
 				       const ERL_NIF_TERM argv[]);
 
+static ERL_NIF_TERM ecl_get_event_profiling_info(ErlNifEnv* env, int argc, 
+						 const ERL_NIF_TERM argv[]);
+
+#if CL_VERSION_2_0 == 1
+
+static ERL_NIF_TERM ecl_create_pipe(ErlNifEnv* env, int argc,
+				    const ERL_NIF_TERM argv[]);
+#endif
+
+
+// Dirty optional since 2.7 and mandatory since 2.12
+#if (ERL_NIF_MAJOR_VERSION > 2) || ((ERL_NIF_MAJOR_VERSION == 2) && (ERL_NIF_MINOR_VERSION >= 7))
+#ifdef USE_DIRTY_SCHEDULER
+#define NIF_FUNC(name,arity,fptr) {(name),(arity),(fptr),(ERL_NIF_DIRTY_JOB_CPU_BOUND)}
+#define NIF_DIRTY_FUNC(name,arity,fptr) {(name),(arity),(fptr),(ERL_NIF_DIRTY_JOB_CPU_BOUND)}
+#else
+#define NIF_FUNC(name,arity,fptr) {(name),(arity),(fptr),(0)}
+#define NIF_DIRTY_FUNC(name,arity,fptr) {(name),(arity),(fptr),(ERL_NIF_DIRTY_JOB_CPU_BOUND)}
+#endif
+#else
+#define NIF_FUNC(name,arity,fptr) {(name),(arity),(fptr)}
+#define NIF_DIRTY_FUNC(name,arity,fptr) {(name),(arity),(fptr)}
+#endif
 
 ErlNifFunc ecl_funcs[] =
 {
-    { "noop",                        0, ecl_noop },
-    { "versions",                    0, ecl_versions },
+    NIF_FUNC( "noop",                        0, ecl_noop ),
+    NIF_FUNC( "noop_",                       0, ecl_noop_ ),
+    NIF_DIRTY_FUNC( "dirty_noop",            0, ecl_noop ),
+    NIF_FUNC( "versions",                    0, ecl_versions ),
     
     // Platform
-    { "get_platform_ids",           0, ecl_get_platform_ids },
-    { "get_platform_info",          2, ecl_get_platform_info },
+    NIF_FUNC( "get_platform_ids",           0, ecl_get_platform_ids ),
+    NIF_FUNC( "get_platform_info",          2, ecl_get_platform_info ),
 
     // Devices
-    { "get_device_ids",             2, ecl_get_device_ids },
+    NIF_FUNC( "get_device_ids",             2, ecl_get_device_ids ),
 #if CL_VERSION_1_2 == 1
-    { "create_sub_devices",         2, ecl_create_sub_devices },
+    NIF_FUNC( "create_sub_devices",         2, ecl_create_sub_devices ),
 #endif
-    { "get_device_info",            2, ecl_get_device_info },
+    NIF_FUNC( "get_device_info",            2, ecl_get_device_info ),
 
     // Context
-    { "create_context",             1, ecl_create_context },
-    { "get_context_info",           2, ecl_get_context_info },
+    NIF_FUNC( "create_context",             1, ecl_create_context ),
+    NIF_FUNC( "get_context_info",           2, ecl_get_context_info ),
 
     // Command queue
-    { "create_queue",               3, ecl_create_queue },
-    { "get_queue_info",             2, ecl_get_queue_info },
+    NIF_FUNC( "create_queue",               3, ecl_create_queue ),
+    NIF_FUNC( "get_queue_info",             2, ecl_get_queue_info ),
 
     // Memory object
-    { "create_buffer",              4, ecl_create_buffer },
+    NIF_FUNC( "create_buffer",              4, ecl_create_buffer ),
 #if CL_VERSION_1_1 == 1
-    { "create_sub_buffer",          4, ecl_create_sub_buffer },
+    NIF_FUNC( "create_sub_buffer",          4, ecl_create_sub_buffer ),
 #endif
 
-    { "get_mem_object_info",        2, ecl_get_mem_object_info },
-    { "get_image_info",             2, ecl_get_image_info },
+    NIF_FUNC( "get_mem_object_info",        2, ecl_get_mem_object_info ),
+    NIF_FUNC( "get_image_info",             2, ecl_get_image_info ),
 
-    { "create_image2d",            7, ecl_create_image2d },
-    { "create_image3d",            9, ecl_create_image3d },
+    NIF_FUNC( "create_image2d",            7, ecl_create_image2d ),
+    NIF_FUNC( "create_image3d",            9, ecl_create_image3d ),
 #if CL_VERSION_1_2 == 1
-    { "create_image",              5, ecl_create_image },
+    NIF_FUNC( "create_image",              5, ecl_create_image ),
 #endif
-    { "get_supported_image_formats",3, ecl_get_supported_image_formats },
+    NIF_FUNC( "get_supported_image_formats",3, ecl_get_supported_image_formats ),
 
     // Sampler 
-    { "create_sampler",             4, ecl_create_sampler },
-    { "get_sampler_info",           2, ecl_get_sampler_info },
+    NIF_FUNC( "create_sampler",             4, ecl_create_sampler ),
+    NIF_FUNC( "get_sampler_info",           2, ecl_get_sampler_info ),
 
     // Program
-    { "create_program_with_source", 2, ecl_create_program_with_source },
-    { "create_program_with_binary", 3, ecl_create_program_with_binary },
+    NIF_FUNC( "create_program_with_source", 2, ecl_create_program_with_source ),
+    NIF_FUNC( "create_program_with_binary", 3, ecl_create_program_with_binary ),
 #if CL_VERSION_1_2 == 1
-    { "create_program_with_builtin_kernels", 3, 
-      ecl_create_program_with_builtin_kernels },
-#endif
-    { "async_build_program",        3, ecl_async_build_program },
+    NIF_FUNC( "create_program_with_builtin_kernels", 3, 
+	      ecl_create_program_with_builtin_kernels ),
+#endif    
+    NIF_FUNC( "async_build_program",        3, ecl_async_build_program ),
 #if CL_VERSION_1_2 == 1
-    { "unload_platform_compiler",   1, ecl_unload_platform_compiler },
+    NIF_FUNC( "unload_platform_compiler",   1, ecl_unload_platform_compiler ),
 #endif
 #if CL_VERSION_1_2 == 1
-    { "async_compile_program",      5,   ecl_async_compile_program },
+    NIF_FUNC( "async_compile_program",      5,   ecl_async_compile_program ),
 #endif
 #if CL_VERSION_1_2 == 1
-    { "async_link_program",         4,   ecl_async_link_program },
+    NIF_FUNC( "async_link_program",         4,   ecl_async_link_program ),
 #endif
-    { "unload_compiler",            0, ecl_unload_compiler },
-    { "get_program_info",           2, ecl_get_program_info },
-    { "get_program_build_info",     3, ecl_get_program_build_info },
+    NIF_FUNC( "unload_compiler",            0, ecl_unload_compiler ),
+    NIF_FUNC( "get_program_info",           2, ecl_get_program_info ),
+    NIF_FUNC( "get_program_build_info",     3, ecl_get_program_build_info ),
 
     // Kernel
-    { "create_kernel",              2, ecl_create_kernel },
-    { "create_kernels_in_program",  1, ecl_create_kernels_in_program },
-    { "set_kernel_arg",             3, ecl_set_kernel_arg },
-    { "set_kernel_arg_size",        3, ecl_set_kernel_arg_size },
-    { "get_kernel_info",            2, ecl_get_kernel_info },
-    { "get_kernel_workgroup_info",  3, ecl_get_kernel_workgroup_info },
+    NIF_FUNC( "create_kernel",              2, ecl_create_kernel ),
+    NIF_FUNC( "create_kernels_in_program",  1, ecl_create_kernels_in_program ),
+    NIF_FUNC( "set_kernel_arg",             3, ecl_set_kernel_arg ),
+    NIF_FUNC( "set_kernel_arg_size",        3, ecl_set_kernel_arg_size ),
+    NIF_FUNC( "get_kernel_info",            2, ecl_get_kernel_info ),
+    NIF_FUNC( "get_kernel_workgroup_info",  3, ecl_get_kernel_workgroup_info ),
 #if CL_VERSION_1_2 == 1
-    { "get_kernel_arg_info",        3, ecl_get_kernel_arg_info },
+    NIF_FUNC( "get_kernel_arg_info",        3, ecl_get_kernel_arg_info ),
 #endif
     // Events
-    { "enqueue_task",               4, ecl_enqueue_task },
-    { "enqueue_nd_range_kernel",    6, ecl_enqueue_nd_range_kernel },
-    { "enqueue_marker",             1, ecl_enqueue_marker },
-    { "enqueue_barrier",            1, ecl_enqueue_barrier },
+    NIF_FUNC( "enqueue_task",               4, ecl_enqueue_task ),
+    NIF_FUNC( "enqueue_nd_range_kernel",    6, ecl_enqueue_nd_range_kernel ),
+    NIF_FUNC( "enqueue_marker",             1, ecl_enqueue_marker ),
+    NIF_FUNC( "enqueue_barrier",            1, ecl_enqueue_barrier ),
 #if CL_VERSION_1_2 == 1
-    { "enqueue_barrier_with_wait_list", 2, ecl_enqueue_barrier_with_wait_list },
-    { "enqueue_marker_with_wait_list",  2, ecl_enqueue_marker_with_wait_list },
+    NIF_FUNC( "enqueue_barrier_with_wait_list", 2, ecl_enqueue_barrier_with_wait_list ),
+    NIF_FUNC( "enqueue_marker_with_wait_list",  2, ecl_enqueue_marker_with_wait_list ),
 #endif
-    { "enqueue_wait_for_events",    2, ecl_enqueue_wait_for_events },
-    { "enqueue_read_buffer",        5, ecl_enqueue_read_buffer },
+    NIF_FUNC( "enqueue_wait_for_events",    2, ecl_enqueue_wait_for_events ),
+    NIF_FUNC( "enqueue_read_buffer",        5, ecl_enqueue_read_buffer ),
 #if CL_VERSION_1_1 == 1
-    { "enqueue_read_buffer_rect",   10, ecl_enqueue_read_buffer_rect },
+    NIF_FUNC( "enqueue_read_buffer_rect",   10, ecl_enqueue_read_buffer_rect ),
 #endif
-    { "enqueue_write_buffer",       7, ecl_enqueue_write_buffer },
+    NIF_FUNC( "enqueue_write_buffer",       7, ecl_enqueue_write_buffer ),
 #if CL_VERSION_1_1 == 1
-    { "enqueue_write_buffer_rect",  11, ecl_enqueue_write_buffer_rect },
+    NIF_FUNC( "enqueue_write_buffer_rect",  11, ecl_enqueue_write_buffer_rect ),
 #endif
 #if CL_VERSION_1_2 == 1
-    { "enqueue_fill_buffer",         6, ecl_enqueue_fill_buffer },
+    NIF_FUNC( "enqueue_fill_buffer",         6, ecl_enqueue_fill_buffer ),
 #endif
-    { "enqueue_read_image",         7, ecl_enqueue_read_image },
-    { "enqueue_write_image",        9, ecl_enqueue_write_image },
-    { "enqueue_copy_buffer",        7, ecl_enqueue_copy_buffer },
+    NIF_FUNC( "enqueue_read_image",         7, ecl_enqueue_read_image ),
+    NIF_FUNC( "enqueue_write_image",        9, ecl_enqueue_write_image ),
+    NIF_FUNC( "enqueue_copy_buffer",        7, ecl_enqueue_copy_buffer ),
 #if CL_VERSION_1_1 == 1
-    { "enqueue_copy_buffer_rect",  11, ecl_enqueue_copy_buffer_rect },
+    NIF_FUNC( "enqueue_copy_buffer_rect",  11, ecl_enqueue_copy_buffer_rect ),
 #endif
-    { "enqueue_copy_image",         6, ecl_enqueue_copy_image },
+    NIF_FUNC( "enqueue_copy_image",         7, ecl_enqueue_copy_image ),
 #if CL_VERSION_1_2 == 1
-    { "enqueue_fill_image",         6, ecl_enqueue_fill_image },
+    NIF_FUNC( "enqueue_fill_image",         6, ecl_enqueue_fill_image ),
 #endif
-    { "enqueue_copy_image_to_buffer", 7, ecl_enqueue_copy_image_to_buffer },
-    { "enqueue_copy_buffer_to_image", 7, ecl_enqueue_copy_buffer_to_image },
-    { "enqueue_map_buffer",           6, ecl_enqueue_map_buffer },
-    { "enqueue_map_image",            6, ecl_enqueue_map_image },
-    { "enqueue_unmap_mem_object",     3, ecl_enqueue_unmap_mem_object },
+    NIF_FUNC( "enqueue_copy_image_to_buffer", 7, ecl_enqueue_copy_image_to_buffer ),
+    NIF_FUNC( "enqueue_copy_buffer_to_image", 7, ecl_enqueue_copy_buffer_to_image ),
+    NIF_FUNC( "enqueue_map_buffer",           6, ecl_enqueue_map_buffer ),
+    NIF_FUNC( "enqueue_map_image",            6, ecl_enqueue_map_image ),
+    NIF_FUNC( "enqueue_unmap_mem_object",     3, ecl_enqueue_unmap_mem_object ),
 #if CL_VERSION_1_2 == 1
-    { "enqueue_migrate_mem_objects",  4, ecl_enqueue_migrate_mem_objects },
+    NIF_FUNC( "enqueue_migrate_mem_objects",  4, ecl_enqueue_migrate_mem_objects ),
+#endif
+    NIF_FUNC( "async_flush",                  1, ecl_async_flush ),
+    NIF_FUNC( "async_finish",                 1, ecl_async_finish ),
+    NIF_FUNC( "async_wait_for_event",         1, ecl_async_wait_for_event ),
+    NIF_FUNC( "get_event_info",               2, ecl_get_event_info ),
+    NIF_FUNC( "get_event_profiling_info",     2, ecl_get_event_profiling_info ),
+
+#if CL_VERSION_2_0 == 1
+    NIF_FUNC( "create_pipe",                  4,  ecl_create_pipe ),
 #endif
-    { "async_flush",                  1, ecl_async_flush },
-    { "async_finish",                 1, ecl_async_finish },
-    { "async_wait_for_event",         1, ecl_async_wait_for_event },
-    { "get_event_info",               2, ecl_get_event_info }
+
+#if CL_VERSION_2_1 == 1
+    NIF_FUNC( "create_program_with_il",       2, ecl_create_program_with_il ),
+#endif
+
 };
 
 static ecl_resource_t platform_r;
@@ -951,6 +996,13 @@ DECL_ATOM(command_type);
 // DECL_ATOM(reference_count);
 DECL_ATOM(execution_status);
 
+// Event Profile Info
+DECL_ATOM(command_queued);
+DECL_ATOM(command_submit);
+DECL_ATOM(command_start);
+DECL_ATOM(command_end);
+DECL_ATOM(command_complete);
+
 // Workgroup info
 DECL_ATOM(work_group_size);
 DECL_ATOM(compile_work_group_size);
@@ -1006,6 +1058,8 @@ DECL_ATOM(invalid_gl_object);
 DECL_ATOM(invalid_buffer_size);
 DECL_ATOM(invalid_mip_level);
 DECL_ATOM(invalid_global_work_size);
+DECL_ATOM(device_partition_failed);
+DECL_ATOM(invalid_device_partition_count);
 
 // cl_device_type
 DECL_ATOM(all);
@@ -1063,6 +1117,8 @@ DECL_ATOM(image2d_array);
 DECL_ATOM(image1d);
 DECL_ATOM(image1d_array);
 DECL_ATOM(image1d_buffer);
+// version2.0
+DECL_ATOM(pipe);
 
 // addressing_mode
 // DECL_ATOM(none);
@@ -1282,6 +1338,9 @@ ecl_kv_t kv_mem_object_type[] = { // enum
     { &ATOM(image1d), CL_MEM_OBJECT_IMAGE1D },
     { &ATOM(image1d_array), CL_MEM_OBJECT_IMAGE1D_ARRAY },
     { &ATOM(image1d_buffer), CL_MEM_OBJECT_IMAGE1D_BUFFER },
+#endif
+#if CL_VERSION_2_0 == 1
+    { &ATOM(pipe), CL_MEM_OBJECT_PIPE },
 #endif
     { 0, 0 }
 };
@@ -1402,6 +1461,12 @@ ecl_kv_t kv_execution_status[] = { // enum
     { &ATOM(invalid_buffer_size), CL_INVALID_BUFFER_SIZE },
     { &ATOM(invalid_mip_level), CL_INVALID_MIP_LEVEL },
     { &ATOM(invalid_global_work_size), CL_INVALID_GLOBAL_WORK_SIZE },
+#ifdef CL_DEVICE_PARTITION_FAILED
+    { &ATOM(device_partition_failed), CL_DEVICE_PARTITION_FAILED },
+#endif
+#ifdef CL_INVALID_DEVICE_PARTITION_COUNT
+    { &ATOM(invalid_device_partition_count), CL_INVALID_DEVICE_PARTITION_COUNT },
+#endif
     { 0, 0 }
 };
 
@@ -1552,6 +1617,8 @@ DECL_ATOM(max_clock_frequency);
 DECL_ATOM(address_bits);
 DECL_ATOM(max_read_image_args);
 DECL_ATOM(max_write_image_args);
+DECL_ATOM(max_read_write_image_args);
+DECL_ATOM(il_version);    
 DECL_ATOM(max_mem_alloc_size);
 DECL_ATOM(image2d_max_width);
 DECL_ATOM(image2d_max_height);
@@ -1745,6 +1812,14 @@ ecl_info_t device_info[] =
     { &ATOM(device_integrated_memory_nv),CL_DEVICE_INTEGRATED_MEMORY_NV, false, OCL_BOOL, 0, 0},
 #endif
 
+#ifdef CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS
+    { &ATOM(max_read_write_image_args), CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, false, OCL_UINT, 0, 0 },
+#endif
+
+#ifdef CL_DEVICE_IL_VERSION
+    { &ATOM(il_version), CL_DEVICE_IL_VERSION, false, OCL_STRING, 0, 0 },
+#endif
+
 };
 
 // Map device info index 0...N => cl_device_info x Data type
@@ -1851,6 +1926,16 @@ ecl_info_t event_info[] = {
     { &ATOM(execution_status), CL_EVENT_COMMAND_EXECUTION_STATUS, false, OCL_ENUM, kv_execution_status, 0 }
 };
 
+ecl_info_t event_profile_info[] = {
+    { &ATOM(command_queued),  CL_PROFILING_COMMAND_QUEUED, false, OCL_ULONG, 0, 0 },
+    { &ATOM(command_submit),  CL_PROFILING_COMMAND_SUBMIT, false, OCL_ULONG, 0, 0 },
+    { &ATOM(command_start),   CL_PROFILING_COMMAND_START, false, OCL_ULONG, 0, 0 },
+    { &ATOM(command_end),     CL_PROFILING_COMMAND_END, false, OCL_ULONG, 0, 0 },
+#if CL_VERSION_2_0 == 1
+    { &ATOM(command_complete), CL_PROFILING_COMMAND_COMPLETE, false, OCL_ULONG, 0, 0 },
+#endif
+};
+
 // clGetKernelArgInfo 1.2
 #if CL_VERSION_1_2 == 1
 
@@ -1879,11 +1964,11 @@ ecl_kv_t kv_type_qualifier[] = {
 };
 
 ecl_info_t arg_info[] = {
-    { &ATOM(address_qualifier), CL_KERNEL_ARG_ADDRESS_QUALIFIER, false, OCL_ENUM, kv_address_qualifier },
-    { &ATOM(access_qualifier), CL_KERNEL_ARG_ACCESS_QUALIFIER, false, OCL_ENUM, kv_access_qualifier },
-    { &ATOM(type_name), CL_KERNEL_ARG_TYPE_NAME, false, OCL_STRING, 0 },
-    { &ATOM(type_qualifier), CL_KERNEL_ARG_TYPE_QUALIFIER, false, OCL_ENUM, kv_type_qualifier },
-    { &ATOM(name),  CL_KERNEL_ARG_NAME, false, OCL_STRING, 0 },
+    { &ATOM(address_qualifier), CL_KERNEL_ARG_ADDRESS_QUALIFIER, false, OCL_ENUM, kv_address_qualifier, 0 },
+    { &ATOM(access_qualifier), CL_KERNEL_ARG_ACCESS_QUALIFIER, false, OCL_ENUM, kv_access_qualifier, 0 },
+    { &ATOM(type_name), CL_KERNEL_ARG_TYPE_NAME, false, OCL_STRING, 0, 0 },
+    { &ATOM(type_qualifier), CL_KERNEL_ARG_TYPE_QUALIFIER, false, OCL_ENUM, kv_type_qualifier, 0 },
+    { &ATOM(name),  CL_KERNEL_ARG_NAME, false, OCL_STRING, 0, 0 },
 };
 #endif
 
@@ -1983,6 +2068,14 @@ ERL_NIF_TERM ecl_error(cl_int err)
 	return ATOM(invalid_mip_level);
     case CL_INVALID_GLOBAL_WORK_SIZE:
 	return ATOM(invalid_global_work_size);
+#ifdef CL_DEVICE_PARTITION_FAILED
+    case CL_DEVICE_PARTITION_FAILED:
+	return ATOM(device_partition_failed);
+#endif
+#ifdef CL_INVALID_DEVICE_PARTITION_COUNT
+    case CL_INVALID_DEVICE_PARTITION_COUNT:
+	return ATOM(invalid_device_partition_count);
+#endif
     default: 
 	return ATOM(unknown);
     }
@@ -2378,11 +2471,11 @@ static void unref_kernel_arg(int type, void* val)
     switch(type) {
     case KERNEL_ARG_MEM:
 	if (val)
-	    clReleaseMemObject((cl_mem) val);
+	    ECL_CALL(clReleaseMemObject)((cl_mem) val);
 	break;
     case KERNEL_ARG_SAMPLER:
 	if (val)
-	    clReleaseSampler((cl_sampler) val);
+	    ECL_CALL(clReleaseSampler)((cl_sampler) val);
 	break;
     case KERNEL_ARG_OTHER:
     default:
@@ -2395,11 +2488,11 @@ static void ref_kernel_arg(int type, void* val)
     switch(type) {
     case KERNEL_ARG_MEM:
 	if (val)
-	    clRetainMemObject((cl_mem) val);
+	    ECL_CALL(clRetainMemObject)((cl_mem) val);
 	break;
     case KERNEL_ARG_SAMPLER:
 	if (val)
-	    clRetainSampler((cl_sampler) val);
+	    ECL_CALL(clRetainSampler)((cl_sampler) val);
 	break;
     case KERNEL_ARG_OTHER:
     default:
@@ -2449,7 +2542,7 @@ static void ecl_queue_dtor(ErlNifEnv* env, ecl_object_t* obj)
 {
     UNUSED(env);
     DBG("ecl_queue_dtor: %p", obj);
-    clReleaseCommandQueue(obj->queue);
+    ECL_CALL(clReleaseCommandQueue)(obj->queue);
     object_erase(obj);
     if (obj->parent) enif_release_resource(obj->parent);
 }
@@ -2458,7 +2551,7 @@ static void ecl_mem_dtor(ErlNifEnv* env, ecl_object_t* obj)
 {
     UNUSED(env);
     DBG("ecl_mem_dtor: %p", obj);
-    clReleaseMemObject(obj->mem);
+    ECL_CALL(clReleaseMemObject)(obj->mem);
     object_erase(obj);
     if (obj->parent) enif_release_resource(obj->parent);
 }
@@ -2467,7 +2560,7 @@ static void ecl_sampler_dtor(ErlNifEnv* env, ecl_object_t* obj)
 {
     UNUSED(env);
     DBG("ecl_sampler_dtor: %p", obj);
-    clReleaseSampler(obj->sampler);
+    ECL_CALL(clReleaseSampler)(obj->sampler);
     object_erase(obj);
     if (obj->parent) enif_release_resource(obj->parent);
 }
@@ -2476,7 +2569,7 @@ static void ecl_program_dtor(ErlNifEnv* env, ecl_object_t* obj)
 {
     UNUSED(env);
     DBG("ecl_program_dtor: %p", obj);
-    clReleaseProgram(obj->program);
+    ECL_CALL(clReleaseProgram)(obj->program);
     object_erase(obj);
     if (obj->parent) enif_release_resource(obj->parent);
 }
@@ -2490,7 +2583,7 @@ static void ecl_kernel_dtor(ErlNifEnv* env, ecl_object_t* obj)
     for (i = 0; i < kern->num_args; i++)
 	unref_kernel_arg(kern->arg[i].type, kern->arg[i].value);
     enif_free(kern->arg);
-    clReleaseKernel(kern->obj.kernel);
+    ECL_CALL(clReleaseKernel)(kern->obj.kernel);
     object_erase(obj);
     if (obj->parent) enif_release_resource(obj->parent);
 }
@@ -2500,7 +2593,7 @@ static void ecl_event_dtor(ErlNifEnv* env, ecl_object_t* obj)
     ecl_event_t* evt = (ecl_event_t*) obj;
     UNUSED(env);
     DBG("ecl_event_dtor: %p", evt);
-    clReleaseEvent(evt->obj.event);
+    ECL_CALL(clReleaseEvent)(evt->obj.event);
     object_erase(obj);
     if (evt->bin) {
 	if (!evt->rl)
@@ -2529,7 +2622,7 @@ static void ecl_context_dtor(ErlNifEnv* env, ecl_object_t* obj)
     *pp = ctx->next;
     enif_rwlock_rwunlock(ecl->context_list_lock);
 
-    clReleaseContext(ctx->obj.context);
+    ECL_CALL(clReleaseContext)(ctx->obj.context);
     object_erase(obj);
     // parent is always = 0
     // kill the event thread
@@ -2880,7 +2973,8 @@ static ERL_NIF_TERM ecl_make_kernel(ErlNifEnv* env, cl_kernel kernel,
     size_t sz;
 
     // Get number of arguments, FIXME: check error return
-    clGetKernelInfo(kernel,CL_KERNEL_NUM_ARGS,sizeof(num_args),&num_args,0);
+    ECL_CALL(clGetKernelInfo)(kernel,CL_KERNEL_NUM_ARGS,
+			      sizeof(num_args),&num_args,0);
     sz = num_args*sizeof(ecl_kernel_arg_t);
 
     kern->arg = (ecl_kernel_arg_t*) enif_alloc(sz);
@@ -3305,9 +3399,9 @@ static void* ecl_context_main(void* arg)
 		    cl_int status;
 		    // read status COMPLETE | ERROR
 		    // FIXME: check error
-		    clGetEventInfo(m.event->obj.event,
-				   CL_EVENT_COMMAND_EXECUTION_STATUS,
-				   sizeof(status), &status, 0);
+		    ECL_CALL(clGetEventInfo)(m.event->obj.event,
+					     CL_EVENT_COMMAND_EXECUTION_STATUS,
+					     sizeof(status), &status, 0);
 		    switch(status) {
 		    case CL_COMPLETE:
 			DBG("ecl_context_main: wait_for_event complete");
@@ -3360,6 +3454,18 @@ static ERL_NIF_TERM ecl_noop(ErlNifEnv* env, int argc,
     return ATOM(ok);
 }
 
+static ERL_NIF_TERM ecl_noop_(ErlNifEnv* env, int argc,
+				   const ERL_NIF_TERM argv[])
+{
+    ecl_env_t* ecl = enif_priv_data(env);
+
+    if (ecl->dirty_scheduler_support)
+	return enif_schedule_nif(env, "noop", ERL_NIF_DIRTY_JOB_CPU_BOUND,
+				 ecl_noop, argc, argv);
+    else
+	return ecl_noop(env, argc, argv);
+}
+
 // version - return list of API versions supported
 static ERL_NIF_TERM ecl_versions(ErlNifEnv* env, int argc,
 				 const ERL_NIF_TERM argv[])
@@ -3382,6 +3488,17 @@ static ERL_NIF_TERM ecl_versions(ErlNifEnv* env, int argc,
     vsn = enif_make_tuple2(env, enif_make_int(env, 1), enif_make_int(env, 2));
     list = enif_make_list_cell(env, vsn, list);
 #endif
+
+#if CL_VERSION_2_0 == 1
+    vsn = enif_make_tuple2(env, enif_make_int(env, 2), enif_make_int(env, 0));
+    list = enif_make_list_cell(env, vsn, list);
+#endif
+
+#if CL_VERSION_2_1 == 1
+    vsn = enif_make_tuple2(env, enif_make_int(env, 2), enif_make_int(env, 1));
+    list = enif_make_list_cell(env, vsn, list);
+#endif    
+
     return list;
 }
 
@@ -3570,7 +3687,7 @@ static ERL_NIF_TERM ecl_get_device_info(ErlNifEnv* env, int argc,
     if (!get_ecl_object(env, argv[0], &device_r, false, &o_device))
 	return enif_make_badarg(env);	
     return make_object_info(env, argv[1], o_device, 
-			    (info_fn_t*) clGetDeviceInfo, 
+			    (info_fn_t*) ECL_FUNC_PTR(clGetDeviceInfo), 
 			    device_info, 
 			    sizeof_array(device_info));
 }
@@ -4370,6 +4487,38 @@ static ERL_NIF_TERM ecl_create_program_with_builtin_kernels(
 }
 #endif
 
+#if CL_VERSION_2_1 == 1
+//
+// cl:create_program_with_il(Context::cl_context(), IL::iodata()) ->
+//   {'ok', cl_program()} | {'error', cl_error()}
+//
+static ERL_NIF_TERM ecl_create_program_with_il(ErlNifEnv* env, int argc, 
+					       const ERL_NIF_TERM argv[])
+{
+    ecl_object_t* o_context;
+    cl_program program;
+    ErlNifBinary il;
+    cl_int err;
+    UNUSED(argc);
+
+    if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
+	return enif_make_badarg(env);
+    if (!enif_inspect_iolist_as_binary(env, argv[1], &il))
+	return enif_make_badarg(env);
+    program = ECL_CALL(clCreateProgramWithIL)(o_context->context,
+					      1,
+					      (const void*) il.data,
+					      il.size,
+					      &err);
+    if (!err) {
+	ERL_NIF_TERM t;
+	t = ecl_make_object(env, &program_r,(void*) program, o_context);
+	return enif_make_tuple2(env, ATOM(ok), t);
+    }
+    return ecl_make_error(env, err);
+}
+#endif
+
 //
 // @spec async_build_program(Program::cl_program(),
 //                     DeviceList::[cl_device_id()],
@@ -5591,7 +5740,8 @@ static ERL_NIF_TERM ecl_enqueue_read_image(ErlNifEnv* env, int argc,
 	return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?
 
     // calculate the read size of the image, FIXME: check error return
-    clGetImageInfo(buffer, CL_IMAGE_ELEMENT_SIZE, sizeof(psize), &psize, 0);
+    ECL_CALL(clGetImageInfo)(buffer, CL_IMAGE_ELEMENT_SIZE,
+			     sizeof(psize), &psize, 0);
     size = region[0]*region[1]*region[2]*psize;
     if (!enif_alloc_binary(size, bin)) {
 	enif_free(bin);
@@ -5916,7 +6066,8 @@ static ERL_NIF_TERM ecl_enqueue_write_image(ErlNifEnv* env, int argc,
     }
 
     // calculate the read size of the image FIXME: check error return
-    clGetImageInfo(buffer, CL_IMAGE_ELEMENT_SIZE, sizeof(psize), &psize, 0);
+    ECL_CALL(clGetImageInfo)(buffer, CL_IMAGE_ELEMENT_SIZE,
+			     sizeof(psize), &psize, 0);
     size = region[0]*region[1]*region[2]*psize;
     if (bin.size < size) {
 	return enif_make_badarg(env);
@@ -6334,16 +6485,16 @@ static ERL_NIF_TERM ecl_enqueue_map_buffer(ErlNifEnv* env, int argc,
 			 (void**) wait_list, &num_events))
 	return enif_make_badarg(env);
 
-    ptr = clEnqueueMapBuffer(o_queue->queue,
-			     buffer,
-			     CL_FALSE,
-			     map_flags,
-			     offset,
-			     size,
-			     num_events,
-			     num_events ? wait_list : 0,
-			     &event,
-			     &err);
+    ptr = ECL_CALL(clEnqueueMapBuffer)(o_queue->queue,
+				       buffer,
+				       CL_FALSE,
+				       map_flags,
+				       offset,
+				       size,
+				       num_events,
+				       num_events ? wait_list : 0,
+				       &event,
+				       &err);
     if (!err) {
 	ERL_NIF_TERM t;
 	// FIXME: how should we handle ptr????
@@ -6392,18 +6543,18 @@ static ERL_NIF_TERM ecl_enqueue_map_image(ErlNifEnv* env, int argc,
 			 (void**) wait_list, &num_events))
 	return enif_make_badarg(env);
 
-    ptr = clEnqueueMapImage(o_queue->queue,
-			    image,
-			    CL_FALSE,
-			    map_flags,
-			    origin,
-			    region,
-			    &row_pitch,
-			    &slice_pitch,
-			    num_events,
-			    num_events ? wait_list : 0,
-			    &event,
-			    &err);
+    ptr = ECL_CALL(clEnqueueMapImage)(o_queue->queue,
+				      image,
+				      CL_FALSE,
+				      map_flags,
+				      origin,
+				      region,
+				      &row_pitch,
+				      &slice_pitch,
+				      num_events,
+				      num_events ? wait_list : 0,
+				      &event,
+				      &err);
     if (!err) {
 	ERL_NIF_TERM t;
 	// FIXME: send binary+event to event thread
@@ -6698,22 +6849,79 @@ static ERL_NIF_TERM ecl_get_event_info(ErlNifEnv* env, int argc,
     if (!get_ecl_object(env, argv[0], &event_r, false, &o_event))
 	return enif_make_badarg(env);
     return make_object_info(env, argv[1], o_event,
-			    (info_fn_t*) clGetEventInfo,
+			    (info_fn_t*) ECL_FUNC_PTR(clGetEventInfo),
 			    event_info,
 			    sizeof_array(event_info));
 }
 
+// return event profiling info
+static ERL_NIF_TERM ecl_get_event_profiling_info(ErlNifEnv* env, int argc, 
+						 const ERL_NIF_TERM argv[])
+{
+    ecl_object_t* o_event;
+    UNUSED(argc);
+    
+    if (!get_ecl_object(env, argv[0], &event_r, false, &o_event))
+	return enif_make_badarg(env);
+    return make_object_info(env, argv[1], o_event,
+			    (info_fn_t*) ECL_FUNC_PTR(clGetEventProfilingInfo),
+			    event_profile_info,
+			    sizeof_array(event_profile_info));    
+}
+
+
+#if CL_VERSION_2_0 == 1
+
+static ERL_NIF_TERM ecl_create_pipe(ErlNifEnv* env, int argc,
+				    const ERL_NIF_TERM argv[])
+{
+    ecl_object_t* o_context;
+    cl_mem_flags flags;
+    cl_mem mem;
+    cl_uint pipe_packet_size;
+    cl_uint pipe_max_packets;
+    cl_int err;
+    UNUSED(argc);
+
+    if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))
+	return enif_make_badarg(env);
+    if (!get_bitfields(env, argv[1], &flags, kv_mem_flags))
+	return enif_make_badarg(env);
+    if (!enif_get_uint(env, argv[2], &pipe_packet_size))
+	return enif_make_badarg(env);
+    if (!enif_get_uint(env, argv[3], &pipe_max_packets))
+	return enif_make_badarg(env);
+
+    DBG("context version: %d", o_context->version);
+    if (o_context->version < 20)
+	err = CL_INVALID_CONTEXT;
+    else
+	mem = ECL_CALL(clCreatePipe)(o_context->context,
+				     flags,
+				     pipe_packet_size,
+				     pipe_max_packets,
+				     NULL, &err);
+    if (!err) {
+	ERL_NIF_TERM t;
+	t = ecl_make_object(env, &mem_r,(void*) mem, o_context);
+	return enif_make_tuple2(env, ATOM(ok), t);
+    }
+    return ecl_make_error(env, err);
+}
+
+#endif
+
 static cl_uint get_version(char *version)
 {
     cl_uint ver = 0;
     version += 7;
-    if(*version >= 48 && *version <= 57)
-	ver += (*version-48)*10;
+    if(*version >= '0' && *version <= '9')
+	ver += (*version-'0')*10;
     version++;
-    if(*version == 46) {
+    if(*version == '.') {
 	version++;
-	if(*version >= 48 && *version <= 57)
-	    ver += (*version-48);
+	if(*version >= '0' && *version <= '9')
+	    ver += (*version-'0');
     }
     /* fprintf(stderr, "V3 %s %d\r\n", version, ver); */
     return ver;
@@ -6726,6 +6934,7 @@ static cl_uint get_version(char *version)
 static int ecl_pre_load(ErlNifEnv* env, ecl_env_t* ecl, cl_int* rerr)
 {
     cl_platform_id   platform_id[MAX_PLATFORMS];
+    cl_int           platform_ver[MAX_PLATFORMS];
     cl_uint          num_platforms;
     cl_uint          i;
     cl_int           err;
@@ -6740,6 +6949,18 @@ static int ecl_pre_load(ErlNifEnv* env, ecl_env_t* ecl, cl_int* rerr)
     ecl->nplatforms = num_platforms;
     ecl->icd_version = 11;
 
+    // first calculate the icd_version (as max of platform versions)
+    for (i = 0; i < num_platforms; i++) {
+	char             version[128];
+	if(CL_SUCCESS == ECL_CALL(clGetPlatformInfo)
+	   (platform_id[i], CL_PLATFORM_VERSION, 64, version, NULL)) {
+	    platform_ver[i] = get_version(version);
+	    
+	    if (platform_ver[i] >  ecl->icd_version)
+		ecl->icd_version = platform_ver[i];
+	}
+    }
+
     for (i = 0; i < num_platforms; i++) {
 	ecl_object_t* obj;
 	cl_device_id     device_id[MAX_DEVICES];
@@ -6748,12 +6969,7 @@ static int ecl_pre_load(ErlNifEnv* env, ecl_env_t* ecl, cl_int* rerr)
 	char             version[128];
 	cl_int           ver = -1;
 
-	if(CL_SUCCESS == ECL_CALL(clGetPlatformInfo)
-	   (platform_id[i], CL_PLATFORM_VERSION, 64, version, NULL)) {
-	    if((ver = get_version(version)) > ecl->icd_version)
-		ecl->icd_version = ver;
-	}
-	obj = ecl_new(env, &platform_r,platform_id[i],0,ver);
+	obj = ecl_new(env, &platform_r,platform_id[i],0,platform_ver[i]);
 	ecl->platform[i].o_platform = obj;
 
 	if ((err = ECL_CALL(clGetDeviceIDs)
@@ -6762,19 +6978,21 @@ static int ecl_pre_load(ErlNifEnv* env, ecl_env_t* ecl, cl_int* rerr)
 	    *rerr = err;
 	    return -1;
 	}
+	DBG("platform: %d, ver=%d", i, platform_ver[i]);
 	ecl->platform[i].o_device=enif_alloc(num_devices*sizeof(ecl_object_t));
 	ecl->platform[i].ndevices = num_devices;
 	for (j = 0; j < num_devices; j++) {
-	    ver = ecl->icd_version;
+	    ver = ecl->icd_version; // assumed version
 	    if(CL_SUCCESS == ECL_CALL(clGetDeviceInfo)
 	       (device_id[j], CL_DEVICE_VERSION, 64, version, NULL)) {
 		ver = get_version(version);
 	    }
 	    obj = ecl_new(env, &device_r, device_id[j],0, ver);
 	    ecl->platform[i].o_device[j] = obj;
+	    DBG("  device:%d, ver=%d", j, ver);
 	}
     }
-
+    DBG("icd: ver=%d", ecl->icd_version);
     return 0;
 }
 
@@ -6784,6 +7002,7 @@ static int  ecl_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info)
     ecl_env_t* ecl;
     cl_int err;
     lhash_func_t func = { ref_hash, ref_cmp, ref_release, 0 };
+    ErlNifSysInfo sys_info;
     UNUSED(env);
     UNUSED(load_info);
 
@@ -6805,6 +7024,14 @@ static int  ecl_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info)
     DBG("ecl_load: ecl=%p", ecl);
     DBG("ecl_load: ecl->context_list_lock=%p", ecl->context_list_lock);
 
+#if (ERL_NIF_MAJOR_VERSION > 2) || ((ERL_NIF_MAJOR_VERSION == 2) && (ERL_NIF_MINOR_VERSION >= 7))    
+    enif_system_info(&sys_info, sizeof(sys_info));
+    ecl->dirty_scheduler_support = sys_info.dirty_scheduler_support;
+#else
+    ecl->dirty_scheduler_support = 0;
+#endif
+    DBG("dirty_scheduler_support = %d", ecl->dirty_scheduler_support);
+    
     // load OpenCL functions
     if (ecl_load_dynfunctions(ecl) < 0)
 	return -1;
@@ -6967,6 +7194,8 @@ static int  ecl_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info)
     LOAD_ATOM(address_bits);
     LOAD_ATOM(max_read_image_args);
     LOAD_ATOM(max_write_image_args);
+    LOAD_ATOM(max_read_write_image_args);
+    LOAD_ATOM(il_version);    
     LOAD_ATOM(max_mem_alloc_size);
     LOAD_ATOM(image2d_max_width);
     LOAD_ATOM(image2d_max_height);
@@ -7109,6 +7338,13 @@ static int  ecl_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info)
     LOAD_ATOM(reference_count);
     LOAD_ATOM(execution_status);
 
+    // Event Profile Info
+    LOAD_ATOM(command_queued);
+    LOAD_ATOM(command_submit);
+    LOAD_ATOM(command_start);
+    LOAD_ATOM(command_end);
+    LOAD_ATOM(command_complete);
+
     // Workgroup info
     LOAD_ATOM(work_group_size);
     LOAD_ATOM(compile_work_group_size);
@@ -7164,6 +7400,8 @@ static int  ecl_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info)
     LOAD_ATOM(invalid_buffer_size);
     LOAD_ATOM(invalid_mip_level);
     LOAD_ATOM(invalid_global_work_size);
+    LOAD_ATOM(device_partition_failed);
+    LOAD_ATOM(invalid_device_partition_count);
 
     // cl_device_type
     LOAD_ATOM(all);
@@ -7220,6 +7458,7 @@ static int  ecl_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info)
     LOAD_ATOM(image1d);
     LOAD_ATOM(image1d_array);
     LOAD_ATOM(image1d_buffer);
+    LOAD_ATOM(pipe);
 
     // addressing_mode
     LOAD_ATOM(none);
diff --git a/c_src/ecl_types.h b/c_src/ecl_types.h
index 436c1e9..57ae843 100644
--- a/c_src/ecl_types.h
+++ b/c_src/ecl_types.h
@@ -93,6 +93,8 @@ typedef cl_program (CL_CALLBACK * t_clCreateProgramWithBinary)(cl_context,cl_uin
 
 typedef cl_program (CL_CALLBACK * t_clCreateProgramWithBuiltInKernels)(cl_context,cl_uint,const cl_device_id *,const char *,cl_int *);
 
+typedef cl_program (CL_CALLBACK * t_clCreateProgramWithIL)(cl_context,cl_uint,const void *,const size_t,cl_int *);
+
 typedef cl_int (CL_CALLBACK * t_clRetainProgram)(cl_program );
 
 typedef cl_int (CL_CALLBACK * t_clReleaseProgram)(cl_program );
@@ -205,4 +207,5 @@ typedef cl_int (CL_CALLBACK * t_clEnqueueBarrier)(cl_command_queue );
 typedef cl_int (CL_CALLBACK * t_clUnloadCompiler)(void);
 typedef void * (CL_CALLBACK * t_clGetExtensionFunctionAddress)(const char *);
 
+typedef cl_mem (CL_CALLBACK * t_clCreatePipe)(cl_context,cl_mem_flags,cl_uint,cl_uint, void*, cl_int*);
 #endif
diff --git a/debian/changelog b/debian/changelog
index 72e11c2..a1e9af2 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+erlang-cl (1.2.4+git20200516.2.dd7caaa-1) UNRELEASED; urgency=low
+
+  * New upstream snapshot.
+  * New upstream snapshot.
+
+ -- Debian Janitor <janitor@jelmer.uk>  Sat, 21 Jan 2023 22:17:54 -0000
+
 erlang-cl (1.2.4-2) unstable; urgency=medium
 
   * Fix the debian/watch uscan control file.
diff --git a/debian/patches/non-linux.patch b/debian/patches/non-linux.patch
index 48d6fd4..25ad121 100644
--- a/debian/patches/non-linux.patch
+++ b/debian/patches/non-linux.patch
@@ -1,6 +1,8 @@
---- a/c_src/Makefile
-+++ b/c_src/Makefile
-@@ -25,7 +25,7 @@
+Index: erlang-cl.git/c_src/Makefile
+===================================================================
+--- erlang-cl.git.orig/c_src/Makefile
++++ erlang-cl.git/c_src/Makefile
+@@ -25,7 +25,7 @@ OBJ = o
  MAC_OS_X  = No
  WIN32_GCC = No
  WIN32_CL  = No
@@ -9,8 +11,8 @@
  
  
  ERL       = erl
-@@ -46,14 +46,23 @@
- WORDSIZE = $(shell $(ERL) -noshell -eval "io:format([126,119,126,110],[erlang:system_info(wordsize)*8])" -s erlang halt)
+@@ -49,14 +49,23 @@ WORDSIZE = $(shell $(ERL) -noshell -eval
+ # CFLAGS += -DUSE_DIRTY_SCHEDULER
  
  ifeq ($(OSNAME)$(WSLcross), Linux)
 -LINUX = Yes
@@ -36,8 +38,10 @@
  endif
  LD_SHARED	:= $(CC) -shared
  LDFLAGS	        += -lOpenCL
---- a/rebar.config
-+++ b/rebar.config
+Index: erlang-cl.git/rebar.config
+===================================================================
+--- erlang-cl.git.orig/rebar.config
++++ erlang-cl.git/rebar.config
 @@ -6,8 +6,8 @@
  {provider_hooks, [{post, [{ct, edoc}, {ct, dialyzer}]}]}.
  
diff --git a/src/cl.erl b/src/cl.erl
index 66e6ef6..8ef74b6 100644
--- a/src/cl.erl
+++ b/src/cl.erl
@@ -71,7 +71,7 @@
 -on_load(init/0).
 
 -export([start/0, start/1, stop/0]).
--export([noop/0]).
+-export([noop/0, noop_/0, dirty_noop/0]).
 -export([versions/0]).
 %% Platform
 -export([get_platform_ids/0]).
@@ -114,7 +114,8 @@
 -export([create_image/5]).
 -export([create_image2d/7]).
 -export([create_image3d/9]).
-
+%% pipe
+-export([create_pipe/4]).
 %% Sampler 
 -export([create_sampler/4]).
 -export([release_sampler/1]).
@@ -125,6 +126,7 @@
 -export([create_program_with_source/2]).
 -export([create_program_with_binary/3]).
 -export([create_program_with_builtin_kernels/3]).
+-export([create_program_with_il/2]). %% 2.1!
 -export([release_program/1]).
 -export([retain_program/1]).
 -export([build_program/3, async_build_program/3]).
@@ -173,7 +175,7 @@
 -export([nowait_enqueue_write_image/8]).
 -export([enqueue_copy_buffer/7]).
 -export([enqueue_copy_buffer_rect/11]).
--export([enqueue_copy_image/6]).
+-export([enqueue_copy_image/7]).
 -export([enqueue_fill_image/6]).
 -export([enqueue_copy_image_to_buffer/7]).
 -export([enqueue_copy_buffer_to_image/7]).
@@ -185,6 +187,8 @@
 -export([retain_event/1]).
 -export([event_info/0]).
 -export([get_event_info/1, get_event_info/2]).
+-export([event_profiling_info/0]).
+-export([get_event_profiling_info/1, get_event_profiling_info/2]).
 -export([wait/1, wait/2]).
 -export([wait_for_events/1]).
 
@@ -271,6 +275,12 @@ stop()  ->
 noop() ->
     ?nif_stub.
 
+noop_() ->  %% dynamic dirty noop
+    ?nif_stub.
+
+dirty_noop() ->  %% dirty noop
+    ?nif_stub.
+
 %%
 %% @spec versions() -> [{Major::integer(),Minor::integer()}]
 %%
@@ -520,6 +530,7 @@ device_info() ->
       fun({1,2},Acc) -> device_info_12(Acc);
 	 ({1,1},Acc) -> device_info_11(Acc);
 	 ({1,0},Acc) -> device_info_10(Acc);
+	 ({2,1},Acc) -> device_info_21(Acc);
 	 (_, Acc) -> Acc
       end, [], versions()).
 
@@ -611,6 +622,10 @@ device_info_12(L) ->
 %%     image_base_address_alignment
     ].
 
+device_info_21(_L) ->
+    [max_read_write_image_args,
+     il_version].
+
 %%
 %% @spec get_device_info(DevID::cl_device_id(), Info::cl_device_info_key()) ->
 %%   {'ok', term()} | {'error', cl_error()}
@@ -977,9 +992,9 @@ get_context_info(Context) when ?is_context(Context) ->
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %% Command Queue (Queue)
 %% @type cl_queue_property() = { 'out_of_order_exec_mode_enable' | 
-%%			         'profiling_enabled' }
+%%			         'profiling_enable' }
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
--type cl_queue_property() :: 'out_of_order_exec_mode_enable' | 'profiling_enabled'.
+-type cl_queue_property() :: 'out_of_order_exec_mode_enable' | 'profiling_enable'.
 %%
 %% @spec create_queue(Context::cl_context(),Device::cl_device_id(),
 %%                    Properties::[cl_queue_property()]) ->
@@ -993,7 +1008,7 @@ get_context_info(Context) when ?is_context(Context) ->
 %% are executed out-of-order. Otherwise, commands are executed
 %% in-order.</dd>
 %% 
-%% <dt>'profiling_enabled'</dt> <dd> Enable or disable profiling of
+%% <dt>'profiling_enable'</dt> <dd> Enable or disable profiling of
 %% commands in the command-queue. If set, the profiling of commands is
 %% enabled. Otherwise profiling of commands is disabled. See
 %% clGetEventProfilingInfo for more information.
@@ -1434,6 +1449,12 @@ create_program_with_binary(_Context, _DeviceList, _BinaryList) ->
 create_program_with_builtin_kernels(_Context, _DeviceList, _KernelNames) ->
     ?nif_stub.
 
+-spec create_program_with_il(Context::cl_context(), IL::iodata()) ->
+				    {'ok', cl_program()} | {'error', cl_error()}.
+
+create_program_with_il(_Context, _IL) ->
+    ?nif_stub.
+
 %%
 %% @spec retain_program(Program::cl_program()) ->
 %%    'ok' | {'error', cl_error()}
@@ -1844,8 +1865,15 @@ get_kernel_arg_info(Kernel) ->
 	{ok, N} ->
 	    {ok,
 	     lists:map(fun(I) ->
-			       {ok,Info} = get_kernel_arg_info(Kernel, I),
-			       {I,Info}
+			       try get_kernel_arg_info(Kernel, I) of
+				   {ok,Info} ->
+				       {I,Info};
+				   {error,Reason} ->
+				       {I,{error,Reason}}
+			       catch
+				   error:Reason ->
+				       {I,{error,Reason}}
+			       end
 		       end, lists:seq(0, N-1))};
 	Error ->
 	    Error
@@ -2166,7 +2194,7 @@ enqueue_copy_buffer_rect(_Queue, _SrcBuffer, _DstBuffer,
 			 _WaitList) ->
     ?nif_stub.
 
-enqueue_copy_image(_QUeue, _SrcImage, _DstImage, _Origin, _Region, _WaitList) ->
+enqueue_copy_image(_QUeue, _SrcImage, _DstImage, _SourceOrigin, _DestOrigin, _Region, _WaitList) ->
     ?nif_stub.
 
 %%  FillColor = <<R:32/unsigned,G:32/unsigned,B:32/unsigned,A:32/unsigned>>
@@ -2302,13 +2330,40 @@ get_event_info(_Event, _Info) ->
 get_event_info(Event) when ?is_event(Event) ->
     get_info_list(Event, event_info(), fun get_event_info/2).
 
+
+event_profiling_info() ->
+    case lists:member({2,0}, cl:versions()) of
+	true ->
+	    [
+	     command_queued,
+	     command_submit,
+	     command_start,
+	     command_end,
+	     command_complete
+	    ];
+	false ->
+	    [
+	     command_queued,
+	     command_submit,
+	     command_start,
+	     command_end
+	    ]
+    end.
+
+get_event_profiling_info(_Event, _Info) ->
+    ?nif_stub.
+
+get_event_profiling_info(Event) ->
+get_info_list(Event, event_profiling_info(),
+	      fun get_event_profiling_info/2).    
+
 %% IMAGES
 %% @doc return a list of image formats [{Order,Type}]
 
 get_supported_image_formats(_Context, _Flags, _ImageType) ->
     ?nif_stub.
 
--spec create_image2d(Conext::cl_context(), Flags::[cl_mem_flag()],
+-spec create_image2d(Context::cl_context(), Flags::[cl_mem_flag()],
 		     ImageFormat::#cl_image_format{},
 		     Width::non_neg_integer(),
 		     Height::non_neg_integer(),
@@ -2320,7 +2375,7 @@ create_image2d(_Context, _MemFlags, _ImageFormat, _Width, _Height, _Pitch,
 		_Data) ->
     ?nif_stub.
 
--spec create_image3d(Conext::cl_context(), Flags::[cl_mem_flag()],
+-spec create_image3d(Context::cl_context(), Flags::[cl_mem_flag()],
 		     ImageFormat::#cl_image_format{},
 		     Width::non_neg_integer(),
 		     Height::non_neg_integer(),
@@ -2343,6 +2398,14 @@ create_image3d(_Context, _MemFlags, _ImageFormat, _Width, _Height, _Depth,
 create_image(_Context, _MemFlags, _ImageFormat, _ImageDesc, _Data) ->
     ?nif_stub.
 
+-spec create_pipe(Context::cl_context(), Flags::[cl_mem_flag()],
+		  PipePacketSize::non_neg_integer(),
+		  PipeMaxPackets::non_neg_integer()) ->
+			 {'ok', cl_mem()} | {'error', cl_error()}.
+
+create_pipe(_Context, _MemFlags, _PipePacketSize, _PipeMaxPackets) ->
+    ?nif_stub.
+
 %% Wait for all events in EventList to complete
 -spec wait_for_events(EventList::[cl_event]) ->
 			     [{'ok','completed'} |
diff --git a/src/cl10.erl b/src/cl10.erl
index 15a250a..6f12d45 100644
--- a/src/cl10.erl
+++ b/src/cl10.erl
@@ -105,7 +105,7 @@
 -export([enqueue_write_image/8]).
 -export([enqueue_write_image/9]).
 -export([nowait_enqueue_write_image/8]).
--export([enqueue_copy_image/6]).
+-export([enqueue_copy_image/7]).
 -export([enqueue_copy_image_to_buffer/7]).
 -export([enqueue_copy_buffer_to_image/7]).
 -export([enqueue_map_buffer/6]).
@@ -230,8 +230,8 @@ enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9) ->
     cl:enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9).
 nowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8) -> 
     cl:nowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8).
-enqueue_copy_image(A1,A2,A3,A4,A5,A6) ->
-    cl:enqueue_copy_image(A1,A2,A3,A4,A5,A6).
+enqueue_copy_image(A1,A2,A3,A4,A5,A6,A7) ->
+    cl:enqueue_copy_image(A1,A2,A3,A4,A5,A6,A7).
 enqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7) ->
     cl:enqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7).
 enqueue_copy_buffer_to_image(A1,A2,A3,A4,A5,A6,A7) ->
diff --git a/src/cl11.erl b/src/cl11.erl
index 2efbeed..b691b00 100644
--- a/src/cl11.erl
+++ b/src/cl11.erl
@@ -105,7 +105,7 @@
 -export([enqueue_write_image/8]).
 -export([enqueue_write_image/9]).
 -export([nowait_enqueue_write_image/8]).
--export([enqueue_copy_image/6]).
+-export([enqueue_copy_image/7]).
 -export([enqueue_copy_image_to_buffer/7]).
 -export([enqueue_copy_buffer_to_image/7]).
 -export([enqueue_map_buffer/6]).
@@ -230,8 +230,8 @@ enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9) ->
     cl:enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9).
 nowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8) -> 
     cl:nowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8).
-enqueue_copy_image(A1,A2,A3,A4,A5,A6) ->
-    cl:enqueue_copy_image(A1,A2,A3,A4,A5,A6).
+enqueue_copy_image(A1,A2,A3,A4,A5,A6,A7) ->
+    cl:enqueue_copy_image(A1,A2,A3,A4,A5,A6,A7).
 enqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7) ->
     cl:enqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7).
 enqueue_copy_buffer_to_image(A1,A2,A3,A4,A5,A6,A7) ->
diff --git a/src/cl12.erl b/src/cl12.erl
index 73f5654..4eb4d15 100644
--- a/src/cl12.erl
+++ b/src/cl12.erl
@@ -94,7 +94,7 @@
 -export([enqueue_write_image/8]).
 -export([enqueue_write_image/9]).
 -export([nowait_enqueue_write_image/8]).
--export([enqueue_copy_image/6]).
+-export([enqueue_copy_image/7]).
 -export([enqueue_copy_image_to_buffer/7]).
 -export([enqueue_copy_buffer_to_image/7]).
 -export([enqueue_map_buffer/6]).
@@ -217,8 +217,8 @@ enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9) ->
     cl:enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9).
 nowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8) ->
     cl:nowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8).
-enqueue_copy_image(A1,A2,A3,A4,A5,A6) ->
-    cl:enqueue_copy_image(A1,A2,A3,A4,A5,A6).
+enqueue_copy_image(A1,A2,A3,A4,A5,A6,A7) ->
+    cl:enqueue_copy_image(A1,A2,A3,A4,A5,A6,A7).
 enqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7) ->
     cl:enqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7).
 enqueue_copy_buffer_to_image(A1,A2,A3,A4,A5,A6,A7) ->
diff --git a/test/cl_basic.erl b/test/cl_basic.erl
index ac50ac4..778c898 100644
--- a/test/cl_basic.erl
+++ b/test/cl_basic.erl
@@ -142,7 +142,7 @@ __kernel void program1(int n, int m) {
 	      build_info(Program, Device)
       end, DeviceList),
 
-    case cl:build_program(Program, DeviceList, "-Dhello=1 -Dtest") of
+    case cl:build_program(Program, DeviceList, "-Dhello=1 -Dtest -cl-kernel-arg-info") of
 	ok ->
 	    foreach(
 	      fun(Device) ->
@@ -169,7 +169,7 @@ __kernel void program1(int n, int m) {
 	      end, Kernels),
 	    foreach(
 	      fun(Device) ->
-		      {ok,Queue} = cl:create_queue(Context,Device,[]),
+		      {ok,Queue} = cl:create_queue(Context,Device,[profiling_enable]),
 		      foreach(
 			fun(Kernel) ->
 				cl:set_kernel_arg(Kernel, 0, 12),
@@ -179,7 +179,10 @@ __kernel void program1(int n, int m) {
 				io:format("EventInfo: ~p\n", [EventInfo]),
 				cl:flush(Queue),
 				io:format("Event Status:=~p\n", 
-					  [cl:wait(Event,1000)])
+					  [cl:wait(Event,1000)]),
+				{ok,ProfileInfo} = cl:get_event_profiling_info(Event),
+				io:format("EventProfilingInfo: ~p\n", [ProfileInfo])
+
 			end, Kernels)
 	      end, DeviceList),
 	    ok;
diff --git a/test/cl_noop.erl b/test/cl_noop.erl
new file mode 100644
index 0000000..553b256
--- /dev/null
+++ b/test/cl_noop.erl
@@ -0,0 +1,42 @@
+%%% @author Tony Rogvall <tony@rogvall.se>
+%%% @copyright (C) 2019, Tony Rogvall
+%%% @doc
+%%%    Test cl nif calling overhead
+%%% @end
+%%% Created : 11 Mar 2019 by Tony Rogvall <tony@rogvall.se>
+
+-module(cl_noop).
+
+-compile(export_all).
+
+
+test() ->
+    T0 = erlang:monotonic_time(),
+    loop_noop(1000000),
+    T1 = erlang:monotonic_time(),
+    Time1 = erlang:convert_time_unit(T1 - T0, native, microsecond),
+    loop_noop_(1000000),
+    T2 = erlang:monotonic_time(),
+    Time2 = erlang:convert_time_unit(T2 - T1, native, microsecond),
+    loop_dirty_noop(1000000),
+    T3 = erlang:monotonic_time(),
+    Time3 = erlang:convert_time_unit(T3 - T2, native, microsecond),
+    {Time1/1000000, Time2/1000000, Time3/1000000}.
+
+loop_noop(0) -> ok;
+loop_noop(I) ->
+    cl:noop(),
+    loop_noop(I-1).
+
+loop_noop_(0) -> ok;
+loop_noop_(I) ->
+    cl:noop_(),
+    loop_noop_(I-1).
+
+loop_dirty_noop(0) -> ok;
+loop_dirty_noop(I) ->
+    cl:dirty_noop(),
+    loop_dirty_noop(I-1).
+
+    
+    

Debdiff

[The following lists of changes regard files as different if they have different names, permissions or owners.]

Files in second set of .debs but not in first

-rw-r--r--  root/root   /usr/lib/debug/.build-id/df/f50f16018db8e25587846f087ad189c4140156.debug

Files in first set of .debs but not in second

-rw-r--r--  root/root   /usr/lib/debug/.build-id/19/aff653dc6d285efe85769b9eb492f58f25dc43.debug

Control files of package erlang-cl: lines which differ (wdiff format)

  • Depends: erlang-base (>= 1:24.3.3+dfsg), 1:25.2.1+dfsg), libc6 (>= 2.4), ocl-icd-libopencl1 | libopencl1 2.34)

Control files of package erlang-cl-dbgsym: lines which differ (wdiff format)

  • Build-Ids: 19aff653dc6d285efe85769b9eb492f58f25dc43 dff50f16018db8e25587846f087ad189c4140156

More details

Full run details