module Devices:sig
..end
type
dim3 = {
|
x : |
|
y : |
|
z : |
type
specificLibrary =
| |
Cuda |
| |
OpenCL |
| |
Both |
type
context
type
generalInfo = {
|
name : |
(* | name of the device | *) |
|
totalGlobalMem : |
(* | the total amount of global memory on the device | *) |
|
localMemSize : |
(* | the total amount of local memory on the device | *) |
|
clockRate : |
(* | the clock rate of the device | *) |
|
totalConstMem : |
(* | the total amount of constant memory on the device | *) |
|
multiProcessorCount : |
(* | the number of multi processor on the device | *) |
|
eccEnabled : |
(* | is ECC (Error Correcting Code) enabled on the device | *) |
|
id : |
(* | the id of the device | *) |
|
ctx : |
(* | the context associated with this device | *) |
type
cudaInfo = {
|
major : |
(* | Major compute capability | *) |
|
minor : |
(* | Minor compute capability | *) |
|
regsPerBlock : |
(* | 32-bit registers available per block | *) |
|
warpSize : |
(* | Warp size in threads | *) |
|
memPitch : |
(* | maximum pitch in bytes allowed by memory copies | *) |
|
maxThreadsPerBlock : |
(* | Maximum number of threads per block | *) |
|
maxThreadsDim : |
(* | Maximum size of each dimension of a block | *) |
|
maxGridSize : |
(* | Maximum size of each dimension of a grid | *) |
|
textureAlignment : |
(* | Alignment requirement for textures | *) |
|
deviceOverlap : |
(* | Device can concurrently copy memory and execute a kernel | *) |
|
kernelExecTimeoutEnabled : |
(* | Specified whether there is a run time limit on kernels | *) |
|
integrated : |
(* | Device is integrated as opposed to discrete | *) |
|
canMapHostMemory : |
(* | Device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer | *) |
|
computeMode : |
(* | Compute mode | *) |
|
concurrentKernels : |
(* | Device can possibly execute multiple kernels concurrently | *) |
|
pciBusID : |
(* | PCI bus ID of the device | *) |
|
pciDeviceID : |
(* | PCI device ID of the device | *) |
|
driverVersion : |
type
platformInfo = {
|
platform_profile : |
(* | OpenCL profile string. Returns the profile name
supported by the implementation.
The profile name returned can be one of the
following strings:
| *) |
|
platform_version : |
(* | OpenCL version string. Returns the OpenCL version supported by the implementation. This version string has the following format: OpenCL < space >< major_version.minor_version >< space >< platform - specific information > The major_version.minor_version value returned will be 1.1. | *) |
|
platform_name : |
(* | Platform name string | *) |
|
platform_vendor : |
(* | Platform vendor string | *) |
|
platform_extensions : |
(* | Returns a space - separated list of extension names (the extension names themselves do not contain any spaces) supported by the platform. Extensions defined here must be supported by all devices associated with this platform. | *) |
|
num_devices : |
(* | Number of devices associated with this platform | *) |
type
deviceType =
| |
CL_DEVICE_TYPE_CPU |
| |
CL_DEVICE_TYPE_GPU |
| |
CL_DEVICE_TYPE_ACCELERATOR |
| |
CL_DEVICE_TYPE_DEFAULT |
type
clDeviceFPConfig =
| |
CL_FP_DENORM |
| |
CL_FP_INF_NAN |
| |
CL_FP_ROUND_TO_NEAREST |
| |
CL_FP_ROUND_TO_ZERO |
| |
CL_FP_ROUND_TO_INF |
| |
CL_FP_FMA |
| |
CL_FP_NONE |
type
clDeviceQueueProperties =
| |
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
| |
CL_QUEUE_PROFILING_ENABLE |
type
clDeviceGlobalMemCacheType =
| |
CL_READ_WRITE_CACHE |
| |
CL_READ_ONLY_CACHE |
| |
CL_NONE |
type
clDeviceLocalMemType =
| |
CL_LOCAL |
| |
CL_GLOBAL |
type
clDeviceExecutionCapabilities =
| |
CL_EXEC_KERNEL |
| |
CL_EXEC_NATIVE_KERNEL |
type
clDeviceID
type
openCLInfo = {
|
platform_info : |
|||
|
device_type : |
|||
|
profile : |
(* | OpenCL profile string. Returns the profile name supported by the device (see note). The profile name returned can be one of the following strings:
| *) |
|
version : |
(* | OpenCL version string. Returns the OpenCL version supported by the device. This version string has the following format: OpenCL < space >< major_version.minor_version >< space >< vendor - specific information > The major_version.minor_version value returned will be 1.1. | *) |
|
vendor : |
(* | Vendor name string | *) |
|
extensions : |
(* | Returns a space - separated list of extension names (the extension names themselves do not contain any spaces). The list of extension names returned currently can include one or more of the following approved extension names:
| *) |
|
vendor_id : |
(* | A unique device vendor identifier. An example of a unique device identifier could be the PCIe ID | *) |
|
max_work_item_dimensions : |
(* | Maximum dimensions that specify the global and local work-item IDs used by the data parallel execution model. (Refer to clEnqueueNDRangeKernel). The minimum value is 3 | *) |
|
address_bits : |
(* | The default compute device address space size specified as an unsigned integer value in bits. Currently supported values are 32 or 64 bits. | *) |
|
max_mem_alloc_size : |
(* | Max size of memory object allocation in bytes. The minimum value is max (1/4th of CL_DEVICE_GLOBAL_MEM_SIZE, 128*1024*1024) | *) |
|
image_support : |
(* | Is true if images are supported by the OpenCL device and false otherwise. | *) |
|
max_read_image_args : |
(* | Max number of simultaneous image objects that can be read by a kernel. The minimum value is 128 if image_support is true. | *) |
|
max_write_image_args : |
(* | Max number of simultaneous image objects that can be written to by a kernel. The minimum value is 8 if image_support is true. | *) |
|
max_samplers : |
(* | Maximum number of samplers that can be used in a kernel. The minimum value is 16 if image_support is true. | *) |
|
mem_base_addr_align : |
(* | Describes the alignment in bits of the base address of any allocated memory object. | *) |
|
min_data_type_align_size : |
(* | The smallest alignment in bytes which can be used for any data type. | *) |
|
global_mem_cacheline_size : |
(* | Size of global memory cache line in bytes. | *) |
|
global_mem_cache_size : |
(* | Size of global memory cache in bytes. | *) |
|
max_constant_args : |
(* | Max number of arguments declared with the __constant qualifier in a kernel. The minimum value is 8. | *) |
|
endian_little : |
(* | Is true if the OpenCL device is a little endian device and false otherwise. | *) |
|
available : |
(* | Is true if the device is available from Spoc | *) |
|
compiler_available : |
(* | Is false if the implementation does not have a compiler available to compile the program source. Is true if the compiler is available. This can be false for the embedded platform profile only. | *) |
|
single_fp_config : |
(* | Describes single precision floating - point capability of the device. This is a bit - field that describes one or more of the following values:
| *) |
|
global_mem_cache_type : |
(* | Type of global memory cache supported. Valid values are: CL_NONE, CL_READ_ONLY_CACHE, and CL_READ_WRITE_CACHE. | *) |
|
queue_properties : |
(* | Describes the command - queue properties supported by the device. This is a bit - field that describes one or more of the following values:
| *) |
|
local_mem_type : |
(* | Type of local memory supported. This can be set to CL_LOCAL implying dedicated local memory storage such as SRAM, or CL_GLOBAL. | *) |
|
double_fp_config : |
(* | Describes the OPTIONAL double precision floating - point capability of the OpenCL device. This is a bit - field that describes one or more of the following values:
| *) |
|
max_constant_buffer_size : |
(* | Max size in bytes of a constant buffer allocation. The minimum value is 64 KB. | *) |
|
execution_capabilities : |
(* | Describes the execution capabilities of the device. This is a bit - field that describes one or more of the following values:
| *) |
|
half_fp_config : |
(* | Describes the OPTIONAL half precision floating - point capability of the OpenCL device. This is a bit - field that describes one or more of the following values:
| *) |
|
max_work_group_size : |
(* | Maximum number of work-items in a work-group executing a kernel using the data parallel execution model. The minimum value is 1. | *) |
|
image2D_max_height : |
(* | Max height of 2D image in pixels. The minimum value is 8192 if image_support is true. | *) |
|
image2D_max_width : |
(* | Max width of 2D image in pixels. The minimum value is 8192 if image_support is true. | *) |
|
image3D_max_depth : |
(* | Max depth of 3D image in pixels. The minimum value is 2048 if image_support is true. | *) |
|
image3D_max_height : |
(* | Max height of 3D image in pixels. The minimum value is 2048 if image_support is true. | *) |
|
image3D_max_width : |
(* | Max width of 3D image in pixels. The minimum value is 2048 if image_support is true. | *) |
|
max_parameter_size : |
(* | Max size in bytes of the arguments that can be passed to a kernel. The minimum value is 1024. For this minimum value, only a maximum of 128 arguments can be passed to a kernel. | *) |
|
max_work_item_size : |
(* | Maximum number of work-items that can be specified in each dimension of the work-group to clEnqueueNDRangeKernel. The minimum value is (1, 1, 1). | *) |
|
prefered_vector_width_char : |
(* | Preferred native vector width size for built - in scalar types that can be put into vectors. The vector width is defined as the number of scalar elements that can be stored in the vector. If the cl_khr_fp16 extension is not supported, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF must return 0. | *) |
|
prefered_vector_width_short : |
(* | see prefered_vector_width_char | *) |
|
prefered_vector_width_int : |
(* | see prefered_vector_width_char | *) |
|
prefered_vector_width_long : |
(* | see prefered_vector_width_char | *) |
|
prefered_vector_width_float : |
(* | see prefered_vector_width_char | *) |
|
prefered_vector_width_double : |
(* | see prefered_vector_width_char | *) |
|
profiling_timer_resolution : |
(* | Describes the resolution of device timer. This is measured in nanoseconds. | *) |
|
driver_version : |
(* | OpenCL software driver version string in the form major_number.minor_number. | *) |
|
device_id : |
(* | Device ID | *) |
val init : ?only:specificLibrary -> unit -> device array
only
: allows to specify which library to use, by default, Spoc will search any device on the systemval cuda_devices : unit -> int
val opencl_devices : unit -> int
val gpgpu_devices : unit -> int
val flush : device -> ?queue_id:int -> unit -> unit
queue_id
: allows to specify only a specific command queueval hasCLExtension : device -> string -> bool
val allowDouble : device -> bool