diff --git a/Ready/BUILD.txt b/BUILD.txt similarity index 100% rename from Ready/BUILD.txt rename to BUILD.txt diff --git a/Brandeisator/CMakeLists.txt b/Brandeisator/CMakeLists.txt deleted file mode 100644 index 882e8ea4f..000000000 --- a/Brandeisator/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -project(Brandeisator) - -FIND_PACKAGE(OpenCV REQUIRED) -INCLUDE_DIRECTORIES( ${OPENCV_INCLUDE_DIR}) - -INCLUDE_DIRECTORIES( "../Display" ) - -add_executable(Brandeisator - brandeisator.cpp - ../Display/display.cpp - ../Display/display.h -) - -TARGET_LINK_LIBRARIES(Brandeisator ${OpenCV_LIBS} ) diff --git a/Brandeisator/brandeisator.cpp b/Brandeisator/brandeisator.cpp deleted file mode 100644 index f55fd79a1..000000000 --- a/Brandeisator/brandeisator.cpp +++ /dev/null @@ -1,172 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// stdlib: -#include -#include -#include -#include - -// local: -#include "defs.h" -#include "display.h" - -void init(float a[X][Y],float b[X][Y]); - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float D_a,float D_b,float A,float B, - float speed, - bool parameter_space); - -int main() -{ - // Here we implement the Brandeisator; the Lengyel-Epstein model for the - // chlorite-iodide-malonic acid (CIMA) reaction - // http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.34.9043&rep=rep1&type=pdf - - // But I can't get this to work at the moment. Perhaps we need a more sophisticated solver? - // Or the initial conditions need to be right to get interesting behaviour? - - // -- parameters -- - float A = 12.371f; - float B = 16.0f; - float D_a = 0.1f; - float D_b = 1.0f; - float speed = 0.01f; - // ---------------- - - // these arrays store the chemical concentrations: - float a[X][Y], b[X][Y]; - // these arrays store the rate of change of those chemicals: - float da[X][Y], db[X][Y]; - - // put the initial conditions into each cell - init(a,b); - - clock_t start,end; - - const int N_FRAMES_PER_DISPLAY = 100; - int iteration = 0; - while(true) - { - start = clock(); - - // compute: - for(int it=0;it (b)) ? (a) : (b)) -#define min(a,b) (((a) < (b)) ? (a) : (b)) -#endif - -void init(float a[X][Y],float b[X][Y]) -{ - srand((unsigned int)time(NULL)); - - // figure the values - for(int i = 0; i < X; i++) { - for(int j = 0; j < Y; j++) { - // start with a uniform field with an approximate circle in the middle - //if(hypot(i%20-10/*-X/2*/,j%20-10/*-Y/2*/)<=frand(2,5)) { - if(hypot(i-X/2,(j-Y/2)/1.5)<=frand(2,5)) - { - a[i][j] = 0.0f; - b[i][j] = 1.0f; - } - else { - a[i][j] = 1; - b[i][j] = 0; - } - a[i][j] = frand(-2.0f,2.0f); - b[i][j] = frand(-2.0f,2.0f); - } - } -} - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float D_a,float D_b,float A,float B,float speed, - bool parameter_space) -{ - const bool toroidal = false; - - int iprev,inext,jprev,jnext; - - // compute change in each cell - for(int i = 0; i < X; i++) { - if(toroidal) { - iprev = (i + X - 1) % X; - inext = (i + 1) % X; - } - else { - iprev = max(0,i-1); - inext = min(X-1,i+1); - } - - for(int j = 0; j < Y; j++) { - if(toroidal) { - jprev = (j + Y - 1) % Y; - jnext = (j + 1) % Y; - } - else { - jprev = max(0,j-1); - jnext = min(Y-1,j+1); - } - - if(parameter_space) { - /*const float kmin=0.03f,kmax=0.07f,fmin=0.00f,fmax=0.06f; - // set f and k for this location (ignore the provided values of f and k) - k = kmin + i*(kmax-kmin)/X; - f = fmin + j*(fmax-fmin)/Y;*/ - } - - float aval = a[i][j]; - float bval = b[i][j]; - - // compute the Laplacians of a and b - float dda = a[i][jprev] + a[i][jnext] + a[iprev][j] + a[inext][j] - 4*aval; - float ddb = b[i][jprev] + b[i][jnext] + b[iprev][j] + b[inext][j] - 4*bval; - - // compute the new rate of change of a and b - da[i][j] = D_a * dda + A - aval - (aval*bval) / (1+aval*aval); - db[i][j] = D_b * ddb + 4*B*aval - B * (aval*bval) / (1+aval*aval); - } - } - - // effect change - for(int i = 0; i < X; i++) { - for(int j = 0; j < Y; j++) { - a[i][j] += (speed * da[i][j]); - b[i][j] += (speed * db[i][j]); - } - } -} - diff --git a/Brusselator/CMakeLists.txt b/Brusselator/CMakeLists.txt deleted file mode 100644 index fbb280370..000000000 --- a/Brusselator/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -project(Brusselator) - -FIND_PACKAGE(OpenCV REQUIRED) -INCLUDE_DIRECTORIES( ${OPENCV_INCLUDE_DIR}) - -INCLUDE_DIRECTORIES( "../Display" ) - -add_executable(Brusselator - brusselator.cpp - ../Display/display.cpp - ../Display/display.h -) - -TARGET_LINK_LIBRARIES(Brusselator ${OpenCV_LIBS} ) diff --git a/Brusselator/brusselator.cpp b/Brusselator/brusselator.cpp deleted file mode 100644 index fd2618bb7..000000000 --- a/Brusselator/brusselator.cpp +++ /dev/null @@ -1,147 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// stdlib: -#include -#include -#include -#include - -// local: -#include "defs.h" -#include "display.h" - -void init(float a[X][Y],float b[X][Y],float A,float B); - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float A,float B,float D1,float D2, - float speed, - bool parameter_space); - -int main() -{ - // -- parameters -- - float A = 3.0f; - float B = 10.0f; - float D1 = 5.0f; - float D2 = 12.0f; - float speed = 0.001f; - // ---------------- - - // these arrays store the chemical concentrations: - float a[X][Y], b[X][Y]; - // these arrays store the rate of change of those chemicals: - float da[X][Y], db[X][Y]; - - // put the initial conditions into each cell - init(a,b,A,B); - - clock_t start,end; - - const int N_FRAMES_PER_DISPLAY = 100; - int iteration = 0; - while(true) - { - start = clock(); - - // compute: - for(int it=0;it (b)) ? (a) : (b)) -#define min(a,b) (((a) < (b)) ? (a) : (b)) -#endif - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float A,float B,float D1,float D2, - float speed, - bool parameter_space) -{ - // compute change in each cell - for(int i = 0; i < X; i++) - { - //int iprev = (i + X - 1) % X; - //int inext = (i + 1) % X; - int iprev = max(0,i-1); - int inext = min(X-1,i+1); - - for(int j = 0; j < Y; j++) - { - //int jprev = (j + Y - 1) % Y; - //int jnext = (j + 1) % Y; - int jprev = max(0,j-1); - int jnext = min(Y-1,j+1); - - float aval = a[i][j]; - float bval = b[i][j]; - - if(parameter_space) - { - const float A1=0.0f,A2=4.0f,B1=0.0f,B2=15.0f; - A = A1+(A2-A1)*i/X; - B = B1+(B2-B1)*j/Y; - } - - // compute the Laplacians of a and b - float dda = a[i][jprev] + a[i][jnext] + a[iprev][j] + a[inext][j] - 4*aval; - float ddb = b[i][jprev] + b[i][jnext] + b[iprev][j] + b[inext][j] - 4*bval; - - // compute the new rate of change of a and b - da[i][j] = A-(B+1)*aval + aval*aval*bval + D1*dda; - db[i][j] = B*aval - aval*aval*bval + D2*ddb; - } - } - - // effect change - for(int i = 0; i < X; i++) { - for(int j = 0; j < Y; j++) { - a[i][j] += (speed * da[i][j]); - b[i][j] += (speed * db[i][j]); - } - } -} - diff --git a/CMakeLists.txt b/CMakeLists.txt index 53c03e73e..b5a025f0a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,18 +1,459 @@ -cmake_minimum_required(VERSION 2.6) - -project(ReactionDiffusion) - -add_subdirectory(TuringSpots) -add_subdirectory(MeinhardtSpots) -add_subdirectory(MeinhardtStripes) -add_subdirectory(GrayScott) -add_subdirectory(Brusselator) -add_subdirectory(Oregonator) -add_subdirectory(Brandeisator) -add_subdirectory(FitzHughNagumo) -add_subdirectory(FitzHughNagumo3D) -add_subdirectory(ComplexGinzbergLandau) -add_subdirectory(Schlogl) -add_subdirectory(Schnakenberg) -add_subdirectory(EdblomOrbanEpstein) -add_subdirectory(Linear) +# Copyright 2011, 2012, 2013 The Ready Bunch +# +# This file is part of Ready. +# +# Ready is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Ready is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ready. If not, see . +# +#-------------------------------------------------------------------------- + +cmake_minimum_required( VERSION 2.6 ) +cmake_policy( SET CMP0003 NEW ) + +project( Ready ) + +set( READY_VERSION 0.6 ) # check matches Help/about.html +add_definitions( -D READY_VERSION=${READY_VERSION} ) + +if( APPLE OR WIN32 ) + # app names are usually capitalized on Mac OS X and Windows + set( APP_NAME Ready ) +else() + # Linux binaries are usually all lowercase + set( APP_NAME ready ) +endif() +set( CMD_NAME rdy ) # command-line version + +#-------------------------------------------source files---------------------------------------------- + +set( BASE_SOURCES # low-level code used in all executables + src/readybase/AbstractRD.hpp src/readybase/AbstractRD.cpp + src/readybase/ImageRD.hpp src/readybase/ImageRD.cpp + src/readybase/GrayScottImageRD.hpp src/readybase/GrayScottImageRD.cpp + src/readybase/OpenCLImageRD.hpp src/readybase/OpenCLImageRD.cpp + src/readybase/FormulaOpenCLImageRD.hpp src/readybase/FormulaOpenCLImageRD.cpp + src/readybase/FullKernelOpenCLImageRD.hpp src/readybase/FullKernelOpenCLImageRD.cpp + src/readybase/MeshRD.hpp src/readybase/MeshRD.cpp + src/readybase/GrayScottMeshRD.hpp src/readybase/GrayScottMeshRD.cpp + src/readybase/OpenCLMeshRD.hpp src/readybase/OpenCLMeshRD.cpp + src/readybase/FormulaOpenCLMeshRD.hpp src/readybase/FormulaOpenCLMeshRD.cpp + src/readybase/FullKernelOpenCLMeshRD.hpp src/readybase/FullKernelOpenCLMeshRD.cpp + src/readybase/OpenCL_MixIn.hpp src/readybase/OpenCL_MixIn.cpp + src/readybase/OpenCL_utils.hpp src/readybase/OpenCL_utils.cpp + src/readybase/IO_XML.hpp src/readybase/IO_XML.cpp + src/readybase/overlays.hpp src/readybase/overlays.cpp + src/readybase/Properties.hpp src/readybase/Properties.cpp + src/readybase/utils.hpp src/readybase/utils.cpp + src/readybase/OpenCL_Dyn_Load.h src/readybase/OpenCL_Dyn_Load.c + src/readybase/MeshGenerators.hpp src/readybase/MeshGenerators.cpp + src/readybase/SystemFactory.hpp src/readybase/SystemFactory.cpp + src/readybase/scene_items.hpp src/readybase/scene_items.cpp +) +include_directories( src/readybase ) + +set( GUI_SOURCES # high-level GUI code used only in Ready + src/gui/IDs.hpp + src/gui/wxutils.hpp src/gui/wxutils.cpp + src/gui/dialogs.hpp src/gui/dialogs.cpp + src/gui/prefs.hpp src/gui/prefs.cpp + src/gui/app.hpp src/gui/app.cpp + src/gui/frame.hpp src/gui/frame.cpp + src/gui/HelpPanel.hpp src/gui/HelpPanel.cpp + src/gui/InfoPanel.hpp src/gui/InfoPanel.cpp + src/gui/PatternsPanel.hpp src/gui/PatternsPanel.cpp + src/gui/vtk_pipeline.hpp src/gui/vtk_pipeline.cpp + src/gui/InteractorStylePainter.hpp src/gui/InteractorStylePainter.cpp + src/gui/wxVTKRenderWindowInteractor.h src/gui/wxVTKRenderWindowInteractor.cxx + src/gui/RecordingDialog.hpp src/gui/RecordingDialog.cpp +) +include_directories( src/gui ) + +set( CMD_SOURCES # code used for the command-line version + src/cmd/main.cpp +) + +set( RESOURCES + resources/ready.rc + resources/appicon.ico + resources/appicon16.ico + resources/appicon32.ico + resources/appicon48.ico + resources/appicon.xpm + resources/Info.plist.in + resources/app.icns + resources/file.icns +) +include_directories( resources ) + +set( PATTERN_FILES + Patterns/Meinhardt1982/stripes.vti Patterns/Meinhardt1982/zebra.vtu + Patterns/Schlogl.vti + Patterns/heat_equation.vti + Patterns/Turing1952/spots.vti Patterns/Turing1952/spots_noisy.vti + Patterns/kernel_test.vti + Patterns/parameter_modulation_demo.vti + Patterns/parameter_modulation_demo2.vti + Patterns/bunny.vtu + Patterns/lion.vtu + Patterns/heat_equation_interpolation.vti + Patterns/Ginzburg-Landau/complex_Ginzburg-Landau.vti + Patterns/Ginzburg-Landau/complex_Ginzburg-Landau_magnitude.vti + Patterns/wave_equation.vti + Patterns/oregonator.vti + Patterns/Brusselator.vti + Patterns/SmoothLife/smoothglider.vti Patterns/SmoothLife/smoothlifeL.vti Patterns/SmoothLife/glider_3D.vti + Patterns/Purwins1999/glider.vti Patterns/Purwins1999/glider_3D.vti Patterns/Purwins1999/multiGlider.vti + Patterns/CPU-only/grayscott_1D.vti + Patterns/CPU-only/grayscott_2D.vti + Patterns/CPU-only/grayscott_3D.vti + Patterns/FitzHugh-Nagumo/tip-splitting.vti + Patterns/FitzHugh-Nagumo/spiral_turbulence.vti + Patterns/FitzHugh-Nagumo/pulsate.vti + Patterns/FitzHugh-Nagumo/squid_axon.vti + Patterns/FitzHugh-Nagumo/Ising_regime.vti + Patterns/Gray-Scott/Lesmes_noisy.vti + Patterns/Gray-Scott/noisy_solitons_mitosis.vti + Patterns/Gray-Scott/parameter-map.vti + Patterns/Gray-Scott/Pearson1993.vti + Patterns/Gray-Scott/self-replicating_spots.vti + Patterns/Gray-Scott/U-Skate/Hutton-and-helix-gliders.vti + Patterns/Gray-Scott/U-Skate/Munafo_glider.vti + Patterns/Gray-Scott/U-Skate/o-ring_2D.vti + Patterns/CellularAutomata/Bays_3D.vti + Patterns/CellularAutomata/Conway_life.vti + Patterns/CellularAutomata/life_torus.vtu + Patterns/CellularAutomata/larger-than-life.vti + Patterns/CellularAutomata/Buss_hex.vtu + Patterns/CellularAutomata/tri_life.vtu + Patterns/CellularAutomata/hex_B2oS2m34_gliders.vtu + Patterns/CellularAutomata/PenroseTilings/life.vtu + Patterns/CellularAutomata/PenroseTilings/life_oscillators.vtu + Patterns/CellularAutomata/PenroseTilings/Goucher_glider.vtu + Patterns/CellularAutomata/PenroseTilings/Imai_glider_B2SC4.vtu + Patterns/CellularAutomata/PenroseTilings/Goucher_loops.vtu + Patterns/CellularAutomata/Salt/salt2D_demo.vti + Patterns/CellularAutomata/Salt/salt3D_circular330.vti + Patterns/Yang2002/Yang_1.vti Patterns/Yang2002/Yang_2b.vti Patterns/Yang2002/Yang_2c.vti + Patterns/Yang2002/Yang_2d.vti Patterns/Yang2002/Yang_3a.vti Patterns/Yang2002/Yang_3b.vti + Patterns/Yang2002/Yang_3c.vti Patterns/Yang2002/Yang_3d.vti Patterns/Yang2002/Yang_4.vti + Patterns/Yang2003/Fig2.vti + Patterns/Yang2003/Fig3a.vti Patterns/Yang2003/Fig3b.vti Patterns/Yang2003/Fig3c.vti + Patterns/McCabe/McCabe.vti Patterns/McCabe/McCabe_simple.vti + Patterns/McCabe/McCabe_additive2b.vti Patterns/McCabe/McCabe_additive2a.vti + Patterns/Kytta2007/Fig5.7a.vti Patterns/Kytta2007/Fig5.7c.vti + Patterns/Kytta2007/Fig5.8c.vti Patterns/Kytta2007/Fig5.8d.vti Patterns/Kytta2007/Fig5.8e.vti + Patterns/Kytta2007/Fig5.8f.vti Patterns/Kytta2007/Fig5.8g.vti + Patterns/Yang2006/jumping.vti Patterns/Yang2006/jumping_cGL.vti + Patterns/Schrodinger1926/packet.vti Patterns/Schrodinger1926/packet_reflect.vti + Patterns/Schrodinger1926/packet_pass.vti Patterns/Schrodinger1926/quantum_tunnelling.vti + Patterns/Schrodinger1926/packet_reflect2D.vti + Patterns/Experiments/mutually-catalytic_spots.vti + Patterns/Experiments/cglrd_ramps_example_djw.vti + Patterns/Experiments/grayscott-historyWave_fuseWorms.vti + Patterns/Experiments/grayscott-historyWave_moreLifelike.vti + Patterns/Experiments/grayscott-historyWaveDC_solitonsAndWorms_init.vti + Patterns/Experiments/orbits_explodey_init.djw.vti + Patterns/Experiments/orbits_sharpWaves-init_djw.vti + Patterns/Experiments/grayscott-historyWave_coralGrow_djw.vti + Patterns/Experiments/gladman_vermiformSolitons.vti +) + +set( HELP_FILES + Help/about.gif Help/about.html + Help/action.html Help/credits.html + Help/file.html Help/help.html + Help/mouse.html Help/quickstart.html + Help/tips.html Help/changes.html + Help/edit.html Help/formats.html + Help/index.html Help/problems.html + Help/view.html Help/introduction.html +) + +set( OTHER_FILES + ./README.txt + ./COPYING.txt + ./TODO.txt + ./BUILD.txt + ./CMakeLists.txt + src/FindOpenCL.cmake + src/Doxyfile.in + resources/logo.png + resources/Icons/22px/icon-pointer.png + resources/Icons/22px/draw-freehand.png + resources/Icons/22px/draw-brush.png + resources/Icons/22px/color-picker.png + resources/Icons/22px/document-new.png + resources/Icons/22px/document-open.png + resources/Icons/22px/document-save.png + resources/Icons/22px/document-revert.png + resources/Icons/22px/media-playback-start_green.png + resources/Icons/22px/media-playback-pause_red.png + resources/Icons/22px/media-seek-forward.png + resources/Icons/22px/media-seek-backward.png + resources/Icons/22px/media-skip-backward_modified.png + resources/Icons/22px/media-record.png + resources/Icons/22px/system-run.png + resources/Icons/22px/list-add_gray.png + resources/Icons/22px/camera-photo.png + resources/Icons/32px/icon-pointer.png + resources/Icons/32px/draw-freehand.png + resources/Icons/32px/draw-brush.png + resources/Icons/32px/color-picker.png + resources/Icons/32px/document-new.png + resources/Icons/32px/document-open.png + resources/Icons/32px/document-save.png + resources/Icons/32px/document-revert.png + resources/Icons/32px/media-playback-start_green.png + resources/Icons/32px/media-playback-pause_red.png + resources/Icons/32px/media-seek-forward.png + resources/Icons/32px/media-seek-backward.png + resources/Icons/32px/media-skip-backward_modified.png + resources/Icons/32px/media-record.png + resources/Icons/32px/system-run.png + resources/Icons/32px/list-add_gray.png + resources/Icons/32px/camera-photo.png + resources/Cursors/pencil-cursor.png + resources/Cursors/brush-cursor.png + resources/Cursors/picker-cursor.png +) + +#-------------------------------------------VTK---------------------------------------------- + +find_package( VTK ) +if( VTK_FOUND ) + include( ${VTK_USE_FILE} ) +else() + message(FATAL_ERROR "Cannot build the executable without VTK. Please set the VTK variables.") +endif() + +#-------------------------------------------wxVTK---------------------------------------------- + +# The following allows you to access wxGLCanvas for GTK +IF(WIN32) + SET(GUI_EXECUTABLE WIN32) +ELSE(WIN32) + IF(APPLE) + SET(GUI_EXECUTABLE MACOSX_BUNDLE) + IF(VTK_USE_COCOA) + SET_SOURCE_FILES_PROPERTIES( + src/gui/wxVTKRenderWindowInteractor.cxx + PROPERTIES COMPILE_FLAGS "-ObjC++") + ENDIF(VTK_USE_COCOA) + ELSE(APPLE) + # Ok X11 for sure, but just check: + IF(NOT VTK_USE_X) + MESSAGE(FATAL_ERROR "You need to have VTK_USE_X") + ENDIF(NOT VTK_USE_X) + # CMake 2.6: + # technically those packages are not required since one can still use the Motif/X11 version and not the gtk one: + FIND_PACKAGE(PkgConfig) + pkg_check_modules (GTK2 gtk+-2.0) + #MESSAGE("${GTK2_INCLUDE_DIRS}") + INCLUDE_DIRECTORIES(${GTK2_INCLUDE_DIRS}) + LINK_LIBRARIES(${GTK2_LIBRARIES}) + # Can I require all my user to have the gl lib on linux, even if they do not really need it... + SET(WXGLCANVASLIBS "gl") + ENDIF(APPLE) +ENDIF(WIN32) + +#-------------------------------------------wxWidgets---------------------------------------------- + +if( APPLE ) + # on Mac OS X it's better to use locally installed wxWidgets headers and libs + # (the pre-installed stuff tends to be out of date; eg. 10.6 has wxMac 2.8.8 and it's a 32-bit debug build) + set( wxWidgets_CONFIG_EXECUTABLE /usr/local/bin/wx-config ) + set( wxWidgets_wxrc_EXECUTABLE /usr/local/bin/wxrc ) # not used, but no harm leaving it in +elseif(UNIX) + # remove -rdynamic from link options on Linux to reduce size by about 1.2MB + set( CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "" ) +endif() + +# wxWidgets is required to build the project +FIND_PACKAGE( wxWidgets COMPONENTS html aui ${WXGLCANVASLIBS} core adv base ) + +IF( wxWidgets_FOUND ) + INCLUDE( ${wxWidgets_USE_FILE} ) +ELSE() + MESSAGE( FATAL_ERROR "Cannot build the executable without wxWidgets. Please set the wxWidgets variables." ) +ENDIF() + +if( WIN32 ) + # prevent link errors with wxMSW 2.9.x + add_definitions( -DwxDEBUG_LEVEL=0 ) +endif() + +#-------------------------------------------OpenCL---------------------------------------------- + +set( CMAKE_MODULE_PATH ${Ready_SOURCE_DIR}/src ) +# (we include our own FindOpenCL.cmake until the time that CMake comes with its own) + +# we need to build against OpenCL +find_package( OpenCL REQUIRED ) +include_directories( ${OPENCL_INCLUDE_DIRS} ) +if( APPLE ) + link_libraries( ${OPENCL_LIBRARIES} ) # on MacOSX we assume that OpenCL is available (might need to rethink for versions before 10.6) +endif() + +#---------------copy installation files to build folder (helps with testing)-------------------- + +foreach( file ${PATTERN_FILES} ${HELP_FILES} ${RESOURCES} ${OTHER_FILES} ) + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${file}" + COMMAND cmake -E copy "${CMAKE_CURRENT_SOURCE_DIR}/${file}" "${CMAKE_CURRENT_BINARY_DIR}/${file}" + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${file}" + ) + list( APPEND files_dest "${CMAKE_CURRENT_BINARY_DIR}/${file}" ) +endforeach() + +add_custom_target( CopyFiles ALL DEPENDS ${files_dest} ) + +#-------------------------------- build ------------------------------------------------------ + +# ensure we link the C runtime statically (N.B. still appears as /MD in the CMake gui but ignore this) +# see: http://www.cmake.org/Wiki/CMake_FAQ#Dynamic_Replace +foreach( var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO ) + string( REGEX REPLACE "/MD" "/MT" ${var} "${${var}}" ) +endforeach() + +set( USE_SSE "YES" ) +if( USE_SSE ) + # enable SSE to allow us to set flags to avoid denormals, and relax the floating-point accuracy for speed + if( MSVC ) + if( CMAKE_CL_64 ) + set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /fp:fast" ) + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:fast" ) + else() # avoid warning on x64, which always comes with SSE2 + set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:SSE2 /fp:fast" ) + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:SSE2 /fp:fast" ) + endif() + else() + add_definitions( -msse2 -ffast-math ) + endif() + add_definitions( -DUSE_SSE ) +endif() + +if( APPLE ) + # support Mac OS 10.5 or later + add_definitions( -mmacosx-version-min=10.5 ) +endif() +if( APPLE OR UNIX ) + # use same settings as in makefiles + add_definitions( -D_LARGE_FILES ) +endif() + +# create base library used by all executables +add_library( readybase STATIC ${BASE_SOURCES} ) +if( ${VTK_MAJOR_VERSION} GREATER 5 ) + target_link_libraries( + readybase + vtkCommonCore + vtkFiltersModeling + vtkFiltersTexture + vtkInteractionStyle + vtkIOXML + vtkRenderingCore + vtkRenderingAnnotation + vtkRenderingFreeType + vtkRenderingFreeTypeOpenGL + ) +else() + target_link_libraries( readybase vtkCommon vtkGraphics vtkIO vtkRendering vtkHybrid ) +endif() + +# create command-line utility +add_executable( ${CMD_NAME} ${CMD_SOURCES} ) +target_link_libraries( ${CMD_NAME} readybase ) + +# create GUI application +add_executable( ${APP_NAME} ${GUI_EXECUTABLE} ${GUI_SOURCES} ${RESOURCES} ) +target_link_libraries( ${APP_NAME} readybase ${wxWidgets_LIBRARIES} ) + +if( APPLE ) + # create Info.plist (using Info.plist.in) and PkgInfo files inside .app bundle + add_custom_target( app_bundle + COMMAND sed -e "s/VERSION/${READY_VERSION}/" ${CMAKE_SOURCE_DIR}/resources/Info.plist.in >Ready.app/Contents/Info.plist + COMMAND echo -n "APPLReDy" >Ready.app/Contents/PkgInfo + ) + add_dependencies( ${APP_NAME} app_bundle ) + + # copy *.icns files into Resources directory inside .app bundle + set_source_files_properties( ${CMAKE_SOURCE_DIR}/resources/app.icns PROPERTIES MACOSX_PACKAGE_LOCATION Resources ) + set_source_files_properties( ${CMAKE_SOURCE_DIR}/resources/file.icns PROPERTIES MACOSX_PACKAGE_LOCATION Resources ) + + # remove unreachable functions and data, and don't add debug info (reduces app size by about 12MB) + target_link_libraries( ${APP_NAME} -Wl,-dead_strip -Wl,-S ) +endif() + +# (Visual Studio only) put the executable in the root binary folder, not in "Debug" or "Release" +# http://stackoverflow.com/questions/543203/cmake-runtime-output-directory-on-windows +if( MSVC_IDE ) + set_target_properties( ${APP_NAME} PROPERTIES PREFIX "../" ) + set_target_properties( ${CMD_NAME} PROPERTIES PREFIX "../" ) +endif() + +# avoid security warnings +if( MSVC ) + add_definitions( /D_CRT_SECURE_NO_WARNINGS /D_CRT_NONSTDC_NO_WARNINGS ) +endif() + +if( CMAKE_COMPILER_IS_GNUCXX ) + list( APPEND CMAKE_EXE_LINKER_FLAGS_RELEASE "-s" ) # strip release binary, for smaller file size +endif() + +#----------------------------------------doxygen------------------------------------------------ + +find_package( Doxygen ) +if(DOXYGEN_FOUND) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY) + add_custom_target(doc + ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + ) +endif() +# (if doxygen is found then 'make doc' should produce html documentation of the source code) + +#----------------------------------------install------------------------------------------------ + +# put Ready in the root of the installation folder instead of in "bin" +install( TARGETS ${APP_NAME} DESTINATION "." ) # (add ${CMD_NAME} if we want to distribute the command-line version too) + +# install our source files, resource files, pattern files, help files and text files +foreach( source_file ${BASE_SOURCES} ${GUI_SOURCES} ${CMD_SOURCES} ${RESOURCES} ${PATTERN_FILES} ${HELP_FILES} ${OTHER_FILES} ) + get_filename_component( path_name "${source_file}" PATH ) + install( FILES "${source_file}" DESTINATION ${path_name} ) +endforeach() + +#----------------------------------------package---------------------------------------------- + +if( APPLE ) + set( CPACK_SYSTEM_NAME "Mac" ) # nicer than "Darwin" +elseif( UNIX ) + if( CMAKE_SIZEOF_VOID_P EQUAL 8 ) + set( CPACK_SYSTEM_NAME "Linux-64bit" ) + else() + set( CPACK_SYSTEM_NAME "Linux-32bit" ) + endif() +elseif( WIN32) + if( CMAKE_SIZEOF_VOID_P EQUAL 8 ) + set( CPACK_SYSTEM_NAME "Windows-64bit" ) + else() + set( CPACK_SYSTEM_NAME "Windows-32bit" ) + endif() +endif() +if( NOT USE_SSE ) + set( CPACK_SYSTEM_NAME "${CPACK_SYSTEM_NAME}-noSSE" ) +endif() +set( CPACK_GENERATOR "ZIP" ) +set( CPACK_PACKAGE_VERSION "${READY_VERSION}" ) +set( CPACK_SOURCE_GENERATOR "ZIP" ) +include( CPack ) diff --git a/ComplexGinzbergLandau/CMakeLists.txt b/ComplexGinzbergLandau/CMakeLists.txt deleted file mode 100644 index bf6ec5710..000000000 --- a/ComplexGinzbergLandau/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -project(ComplexGinzbergLandau) - -FIND_PACKAGE(OpenCV REQUIRED) -INCLUDE_DIRECTORIES( ${OPENCV_INCLUDE_DIR}) - -INCLUDE_DIRECTORIES( "../Display" ) - -add_executable(ComplexGinzbergLandau - complex_ginzberg_landau.cpp - ../Display/display.cpp - ../Display/display.h -) - -TARGET_LINK_LIBRARIES(ComplexGinzbergLandau ${OpenCV_LIBS} ) diff --git a/ComplexGinzbergLandau/complex_ginzberg_landau.cpp b/ComplexGinzbergLandau/complex_ginzberg_landau.cpp deleted file mode 100644 index 46b521c43..000000000 --- a/ComplexGinzbergLandau/complex_ginzberg_landau.cpp +++ /dev/null @@ -1,137 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// stdlib: -#include -#include -#include -#include - -// local: -#include "defs.h" -#include "display.h" - -void init(float a[X][Y],float b[X][Y]); - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float D_a,float D_b,float alpha,float beta,float gamma,float delta, - float speed); - -int main() -{ - // Here we implement the complex Ginzberg-Landau model: - // http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.31.529&rep=rep1&type=pdf - - // -- parameters -- - float alpha = 1.0f/16.0f; - float beta = 1.0f; - float delta = 1.0f; // 2.0f gives unstable spirals - float gamma = delta/16.0f; - float D_a = 0.2f; - float D_b = 0.2f; - float speed = 0.2f; - // ---------------- - - // these arrays store the chemical concentrations: - float a[X][Y], b[X][Y]; - // these arrays store the rate of change of those chemicals: - float da[X][Y], db[X][Y]; - - // put the initial conditions into each cell - init(a,b); - - clock_t start,end; - - const int N_FRAMES_PER_DISPLAY = 100; - int iteration = 0; - while(true) - { - start = clock(); - - // compute: - for(int it=0;it (b)) ? (a) : (b)) -#define min(a,b) (((a) < (b)) ? (a) : (b)) -#endif - -void init(float a[X][Y],float b[X][Y]) -{ - srand((unsigned int)time(NULL)); - - // figure the values - for(int i = 0; i < X; i++) { - for(int j = 0; j < Y; j++) { - a[i][j] = frand(-1.0f,1.0f); - b[i][j] = frand(-1.0f,1.0f); - } - } -} - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float D_a,float D_b,float alpha,float beta,float gamma,float delta, - float speed) -{ - // compute change in each cell - for(int i = 0; i < X; i++) - { - int iprev = (i + X - 1) % X; - int inext = (i + 1) % X; - - for(int j = 0; j < Y; j++) - { - int jprev = (j + Y - 1) % Y; - int jnext = (j + 1) % Y; - - float aval = a[i][j]; - float bval = b[i][j]; - - // compute the Laplacians of a and b - float dda = a[i][jprev] + a[i][jnext] + a[iprev][j] + a[inext][j] - 4*aval; - float ddb = b[i][jprev] + b[i][jnext] + b[iprev][j] + b[inext][j] - 4*bval; - - // compute the new rate of change of a and b - da[i][j] = D_a * dda + alpha*aval - gamma*bval + (-beta*aval + delta*bval)*(aval*aval+bval*bval); - db[i][j] = D_b * ddb + alpha*bval + gamma*aval + (-beta*bval - delta*aval)*(aval*aval+bval*bval); - } - } - - // effect change - for(int i = 0; i < X; i++) { - for(int j = 0; j < Y; j++) { - a[i][j] += (speed * da[i][j]); - b[i][j] += (speed * db[i][j]); - } - } -} - diff --git a/Display/defs.h b/Display/defs.h deleted file mode 100644 index 62feba110..000000000 --- a/Display/defs.h +++ /dev/null @@ -1,3 +0,0 @@ -// the size of the world: -#define X 256 -#define Y 256 diff --git a/Display/display.cpp b/Display/display.cpp deleted file mode 100644 index 8f410638e..000000000 --- a/Display/display.cpp +++ /dev/null @@ -1,129 +0,0 @@ -// OpenCV: -#include -#include - -// stdlib -#include - -// local: -#include "display.h" - -bool display(float r[X][Y],float g[X][Y],float b[X][Y], - int iteration,bool auto_brighten,float manual_brighten, - int scale,int delay_ms,const char* message) -{ - static bool need_init = true; - static bool write_video = false; - - static IplImage *im,*im2,*im3; - static int border = 0; - static CvFont font; - static CvVideoWriter *video; - static const CvScalar white = cvScalar(255,255,255); - - const char *title = "Press ESC to quit"; - - if(need_init) - { - need_init = false; - - im = cvCreateImage(cvSize(X,Y),IPL_DEPTH_8U,3); - cvSet(im,cvScalar(0,0,0)); - im2 = cvCreateImage(cvSize(X*scale,Y*scale),IPL_DEPTH_8U,3); - im3 = cvCreateImage(cvSize(X*scale+border*2,Y*scale+border),IPL_DEPTH_8U,3); - - cvNamedWindow(title,CV_WINDOW_AUTOSIZE); - - double hScale=0.4; - double vScale=0.4; - int lineWidth=1; - cvInitFont(&font,CV_FONT_HERSHEY_COMPLEX,hScale,vScale,0,lineWidth,CV_AA); - - if(write_video) - { - video = cvCreateVideoWriter(title,CV_FOURCC('D','I','V','X'),25.0,cvGetSize(im3),1); - border = 20; - } - } - - // convert float arrays to IplImage for OpenCV to display - float val,minR=FLT_MAX,maxR=-FLT_MAX,minG=FLT_MAX,maxG=-FLT_MAX,minB=FLT_MAX,maxB=-FLT_MAX; - if(auto_brighten) - { - for(int i=0;imaxR) maxR=val; - } - if(g) { - val = g[i][j]; - if(valmaxG) maxG=val; - } - if(b) { - val = b[i][j]; - if(valmaxB) maxB=val; - } - } - } - } - for(int i=0;i255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 2] = (uchar)val; - } - if(g) { - float val = g[i][Y-j-1]; - if(auto_brighten) val = 255.0f * (val-minG) / (maxG-minG); - else val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 1] = (uchar)val; - } - if(b) { - float val = b[i][Y-j-1]; - if(auto_brighten) val = 255.0f * (val-minB) / (maxB-minB); - else val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 0] = (uchar)val; - } - } - } - - cvResize(im,im2); - cvCopyMakeBorder(im2,im3,cvPoint(border*2,0),IPL_BORDER_CONSTANT); - - char txt[100]; - if(!write_video) - { - sprintf(txt,"%d",iteration); - cvPutText(im3,txt,cvPoint(20,20),&font,white); - } - - cvPutText(im3,message,cvPoint(20,40),&font,white); - - if(write_video) - cvWriteFrame(video,im3); - - cvShowImage(title,im3); - - int key = cvWaitKey(delay_ms); // allow time for the image to be drawn - if(key==27) // did user ask to quit? - { - cvDestroyWindow(title); - cvReleaseImage(&im); - cvReleaseImage(&im2); - if(write_video) - cvReleaseVideoWriter(&video); - return true; - } - return false; -} - diff --git a/Display/display.h b/Display/display.h deleted file mode 100644 index 91de43969..000000000 --- a/Display/display.h +++ /dev/null @@ -1,5 +0,0 @@ -#include "defs.h" - -bool display(float r[X][Y],float g[X][Y],float b[X][Y], - int iteration,bool auto_brighten,float manual_brighten, - int scale,int delay_ms,const char* message); diff --git a/EdblomOrbanEpstein/CMakeLists.txt b/EdblomOrbanEpstein/CMakeLists.txt deleted file mode 100644 index 489a331ee..000000000 --- a/EdblomOrbanEpstein/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -project(EdblomOrbanEpstein) - -FIND_PACKAGE(OpenCV REQUIRED) -INCLUDE_DIRECTORIES( ${OPENCV_INCLUDE_DIR}) - -INCLUDE_DIRECTORIES( "../Display" ) - -add_executable(EdblomOrbanEpstein - edblom_orban_epstein.cpp - ../Display/display.cpp - ../Display/display.h -) - -TARGET_LINK_LIBRARIES(EdblomOrbanEpstein ${OpenCV_LIBS} ) diff --git a/EdblomOrbanEpstein/edblom_orban_epstein.cpp b/EdblomOrbanEpstein/edblom_orban_epstein.cpp deleted file mode 100644 index 378cf95b7..000000000 --- a/EdblomOrbanEpstein/edblom_orban_epstein.cpp +++ /dev/null @@ -1,173 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// stdlib: -#include -#include -#include -#include - -// local: -#include "defs.h" -#include "display.h" - -void init(float a[X][Y],float b[X][Y]); - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float A,float B,float C,float D,float F, - float speed, - bool parameter_space); - -int main() -{ - // http://www.robinengelhardt.info/speciale/ - - // -- parameters -- - float A=17.00f; - float B=1.0f; - float C=1.0f; - float D=1.39f; - float F=7.65f; - float speed = 0.01f; - // ---------------- - - // these arrays store the chemical concentrations: - float a[X][Y], b[X][Y]; - // these arrays store the rate of change of those chemicals: - float da[X][Y], db[X][Y]; - - // put the initial conditions into each cell - init(a,b); - - clock_t start,end; - - const int N_FRAMES_PER_DISPLAY = 100; - int iteration = 0; - while(true) - { - start = clock(); - - // compute: - for(int it=0;it=20 && abs(i-X/2)<7) ) // spreading from a vertical strip with a 1 pixel wiggle - //if(abs(i-X/2)>10 && j>Y-2) // spreading from a broken horizontal strip at the top edge - //if(abs(i-X/2)>10 && j>Y-14) // spreading from a broken horizontal strip at the top edge - //if(abs(j-Y/2)<7) // spreading from a horizontal strip in the middle - { - a[i][j]=4.4f; - //b[i][j]=3.27f; - b[i][j]=4.5f; - } - else - { - a[i][j]=5.80f; - b[i][j]=1.85f; - } - } - } -} - -#ifndef max -#define max(a,b) (((a) > (b)) ? (a) : (b)) -#define min(a,b) (((a) < (b)) ? (a) : (b)) -#endif - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float A,float B,float C,float D,float F, - float speed, - bool parameter_space) -{ - const bool toroidal = true; - - int iprev,inext,jprev,jnext; - - // compute change in each cell - for(int i = 0; i < X; i++) { - if(toroidal) { - iprev = (i + X - 1) % X; - inext = (i + 1) % X; - } - else { - iprev = max(0,i-1); - inext = min(X-1,i+1); - } - - for(int j = 0; j < Y; j++) { - if(toroidal) { - jprev = (j + Y - 1) % Y; - jnext = (j + 1) % Y; - } - else { - jprev = max(0,j-1); - jnext = min(Y-1,j+1); - } - - if(parameter_space) { - /*const float kmin=0.045f,kmax=0.07f,fmin=0.00f,fmax=0.14f; - // set f and k for this location (ignore the provided values of f and k) - k = kmin + i*(kmax-kmin)/X; - f = fmin + j*(fmax-fmin)/Y;*/ - } - - float aval = a[i][j]; - float bval = b[i][j]; - - // compute the Laplacians of a and b - float dda = a[i][jprev] + a[i][jnext] + a[iprev][j] + a[inext][j] - 4*aval; - float ddb = b[i][jprev] + b[i][jnext] + b[iprev][j] + b[inext][j] - 4*bval; - - // compute the new rate of change of a and b - da[i][j] = C * ( - aval*bval*bval + A*bval - (1+B)*aval ) + D*dda; - db[i][j] = (1/C) * ( aval*bval*bval - (1+A)*bval + aval + F ) + ddb; - } - } - - // effect change - for(int i = 0; i < X; i++) { - for(int j = 0; j < Y; j++) { - a[i][j] += (speed * da[i][j]); - b[i][j] += (speed * db[i][j]); - } - } -} - diff --git a/FitzHughNagumo/CMakeLists.txt b/FitzHughNagumo/CMakeLists.txt deleted file mode 100644 index 529e22336..000000000 --- a/FitzHughNagumo/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -project(FitzHughNagumo) - -FIND_PACKAGE(OpenCV REQUIRED) -INCLUDE_DIRECTORIES( ${OPENCV_INCLUDE_DIR}) - -INCLUDE_DIRECTORIES( "../Display" ) - -add_executable(FitzHughNagumo - fitz_hugh_nagumo.cpp - ../Display/display.cpp - ../Display/display.h -) - -TARGET_LINK_LIBRARIES(FitzHughNagumo ${OpenCV_LIBS} ) diff --git a/FitzHughNagumo/fitz_hugh_nagumo.cpp b/FitzHughNagumo/fitz_hugh_nagumo.cpp deleted file mode 100644 index b9882ada6..000000000 --- a/FitzHughNagumo/fitz_hugh_nagumo.cpp +++ /dev/null @@ -1,243 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// stdlib: -#include -#include -#include -#include - -// local: -#include "defs.h" -#include "display.h" - -void init(float a[X][Y],float b[X][Y]); - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float a0,float a1,float epsilon,float delta,float k1,float k2,float k3, - float speed, - bool parameter_map); - -int main() -{ - // -- parameters -- - - // From Hagberg and Meron: - // http://arxiv.org/pdf/patt-sol/9401002 - - // for tip-splitting: - float a0 = -0.1f; - float a1 = 2.0f; - float epsilon = 0.05f; - float delta = 4.0f; - float k1 = 1.0f; - float k2 = 0.0f; - float k3 = 1.0f; - bool spiral_waves = false; - - // for spiral turbulence: - /*float a0 = -0.1f; - float a1 = 2.0f; - float epsilon = 0.014f; - float delta = 2.8f; - float k1 = 1.0f; - float k2 = 0.0f; - float k3 = 1.0f; - bool spiral_waves = false;*/ - - // for spiral waves: http://thevirtualheart.org/java/2dfhn.html - /*float a0 = 0.0f; - float a1 = 1.0f; - float epsilon = 0.01f; - float delta = 0.0f; - float k1 = -0.1f; - float k2 = -1.1f; - float k3 = 0.5f; - bool spiral_waves = true;*/ - - // from Malevanets and Kapral (can't get these to work) - - // for labyrinth: - /*float a0 = 0.146f; - float a1 = 3.05f; - float epsilon = 0.017f; - float delta = 4.0f; - float k1 = 1.0f; - float k2 = 0.0f; - float k3 = 1.0f;*/ - - // for bloch fronts: - /*float a0 = 0.0f; - float a1 = 4.88f; - float epsilon = 0.084f; - float delta = 0.0f; - float k1 = 1.0f; - float k2 = 0.0f; - float k3 = 1.0f;*/ - - float speed = 0.05f; - bool parameter_map = false; - // ---------------- - - // these arrays store the chemical concentrations: - float a[X][Y], b[X][Y]; - // these arrays store the rate of change of those chemicals: - float da[X][Y], db[X][Y]; - - // put the initial conditions into each cell - init(a,b); - - clock_t start,end; - - const int N_FRAMES_PER_DISPLAY = 100; - int iteration = 0; - while(true) - { - start = clock(); - - // compute: - for(int it=0;it0 && iteration%2000==0)) ) - { - int div = (int)(rand()*Y/(float)RAND_MAX); - for(int i = 0; i < X; i++) - { - for(int j = 0; j < Y; j++) - { - if(j10 && i<20) // start with a wave on the left edge - { - a[i][j] = 1.0f; - } - if(i>1 && i<17) - { - b[i][j] = 0.1f; - }*/ - if(abs(i-X/2) (b)) ? (a) : (b)) -#define min(a,b) (((a) < (b)) ? (a) : (b)) -#endif - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float a0,float a1,float epsilon,float delta,float k1,float k2,float k3, - float speed, - bool parameter_map) -{ - int iprev,inext,jprev,jnext; - bool toroidal = false; - - // compute change in each cell - for(int i = 0; i < X; i++) - { - if(toroidal) { iprev = (i + X - 1) % X; inext = (i + 1) % X; } - else { iprev=max(0,i-1); inext=min(X-1,i+1); } - - for(int j = 0; j < Y; j++) - { - if(toroidal) { jprev = (j + Y - 1) % Y; jnext = (j + 1) % Y; } - else { jprev=max(0,j-1); jnext=min(Y-1,j+1); } - - float aval = a[i][j]; - float bval = b[i][j]; - - // compute the Laplacians of a and b - float dda = a[i][jprev] + a[i][jnext] + a[iprev][j] + a[inext][j] - 4*aval; - float ddb = b[i][jprev] + b[i][jnext] + b[iprev][j] + b[inext][j] - 4*bval; - - if(parameter_map) - { - const float min_a0=-0.6f,max_a0=0.6f; - const float min_a1=-2.0f,max_a1=10.0f; - const float min_epsilon=0.0f,max_epsilon=0.2f; - const float min_delta=0.0f,max_delta=5.0f; - //a0 = min_a0 + (max_a0-min_a0) * i/X; - //a1 = min_a1 + (max_a1-min_a1) * j/Y; - epsilon = min_epsilon + (max_epsilon-min_epsilon) * i/X; - delta = min_delta + (max_delta-min_delta) * j/Y; - } - - // compute the new rate of change of a and b - da[i][j] = k1*aval - k2*aval*aval - aval*aval*aval - bval + dda; - db[i][j] = epsilon * (k3*aval - a1*bval - a0) + delta*ddb; - } - } - - // effect change - for(int i = 0; i < X; i++) { - for(int j = 0; j < Y; j++) { - a[i][j] += (speed * da[i][j]); - b[i][j] += (speed * db[i][j]); - } - } -} - diff --git a/FitzHughNagumo/fitz_hugh_nagumo2.cpp b/FitzHughNagumo/fitz_hugh_nagumo2.cpp deleted file mode 100644 index 2f8071997..000000000 --- a/FitzHughNagumo/fitz_hugh_nagumo2.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -Specifically following Wikipedia -http://en.wikipedia.org/wiki/File:Reaction_diffusion_spiral.gif - -*/ - -// Original copyright notice: - -/* - -Make spots and stripes with reaction-diffusion. - -The spot-formation system is described in the article: - - "A Model for Generating Aspects of Zebra and Other Mammailian - Coat Patterns" - Jonathan B. L. Bard - Journal of Theoretical Biology, Vol. 93, No. 2, pp. 363-385 - (November 1981) - -The stripe-formation system is described in the book: - - Models of Biological Pattern Formation - Hans Meinhardt - Academic Press, 1982 - - -Permission is granted to modify and/or distribute this program so long -as the program is distributed free of charge and this header is retained -as part of the program. - -Copyright (c) Greg Turk, 1991 - -*/ - -// stdlib: -#include -#include -#include - -// local: -#include "defs.h" -#include "display.h" - -void init(float a[X][Y],float b[X][Y]); - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float tau,float du2,float dv2,float kappa,float lambda,float sigma, - float speed, - bool parameter_map); - -int main() -{ - // -- parameters -- - - // hex: - float tau = 0.1f; - float du2 = 0.00024f * 10.0f; - float dv2 = 0.005f * 10.0f; - float kappa = -0.05f; - float lambda = 1.0f; - float sigma = 1.0f; - - // for spiral waves - /*float tau = 4.0f; - float du2 = 0.0015f * 10.0f; - float dv2 = 0.01f * 10.0f; - float kappa = -1.126f; - float lambda = 4.67f; - float sigma = -3.33f;*/ - - float speed = 0.001f; - bool parameter_map = false; - // ---------------- - - // these arrays store the chemical concentrations: - float a[X][Y], b[X][Y]; - // these arrays store the rate of change of those chemicals: - float da[X][Y], db[X][Y]; - - // put the initial conditions into each cell - init(a,b); - - int iteration = 0; - while(true) - { - // compute: - compute(a,b,da,db,tau,du2,dv2,kappa,lambda,sigma,speed,parameter_map); - - // display: - if(iteration%100==0) - { - if(display(a,b,b,iteration,true,200.0f,1,10,"FitzHughNagumo.avi")) // did user ask to quit? - break; - } - - // to make more interesting patterns we periodically reset part of the grid - /*if(iteration==300 || (iteration>0 && iteration%3000==0)) - { - int div = rand()*Y/(float)RAND_MAX; - for(int i = 0; i < X; i++) - { - for(int j = 0; j < Y; j++) - { - if(j10 && i<20) // start with a wave on the left edge - { - a[i][j] = 1.0f; - } - if(i>1 && i<17) - { - b[i][j] = 0.1f; - } - // you can start from random conditions too, but it's harder to guarantee - // interesting waves - a[i][j] = frand(-0.1f,0.1f); - b[i][j] = frand(-0.1f,0.1f); - } - } -} - -#ifndef max -#define max(a,b) (((a) > (b)) ? (a) : (b)) -#define min(a,b) (((a) < (b)) ? (a) : (b)) -#endif - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float tau,float du2,float dv2,float kappa,float lambda,float sigma, - float speed, - bool parameter_map) -{ - int iprev,inext,jprev,jnext; - bool toroidal = false; - - // compute change in each cell - for(int i = 0; i < X; i++) - { - if(toroidal) { iprev = (i + X - 1) % X; inext = (i + 1) % X; } - else { iprev=max(0,i-1); inext=min(X-1,i+1); } - - for(int j = 0; j < Y; j++) - { - if(toroidal) { jprev = (j + Y - 1) % Y; jnext = (j + 1) % Y; } - else { jprev=max(0,j-1); jnext=min(Y-1,j+1); } - - float aval = a[i][j]; - float bval = b[i][j]; - - // compute the Laplacians of a and b - float dda = a[i][jprev] + a[i][jnext] + a[iprev][j] + a[inext][j] - 4*aval; - float ddb = b[i][jprev] + b[i][jnext] + b[iprev][j] + b[inext][j] - 4*bval; - - if(parameter_map) - { - } - - // compute the new rate of change of a and b - da[i][j] = lambda*aval - aval*aval*aval - kappa - sigma*bval + du2*dda; - db[i][j] = (aval - bval - dv2*ddb ) / tau; - } - } - - // effect change - for(int i = 0; i < X; i++) { - for(int j = 0; j < Y; j++) { - a[i][j] += (speed * da[i][j]); - b[i][j] += (speed * db[i][j]); - } - } -} diff --git a/FitzHughNagumo3D/CMakeLists.txt b/FitzHughNagumo3D/CMakeLists.txt deleted file mode 100644 index 62432f33c..000000000 --- a/FitzHughNagumo3D/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -project(FitzHughNagumo3D) - -FIND_PACKAGE(OpenCV REQUIRED) -INCLUDE_DIRECTORIES( ${OPENCV_INCLUDE_DIR} ) - -INCLUDE_DIRECTORIES( "../Display" ) - -add_executable(FitzHughNagumo3D - fitz_hugh_nagumo_3d.cpp - ../Display/display.cpp - ../Display/display.h -) - -TARGET_LINK_LIBRARIES(FitzHughNagumo3D - ${OpenCV_LIBS} - ${VTK_LIBS} -) diff --git a/FitzHughNagumo3D/fitz_hugh_nagumo_3d.cpp b/FitzHughNagumo3D/fitz_hugh_nagumo_3d.cpp deleted file mode 100644 index 91202d788..000000000 --- a/FitzHughNagumo3D/fitz_hugh_nagumo_3d.cpp +++ /dev/null @@ -1,390 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// stdlib: -#include -#include -#include -#include - -// STL: -#include -using namespace std; - -// OpenCV: -#include -#include - -// return a random value between lower and upper -float frand(float lower,float upper) -{ - return lower + rand()*(upper-lower)/RAND_MAX; -} - -#ifndef max -#define max(a,b) (((a) > (b)) ? (a) : (b)) -#define min(a,b) (((a) < (b)) ? (a) : (b)) -#endif - -bool display(float *r,float *g,float *b,int S, - int iteration,bool auto_brighten,float manual_brighten, - float scale,int delay_ms,const char* window_title) -{ - static bool need_init = true; - - static bool write_video = true; - static bool is_parameter_map = false; - - static IplImage *im,*im2,*im3; - static int border = 0; - static CvFont font; - static CvVideoWriter *video; - static const CvScalar white = cvScalar(255,255,255); - - int k=S/2; - - if(need_init) - { - need_init = false; - - im = cvCreateImage(cvSize(S,S),IPL_DEPTH_8U,3); - cvSet(im,cvScalar(0,0,0)); - im2 = cvCreateImage(cvSize(int(S*scale),int(S*scale)),IPL_DEPTH_8U,3); - - cvNamedWindow(window_title,CV_WINDOW_AUTOSIZE); - - double hScale=0.4; - double vScale=0.4; - int lineWidth=1; - cvInitFont(&font,CV_FONT_HERSHEY_COMPLEX,hScale,vScale,0,lineWidth,CV_AA); - - if(is_parameter_map) - { - border = 20; - } - - im3 = cvCreateImage(cvSize(int(S*scale+border*2),int(S*scale+border)),IPL_DEPTH_8U,3); - - if(write_video) - { - float fps=30.0f; - // try to find a codec that works - video = cvCreateVideoWriter("out.avi",CV_FOURCC('D','I','V','X'),fps,cvGetSize(im3)); - if(video==NULL) - video = cvCreateVideoWriter("out.avi",CV_FOURCC('M','J','P','G'),fps,cvGetSize(im3)); - if(video==NULL) - video = cvCreateVideoWriter("out.avi",CV_FOURCC('P','I','M','1'),fps,cvGetSize(im3)); - if(video==NULL) - video = cvCreateVideoWriter("out.avi",CV_FOURCC('P','I','C','1'),fps,cvGetSize(im3)); - if(video==NULL) - video = cvCreateVideoWriter("out.avi",CV_FOURCC('M','P','4','2'),fps,cvGetSize(im3)); - if(video==NULL) - video = cvCreateVideoWriter("out.avi",CV_FOURCC('D','I','V','3'),fps,cvGetSize(im3)); - if(video==NULL) - video = cvCreateVideoWriter("out.avi",CV_FOURCC('V','P','3','1'),fps,cvGetSize(im3)); - if(video==NULL) - video = cvCreateVideoWriter("out.avi",CV_FOURCC('V','P','3','0'),fps,cvGetSize(im3)); - if(video==NULL) - video = cvCreateVideoWriter("out.avi",CV_FOURCC('C','V','I','D'),fps,cvGetSize(im3)); - if(video==NULL) - video = cvCreateVideoWriter("out.avi",CV_FOURCC('f','f','d','s'),fps,cvGetSize(im3)); - if(video==NULL) - video = cvCreateVideoWriter("out.mpg",CV_FOURCC_DEFAULT,fps,cvGetSize(im3)); - if(video==NULL) - video = cvCreateVideoWriter("out.avi",-1,fps,cvGetSize(im3)); // ask usr to choose - if(video==NULL) - video = cvCreateVideoWriter("im_00001.png",0,fps,cvGetSize(im3)); // fall back on image output - // to get video output working on linux, you may need to recompile opencv to include ffmpeg, - // as described here: http://opencv.willowgarage.com/wiki/InstallGuide (I did on Ubuntu 10.04) - } - - } - - if(!im) return true; - - // convert float arrays to IplImage for OpenCV to display - float val,minR=FLT_MAX,maxR=-FLT_MAX,minG=FLT_MAX,maxG=-FLT_MAX,minB=FLT_MAX,maxB=-FLT_MAX; - if(auto_brighten) - { - for(int i=0;imaxR) maxR=val; - } - if(g) { - val = g[(i*S+j)*S+k]; - if(valmaxG) maxG=val; - } - if(b) { - val = b[(i*S+j)*S+k]; - if(valmaxB) maxB=val; - } - } - } - } - float rangeR = max(0.001f,maxR-minR); - float rangeG = max(0.001f,maxG-minG); - float rangeB = max(0.001f,maxB-minB); - for(int i=0;i255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 2] = (uchar)val; - } - if(g) { - val = g[(i*S+(S-j-1))*S+k]; - if(auto_brighten) val = 255.0f * (val-minG) / rangeG; - else val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 1] = (uchar)val; - } - if(b) { - val = b[(i*S+(S-j-1))*S+k]; - if(auto_brighten) val = 255.0f * (val-minB) / rangeB; - else val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 0] = (uchar)val; - } - } - } - - cvResize(im,im2); - cvCopyMakeBorder(im2,im3,cvPoint(border*2,0),IPL_BORDER_CONSTANT); - - char txt[100]; - - // show the iteration count - sprintf(txt,"%d",iteration); - cvPutText(im3,txt,cvPoint(20,20),&font,white); - - if(!write_video) - { - if(auto_brighten) - { - // show the range of chemical concentrations - sprintf(txt,"chemical 1 range: %.4f - %.4f",minR,maxR); - cvPutText(im3,txt,cvPoint(20,60),&font,white); - sprintf(txt,"chemical 2 range: %.4f - %.4f",minG,maxG); - cvPutText(im3,txt,cvPoint(20,80),&font,white); - sprintf(txt,"chemical 3 range: %.4f - %.4f",minB,maxB); - cvPutText(im3,txt,cvPoint(20,100),&font,white); - } - } - - if(is_parameter_map) - { - // you'll need to change these labels to your needs - cvPutText(im3,"0.14",cvPoint(5,15),&font,white); - cvPutText(im3,"F",cvPoint(5,im2->height/2),&font,white); - cvPutText(im3,"0.00",cvPoint(5,im2->height),&font,white); - cvPutText(im3,"0.045",cvPoint(border*2-10,im2->height+15),&font,white); - cvPutText(im3,"K",cvPoint(border*2+im2->width/2,im2->height+15),&font,white); - cvPutText(im3,"0.07",cvPoint(im3->width-40,im2->height+15),&font,white); - } - - if(write_video) - cvWriteFrame(video,im3); - - cvShowImage(window_title,im3); - - int key = cvWaitKey(delay_ms); // allow time for the image to be drawn - if(key==27) // did user ask to quit? - { - cvDestroyWindow(window_title); - cvReleaseImage(&im); - cvReleaseImage(&im2); - if(write_video) - cvReleaseVideoWriter(&video); - return true; - } - return false; -} - -int main() -{ - // -- parameters -- - - // From Hagberg and Meron: - // http://arxiv.org/pdf/patt-sol/9401002 - - // tip-splitting - float a0 = -0.1f; - float a1 = 2.0f; - float epsilon = 0.05f; - float delta = 4.0f; - float k1 = 1.0f; - float k2 = 0.0f; - float k3 = 1.0f; - bool spiral_waves = false; - - // for spiral turbulence: - /*float a0 = -0.1f; - float a1 = 2.0f; - float epsilon = 0.014f; - float delta = 2.8f; - float k1 = 1.0f; - float k2 = 0.0f; - float k3 = 1.0f; - bool spiral_waves = false;*/ - - // for spiral waves: http://thevirtualheart.org/java/2dfhn.html - /*float a0 = 0.0f; - float a1 = 1.0f; - float epsilon = 0.01f; - float delta = 0.0f; - float k1 = -0.1f; - float k2 = -1.1f; - float k3 = 0.5f; - bool spiral_waves = true; - */ - - // from Malevanets and Kapral (can't get these to work) - - // for labyrinth: - /*float a0 = 0.146f; - float a1 = 3.05f; - float epsilon = 0.017f; - float delta = 4.0f; - float k1 = 1.0f; - float k2 = 0.0f; - float k3 = 1.0f;*/ - - // for bloch fronts: - /*float a0 = 0.0f; - float a1 = 4.88f; - float epsilon = 0.084f; - float delta = 0.0f; - float k1 = 1.0f; - float k2 = 0.0f; - float k3 = 1.0f;*/ - - float speed = 0.02f; - bool parameter_map = false; - // ---------------- - - const int S = 50; - - // these arrays store the chemical concentrations: - float *a = new float[S*S*S],*b=new float[S*S*S]; - // these arrays store the rate of change of those chemicals: - float *da = new float[S*S*S],*db=new float[S*S*S]; - - // put the initial conditions into each cell - srand((unsigned int)time(NULL)); - for(int i = 0; i < S; i++) - { - for(int j = 0; j < S; j++) - { - for(int k = 0; k < S; k++) - { - a[(i*S+j)*S+k] = 0.0f; - b[(i*S+j)*S+k] = 0.0f; - float x=i,y=j,z=k; - // rotate - { - const float PI=3.1415926535f; - float a1=PI/10,a2=PI/9,a3=PI/12; // angles - x = i * cos(a2)*cos(a3) + j * (-cos(a1)*sin(a3)+sin(a1)*sin(a2)*cos(a3)) + k * (sin(a1)*sin(a3)+cos(a1)*sin(a2)*cos(a3)); - y = i * cos(a2)*sin(a3) + j * (cos(a1)*cos(a3)+sin(a1)*sin(a2)*sin(a3)) + k * (-sin(a1)*cos(a3)+cos(a1)*sin(a2)*sin(a3)); - z = i*(-sin(a2)) + j * (sin(a1)*cos(a2)) + k * (cos(a1)*cos(a2)); - } - if( abs(x-S/2) -#include -#include -#include - -// local: -#include "defs.h" -#include "display.h" - -void init(float a[X][Y],float b[X][Y]); - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float r_a,float r_b,float f,float k, - float speed); - -int main() -{ - // Here we implement the Gray-Scott model, as described here: - // http://www.cc.gatech.edu/~turk/bio_sim/hw3.html - // http://arxiv.org/abs/patt-sol/9304003 - - // -- parameters -- - float r_a = 0.082f; - float r_b = 0.041f; - - // for spots: - float k = 0.064f; - float f = 0.035f; - // for stripes: - //float k = 0.06f; - //float f = 0.035f; - // for long stripes - //float k = 0.065f; - //float f = 0.056f; - // for dots and stripes - //float k = 0.064f; - //float f = 0.04f; - // for spiral waves: - //float k = 0.0475f; - //float f = 0.0118f; - float speed = 1.0f; - // ---------------- - - // these arrays store the chemical concentrations: - float a[X][Y], b[X][Y]; - // these arrays store the rate of change of those chemicals: - float da[X][Y], db[X][Y]; - - // put the initial conditions into each cell - init(a,b); - - clock_t start,end; - - const int N_FRAMES_PER_DISPLAY = 100; - int iteration = 0; - while(true) - { - start = clock(); - - // compute: - for(int it=0;it (b)) ? (a) : (b)) -#define min(a,b) (((a) < (b)) ? (a) : (b)) -#endif - -void init(float a[X][Y],float b[X][Y]) -{ - srand((unsigned int)time(NULL)); - - // figure the values - for(int i = 0; i < X; i++) { - for(int j = 0; j < Y; j++) { - - //if(hypot(i%10-5/*-X/2*/,j%10-5/*-Y/2*/)<=2.0f)//frand(2,3)) - if(hypot(i-X/2,(j-Y/2)/1.5)<=frand(2,5)) // start with a uniform field with an approximate circle in the middle - { - a[i][j] = 0.0f; - b[i][j] = 1.0f; - } - else { - a[i][j] = 1; - b[i][j] = 0; - } - //float v = frand(0.0f,1.0f); - //a[i][j] = v; - //b[i][j] = 1.0f-v; - //a[i][j] += frand(-0.01f,0.01f); - //b[i][j] += frand(-0.01f,0.01f); - } - } -} - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float r_a,float r_b,float f,float k,float speed) -{ - //const bool toroidal = false; - - //int iprev,inext,jprev,jnext; - - // compute change in each cell - for(int i = 0; i < X; i++) { - int iprev,inext; - /*if(toroidal) { - iprev = (i + X - 1) % X; - inext = (i + 1) % X; - } - else*/ { - iprev = max(0,i-1); - inext = min(X-1,i+1); - } - - for(int j = 0; j < Y; j++) { - int jprev,jnext; - /*if(toroidal) { - jprev = (j + Y - 1) % Y; - jnext = (j + 1) % Y; - } - else*/ { - jprev = max(0,j-1); - jnext = min(Y-1,j+1); - } - - float aval = a[i][j]; - float bval = b[i][j]; - - // compute the Laplacians of a and b - float dda = a[i][jprev] + a[i][jnext] + a[iprev][j] + a[inext][j] - 4*aval; - float ddb = b[i][jprev] + b[i][jnext] + b[iprev][j] + b[inext][j] - 4*bval; - - // compute the new rate of change of a and b - da[i][j] = r_a * dda - aval*bval*bval + f*(1-aval); - db[i][j] = r_b * ddb + aval*bval*bval - (f+k)*bval; - } - } - - // effect change - for(int i = 0; i < X; i++) { - for(int j = 0; j < Y; j++) { - a[i][j] += speed * da[i][j]; - b[i][j] += speed * db[i][j]; - } - } -} - diff --git a/Ready/Help/about.gif b/Help/about.gif similarity index 100% rename from Ready/Help/about.gif rename to Help/about.gif diff --git a/Ready/Help/about.html b/Help/about.html similarity index 100% rename from Ready/Help/about.html rename to Help/about.html diff --git a/Ready/Help/action.html b/Help/action.html similarity index 100% rename from Ready/Help/action.html rename to Help/action.html diff --git a/Ready/Help/changes.html b/Help/changes.html similarity index 100% rename from Ready/Help/changes.html rename to Help/changes.html diff --git a/Ready/Help/credits.html b/Help/credits.html similarity index 100% rename from Ready/Help/credits.html rename to Help/credits.html diff --git a/Ready/Help/edit.html b/Help/edit.html similarity index 100% rename from Ready/Help/edit.html rename to Help/edit.html diff --git a/Ready/Help/file.html b/Help/file.html similarity index 100% rename from Ready/Help/file.html rename to Help/file.html diff --git a/Ready/Help/formats.html b/Help/formats.html similarity index 100% rename from Ready/Help/formats.html rename to Help/formats.html diff --git a/Ready/Help/help.html b/Help/help.html similarity index 100% rename from Ready/Help/help.html rename to Help/help.html diff --git a/Ready/Help/index.html b/Help/index.html similarity index 100% rename from Ready/Help/index.html rename to Help/index.html diff --git a/Ready/Help/introduction.html b/Help/introduction.html similarity index 100% rename from Ready/Help/introduction.html rename to Help/introduction.html diff --git a/Ready/Help/mouse.html b/Help/mouse.html similarity index 100% rename from Ready/Help/mouse.html rename to Help/mouse.html diff --git a/Ready/Help/problems.html b/Help/problems.html similarity index 100% rename from Ready/Help/problems.html rename to Help/problems.html diff --git a/Ready/Help/quickstart.html b/Help/quickstart.html similarity index 100% rename from Ready/Help/quickstart.html rename to Help/quickstart.html diff --git a/Ready/Help/tips.html b/Help/tips.html similarity index 100% rename from Ready/Help/tips.html rename to Help/tips.html diff --git a/Ready/Help/view.html b/Help/view.html similarity index 100% rename from Ready/Help/view.html rename to Help/view.html diff --git a/Linear/CMakeLists.txt b/Linear/CMakeLists.txt deleted file mode 100644 index 09587ceb2..000000000 --- a/Linear/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -project(Linear) - -FIND_PACKAGE(OpenCV REQUIRED) -INCLUDE_DIRECTORIES( ${OPENCV_INCLUDE_DIR}) - -INCLUDE_DIRECTORIES( "../Display" ) - -add_executable(Linear - linear.cpp - ../Display/display.cpp - ../Display/display.h -) - -TARGET_LINK_LIBRARIES(Linear ${OpenCV_LIBS} ) diff --git a/Linear/linear.cpp b/Linear/linear.cpp deleted file mode 100644 index c3ce53db3..000000000 --- a/Linear/linear.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// stdlib: -#include -#include -#include - -// local: -#include "defs.h" -#include "display.h" - -void init(float a[X][Y],float b[X][Y]); - -void compute(float a[X][Y],float b[X][Y],float da[X][Y],float db[X][Y]); - -int main() -{ - // http://www.nature.com/ncomms/journal/v1/n6/full/ncomms1071.html - - // these arrays store the chemical concentrations: - float a[X][Y], b[X][Y]; - // these arrays store the rate of change of those chemicals: - float da[X][Y], db[X][Y]; - - // put the initial conditions into each cell - init(a,b); - - clock_t start,end; - - const int N_FRAMES_PER_DISPLAY = 100; - int iteration = 0; - while(true) - { - start = clock(); - - // compute: - for(int it=0;itsyn_a_max) syn_a=syn_a_max; - if(syn_b<0.0f) syn_b=0.0f; if(syn_b>syn_b_max) syn_b=syn_b_max; - - // compute the new rate of change of a and b - da[i][j] = R*( syn_a - D*aval ) + D_a * dda; - db[i][j] = R*( syn_b - G*bval ) + D_b * ddb; - } - } - - // effect change - for(int i = 0; i < X; i++) - { - for(int j = 0; j < Y; j++) - { - a[i][j] += (speed * da[i][j]); - b[i][j] += (speed * db[i][j]); - } - } -} diff --git a/MeinhardtSpots/CMakeLists.txt b/MeinhardtSpots/CMakeLists.txt deleted file mode 100644 index ba5d928d6..000000000 --- a/MeinhardtSpots/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -project(MeinhardtSpots) - -FIND_PACKAGE(OpenCV REQUIRED) -INCLUDE_DIRECTORIES( ${OPENCV_INCLUDE_DIR}) - -INCLUDE_DIRECTORIES( "../Display" ) - -add_executable(MeinhardtSpots - meinhardt_spots.cpp - ../Display/display.cpp - ../Display/display.h -) - -TARGET_LINK_LIBRARIES(MeinhardtSpots ${OpenCV_LIBS} ) diff --git a/MeinhardtSpots/meinhardt_spots.cpp b/MeinhardtSpots/meinhardt_spots.cpp deleted file mode 100644 index a3ece6566..000000000 --- a/MeinhardtSpots/meinhardt_spots.cpp +++ /dev/null @@ -1,141 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -NB. Turk's thesis has errors in the formulae for this system, the correct ones are here: -http://www1.cse.wustl.edu/~faanly/materials/Sketching_RD_Texture.pdf - -*/ - -// stdlib: -#include -#include -#include - -// local: -#include "defs.h" -#include "display.h" - -void init(float a[X][Y],float b[X][Y],float beta[X][Y], - float p1,float p2); - -void compute(float a[X][Y],float b[X][Y],float beta[X][Y], - float da[X][Y],float db[X][Y], - float diff1,float diff2,float p1,float p2,float p3,float s, - float speed); - -int main() -{ - // -- parameters -- - float p1 = 0.03f; - float p2 = 0.04f; - float p3 = 0.0f; - float diff1 = 0.01f; - float diff2 = 0.2f; - float s = 0.2f; - float speed = 1.0f; - // ---------------- - - // these arrays store the chemical concentrations: - float a[X][Y], b[X][Y]; - float beta[X][Y]; - // these arrays store the rate of change of those chemicals: - float da[X][Y], db[X][Y]; - - // put the initial conditions into each cell - init(a,b,beta,p1,p2); - - clock_t start,end; - - const int N_FRAMES_PER_DISPLAY = 100; - int iteration = 0; - while(true) - { - start = clock(); - - // compute: - for(int it=0;it -#include -#include - -// local: -#include "defs.h" -#include "display.h" - -void init(float a[X][Y],float b[X][Y],float c[X][Y],float d[X][Y],float e[X][Y], - float beta[X][Y], - float k_ab,float k_c,float arand); - -void compute(float a[X][Y],float b[X][Y],float c[X][Y],float d[X][Y],float e[X][Y], - float beta[X][Y], - float da[X][Y],float db[X][Y],float dc[X][Y],float dd[X][Y],float de[X][Y], - float diff1,float diff2,float k_ab,float k_c,float k_de,float speed); - -int main() -{ - // -- parameters -- - float k_ab = 0.04f; // p1 - float k_c = 0.06f; // p2 - float k_de = 0.04f; // p3 - float diff1 = 0.009f; - float diff2 = 0.2f; - float arand = 0.01f; - float speed = 1.0f; - // ---------------- - - // these arrays store the chemical concentrations: - float a[X][Y], b[X][Y], c[X][Y], d[X][Y], e[X][Y]; - float beta[X][Y]; - // these arrays store the rate of change of those chemicals: - float da[X][Y], db[X][Y], dc[X][Y], dd[X][Y], de[X][Y]; - - // put the initial conditions into each cell - init(a,b,c,d,e,beta,k_ab,k_c,arand); - - clock_t start,end; - - const int N_FRAMES_PER_DISPLAY = 100; - int iteration = 0; - while(true) - { - start = clock(); - - // compute: - for(int it=0;it -#include -#include -#include - -// local: -#include "defs.h" -#include "display.h" - -void init(float a[X][Y],float b[X][Y],float c[X][Y]); - -void compute(float a[X][Y],float b[X][Y],float c[X][Y], - float da[X][Y],float db[X][Y],float dc[X][Y], - float Da,float Db,float Dc,float q,float epsilon,float f, - float speed, - bool parameter_space); - -int main() -{ - // Following: - // http://hopf.chem.brandeis.edu/yanglingfa/pattern/oreg/index.html - - // But I can't get this to work at the moment. - - // -- parameters -- - float q = 0.01f; - float epsilon = 0.5f; - float f = 0.80f; // labyrinthine - float Da = 3.0f; // (Dx,Dz,Dr at http://hopf.chem.brandeis.edu/yanglingfa/pattern/oreg/index.html ) - float Db = 100.0f; - float Dc = 0.1f; - float speed = 0.00001f; - // ---------------- - - // these arrays store the chemical concentrations: - float a[X][Y], b[X][Y], c[X][Y]; - // these arrays store the rate of change of those chemicals: - float da[X][Y], db[X][Y], dc[X][Y]; - - // put the initial conditions into each cell - init(a,b,c); - - clock_t start,end; - - const int N_FRAMES_PER_DISPLAY = 100; - int iteration = 0; - while(true) - { - start = clock(); - - // compute: - for(int it=0;it (b)) ? (a) : (b)) -#define min(a,b) (((a) < (b)) ? (a) : (b)) -#endif - -void compute(float a[X][Y],float b[X][Y],float c[X][Y], - float da[X][Y],float db[X][Y],float dc[X][Y], - float Da,float Db,float Dc,float q,float epsilon,float f, - float speed, - bool parameter_space) -{ - // compute change in each cell - for(int i = 0; i < X; i++) - { - //int iprev = (i + X - 1) % X; - //int inext = (i + 1) % X; - int iprev = max(0,i-1); - int inext = min(X-1,i+1); - - for(int j = 0; j < Y; j++) - { - //int jprev = (j + Y - 1) % Y; - //int jnext = (j + 1) % Y; - int jprev = max(0,j-1); - int jnext = min(Y-1,j+1); - - float aval = a[i][j]; - float bval = b[i][j]; - float cval = c[i][j]; - - if(parameter_space) - { - /*const float A1=0.0f,A2=4.0f,B1=0.0f,B2=15.0f; - A = A1+(A2-A1)*i/X; - B = B2+(B1-B2)*j/Y;*/ - } - - // compute the Laplacians of a, b and c - float dda = a[i][jprev] + a[i][jnext] + a[iprev][j] + a[inext][j] - 4*aval; - float ddb = b[i][jprev] + b[i][jnext] + b[iprev][j] + b[inext][j] - 4*bval; - float ddc = c[i][jprev] + c[i][jnext] + c[iprev][j] + c[inext][j] - 4*cval; - - // compute the new rate of change of a, b and c - da[i][j] = Da*dda + ( aval - aval*aval - f*bval*(aval-q)/(aval+q) - (aval-cval)/2.0f ) / epsilon; - db[i][j] = Db*ddb + aval - bval; - dc[i][j] = Dc*ddc + ( aval - cval ) / ( 2.0f * epsilon ); - } - } - - // effect change - for(int i = 0; i < X; i++) { - for(int j = 0; j < Y; j++) { - a[i][j] += (speed * da[i][j]); - b[i][j] += (speed * db[i][j]); - c[i][j] += (speed * dc[i][j]); - } - } -} - diff --git a/Ready/Patterns/Brusselator.vti b/Patterns/Brusselator.vti similarity index 100% rename from Ready/Patterns/Brusselator.vti rename to Patterns/Brusselator.vti diff --git a/Ready/Patterns/CPU-only/grayscott_1D.vti b/Patterns/CPU-only/grayscott_1D.vti similarity index 100% rename from Ready/Patterns/CPU-only/grayscott_1D.vti rename to Patterns/CPU-only/grayscott_1D.vti diff --git a/Ready/Patterns/CPU-only/grayscott_2D.vti b/Patterns/CPU-only/grayscott_2D.vti similarity index 100% rename from Ready/Patterns/CPU-only/grayscott_2D.vti rename to Patterns/CPU-only/grayscott_2D.vti diff --git a/Ready/Patterns/CPU-only/grayscott_3D.vti b/Patterns/CPU-only/grayscott_3D.vti similarity index 100% rename from Ready/Patterns/CPU-only/grayscott_3D.vti rename to Patterns/CPU-only/grayscott_3D.vti diff --git a/Ready/Patterns/CellularAutomata/Bays_3D.vti b/Patterns/CellularAutomata/Bays_3D.vti similarity index 100% rename from Ready/Patterns/CellularAutomata/Bays_3D.vti rename to Patterns/CellularAutomata/Bays_3D.vti diff --git a/Ready/Patterns/CellularAutomata/Buss_hex.vtu b/Patterns/CellularAutomata/Buss_hex.vtu similarity index 100% rename from Ready/Patterns/CellularAutomata/Buss_hex.vtu rename to Patterns/CellularAutomata/Buss_hex.vtu diff --git a/Ready/Patterns/CellularAutomata/Conway_life.vti b/Patterns/CellularAutomata/Conway_life.vti similarity index 100% rename from Ready/Patterns/CellularAutomata/Conway_life.vti rename to Patterns/CellularAutomata/Conway_life.vti diff --git a/Ready/Patterns/CellularAutomata/PenroseTilings/Goucher_glider.vtu b/Patterns/CellularAutomata/PenroseTilings/Goucher_glider.vtu similarity index 100% rename from Ready/Patterns/CellularAutomata/PenroseTilings/Goucher_glider.vtu rename to Patterns/CellularAutomata/PenroseTilings/Goucher_glider.vtu diff --git a/Ready/Patterns/CellularAutomata/PenroseTilings/Goucher_loops.vtu b/Patterns/CellularAutomata/PenroseTilings/Goucher_loops.vtu similarity index 100% rename from Ready/Patterns/CellularAutomata/PenroseTilings/Goucher_loops.vtu rename to Patterns/CellularAutomata/PenroseTilings/Goucher_loops.vtu diff --git a/Ready/Patterns/CellularAutomata/PenroseTilings/Imai_glider_B2SC4.vtu b/Patterns/CellularAutomata/PenroseTilings/Imai_glider_B2SC4.vtu similarity index 100% rename from Ready/Patterns/CellularAutomata/PenroseTilings/Imai_glider_B2SC4.vtu rename to Patterns/CellularAutomata/PenroseTilings/Imai_glider_B2SC4.vtu diff --git a/Ready/Patterns/CellularAutomata/PenroseTilings/life.vtu b/Patterns/CellularAutomata/PenroseTilings/life.vtu similarity index 100% rename from Ready/Patterns/CellularAutomata/PenroseTilings/life.vtu rename to Patterns/CellularAutomata/PenroseTilings/life.vtu diff --git a/Ready/Patterns/CellularAutomata/PenroseTilings/life_oscillators.vtu b/Patterns/CellularAutomata/PenroseTilings/life_oscillators.vtu similarity index 100% rename from Ready/Patterns/CellularAutomata/PenroseTilings/life_oscillators.vtu rename to Patterns/CellularAutomata/PenroseTilings/life_oscillators.vtu diff --git a/Ready/Patterns/CellularAutomata/Salt/salt2D_demo.vti b/Patterns/CellularAutomata/Salt/salt2D_demo.vti similarity index 100% rename from Ready/Patterns/CellularAutomata/Salt/salt2D_demo.vti rename to Patterns/CellularAutomata/Salt/salt2D_demo.vti diff --git a/Ready/Patterns/CellularAutomata/Salt/salt3D_circular330.vti b/Patterns/CellularAutomata/Salt/salt3D_circular330.vti similarity index 100% rename from Ready/Patterns/CellularAutomata/Salt/salt3D_circular330.vti rename to Patterns/CellularAutomata/Salt/salt3D_circular330.vti diff --git a/Ready/Patterns/CellularAutomata/hex_B2oS2m34_gliders.vtu b/Patterns/CellularAutomata/hex_B2oS2m34_gliders.vtu similarity index 100% rename from Ready/Patterns/CellularAutomata/hex_B2oS2m34_gliders.vtu rename to Patterns/CellularAutomata/hex_B2oS2m34_gliders.vtu diff --git a/Ready/Patterns/CellularAutomata/larger-than-life.vti b/Patterns/CellularAutomata/larger-than-life.vti similarity index 100% rename from Ready/Patterns/CellularAutomata/larger-than-life.vti rename to Patterns/CellularAutomata/larger-than-life.vti diff --git a/Ready/Patterns/CellularAutomata/life_torus.vtu b/Patterns/CellularAutomata/life_torus.vtu similarity index 100% rename from Ready/Patterns/CellularAutomata/life_torus.vtu rename to Patterns/CellularAutomata/life_torus.vtu diff --git a/Ready/Patterns/CellularAutomata/tri_life.vtu b/Patterns/CellularAutomata/tri_life.vtu similarity index 100% rename from Ready/Patterns/CellularAutomata/tri_life.vtu rename to Patterns/CellularAutomata/tri_life.vtu diff --git a/Ready/Patterns/Experiments/cglrd_ramps_example_djw.vti b/Patterns/Experiments/cglrd_ramps_example_djw.vti similarity index 100% rename from Ready/Patterns/Experiments/cglrd_ramps_example_djw.vti rename to Patterns/Experiments/cglrd_ramps_example_djw.vti diff --git a/Ready/Patterns/Experiments/gladman_vermiformSolitons.vti b/Patterns/Experiments/gladman_vermiformSolitons.vti similarity index 100% rename from Ready/Patterns/Experiments/gladman_vermiformSolitons.vti rename to Patterns/Experiments/gladman_vermiformSolitons.vti diff --git a/Ready/Patterns/Experiments/grayscott-historyWaveDC_solitonsAndWorms_init.vti b/Patterns/Experiments/grayscott-historyWaveDC_solitonsAndWorms_init.vti similarity index 100% rename from Ready/Patterns/Experiments/grayscott-historyWaveDC_solitonsAndWorms_init.vti rename to Patterns/Experiments/grayscott-historyWaveDC_solitonsAndWorms_init.vti diff --git a/Ready/Patterns/Experiments/grayscott-historyWave_coralGrow_djw.vti b/Patterns/Experiments/grayscott-historyWave_coralGrow_djw.vti similarity index 100% rename from Ready/Patterns/Experiments/grayscott-historyWave_coralGrow_djw.vti rename to Patterns/Experiments/grayscott-historyWave_coralGrow_djw.vti diff --git a/Ready/Patterns/Experiments/grayscott-historyWave_fuseWorms.vti b/Patterns/Experiments/grayscott-historyWave_fuseWorms.vti similarity index 100% rename from Ready/Patterns/Experiments/grayscott-historyWave_fuseWorms.vti rename to Patterns/Experiments/grayscott-historyWave_fuseWorms.vti diff --git a/Ready/Patterns/Experiments/grayscott-historyWave_moreLifelike.vti b/Patterns/Experiments/grayscott-historyWave_moreLifelike.vti similarity index 100% rename from Ready/Patterns/Experiments/grayscott-historyWave_moreLifelike.vti rename to Patterns/Experiments/grayscott-historyWave_moreLifelike.vti diff --git a/Ready/Patterns/Experiments/mutually-catalytic_spots.vti b/Patterns/Experiments/mutually-catalytic_spots.vti similarity index 100% rename from Ready/Patterns/Experiments/mutually-catalytic_spots.vti rename to Patterns/Experiments/mutually-catalytic_spots.vti diff --git a/Ready/Patterns/Experiments/orbits_explodey_init.djw.vti b/Patterns/Experiments/orbits_explodey_init.djw.vti similarity index 100% rename from Ready/Patterns/Experiments/orbits_explodey_init.djw.vti rename to Patterns/Experiments/orbits_explodey_init.djw.vti diff --git a/Ready/Patterns/Experiments/orbits_sharpWaves-init_djw.vti b/Patterns/Experiments/orbits_sharpWaves-init_djw.vti similarity index 100% rename from Ready/Patterns/Experiments/orbits_sharpWaves-init_djw.vti rename to Patterns/Experiments/orbits_sharpWaves-init_djw.vti diff --git a/Ready/Patterns/FitzHugh-Nagumo/Ising_regime.vti b/Patterns/FitzHugh-Nagumo/Ising_regime.vti similarity index 100% rename from Ready/Patterns/FitzHugh-Nagumo/Ising_regime.vti rename to Patterns/FitzHugh-Nagumo/Ising_regime.vti diff --git a/Ready/Patterns/FitzHugh-Nagumo/pulsate.vti b/Patterns/FitzHugh-Nagumo/pulsate.vti similarity index 100% rename from Ready/Patterns/FitzHugh-Nagumo/pulsate.vti rename to Patterns/FitzHugh-Nagumo/pulsate.vti diff --git a/Ready/Patterns/FitzHugh-Nagumo/spiral_turbulence.vti b/Patterns/FitzHugh-Nagumo/spiral_turbulence.vti similarity index 100% rename from Ready/Patterns/FitzHugh-Nagumo/spiral_turbulence.vti rename to Patterns/FitzHugh-Nagumo/spiral_turbulence.vti diff --git a/Ready/Patterns/FitzHugh-Nagumo/squid_axon.vti b/Patterns/FitzHugh-Nagumo/squid_axon.vti similarity index 100% rename from Ready/Patterns/FitzHugh-Nagumo/squid_axon.vti rename to Patterns/FitzHugh-Nagumo/squid_axon.vti diff --git a/Ready/Patterns/FitzHugh-Nagumo/tip-splitting.vti b/Patterns/FitzHugh-Nagumo/tip-splitting.vti similarity index 100% rename from Ready/Patterns/FitzHugh-Nagumo/tip-splitting.vti rename to Patterns/FitzHugh-Nagumo/tip-splitting.vti diff --git a/Ready/Patterns/Ginzburg-Landau/complex_Ginzburg-Landau.vti b/Patterns/Ginzburg-Landau/complex_Ginzburg-Landau.vti similarity index 100% rename from Ready/Patterns/Ginzburg-Landau/complex_Ginzburg-Landau.vti rename to Patterns/Ginzburg-Landau/complex_Ginzburg-Landau.vti diff --git a/Ready/Patterns/Ginzburg-Landau/complex_Ginzburg-Landau_magnitude.vti b/Patterns/Ginzburg-Landau/complex_Ginzburg-Landau_magnitude.vti similarity index 100% rename from Ready/Patterns/Ginzburg-Landau/complex_Ginzburg-Landau_magnitude.vti rename to Patterns/Ginzburg-Landau/complex_Ginzburg-Landau_magnitude.vti diff --git a/Ready/Patterns/Gray-Scott/Lesmes_noisy.vti b/Patterns/Gray-Scott/Lesmes_noisy.vti similarity index 100% rename from Ready/Patterns/Gray-Scott/Lesmes_noisy.vti rename to Patterns/Gray-Scott/Lesmes_noisy.vti diff --git a/Ready/Patterns/Gray-Scott/Pearson1993.vti b/Patterns/Gray-Scott/Pearson1993.vti similarity index 100% rename from Ready/Patterns/Gray-Scott/Pearson1993.vti rename to Patterns/Gray-Scott/Pearson1993.vti diff --git a/Ready/Patterns/Gray-Scott/U-Skate/Hutton-and-helix-gliders.vti b/Patterns/Gray-Scott/U-Skate/Hutton-and-helix-gliders.vti similarity index 100% rename from Ready/Patterns/Gray-Scott/U-Skate/Hutton-and-helix-gliders.vti rename to Patterns/Gray-Scott/U-Skate/Hutton-and-helix-gliders.vti diff --git a/Ready/Patterns/Gray-Scott/U-Skate/Munafo_glider.vti b/Patterns/Gray-Scott/U-Skate/Munafo_glider.vti similarity index 100% rename from Ready/Patterns/Gray-Scott/U-Skate/Munafo_glider.vti rename to Patterns/Gray-Scott/U-Skate/Munafo_glider.vti diff --git a/Ready/Patterns/Gray-Scott/U-Skate/o-ring_2D.vti b/Patterns/Gray-Scott/U-Skate/o-ring_2D.vti similarity index 100% rename from Ready/Patterns/Gray-Scott/U-Skate/o-ring_2D.vti rename to Patterns/Gray-Scott/U-Skate/o-ring_2D.vti diff --git a/Ready/Patterns/Gray-Scott/noisy_solitons_mitosis.vti b/Patterns/Gray-Scott/noisy_solitons_mitosis.vti similarity index 100% rename from Ready/Patterns/Gray-Scott/noisy_solitons_mitosis.vti rename to Patterns/Gray-Scott/noisy_solitons_mitosis.vti diff --git a/Ready/Patterns/Gray-Scott/parameter-map.vti b/Patterns/Gray-Scott/parameter-map.vti similarity index 100% rename from Ready/Patterns/Gray-Scott/parameter-map.vti rename to Patterns/Gray-Scott/parameter-map.vti diff --git a/Ready/Patterns/Gray-Scott/self-replicating_spots.vti b/Patterns/Gray-Scott/self-replicating_spots.vti similarity index 100% rename from Ready/Patterns/Gray-Scott/self-replicating_spots.vti rename to Patterns/Gray-Scott/self-replicating_spots.vti diff --git a/Ready/Patterns/Kytta2007/Fig5.7a.vti b/Patterns/Kytta2007/Fig5.7a.vti similarity index 100% rename from Ready/Patterns/Kytta2007/Fig5.7a.vti rename to Patterns/Kytta2007/Fig5.7a.vti diff --git a/Ready/Patterns/Kytta2007/Fig5.7c.vti b/Patterns/Kytta2007/Fig5.7c.vti similarity index 100% rename from Ready/Patterns/Kytta2007/Fig5.7c.vti rename to Patterns/Kytta2007/Fig5.7c.vti diff --git a/Ready/Patterns/Kytta2007/Fig5.8c.vti b/Patterns/Kytta2007/Fig5.8c.vti similarity index 100% rename from Ready/Patterns/Kytta2007/Fig5.8c.vti rename to Patterns/Kytta2007/Fig5.8c.vti diff --git a/Ready/Patterns/Kytta2007/Fig5.8d.vti b/Patterns/Kytta2007/Fig5.8d.vti similarity index 100% rename from Ready/Patterns/Kytta2007/Fig5.8d.vti rename to Patterns/Kytta2007/Fig5.8d.vti diff --git a/Ready/Patterns/Kytta2007/Fig5.8e.vti b/Patterns/Kytta2007/Fig5.8e.vti similarity index 100% rename from Ready/Patterns/Kytta2007/Fig5.8e.vti rename to Patterns/Kytta2007/Fig5.8e.vti diff --git a/Ready/Patterns/Kytta2007/Fig5.8f.vti b/Patterns/Kytta2007/Fig5.8f.vti similarity index 100% rename from Ready/Patterns/Kytta2007/Fig5.8f.vti rename to Patterns/Kytta2007/Fig5.8f.vti diff --git a/Ready/Patterns/Kytta2007/Fig5.8g.vti b/Patterns/Kytta2007/Fig5.8g.vti similarity index 100% rename from Ready/Patterns/Kytta2007/Fig5.8g.vti rename to Patterns/Kytta2007/Fig5.8g.vti diff --git a/Ready/Patterns/McCabe/McCabe.vti b/Patterns/McCabe/McCabe.vti similarity index 100% rename from Ready/Patterns/McCabe/McCabe.vti rename to Patterns/McCabe/McCabe.vti diff --git a/Ready/Patterns/McCabe/McCabe_additive2a.vti b/Patterns/McCabe/McCabe_additive2a.vti similarity index 100% rename from Ready/Patterns/McCabe/McCabe_additive2a.vti rename to Patterns/McCabe/McCabe_additive2a.vti diff --git a/Ready/Patterns/McCabe/McCabe_additive2b.vti b/Patterns/McCabe/McCabe_additive2b.vti similarity index 100% rename from Ready/Patterns/McCabe/McCabe_additive2b.vti rename to Patterns/McCabe/McCabe_additive2b.vti diff --git a/Ready/Patterns/McCabe/McCabe_simple.vti b/Patterns/McCabe/McCabe_simple.vti similarity index 100% rename from Ready/Patterns/McCabe/McCabe_simple.vti rename to Patterns/McCabe/McCabe_simple.vti diff --git a/Ready/Patterns/Meinhardt1982/stripes.vti b/Patterns/Meinhardt1982/stripes.vti similarity index 100% rename from Ready/Patterns/Meinhardt1982/stripes.vti rename to Patterns/Meinhardt1982/stripes.vti diff --git a/Ready/Patterns/Meinhardt1982/zebra.vtu b/Patterns/Meinhardt1982/zebra.vtu similarity index 100% rename from Ready/Patterns/Meinhardt1982/zebra.vtu rename to Patterns/Meinhardt1982/zebra.vtu diff --git a/Ready/Patterns/Purwins1999/glider.vti b/Patterns/Purwins1999/glider.vti similarity index 100% rename from Ready/Patterns/Purwins1999/glider.vti rename to Patterns/Purwins1999/glider.vti diff --git a/Ready/Patterns/Purwins1999/glider_3D.vti b/Patterns/Purwins1999/glider_3D.vti similarity index 100% rename from Ready/Patterns/Purwins1999/glider_3D.vti rename to Patterns/Purwins1999/glider_3D.vti diff --git a/Ready/Patterns/Purwins1999/multiGlider.vti b/Patterns/Purwins1999/multiGlider.vti similarity index 100% rename from Ready/Patterns/Purwins1999/multiGlider.vti rename to Patterns/Purwins1999/multiGlider.vti diff --git a/Ready/Patterns/Schlogl.vti b/Patterns/Schlogl.vti similarity index 100% rename from Ready/Patterns/Schlogl.vti rename to Patterns/Schlogl.vti diff --git a/Ready/Patterns/Schrodinger1926/packet.vti b/Patterns/Schrodinger1926/packet.vti similarity index 100% rename from Ready/Patterns/Schrodinger1926/packet.vti rename to Patterns/Schrodinger1926/packet.vti diff --git a/Ready/Patterns/Schrodinger1926/packet_pass.vti b/Patterns/Schrodinger1926/packet_pass.vti similarity index 100% rename from Ready/Patterns/Schrodinger1926/packet_pass.vti rename to Patterns/Schrodinger1926/packet_pass.vti diff --git a/Ready/Patterns/Schrodinger1926/packet_reflect.vti b/Patterns/Schrodinger1926/packet_reflect.vti similarity index 100% rename from Ready/Patterns/Schrodinger1926/packet_reflect.vti rename to Patterns/Schrodinger1926/packet_reflect.vti diff --git a/Ready/Patterns/Schrodinger1926/packet_reflect2D.vti b/Patterns/Schrodinger1926/packet_reflect2D.vti similarity index 100% rename from Ready/Patterns/Schrodinger1926/packet_reflect2D.vti rename to Patterns/Schrodinger1926/packet_reflect2D.vti diff --git a/Ready/Patterns/Schrodinger1926/quantum_tunnelling.vti b/Patterns/Schrodinger1926/quantum_tunnelling.vti similarity index 100% rename from Ready/Patterns/Schrodinger1926/quantum_tunnelling.vti rename to Patterns/Schrodinger1926/quantum_tunnelling.vti diff --git a/Ready/Patterns/SmoothLife/glider_3D.vti b/Patterns/SmoothLife/glider_3D.vti similarity index 100% rename from Ready/Patterns/SmoothLife/glider_3D.vti rename to Patterns/SmoothLife/glider_3D.vti diff --git a/Ready/Patterns/SmoothLife/smoothglider.vti b/Patterns/SmoothLife/smoothglider.vti similarity index 100% rename from Ready/Patterns/SmoothLife/smoothglider.vti rename to Patterns/SmoothLife/smoothglider.vti diff --git a/Ready/Patterns/SmoothLife/smoothlifeL.vti b/Patterns/SmoothLife/smoothlifeL.vti similarity index 100% rename from Ready/Patterns/SmoothLife/smoothlifeL.vti rename to Patterns/SmoothLife/smoothlifeL.vti diff --git a/Ready/Patterns/Turing1952/spots.vti b/Patterns/Turing1952/spots.vti similarity index 100% rename from Ready/Patterns/Turing1952/spots.vti rename to Patterns/Turing1952/spots.vti diff --git a/Ready/Patterns/Turing1952/spots_noisy.vti b/Patterns/Turing1952/spots_noisy.vti similarity index 100% rename from Ready/Patterns/Turing1952/spots_noisy.vti rename to Patterns/Turing1952/spots_noisy.vti diff --git a/Ready/Patterns/Yang2002/Yang_1.vti b/Patterns/Yang2002/Yang_1.vti similarity index 100% rename from Ready/Patterns/Yang2002/Yang_1.vti rename to Patterns/Yang2002/Yang_1.vti diff --git a/Ready/Patterns/Yang2002/Yang_2b.vti b/Patterns/Yang2002/Yang_2b.vti similarity index 100% rename from Ready/Patterns/Yang2002/Yang_2b.vti rename to Patterns/Yang2002/Yang_2b.vti diff --git a/Ready/Patterns/Yang2002/Yang_2c.vti b/Patterns/Yang2002/Yang_2c.vti similarity index 100% rename from Ready/Patterns/Yang2002/Yang_2c.vti rename to Patterns/Yang2002/Yang_2c.vti diff --git a/Ready/Patterns/Yang2002/Yang_2d.vti b/Patterns/Yang2002/Yang_2d.vti similarity index 100% rename from Ready/Patterns/Yang2002/Yang_2d.vti rename to Patterns/Yang2002/Yang_2d.vti diff --git a/Ready/Patterns/Yang2002/Yang_3a.vti b/Patterns/Yang2002/Yang_3a.vti similarity index 100% rename from Ready/Patterns/Yang2002/Yang_3a.vti rename to Patterns/Yang2002/Yang_3a.vti diff --git a/Ready/Patterns/Yang2002/Yang_3b.vti b/Patterns/Yang2002/Yang_3b.vti similarity index 100% rename from Ready/Patterns/Yang2002/Yang_3b.vti rename to Patterns/Yang2002/Yang_3b.vti diff --git a/Ready/Patterns/Yang2002/Yang_3c.vti b/Patterns/Yang2002/Yang_3c.vti similarity index 100% rename from Ready/Patterns/Yang2002/Yang_3c.vti rename to Patterns/Yang2002/Yang_3c.vti diff --git a/Ready/Patterns/Yang2002/Yang_3d.vti b/Patterns/Yang2002/Yang_3d.vti similarity index 100% rename from Ready/Patterns/Yang2002/Yang_3d.vti rename to Patterns/Yang2002/Yang_3d.vti diff --git a/Ready/Patterns/Yang2002/Yang_4.vti b/Patterns/Yang2002/Yang_4.vti similarity index 100% rename from Ready/Patterns/Yang2002/Yang_4.vti rename to Patterns/Yang2002/Yang_4.vti diff --git a/Ready/Patterns/Yang2003/Fig2.vti b/Patterns/Yang2003/Fig2.vti similarity index 100% rename from Ready/Patterns/Yang2003/Fig2.vti rename to Patterns/Yang2003/Fig2.vti diff --git a/Ready/Patterns/Yang2003/Fig3a.vti b/Patterns/Yang2003/Fig3a.vti similarity index 100% rename from Ready/Patterns/Yang2003/Fig3a.vti rename to Patterns/Yang2003/Fig3a.vti diff --git a/Ready/Patterns/Yang2003/Fig3b.vti b/Patterns/Yang2003/Fig3b.vti similarity index 100% rename from Ready/Patterns/Yang2003/Fig3b.vti rename to Patterns/Yang2003/Fig3b.vti diff --git a/Ready/Patterns/Yang2003/Fig3c.vti b/Patterns/Yang2003/Fig3c.vti similarity index 100% rename from Ready/Patterns/Yang2003/Fig3c.vti rename to Patterns/Yang2003/Fig3c.vti diff --git a/Ready/Patterns/Yang2006/jumping.vti b/Patterns/Yang2006/jumping.vti similarity index 100% rename from Ready/Patterns/Yang2006/jumping.vti rename to Patterns/Yang2006/jumping.vti diff --git a/Ready/Patterns/Yang2006/jumping_cGL.vti b/Patterns/Yang2006/jumping_cGL.vti similarity index 100% rename from Ready/Patterns/Yang2006/jumping_cGL.vti rename to Patterns/Yang2006/jumping_cGL.vti diff --git a/Ready/Patterns/bunny.vtu b/Patterns/bunny.vtu similarity index 100% rename from Ready/Patterns/bunny.vtu rename to Patterns/bunny.vtu diff --git a/Ready/Patterns/heat_equation.vti b/Patterns/heat_equation.vti similarity index 100% rename from Ready/Patterns/heat_equation.vti rename to Patterns/heat_equation.vti diff --git a/Ready/Patterns/heat_equation_interpolation.vti b/Patterns/heat_equation_interpolation.vti similarity index 100% rename from Ready/Patterns/heat_equation_interpolation.vti rename to Patterns/heat_equation_interpolation.vti diff --git a/Ready/Patterns/kernel_test.vti b/Patterns/kernel_test.vti similarity index 100% rename from Ready/Patterns/kernel_test.vti rename to Patterns/kernel_test.vti diff --git a/Ready/Patterns/lion.vtu b/Patterns/lion.vtu similarity index 100% rename from Ready/Patterns/lion.vtu rename to Patterns/lion.vtu diff --git a/Ready/Patterns/oregonator.vti b/Patterns/oregonator.vti similarity index 100% rename from Ready/Patterns/oregonator.vti rename to Patterns/oregonator.vti diff --git a/Ready/Patterns/parameter_modulation_demo.vti b/Patterns/parameter_modulation_demo.vti similarity index 100% rename from Ready/Patterns/parameter_modulation_demo.vti rename to Patterns/parameter_modulation_demo.vti diff --git a/Ready/Patterns/parameter_modulation_demo2.vti b/Patterns/parameter_modulation_demo2.vti similarity index 100% rename from Ready/Patterns/parameter_modulation_demo2.vti rename to Patterns/parameter_modulation_demo2.vti diff --git a/Ready/Patterns/wave_equation.vti b/Patterns/wave_equation.vti similarity index 100% rename from Ready/Patterns/wave_equation.vti rename to Patterns/wave_equation.vti diff --git a/README.txt b/README.txt index 85fbae587..dd9508b2e 100644 --- a/README.txt +++ b/README.txt @@ -1,69 +1,35 @@ - Copyright (C) 1991, 2010, Greg Turk, Tim Hutton - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. +--------------------------------------------------------------------------------- + About +--------------------------------------------------------------------------------- - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +For help, visit: http://code.google.com/p/reaction-diffusion/ +Or email: reaction-diffusion@googlegroups.com - You should have received a copy of the GNU General Public License - along with this program. If not, see . +Ready is free software. Help us improve it! ------------------------- +For build instructions, see BUILD.txt. -This is a port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html +See Help/credits.html for a list of credits and acknowledgements. -The following copyright notice appeared with the original source code. -Greg Turk has given permission for the code to be distributed under the GPL. +--------------------------------------------------------------------------------- + License +--------------------------------------------------------------------------------- -==begin== +Copyright 2011, 2012, 2013 The Ready Bunch -Make spots and stripes with reaction-diffusion. +The Ready Bunch is: Tim Hutton, Robert Munafo, Andrew Trevorrow, Tom Rokicki, Dan Wills -The spot-formation system is described in the article: +This file is part of Ready. - "A Model for Generating Aspects of Zebra and Other Mammailian - Coat Patterns" - Jonathan B. L. Bard - Journal of Theoretical Biology, Vol. 93, No. 2, pp. 363-385 - (November 1981) +Ready is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. -The stripe-formation system is described in the book: - - Models of Biological Pattern Formation - Hans Meinhardt - Academic Press, 1982 - - -Permission is granted to modify and/or distribute this program so long -as the program is distributed free of charge and this header is retained -as part of the program. - -Copyright (c) Greg Turk, 1991 - -==end== - ------------------------- - -Build instructions: - -1) install CMake and OpenCV -2) run CMake to generate the build files for your chosen compiler -3) build - -Tested on Linux and Windows XP. Should work on all operating systems. - -There's a SpeedComparisons folder which isn't built by default. If you're interested in OpenMP, OpenCL etc. then you can build it. - ------------------------- - -Contact: - -Tim Hutton -Greg Turk +Ready is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. +See COPYING.txt for the full GNU General Public License. +Or visit . diff --git a/Ready/CMakeLists.txt b/Ready/CMakeLists.txt deleted file mode 100644 index b5a025f0a..000000000 --- a/Ready/CMakeLists.txt +++ /dev/null @@ -1,459 +0,0 @@ -# Copyright 2011, 2012, 2013 The Ready Bunch -# -# This file is part of Ready. -# -# Ready is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Ready is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Ready. If not, see . -# -#-------------------------------------------------------------------------- - -cmake_minimum_required( VERSION 2.6 ) -cmake_policy( SET CMP0003 NEW ) - -project( Ready ) - -set( READY_VERSION 0.6 ) # check matches Help/about.html -add_definitions( -D READY_VERSION=${READY_VERSION} ) - -if( APPLE OR WIN32 ) - # app names are usually capitalized on Mac OS X and Windows - set( APP_NAME Ready ) -else() - # Linux binaries are usually all lowercase - set( APP_NAME ready ) -endif() -set( CMD_NAME rdy ) # command-line version - -#-------------------------------------------source files---------------------------------------------- - -set( BASE_SOURCES # low-level code used in all executables - src/readybase/AbstractRD.hpp src/readybase/AbstractRD.cpp - src/readybase/ImageRD.hpp src/readybase/ImageRD.cpp - src/readybase/GrayScottImageRD.hpp src/readybase/GrayScottImageRD.cpp - src/readybase/OpenCLImageRD.hpp src/readybase/OpenCLImageRD.cpp - src/readybase/FormulaOpenCLImageRD.hpp src/readybase/FormulaOpenCLImageRD.cpp - src/readybase/FullKernelOpenCLImageRD.hpp src/readybase/FullKernelOpenCLImageRD.cpp - src/readybase/MeshRD.hpp src/readybase/MeshRD.cpp - src/readybase/GrayScottMeshRD.hpp src/readybase/GrayScottMeshRD.cpp - src/readybase/OpenCLMeshRD.hpp src/readybase/OpenCLMeshRD.cpp - src/readybase/FormulaOpenCLMeshRD.hpp src/readybase/FormulaOpenCLMeshRD.cpp - src/readybase/FullKernelOpenCLMeshRD.hpp src/readybase/FullKernelOpenCLMeshRD.cpp - src/readybase/OpenCL_MixIn.hpp src/readybase/OpenCL_MixIn.cpp - src/readybase/OpenCL_utils.hpp src/readybase/OpenCL_utils.cpp - src/readybase/IO_XML.hpp src/readybase/IO_XML.cpp - src/readybase/overlays.hpp src/readybase/overlays.cpp - src/readybase/Properties.hpp src/readybase/Properties.cpp - src/readybase/utils.hpp src/readybase/utils.cpp - src/readybase/OpenCL_Dyn_Load.h src/readybase/OpenCL_Dyn_Load.c - src/readybase/MeshGenerators.hpp src/readybase/MeshGenerators.cpp - src/readybase/SystemFactory.hpp src/readybase/SystemFactory.cpp - src/readybase/scene_items.hpp src/readybase/scene_items.cpp -) -include_directories( src/readybase ) - -set( GUI_SOURCES # high-level GUI code used only in Ready - src/gui/IDs.hpp - src/gui/wxutils.hpp src/gui/wxutils.cpp - src/gui/dialogs.hpp src/gui/dialogs.cpp - src/gui/prefs.hpp src/gui/prefs.cpp - src/gui/app.hpp src/gui/app.cpp - src/gui/frame.hpp src/gui/frame.cpp - src/gui/HelpPanel.hpp src/gui/HelpPanel.cpp - src/gui/InfoPanel.hpp src/gui/InfoPanel.cpp - src/gui/PatternsPanel.hpp src/gui/PatternsPanel.cpp - src/gui/vtk_pipeline.hpp src/gui/vtk_pipeline.cpp - src/gui/InteractorStylePainter.hpp src/gui/InteractorStylePainter.cpp - src/gui/wxVTKRenderWindowInteractor.h src/gui/wxVTKRenderWindowInteractor.cxx - src/gui/RecordingDialog.hpp src/gui/RecordingDialog.cpp -) -include_directories( src/gui ) - -set( CMD_SOURCES # code used for the command-line version - src/cmd/main.cpp -) - -set( RESOURCES - resources/ready.rc - resources/appicon.ico - resources/appicon16.ico - resources/appicon32.ico - resources/appicon48.ico - resources/appicon.xpm - resources/Info.plist.in - resources/app.icns - resources/file.icns -) -include_directories( resources ) - -set( PATTERN_FILES - Patterns/Meinhardt1982/stripes.vti Patterns/Meinhardt1982/zebra.vtu - Patterns/Schlogl.vti - Patterns/heat_equation.vti - Patterns/Turing1952/spots.vti Patterns/Turing1952/spots_noisy.vti - Patterns/kernel_test.vti - Patterns/parameter_modulation_demo.vti - Patterns/parameter_modulation_demo2.vti - Patterns/bunny.vtu - Patterns/lion.vtu - Patterns/heat_equation_interpolation.vti - Patterns/Ginzburg-Landau/complex_Ginzburg-Landau.vti - Patterns/Ginzburg-Landau/complex_Ginzburg-Landau_magnitude.vti - Patterns/wave_equation.vti - Patterns/oregonator.vti - Patterns/Brusselator.vti - Patterns/SmoothLife/smoothglider.vti Patterns/SmoothLife/smoothlifeL.vti Patterns/SmoothLife/glider_3D.vti - Patterns/Purwins1999/glider.vti Patterns/Purwins1999/glider_3D.vti Patterns/Purwins1999/multiGlider.vti - Patterns/CPU-only/grayscott_1D.vti - Patterns/CPU-only/grayscott_2D.vti - Patterns/CPU-only/grayscott_3D.vti - Patterns/FitzHugh-Nagumo/tip-splitting.vti - Patterns/FitzHugh-Nagumo/spiral_turbulence.vti - Patterns/FitzHugh-Nagumo/pulsate.vti - Patterns/FitzHugh-Nagumo/squid_axon.vti - Patterns/FitzHugh-Nagumo/Ising_regime.vti - Patterns/Gray-Scott/Lesmes_noisy.vti - Patterns/Gray-Scott/noisy_solitons_mitosis.vti - Patterns/Gray-Scott/parameter-map.vti - Patterns/Gray-Scott/Pearson1993.vti - Patterns/Gray-Scott/self-replicating_spots.vti - Patterns/Gray-Scott/U-Skate/Hutton-and-helix-gliders.vti - Patterns/Gray-Scott/U-Skate/Munafo_glider.vti - Patterns/Gray-Scott/U-Skate/o-ring_2D.vti - Patterns/CellularAutomata/Bays_3D.vti - Patterns/CellularAutomata/Conway_life.vti - Patterns/CellularAutomata/life_torus.vtu - Patterns/CellularAutomata/larger-than-life.vti - Patterns/CellularAutomata/Buss_hex.vtu - Patterns/CellularAutomata/tri_life.vtu - Patterns/CellularAutomata/hex_B2oS2m34_gliders.vtu - Patterns/CellularAutomata/PenroseTilings/life.vtu - Patterns/CellularAutomata/PenroseTilings/life_oscillators.vtu - Patterns/CellularAutomata/PenroseTilings/Goucher_glider.vtu - Patterns/CellularAutomata/PenroseTilings/Imai_glider_B2SC4.vtu - Patterns/CellularAutomata/PenroseTilings/Goucher_loops.vtu - Patterns/CellularAutomata/Salt/salt2D_demo.vti - Patterns/CellularAutomata/Salt/salt3D_circular330.vti - Patterns/Yang2002/Yang_1.vti Patterns/Yang2002/Yang_2b.vti Patterns/Yang2002/Yang_2c.vti - Patterns/Yang2002/Yang_2d.vti Patterns/Yang2002/Yang_3a.vti Patterns/Yang2002/Yang_3b.vti - Patterns/Yang2002/Yang_3c.vti Patterns/Yang2002/Yang_3d.vti Patterns/Yang2002/Yang_4.vti - Patterns/Yang2003/Fig2.vti - Patterns/Yang2003/Fig3a.vti Patterns/Yang2003/Fig3b.vti Patterns/Yang2003/Fig3c.vti - Patterns/McCabe/McCabe.vti Patterns/McCabe/McCabe_simple.vti - Patterns/McCabe/McCabe_additive2b.vti Patterns/McCabe/McCabe_additive2a.vti - Patterns/Kytta2007/Fig5.7a.vti Patterns/Kytta2007/Fig5.7c.vti - Patterns/Kytta2007/Fig5.8c.vti Patterns/Kytta2007/Fig5.8d.vti Patterns/Kytta2007/Fig5.8e.vti - Patterns/Kytta2007/Fig5.8f.vti Patterns/Kytta2007/Fig5.8g.vti - Patterns/Yang2006/jumping.vti Patterns/Yang2006/jumping_cGL.vti - Patterns/Schrodinger1926/packet.vti Patterns/Schrodinger1926/packet_reflect.vti - Patterns/Schrodinger1926/packet_pass.vti Patterns/Schrodinger1926/quantum_tunnelling.vti - Patterns/Schrodinger1926/packet_reflect2D.vti - Patterns/Experiments/mutually-catalytic_spots.vti - Patterns/Experiments/cglrd_ramps_example_djw.vti - Patterns/Experiments/grayscott-historyWave_fuseWorms.vti - Patterns/Experiments/grayscott-historyWave_moreLifelike.vti - Patterns/Experiments/grayscott-historyWaveDC_solitonsAndWorms_init.vti - Patterns/Experiments/orbits_explodey_init.djw.vti - Patterns/Experiments/orbits_sharpWaves-init_djw.vti - Patterns/Experiments/grayscott-historyWave_coralGrow_djw.vti - Patterns/Experiments/gladman_vermiformSolitons.vti -) - -set( HELP_FILES - Help/about.gif Help/about.html - Help/action.html Help/credits.html - Help/file.html Help/help.html - Help/mouse.html Help/quickstart.html - Help/tips.html Help/changes.html - Help/edit.html Help/formats.html - Help/index.html Help/problems.html - Help/view.html Help/introduction.html -) - -set( OTHER_FILES - ./README.txt - ./COPYING.txt - ./TODO.txt - ./BUILD.txt - ./CMakeLists.txt - src/FindOpenCL.cmake - src/Doxyfile.in - resources/logo.png - resources/Icons/22px/icon-pointer.png - resources/Icons/22px/draw-freehand.png - resources/Icons/22px/draw-brush.png - resources/Icons/22px/color-picker.png - resources/Icons/22px/document-new.png - resources/Icons/22px/document-open.png - resources/Icons/22px/document-save.png - resources/Icons/22px/document-revert.png - resources/Icons/22px/media-playback-start_green.png - resources/Icons/22px/media-playback-pause_red.png - resources/Icons/22px/media-seek-forward.png - resources/Icons/22px/media-seek-backward.png - resources/Icons/22px/media-skip-backward_modified.png - resources/Icons/22px/media-record.png - resources/Icons/22px/system-run.png - resources/Icons/22px/list-add_gray.png - resources/Icons/22px/camera-photo.png - resources/Icons/32px/icon-pointer.png - resources/Icons/32px/draw-freehand.png - resources/Icons/32px/draw-brush.png - resources/Icons/32px/color-picker.png - resources/Icons/32px/document-new.png - resources/Icons/32px/document-open.png - resources/Icons/32px/document-save.png - resources/Icons/32px/document-revert.png - resources/Icons/32px/media-playback-start_green.png - resources/Icons/32px/media-playback-pause_red.png - resources/Icons/32px/media-seek-forward.png - resources/Icons/32px/media-seek-backward.png - resources/Icons/32px/media-skip-backward_modified.png - resources/Icons/32px/media-record.png - resources/Icons/32px/system-run.png - resources/Icons/32px/list-add_gray.png - resources/Icons/32px/camera-photo.png - resources/Cursors/pencil-cursor.png - resources/Cursors/brush-cursor.png - resources/Cursors/picker-cursor.png -) - -#-------------------------------------------VTK---------------------------------------------- - -find_package( VTK ) -if( VTK_FOUND ) - include( ${VTK_USE_FILE} ) -else() - message(FATAL_ERROR "Cannot build the executable without VTK. Please set the VTK variables.") -endif() - -#-------------------------------------------wxVTK---------------------------------------------- - -# The following allows you to access wxGLCanvas for GTK -IF(WIN32) - SET(GUI_EXECUTABLE WIN32) -ELSE(WIN32) - IF(APPLE) - SET(GUI_EXECUTABLE MACOSX_BUNDLE) - IF(VTK_USE_COCOA) - SET_SOURCE_FILES_PROPERTIES( - src/gui/wxVTKRenderWindowInteractor.cxx - PROPERTIES COMPILE_FLAGS "-ObjC++") - ENDIF(VTK_USE_COCOA) - ELSE(APPLE) - # Ok X11 for sure, but just check: - IF(NOT VTK_USE_X) - MESSAGE(FATAL_ERROR "You need to have VTK_USE_X") - ENDIF(NOT VTK_USE_X) - # CMake 2.6: - # technically those packages are not required since one can still use the Motif/X11 version and not the gtk one: - FIND_PACKAGE(PkgConfig) - pkg_check_modules (GTK2 gtk+-2.0) - #MESSAGE("${GTK2_INCLUDE_DIRS}") - INCLUDE_DIRECTORIES(${GTK2_INCLUDE_DIRS}) - LINK_LIBRARIES(${GTK2_LIBRARIES}) - # Can I require all my user to have the gl lib on linux, even if they do not really need it... - SET(WXGLCANVASLIBS "gl") - ENDIF(APPLE) -ENDIF(WIN32) - -#-------------------------------------------wxWidgets---------------------------------------------- - -if( APPLE ) - # on Mac OS X it's better to use locally installed wxWidgets headers and libs - # (the pre-installed stuff tends to be out of date; eg. 10.6 has wxMac 2.8.8 and it's a 32-bit debug build) - set( wxWidgets_CONFIG_EXECUTABLE /usr/local/bin/wx-config ) - set( wxWidgets_wxrc_EXECUTABLE /usr/local/bin/wxrc ) # not used, but no harm leaving it in -elseif(UNIX) - # remove -rdynamic from link options on Linux to reduce size by about 1.2MB - set( CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "" ) -endif() - -# wxWidgets is required to build the project -FIND_PACKAGE( wxWidgets COMPONENTS html aui ${WXGLCANVASLIBS} core adv base ) - -IF( wxWidgets_FOUND ) - INCLUDE( ${wxWidgets_USE_FILE} ) -ELSE() - MESSAGE( FATAL_ERROR "Cannot build the executable without wxWidgets. Please set the wxWidgets variables." ) -ENDIF() - -if( WIN32 ) - # prevent link errors with wxMSW 2.9.x - add_definitions( -DwxDEBUG_LEVEL=0 ) -endif() - -#-------------------------------------------OpenCL---------------------------------------------- - -set( CMAKE_MODULE_PATH ${Ready_SOURCE_DIR}/src ) -# (we include our own FindOpenCL.cmake until the time that CMake comes with its own) - -# we need to build against OpenCL -find_package( OpenCL REQUIRED ) -include_directories( ${OPENCL_INCLUDE_DIRS} ) -if( APPLE ) - link_libraries( ${OPENCL_LIBRARIES} ) # on MacOSX we assume that OpenCL is available (might need to rethink for versions before 10.6) -endif() - -#---------------copy installation files to build folder (helps with testing)-------------------- - -foreach( file ${PATTERN_FILES} ${HELP_FILES} ${RESOURCES} ${OTHER_FILES} ) - add_custom_command( - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${file}" - COMMAND cmake -E copy "${CMAKE_CURRENT_SOURCE_DIR}/${file}" "${CMAKE_CURRENT_BINARY_DIR}/${file}" - DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${file}" - ) - list( APPEND files_dest "${CMAKE_CURRENT_BINARY_DIR}/${file}" ) -endforeach() - -add_custom_target( CopyFiles ALL DEPENDS ${files_dest} ) - -#-------------------------------- build ------------------------------------------------------ - -# ensure we link the C runtime statically (N.B. still appears as /MD in the CMake gui but ignore this) -# see: http://www.cmake.org/Wiki/CMake_FAQ#Dynamic_Replace -foreach( var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO ) - string( REGEX REPLACE "/MD" "/MT" ${var} "${${var}}" ) -endforeach() - -set( USE_SSE "YES" ) -if( USE_SSE ) - # enable SSE to allow us to set flags to avoid denormals, and relax the floating-point accuracy for speed - if( MSVC ) - if( CMAKE_CL_64 ) - set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /fp:fast" ) - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:fast" ) - else() # avoid warning on x64, which always comes with SSE2 - set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:SSE2 /fp:fast" ) - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:SSE2 /fp:fast" ) - endif() - else() - add_definitions( -msse2 -ffast-math ) - endif() - add_definitions( -DUSE_SSE ) -endif() - -if( APPLE ) - # support Mac OS 10.5 or later - add_definitions( -mmacosx-version-min=10.5 ) -endif() -if( APPLE OR UNIX ) - # use same settings as in makefiles - add_definitions( -D_LARGE_FILES ) -endif() - -# create base library used by all executables -add_library( readybase STATIC ${BASE_SOURCES} ) -if( ${VTK_MAJOR_VERSION} GREATER 5 ) - target_link_libraries( - readybase - vtkCommonCore - vtkFiltersModeling - vtkFiltersTexture - vtkInteractionStyle - vtkIOXML - vtkRenderingCore - vtkRenderingAnnotation - vtkRenderingFreeType - vtkRenderingFreeTypeOpenGL - ) -else() - target_link_libraries( readybase vtkCommon vtkGraphics vtkIO vtkRendering vtkHybrid ) -endif() - -# create command-line utility -add_executable( ${CMD_NAME} ${CMD_SOURCES} ) -target_link_libraries( ${CMD_NAME} readybase ) - -# create GUI application -add_executable( ${APP_NAME} ${GUI_EXECUTABLE} ${GUI_SOURCES} ${RESOURCES} ) -target_link_libraries( ${APP_NAME} readybase ${wxWidgets_LIBRARIES} ) - -if( APPLE ) - # create Info.plist (using Info.plist.in) and PkgInfo files inside .app bundle - add_custom_target( app_bundle - COMMAND sed -e "s/VERSION/${READY_VERSION}/" ${CMAKE_SOURCE_DIR}/resources/Info.plist.in >Ready.app/Contents/Info.plist - COMMAND echo -n "APPLReDy" >Ready.app/Contents/PkgInfo - ) - add_dependencies( ${APP_NAME} app_bundle ) - - # copy *.icns files into Resources directory inside .app bundle - set_source_files_properties( ${CMAKE_SOURCE_DIR}/resources/app.icns PROPERTIES MACOSX_PACKAGE_LOCATION Resources ) - set_source_files_properties( ${CMAKE_SOURCE_DIR}/resources/file.icns PROPERTIES MACOSX_PACKAGE_LOCATION Resources ) - - # remove unreachable functions and data, and don't add debug info (reduces app size by about 12MB) - target_link_libraries( ${APP_NAME} -Wl,-dead_strip -Wl,-S ) -endif() - -# (Visual Studio only) put the executable in the root binary folder, not in "Debug" or "Release" -# http://stackoverflow.com/questions/543203/cmake-runtime-output-directory-on-windows -if( MSVC_IDE ) - set_target_properties( ${APP_NAME} PROPERTIES PREFIX "../" ) - set_target_properties( ${CMD_NAME} PROPERTIES PREFIX "../" ) -endif() - -# avoid security warnings -if( MSVC ) - add_definitions( /D_CRT_SECURE_NO_WARNINGS /D_CRT_NONSTDC_NO_WARNINGS ) -endif() - -if( CMAKE_COMPILER_IS_GNUCXX ) - list( APPEND CMAKE_EXE_LINKER_FLAGS_RELEASE "-s" ) # strip release binary, for smaller file size -endif() - -#----------------------------------------doxygen------------------------------------------------ - -find_package( Doxygen ) -if(DOXYGEN_FOUND) - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY) - add_custom_target(doc - ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - ) -endif() -# (if doxygen is found then 'make doc' should produce html documentation of the source code) - -#----------------------------------------install------------------------------------------------ - -# put Ready in the root of the installation folder instead of in "bin" -install( TARGETS ${APP_NAME} DESTINATION "." ) # (add ${CMD_NAME} if we want to distribute the command-line version too) - -# install our source files, resource files, pattern files, help files and text files -foreach( source_file ${BASE_SOURCES} ${GUI_SOURCES} ${CMD_SOURCES} ${RESOURCES} ${PATTERN_FILES} ${HELP_FILES} ${OTHER_FILES} ) - get_filename_component( path_name "${source_file}" PATH ) - install( FILES "${source_file}" DESTINATION ${path_name} ) -endforeach() - -#----------------------------------------package---------------------------------------------- - -if( APPLE ) - set( CPACK_SYSTEM_NAME "Mac" ) # nicer than "Darwin" -elseif( UNIX ) - if( CMAKE_SIZEOF_VOID_P EQUAL 8 ) - set( CPACK_SYSTEM_NAME "Linux-64bit" ) - else() - set( CPACK_SYSTEM_NAME "Linux-32bit" ) - endif() -elseif( WIN32) - if( CMAKE_SIZEOF_VOID_P EQUAL 8 ) - set( CPACK_SYSTEM_NAME "Windows-64bit" ) - else() - set( CPACK_SYSTEM_NAME "Windows-32bit" ) - endif() -endif() -if( NOT USE_SSE ) - set( CPACK_SYSTEM_NAME "${CPACK_SYSTEM_NAME}-noSSE" ) -endif() -set( CPACK_GENERATOR "ZIP" ) -set( CPACK_PACKAGE_VERSION "${READY_VERSION}" ) -set( CPACK_SOURCE_GENERATOR "ZIP" ) -include( CPack ) diff --git a/Ready/COPYING.txt b/Ready/COPYING.txt deleted file mode 100644 index 94a9ed024..000000000 --- a/Ready/COPYING.txt +++ /dev/null @@ -1,674 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. diff --git a/Ready/README.txt b/Ready/README.txt deleted file mode 100644 index dd9508b2e..000000000 --- a/Ready/README.txt +++ /dev/null @@ -1,35 +0,0 @@ ---------------------------------------------------------------------------------- - About ---------------------------------------------------------------------------------- - -For help, visit: http://code.google.com/p/reaction-diffusion/ -Or email: reaction-diffusion@googlegroups.com - -Ready is free software. Help us improve it! - -For build instructions, see BUILD.txt. - -See Help/credits.html for a list of credits and acknowledgements. - ---------------------------------------------------------------------------------- - License ---------------------------------------------------------------------------------- - -Copyright 2011, 2012, 2013 The Ready Bunch - -The Ready Bunch is: Tim Hutton, Robert Munafo, Andrew Trevorrow, Tom Rokicki, Dan Wills - -This file is part of Ready. - -Ready is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -Ready is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -See COPYING.txt for the full GNU General Public License. -Or visit . diff --git a/Ready_old_CLI/CMakeLists.txt b/Ready_old_CLI/CMakeLists.txt deleted file mode 100644 index 857219474..000000000 --- a/Ready_old_CLI/CMakeLists.txt +++ /dev/null @@ -1,39 +0,0 @@ -cmake_minimum_required(VERSION 2.6) - -project(ReaDy) - -if (NOT CMAKE_BUILD_TYPE) - message(STATUS "No build type selected, default to Release") - set(CMAKE_BUILD_TYPE "Release") -endif() - -if(MSVC) - add_definitions(/arch:SSE2) -else() - add_definitions(-msse2) -endif() - -FIND_PACKAGE(OpenCV REQUIRED) -INCLUDE_DIRECTORIES( ${OPENCV_INCLUDE_DIR}) - -INCLUDE_DIRECTORIES( "calc_Brusselator" ) -INCLUDE_DIRECTORIES( "calc_Gray_Scott" ) -INCLUDE_DIRECTORIES( "ready_display" ) - -add_executable(ReaDy - main-ready.cpp - util.h - util.cpp - calc_Brusselator/brusselator.cpp - calc_Brusselator/brusselator.h - calc_Gray_Scott/gray_scott_scalar.cpp - calc_Gray_Scott/gray_scott_scalar.h - calc_Gray_Scott/gray_scott_hwivector.cpp - calc_Gray_Scott/gray_scott_hwivector.h - calc_Gray_Scott/hwi_vector.h - calc_Gray_Scott/dicek.h - ready_display/ready_display.cpp - ready_display/ready_display.h -) - -TARGET_LINK_LIBRARIES(ReaDy ${OpenCV_LIBS} ) diff --git a/Ready_old_CLI/calc_Brusselator/brusselator.cpp b/Ready_old_CLI/calc_Brusselator/brusselator.cpp deleted file mode 100644 index 2afa0ce2c..000000000 --- a/Ready_old_CLI/calc_Brusselator/brusselator.cpp +++ /dev/null @@ -1,94 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -#include -#include - -#include "brusselator.h" -#include "../util.h" - -static int g_width; -static int g_height; -static bool g_wrap; -static bool g_paramspace; - - -void bruss_init(float *a, float *b, int width, int height, float A, float B) -{ - srand((unsigned int)time(NULL)); - - // figure the values - for(int i = 0; i < width*height; i++) - { - a[i] = A + ut_frand(-0.01f,0.01f); - b[i] = B/A + ut_frand(-0.01f,0.01f); - } -} - -void bruss_compute_setup(int width, int height, bool wrap, bool paramspace) -{ - g_width = width; - g_height = height; - g_wrap = wrap; - g_paramspace = paramspace; -} - -void compute_bruss(float *a, float *b, float *da, float *db, - float A, float B, float D1, float D2, float speed) -{ - // compute change in each cell - for(int i = 0; i < g_height; i++) - { - int iprev, inext; - if (g_wrap) { - iprev = (i + g_height - 1) % g_height; - inext = (i + 1) % g_height; - } else { - iprev = max(0,i-1); - inext = min(g_height-1,i+1); - } - - for(int j = 0; j < g_width; j++) - { - int jprev, jnext; - if (g_wrap) { - jprev = (j + g_width - 1) % g_width; - jnext = (j + 1) % g_width; - } else { - jprev = max(0,j-1); - jnext = min(g_width-1,j+1); - } - - float aval = a[i*g_width+j]; - float bval = b[i*g_width+j]; - - if(g_paramspace) - { - const float A1=0.0, A2=4.0, B1=0.0, B2=15.0; - A = A1+(A2-A1)*((float)i)/((float)g_height); - B = B1+(B2-B1)*((float)j)/((float)g_width); - } - - // compute the Laplacians of a and b - float dda = a[i*g_width+jprev] + a[i*g_width+jnext] + a[iprev*g_width+j] + a[inext*g_width+j] - 4*aval; - float ddb = b[i*g_width+jprev] + b[i*g_width+jnext] + b[iprev*g_width+j] + b[inext*g_width+j] - 4*bval; - - // compute the new rate of change of a and b - da[i*g_width+j] = A-(B+1)*aval + aval*aval*bval + D1*dda; - db[i*g_width+j] = B*aval - aval*aval*bval + D2*ddb; - } - } - - // effect change - for(int i = 0; i < g_width*g_height; i++) { - a[i] += (speed * da[i]); - b[i] += (speed * db[i]); - } -} - diff --git a/Ready_old_CLI/calc_Brusselator/brusselator.h b/Ready_old_CLI/calc_Brusselator/brusselator.h deleted file mode 100644 index ce73bd8ce..000000000 --- a/Ready_old_CLI/calc_Brusselator/brusselator.h +++ /dev/null @@ -1,13 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -void bruss_init(float *a, float *b, int width, int height, float A, float B); -void bruss_compute_setup(int width, int height, bool wrap, bool paramspace); -void compute_bruss(float *a, float *b, float *da, float *db, - float A, float B, float D1, float D2, float speed); diff --git a/Ready_old_CLI/calc_Gray_Scott/dicek.h b/Ready_old_CLI/calc_Gray_Scott/dicek.h deleted file mode 100644 index c68f87dfc..000000000 --- a/Ready_old_CLI/calc_Gray_Scott/dicek.h +++ /dev/null @@ -1,506 +0,0 @@ -/* - dicek.h ^u 120 ^x f - -This provides macros for multi-threaded programming. The intention is to allow a single program source to be compiled -for Windows, Linux or Mac OS, and accomplish multi-threading without having to have a lot of #ifdefs around the -OS-specific code. - -REVISION HISTORY - 20110929 First version of simple DICEK_SPLIT_MERGE function. This is a blocking, one-time N-way parallel subroutine -call (no provision for threads to continue through multiple synchronization barriers). Works in DICEK_USE_THREADS and -DICEK_EMULATE modes (tested with math3000.cxx A094358() routine). - 20110930 Add a first (extremely speculative and untested) shot at the Windows implementation, currently protected by a -fall-back #ifdef block at the beginning that checks for Windows and reverts to EMULATE mode, pending testing by a real -Windows programmer. - 20111001 Add the thread interlock macros and get them working in EMULATE and POSIX modes. - 20111009 Initialize mutexes in Win32 version of DICEK_SPLIT_1 - 20111011 Use semaphores instead of mutexes in Win32 version. - -*/ - -/* First we test for each of the known operating system environments */ - -#if (defined(__linux__) || defined(__APPLE__)) -# ifndef DICEK_USE_POSIX -# ifndef DICEK_EMULATE -# define DICEK_USE_POSIX -# endif -# endif -#endif - - -#ifdef _WIN32 -# ifndef DICEK_EMULATE -# define DICEK_USE_PROCESS_H -# endif -#endif - - -#ifndef DICEK_USE_PROCESS_H -# ifndef DICEK_USE_POSIX -# ifndef DICEK_EMULATE -# define DICEK_EMULATE -# endif -# endif -#endif - - - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// // -// #include // -// /* xx*/ /* */ // -// /* */ /* */ // -// int main() {char a[] ={ 'T', 'h','i', 's',32, 'p','r' ,'o','g' // -// ,'r','a','m', 32, 'j', 'u', 's', 't' ,32 ,'d', 'o', 'e' ,'s' ,32 ,'i' // -// ,'t' ,32 ,'t' ,'h' ,'e' ,32, 'h' ,97,'r','d' ,32, 'w',97,121,33, 32, 40, // -// 68, 'o', 'n', 39, 't' ,32 ,'y' ,'o', 117, 32 ,'t' ,'h' ,'i' // -// ,'n' /* Xy =a +3 +n ++ ;a= b- (* x/z ); if // -// (Xy-++n<(z+*x))z =b;a +b, z+= x*/,107 , 63,63 ,63,41,'\n' ,00}; puts(a);} /*.RPM.*/ // -// // -// Emulated versions of the DICEK macros. (These also serve as documentation for what each macro does) // -// // -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#ifdef DICEK_EMULATE - -/* - We cannot do inter-thread communication because we execute each "spawned thread" to completion before starting the next -one. Therefore, the first time any child thread tries to DICEK_INTERLOCK, the program will deadlock because there is no -way for the parent to get to the DICEK_CH_BEGIN until the child has exited. - */ -#define DICEK_SUPPORTS_BLOCKING 0 - -/* -DICEK_THREAD_VARS defines variables which need to be included in the parameter block of any subroutine called by -DICEK_SPLIT_MERGE - Since this is the emulated version, there are no master_wkg and child_wkg semaphores. - */ -#define DICEK_THREAD_VARS \ - long DICEK_tnum; \ - void * DICEK_thread; \ - void * DICEK_return; - -/* - DICEK_INIT_NTHR declares an integer-type variable named "nth" and initializes it to the number of hardware threads -supported by this system. Place it in your main() or a function from which threads will be launched. - If you want your threads to have access to the value of nth, you should do "glob = nth;" right after the -DICEK_INIT_NTHR(), where "glob" is a suitably scoped global variable. - -In emulated mode we return 3 as the number of "hardware threads", partly because 3 is a fairly rare value in real -computers (the Athlon X3 being about the only one anyone will have) and therefore if you find that DICEK is running 3 -"threads" then you know it probably failed to auto-detect the environment. - Also, as of this writing (2011) 3 threads is about the average number of hardware threads across all portable and -desktop computers that are out there. (Anything by Intel with the "Core i3" or higher brand has 4 threads, and most -desktop machines have at least 2 cores and 4 threads as well) - However, since this is the emulated-mode version of the macros, the "threads" are actually going to just run one -after the next. - */ -#define DICEK_INIT_NTHR(nth) int nth = 3; - -/* - Place DICEK_DATA in the variable declarations area of the function containing an DICEK_FORK directive. dtype should be -a struct which contains the DICEK_THREAD_VARS macro, and as many other fields as you want. nth is the number of threads -that you will be creating with DICEK_SPLIT_MERGE. - It will declare an array aname (dynamically allocated) which will be an array of structs of type dtype, and fill the -DICEK-specific fields with a thread number and a pointer to the pthreads data for each thread that will be used by -DICEK_SPLIT_MERGE. - */ -#define DICEK_DATA(dtype, arrayname, nth) \ - dtype * arrayname; \ - arrayname = (dtype *)malloc(nth * sizeof(dtype)); \ - for(int _DICEK_i=0; _DICEK_iDICEK_return = (void*)(returnv); - -#define DICEK_MERGE_M(arrayname, index) /* nop */ - -#define DICEK_MERGE(funcname, arrayname, nth) /* nop */ - -#endif - -/* - - - - - - - - - - - - - - - - - - - - - End of the EMULATED section - - - - - - - - - - - - - - - - - - - - - - */ - - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// , , // -// /( )` // -// \ \___ / | // -// _nnnn_ /- _ `-/ ' // -// dGGGGMMb (/\/ \ \ /\ // -// @p~qp~~qMb / / | ` \ // -// M|@||@) M| ooooooooo. .oooooo. .oooooo..o ooooo ooooooo ooooo O O ) / | // -// @,----.JM| `888 `Y88. d8P' `Y8b d8P' `Y8 `888' `8888 d8' `-^--'`< ' // -// JS^\__/ qKL 888 .d88' 888 888 Y88bo. 888 Y888..8P (_.) _ ) / // -// dZP qKRb 888ooo88P' 888 888 `"Y8888o. 888 `8888' `.___/` / // -// dZP qKKb 888 888 888 `"Y88b 888 .8PY888. `-----' / // -// fZP SMMb 888 `88b d88' oo .d8P 888 d8' `888b <----. __ / __ \ // -// HZM MMMM o888o `Y8bood8P' 8""88888P' o888o o888o o88888o <----|====O)))==) \) /==== // -// FqM MMMM <----' `--' `.__,' \ // -// __| ". |\dS"qML | | // -// | `. | `' \Zq \ / /\// -//_) \.___.,| .' ______( (_ / \______/ // -//\____ )MMMMMP| .' Versions of the DICEK macros for Linux, Mac OS, ,' ,-----' | // -// `-' `--' hjm and other POSIX-compliant environments `--{__________) // -// // -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#ifdef DICEK_USE_POSIX - -#define DICEK_SUPPORTS_BLOCKING 1 - -#include - -// Mach-based system (most notably Apple) use sysctlbyname to query the number of CPUs/cores/threads -# ifdef __MACH__ -# include -# include -# endif - -// TODO: Some Linux systems might need an #include to access the mechanism for finding out the number of threads. - - -#define DICEK_THREAD_VARS \ - long DICEK_tnum; \ - void * DICEK_thread; \ - pthread_mutex_t DICEK_master_wkg; \ - pthread_mutex_t DICEK_child_wkg; \ - void * DICEK_return; - - -#ifdef __APPLE__ -# define DICEK_INIT_NTHR(nth) int nth; { \ - size_t _DICEK_sz_in = sizeof(nth); \ - long _DICEK_rv2 = sysctlbyname("hw.ncpu", (void *) (&nth), &_DICEK_sz_in, 0, 0); \ - if (nth <= 0) { nth = 1; } \ - if (nth > 64) { nth = 64; } } -#else -// TODO: How to query number of threads in Linux (I suspect reading /proc/cpuinfo will work on most, -// but not all, Linuces) -# define DICEK_INIT_NTHR(nth) int nth = 3; -#endif - -#define DICEK_DATA(dtype, arrayname, nth) \ - dtype * arrayname; \ - arrayname = (dtype *)malloc(nth * sizeof(dtype)); \ - for(int _DICEK_i=0; _DICEK_iDICEK_child_wkg)); - -#define DICEK_CH_SYNC \ - pthread_mutex_unlock(&(_DICEK_params->DICEK_master_wkg)); - -#define DICEK_INTERLOCK(arrayname, nth) \ - for(int _DICEK_i=0; _DICEK_iDICEK_child_wkg)); - -#define DICEK_RETURN(returnv) pthread_exit((void*)(returnv)); - -#define DICEK_MERGE_M(arrayname, index) \ - DICEK_MERGE_1(arrayname, index) \ - pthread_mutex_destroy(&(arrayname[index].DICEK_child_wkg)); \ - pthread_mutex_unlock(&(arrayname[index].DICEK_master_wkg)); \ - pthread_mutex_destroy(&(arrayname[index].DICEK_master_wkg)); - -#define DICEK_MERGE(funcname, arrayname, nth) \ - for(int _DICEK_i=0; _DICEK_i -#include - -#define DICEK_THREAD_VARS \ - long DICEK_tnum; \ - HANDLE DICEK_thread; \ - HANDLE DICEK_master_wkg; \ - HANDLE DICEK_child_wkg; \ - void * DICEK_return; - -#define DICEK_INIT_NTHR(nth) \ - int nth; \ - { SYSTEM_INFO si; GetSystemInfo(&si); nth = si.dwNumberOfProcessors; } - -#define DICEK_DATA(dtype, arrayname, nth) \ - dtype * arrayname; \ - arrayname = (dtype *)malloc(nth * sizeof(dtype)); \ - for(int _DICEK_i=0; _DICEK_iDICEK_child_wkg,INFINITE); - -#define DICEK_CH_SYNC \ - ReleaseSemaphore(_DICEK_params->DICEK_master_wkg, 1, NULL); - -#define DICEK_INTERLOCK(arrayname, nth) \ - for(int _DICEK_i=0; _DICEK_iDICEK_child_wkg, 1, NULL); - -#define DICEK_RETURN(returnv) \ - _DICEK_params->DICEK_return = (void*)(returnv); \ - _endthread(); - -#define DICEK_MERGE_M(arrayname, index) \ - DICEK_MERGE_1(arrayname, index) \ - CloseHandle(arrayname[index].DICEK_thread); \ - CloseHandle(arrayname[index].DICEK_child_wkg); \ - ReleaseSemaphore(arrayname[index].DICEK_master_wkg, 1, NULL); \ - CloseHandle(arrayname[index].DICEK_master_wkg); - -#define DICEK_MERGE(funcname, arrayname, nth) \ - for(int _DICEK_i=0; _DICEK_i -#endif - -// To convince yourself that the macro library works on any hardware, -// un-comment this "#define HWIV_EMULATE", and you'll get the macros inside -// the #ifdef HWIV_V4F4_EMULATED block in hwi_vector.h. The emulated -// macros do everything with normal floats and arrays, and run about 3-4 times -// slower. -//#define HWIV_EMULATE -#define HWIV_WANT_V4F4 -#include "hwi_vector.h"; - -// stdlib: -#include - -#include "gray_scott_hwivector.h" -#include "../util.h" - -static int g_width; -static int g_height; -static bool g_wrap; -static bool g_paramspace; - -void gs_hwi_compute_setup(int width, int height, bool wrap, bool paramspace) -{ - g_width = width; - g_height = height; - g_wrap = wrap; - g_paramspace = paramspace; -} - -/* Spawn threads and dispatch to compute() routine inside threads */ -void compute_dispatch(float *u, float *v, float *du, float *dv, - float D_u, float D_v, float F, float k, float speed, - int num_its, int nthreads) -{ - if (nthreads <= 0) { - nthreads = 1; - } - DICEK_DATA(compute_params, cp, nthreads); - - int i; - int a_row = 0; - - /* Set up all the parameter blocks */ - for(i=0; i1) ? 1 : 0; - } - - if (nthreads > 1) { - /* Start N threads, each will immediately begin the first part of its computation */ - DICEK_SPLIT(compute_gs_hwiv, cp, nthreads); - - /* For each iteration we sync the threads once. */ - for(i=0; iu; - float *v = param_block->v; - float *du = param_block->du; - float *dv = param_block->dv; - float D_u = param_block->D_u; - float D_v = param_block->D_v; - float F = param_block->F; - float k = param_block->k; - float speed = param_block->speed; - int num_its = param_block->num_its; - int start_row = param_block->start_row; - int end_row = param_block->end_row; - int interlock = param_block->interlock_type; - - - int iter; - -#ifdef SUPPORT_PARAM_SPACE - const float k_min=0.045, k_max=0.07, F_min=0.01, F_max=0.09; - float k_diff; - V4F4 v4_kdiff; - k_diff = (k_max-k_min)/g_height; - v4_kdiff = v4SET(0, -k_diff, -2*k_diff, -3*k_diff); -#endif - - if (interlock) { DICEK_CH_BEGIN } - - // Scan per iteration - for(iter = 0; iter < num_its; iter++) { - - // Scan per row - for(int i = start_row; i < end_row; i++) { -#ifdef SUPPORT_PARAM_SPACE - V4F4 v4_F = v4SPLAT(F); - V4F4 v4_k = v4SPLAT(k); -#else - const V4F4 v4_F = v4SPLAT(F); - const V4F4 v4_k = v4SPLAT(k); -#endif - const V4F4 v4_Du = v4SPLAT(D_u); - const V4F4 v4_Dv = v4SPLAT(D_v); - int iprev,inext; - if (g_wrap) { - /* Periodic boundary condition */ - iprev = (i+g_height-1) % g_height; - inext = (i+1) % g_height; - } else { - /* The edges are their own neighbors. This amounts to a Neumann boundary condition. */ - iprev = max(i-1, 0); - inext = min(i+1, g_height-1); - } - -#ifdef SUPPORT_PARAM_SPACE - if (g_paramspace) { - // set F for this row (ignore the provided value) - F = F_min + (g_height-i-1) * (F_max-F_min)/g_height; - v4_F = v4SPLAT(F); - } -#endif - - // Scan per column in steps of vector width - for(int j = 1; j < wid_v-1; j++) { - V4F4 * v_ubase = ((V4F4 *)u)+i*wid_v+j; - V4F4 * v_vbase = ((V4F4 *)v)+i*wid_v+j; - V4F4 * v_dubase = ((V4F4 *)du)+i*wid_v+j; - V4F4 * v_dvbase = ((V4F4 *)dv)+i*wid_v+j; - V4F4 u_left = _mm_loadu_ps(((float*)v_ubase)-1); - V4F4 u_right = _mm_loadu_ps(((float*)v_ubase)+1); - V4F4 v_left = _mm_loadu_ps(((float*)v_vbase)-1); - V4F4 v_right = _mm_loadu_ps(((float*)v_vbase)+1); - V4F4 * v_ub_prev = ((V4F4 *)u)+iprev*wid_v+j; - V4F4 * v_ub_next = ((V4F4 *)u)+inext*wid_v+j; - V4F4 * v_vb_prev = ((V4F4 *)v)+iprev*wid_v+j; - V4F4 * v_vb_next = ((V4F4 *)v)+inext*wid_v+j; - -#ifdef SUPPORT_PARAM_SPACE - if (g_paramspace) { - // set k for this column (ignore the provided value) - k = k_min + (g_width-(j*4)-5)*k_diff; - // k decreases by k_diff each time j increases by 1, so this vector - // needs to contain 4 different k values. We use v4_kdiff, pre-computed - // above, to accomplish this. - v4_k = v4ADD(v4SPLAT(k),v4_kdiff); - } -#endif - - // To compute the Laplacians of u and v, we use the 5-point neighbourhood for the Euler discrete method: - // nabla(x) = x[i][j-1]+x[i][j+1]+x[i-1][j]+x[i+1][j] - 4*x[i][j]; - // ("nabla" is the name of the "upside down delta" symbol used for the Laplacian in equations) -# define NABLA_5PT(ctr,left,right,up,down) \ - v4SUB(v4ADD(v4ADD(v4ADD(left,right),up),down),v4MUL(ctr,v4SPLAT(4.0f))) - - // compute the new rate of change of u and v - V4F4 v4_uvv = v4MUL(*v_ubase,v4MUL(*v_vbase,*v_vbase)); // u*v^2 is used twice - - /* Scalar code is: du[i][j] = D_u * nabla_u - u*v^2 + F*(1-u); - We treat it as: D_u * nabla_u - (u*v^2 - F*(1-u)) */ - *v_dubase = v4SUB(v4MUL(v4_Du, - NABLA_5PT(*v_ubase, u_left, u_right, *v_ub_prev, *v_ub_next)), - v4SUB(v4_uvv,v4MUL(v4_F,v4SUB(v4SPLAT(1.0f),*v_ubase)))); - - /* dv formula is similar: dv[i][j] = D_v * nabla_v + u*v^2 - (F+k)*v; */ - *v_dvbase = v4ADD(v4MUL(v4_Dv, - NABLA_5PT(*v_vbase, v_left, v_right, *v_vb_prev, *v_vb_next)), - v4SUB(v4_uvv,v4MUL(v4ADD(v4_F,v4_k),*v_vbase))); - } - - } // End of scan per row - - if (interlock) { DICEK_CH_SYNC } - if (interlock) { DICEK_CH_BEGIN } - - { - int right_b, left_b; - if (g_wrap) { - right_b = wid_v-2; - left_b = 1; - } else { - right_b = 1; - left_b = wid_v-2; - } - // effect change - for(int i = start_row; i < end_row; i++) { - for(int j = 1; j < wid_v-1; j++) { - const V4F4 v4_speed = v4SPLAT(speed); - V4F4 * v_ubase = ((V4F4 *)u)+i*wid_v+j; - V4F4 * v_vbase = ((V4F4 *)v)+i*wid_v+j; - V4F4 * v_dubase = ((V4F4 *)du)+i*wid_v+j; - V4F4 * v_dvbase = ((V4F4 *)dv)+i*wid_v+j; - // u[i][j] = u[i][j] + speed * du[i][j]; - *v_ubase = v4ADD(v4MUL(v4_speed, *v_dubase), *v_ubase); - // v[i][j] = v[i][j] + speed * dv[i][j]; - *v_vbase = v4ADD(v4MUL(v4_speed, *v_dvbase), *v_vbase); - } - // Update cells on boundary from one row inland - *(((V4F4 *)u)+i*wid_v) = *(((V4F4 *)u)+i*wid_v+right_b); - *(((V4F4 *)v)+i*wid_v) = *(((V4F4 *)v)+i*wid_v+right_b); - *(((V4F4 *)u)+i*wid_v+wid_v-1) = *(((V4F4 *)u)+i*wid_v+left_b); - *(((V4F4 *)v)+i*wid_v+wid_v-1) = *(((V4F4 *)v)+i*wid_v+left_b); - } - } - - } // End of scan per iteration - - if (interlock) { DICEK_CH_SYNC } - - DICEK_CH_END -} -#endif - - -#ifdef HWIV_V4F4_PDE4 - -/* - This SSE code is based on the method in Munafo's PDE4 program, which tries to reduce memory accesses -by using aligned loads and shifting data internally to vector registers. The more obvious approach above -turned out to be faster. - */ - -#define VECSIZE 4 - -/* The parameter space code, specifically the "if(parameter_space)" test itself, causes a 2.5% slowdown even when the - parameter_space flag is false */ -#define SUPPORT_PARAM_SPACE - -void * compute_gs_hwiv(void * gpb) -{ - DICEK_SUB(compute_params, gpb); - -#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_IX86)) - /* On Intel we disable accurate handling of denorms and zeros. This is an - important speed optimization. */ - int oldMXCSR = _mm_getcsr(); //read the old MXCSR setting - int newMXCSR = oldMXCSR | 0x8040; // set DAZ and FZ bits - _mm_setcsr( newMXCSR ); //write the new MXCSR setting to the MXCSR -#endif - - compute_params * param_block; - param_block = (compute_params *) gpb; - float *u = param_block->u; - float *v = param_block->v; - float *du = param_block->du; - float *dv = param_block->dv; - float D_u = param_block->D_u; - float D_v = param_block->D_v; - float F = param_block->F; - float k = param_block->k; - float speed = param_block->speed; - int parameter_space = param_block->parameter_space; - int num_its = param_block->num_its; - int width = param_block->width; - int height = param_block->height; - bool wrap = param_block->wrap; - int start_row = param_block->start_row; - int end_row = param_block->end_row; - int interlock = param_block->interlock_type; - - int iter; -#ifndef HWIV_HAVE_V4F4 - fprintf(stdout, "Did not get vector macros from HWIV\n"); - exit(-1); -#endif - // Vector "constants": speed, F, k, D_u, D_v - V4F4 v4_speed, v4_F, v4_k, v4_Du, v4_Dv; - // Pointers used to load data from rows of the grid - V4F4 *v_ub_prev, *v_ubase, *v_ub_next; - V4F4 *v_vb_prev, *v_vbase, *v_vb_next; - // Actual grid data - V4F4 v4_u_l, v4_u, v4_u_r; - V4F4 v4_v_l, v4_v, v4_v_r; - V4F4 v4_uvv; - // Pointers to second grid where we write the results of the main computation - V4F4 *v_dubase, *v_dvbase; - -#ifdef SUPPORT_PARAM_SPACE - const float k_min=0.045f, k_max=0.07f, F_min=0.01f, F_max=0.09f; - float k_diff; - V4F4 v4_kdiff; - k_diff = (k_max-k_min)/height; - v4_kdiff = v4SET(0, -k_diff, -2*k_diff, -3*k_diff); -#endif - - // Initialize our vectorized scalars - v4_speed = v4SPLAT(speed); - v4_F = v4SPLAT(F); - v4_k = v4SPLAT(k); - v4_Du = v4SPLAT(D_u); - v4_Dv = v4SPLAT(D_v); - - if (interlock) { DICEK_CH_BEGIN } - - // Scan per iteration - for(iter = 0; iter < num_its; iter++) { - -//printf("iter %d rows [%ld,%ld)\n",iter,start_row,end_row); - - // Scan per row - for(int i = start_row; i < end_row; i++) { - int iprev,inext; - int v_j2; - if (wrap) { - /* Periodic boundary condition */ - iprev = (i+height-1) % height; - inext = (i+1) % height; - } else { - /* The edges are their own neighbors. This amounts to a Neumann boundary condition. */ - iprev = max(i-1, 0); - inext = min(i+1, height-1); - } - - /* Get pointers to beginning of rows for each of the grids. We access - 3 rows each for u and v, and 1 row each for du and dv. */ - v_ubase = (V4F4 *)&(u[i*width]); - v_ub_prev = (V4F4 *)&(u[iprev*width]); - v_ub_next = (V4F4 *)&(u[inext*width]); - v_vbase = (V4F4 *)&(v[i*width]); - v_vb_prev = (V4F4 *)&(v[iprev*width]); - v_vb_next = (V4F4 *)&(v[inext*width]); - v_dubase = (V4F4 *)&(du[i*width]); - v_dvbase = (V4F4 *)&(dv[i*width]); - -#ifdef SUPPORT_PARAM_SPACE - if (parameter_space) { - // set F for this row (ignore the provided value) - F = F_min + (height-i-1) * (F_max-F_min)/width; - v4_F = v4SPLAT(F); - } -#endif - - /* Pre-load the first two blocks of data we need, which are the "center" - and "right" blocks from the end of the row (as if we have just wrapped - around from the end of the row back to the beginning) */ - v_j2 = wrap ? ((width-4)/VECSIZE) : 0; - - v4_u = *(v_ubase+v_j2); - v4_v = *(v_vbase+v_j2); - v4_u_r = *v_ubase++; - v4_v_r = *v_vbase++; - - // Scan per column in steps of vector width - for(int j = 0; j < width-VECSIZE; j+=VECSIZE) { - // Get a new 4 pixels from the current row and shift the other 8 pixels over - v4_u_l = v4_u; v4_u = v4_u_r; v4_u_r = *v_ubase; - v4_v_l = v4_v; v4_v = v4_v_r; v4_v_r = *v_vbase; - -#ifdef SUPPORT_PARAM_SPACE - if (parameter_space) { - // set k for this column (ignore the provided value) - k = k_min + (width-j-1)*k_diff; - // k decreases by k_diff each time j increases by 1, so this vector - // needs to contain 4 different k values. We use v4_kdiff, pre-computed - // above, to accomplish this. - v4_k = v4ADD(v4SPLAT(k),v4_kdiff); - } -#endif - - // To compute the Laplacians of u and v, we use the 5-point neighbourhood for the Euler discrete method: - // nabla(x) = x[i][j-1]+x[i][j+1]+x[i-1][j]+x[i+1][j] - 4*x[i][j]; - // ("nabla" is the name of the "upside down delta" symbol used for the Laplacian in equations) -# define NABLA_5PT(ctr,left,right,up,down) \ - v4SUB(v4ADD(v4ADD(v4ADD(left,right),up),down),v4MUL(ctr,v4SPLAT(4.0f))) - - // compute the new rate of change of u and v - v4_uvv = v4MUL(v4_u,v4MUL(v4_v,v4_v)); // u*v^2 is used twice - - /* Scalar code is: du[i][j] = D_u * nabla_u - u*v^2 + F*(1-u); - We treat it as: D_u * nabla_u - (u*v^2 - F*(1-u)) */ - *v_dubase = v4SUB(v4MUL(v4_Du, - NABLA_5PT(v4_u, v4RAISE(v4_u,v4_u_l), v4LOWER(v4_u,v4_u_r), *v_ub_prev, *v_ub_next)), - v4SUB(v4_uvv,v4MUL(v4_F,v4SUB(v4SPLAT(1.0f),v4_u)))); - - /* dv formula is similar: dv[i][j] = D_v * nabla_v + u*v^2 - (F+k)*v; */ - *v_dvbase = v4ADD(v4MUL(v4_Dv, - NABLA_5PT(v4_v, v4RAISE(v4_v,v4_v_l), v4LOWER(v4_v,v4_v_r), *v_vb_prev, *v_vb_next)), - v4SUB(v4_uvv,v4MUL(v4ADD(v4_F,v4_k),v4_v))); - - v_ub_prev++; v_ub_next++; - v_ubase++; v_vbase++; - v_vb_prev++; v_vb_next++; - v_dubase++; v_dvbase++; - } - - /* Now we do the last 4 pixels. This is unrolled out of the main loop just to avoid having to do the wrap - test every time in the j loop. */ - v4_u_l = v4_u; v4_u = v4_u_r; - v4_v_l = v4_v; v4_v = v4_v_r; - if (wrap) { - /* The 4 cells to the "right" are the first 4 in this row */ - v4_u_r = *((V4F4 *)&(u[i*width])); - v4_v_r = *((V4F4 *)&(v[i*width])); - } else { - /* just leave them alone, retaining the rightmost 4 values in this row, which were loaded on the last iteration - through the loop */ - } - v4_uvv = v4MUL(v4_u,v4MUL(v4_v,v4_v)); - *v_dubase = v4SUB(v4MUL(v4_Du, - NABLA_5PT(v4_u, v4RAISE(v4_u,v4_u_l), v4LOWER(v4_u,v4_u_r), *v_ub_prev, *v_ub_next)), - v4SUB(v4_uvv,v4MUL(v4_F,v4SUB(v4SPLAT(1.0f),v4_u)))); - *v_dvbase = v4ADD(v4MUL(v4_Dv, - NABLA_5PT(v4_v, v4RAISE(v4_v,v4_v_l), v4LOWER(v4_v,v4_v_r), *v_vb_prev, *v_vb_next)), - v4SUB(v4_uvv,v4MUL(v4ADD(v4_F,v4_k),v4_v))); - } // End of scan per row - - // First thread interlock goes here - if (interlock) { DICEK_CH_SYNC } - if (interlock) { DICEK_CH_BEGIN } - - { - // effect change - for(int i = start_row; i < end_row; i++) { - v_ubase = ((V4F4 *) (&(u[i*width]))); - v_vbase = ((V4F4 *) (&(v[i*width]))); - v_dubase = ((V4F4 *) (&(du[i*width]))); - v_dvbase = ((V4F4 *) (&(dv[i*width]))); - for(int j = 0; j < width; j+=VECSIZE) { - // u[i][j] = u[i][j] + speed * du[i][j]; - *v_ubase = v4ADD(v4MUL(v4_speed, *v_dubase), *v_ubase); v_ubase++; v_dubase++; - // v[i][j] = v[i][j] + speed * dv[i][j]; - *v_vbase = v4ADD(v4MUL(v4_speed, *v_dvbase), *v_vbase); v_vbase++; v_dvbase++; - } - } - } - - } // End of scan per iteration - - // second thread interlock goes here - if (interlock) { DICEK_CH_SYNC } - - DICEK_CH_END -} -#endif - - -#ifdef HWIV_EMULATE - -/* This is the old version of compute() that used all "assembly-language" syntax */ -#define INDEX(a,x,y) ((a)[(x)*g_width+(y)]) - -void compute(float *u, float *v, float *du, float *dv, - float D_u,float D_v,float F,float k,float speed, - int parameter_space) -{ -#ifndef HWIV_HAVE_V4F4 - fprintf(stdout, "Did not get vector macros from HWIV\n"); - exit(-1); -#endif - V4F4 v4_speed; // vectorized version of speed scalar - V4F4 v4_F; // vectorized version of F scalar - V4F4 v4_k; // vectorized version of k scalar - HWIV_4F4_ALIGNED talign; // used by FILL_4F4 - V4F4 v4_u; V4F4 v4_du; - V4F4 v4_v; V4F4 v4_dv; - HWIV_INIT_MUL0_4F4; // used by MUL (on targets that need it) - HWIV_INIT_MTMP_4F4; // used by MADD (on targets that need it) - HWIV_INIT_FILL; // used by FILL - HWIV_INIT_RLTMP_4F4; // used by RAISE and LOWER - V4F4 v4_tmp; - V4F4 v4_Du; - V4F4 v4_Dv; - V4F4 v4_nabla_u; - V4F4 v4_nabla_v; - V4F4 v4_1; - V4F4 v4_4; - const float k_min=0.045f, k_max=0.07f, F_min=0.01f, F_max=0.09f; - float k_diff; - V4F4 v4_kdiff; - float * ubase; - float * ub_prev; - float * ub_next; - float * vbase; float * vb_prev; float * vb_next; - float * dubase; float * dvbase; - - V4F4 v4_u_l; - V4F4 v4_u_r; - V4F4 v4_v_l; - V4F4 v4_v_r; - - //F_diff = (F_max-F_min)/width; - k_diff = (k_max-k_min)/height; - - // Initialize our vectorized scalars - HWIV_SPLAT_4F4(v4_speed, speed); - HWIV_SPLAT_4F4(v4_F, F); - HWIV_SPLAT_4F4(v4_k, k); - HWIV_SPLAT_4F4(v4_Du, D_u); - HWIV_SPLAT_4F4(v4_Dv, D_v); - HWIV_SPLAT_4F4(v4_1, 1.0); - HWIV_SPLAT_4F4(v4_4, 4.0); - HWIV_FILL_4F4(v4_kdiff, 0, -k_diff, -2*k_diff, -3*k_diff); - - // Scan per row - for(int i = 0; i < height; i++) { - int iprev,inext; - int j2; - - if (g_wrap) { - iprev = (i+height-1) % height; - inext = (i+1) % height; - } else { - iprev = max(i-1, 0); - inext = min(i+1, height-1); - } - /* Get pointers to beginning of rows for each of the grids. We access - 3 rows each for u and v, and 1 row each for du and dv. */ - ubase = &INDEX(u,i,0); - ub_prev = &INDEX(u,iprev,0); - ub_next = &INDEX(u,inext,0); - vbase = &INDEX(v,i,0); - vb_prev = &INDEX(v,iprev,0); - vb_next = &INDEX(v,inext,0); - dubase = &INDEX(du,i,0); - dvbase = &INDEX(dv,i,0); - - if (parameter_space) { - // set F for this row (ignore the provided value) - F = F_min + (height-i-1) * (F_max-F_min)/width; - HWIV_SPLAT_4F4(v4_F, F); - } - - /* Pre-load the first two blocks of data we need, which are the "center" - and "right" blocks from the end of the row (as if we have just wrapped - around from the end of the row back to the beginning) */ - j2 = g_wrap ? (width-4) : 0; - HWIV_LOAD_4F4(v4_u, ubase+j2); - HWIV_LOAD_4F4(v4_u_r, ubase); - HWIV_LOAD_4F4(v4_v, vbase+j2); - HWIV_LOAD_4F4(v4_v_r, vbase); - - // Scan per column in steps of vector width - for(int j = 0; j < width; j+=4) { - if (g_wrap) { - j2 = (j+4) % width; - } else { - j2 = min(j+4, height-4); - } - - HWIV_COPY_4F4(v4_u_l, v4_u); - HWIV_COPY_4F4(v4_v_l, v4_v); - HWIV_COPY_4F4(v4_u, v4_u_r); - HWIV_COPY_4F4(v4_v, v4_v_r); - HWIV_LOAD_4F4(v4_u_r, ubase+j2); - HWIV_LOAD_4F4(v4_v_r, vbase+j2); - - if (parameter_space) { - // set k for this column (ignore the provided value) - k = k_min + (width-j-1)*k_diff; - // k decreases by k_diff each time j increases by 1, so this vector - // needs to contain 4 different k values. - HWIV_SPLAT_4F4(v4_tmp, k); - HWIV_ADD_4F4(v4_k, v4_tmp, v4_kdiff); - } - - // compute the Laplacians of u and v. "nabla" is the name of the - // "upside down delta" symbol used for the Laplacian in equations - - /* Scalar code is: - nabla_u = u[i][jprev]+u[i][jnext]+u[iprev][j]+u[inext][j] - 4*uval; */ - HWIV_RAISE_4F4(v4_nabla_u, v4_u, v4_u_l); - - HWIV_LOWER_4F4(v4_tmp, v4_u, v4_u_r); - HWIV_ADD_4F4(v4_nabla_u, v4_nabla_u, v4_tmp); - - // Now we add in the "up" and "down" neighbors - HWIV_LOAD_4F4(v4_tmp, ub_prev+j); - HWIV_ADD_4F4(v4_nabla_u, v4_nabla_u, v4_tmp); - HWIV_LOAD_4F4(v4_tmp, ub_next+j); - HWIV_ADD_4F4(v4_nabla_u, v4_nabla_u, v4_tmp); - - // Now we compute -(4*u-neighbors) = neighbors - 4*u - HWIV_NMSUB_4F4(v4_nabla_u, v4_4, v4_u, v4_nabla_u); - - // Same thing all over again for the v's - HWIV_RAISE_4F4(v4_nabla_v, v4_v, v4_v_l); - HWIV_LOWER_4F4(v4_tmp, v4_v, v4_v_r); - HWIV_ADD_4F4(v4_nabla_v, v4_nabla_v, v4_tmp); - HWIV_LOAD_4F4(v4_tmp, vb_prev+j); - HWIV_ADD_4F4(v4_nabla_v, v4_nabla_v, v4_tmp); - HWIV_LOAD_4F4(v4_tmp, vb_next+j); - HWIV_ADD_4F4(v4_nabla_v, v4_nabla_v, v4_tmp); - HWIV_NMSUB_4F4(v4_nabla_v, v4_4, v4_v, v4_nabla_v); - - // compute the new rate of change of u and v - - /* Scalar code is: - du[i][j] = D_u * nabla_u - uval*vval*vval + F*(1-uval); - We treat it as: - D_u * nabla_u - (uval*vval*vval - (-(F*uval-F)) ) */ - - HWIV_NMSUB_4F4(v4_tmp, v4_F, v4_u, v4_F); // -(F*u-F) = F-F*u = F(1-u) - HWIV_MUL_4F4(v4_dv, v4_v, v4_v); // v^2 - HWIV_MSUB_4F4(v4_tmp, v4_u, v4_dv, v4_tmp); // u*v^2 - F(1-u) - HWIV_MSUB_4F4(v4_du, v4_Du, v4_nabla_u, v4_tmp); // D_u*nabla_u - (u*v^2 - F(1-u)) - // = D_u*nabla_u - u*v^2 + F(1-u) - HWIV_SAVE_4F4(dubase+j, v4_du); - - /* dv formula is similar: - dv[i][j] = D_v * nabla_v + uval*vval*vval - (F+k)*vval; - We treat it as: - D_v * nabla_v + uval*vval*vval - (F*vval + k*vval); */ - HWIV_MUL_4F4(v4_tmp, v4_k, v4_v); // k*v - HWIV_MADD_4F4(v4_tmp, v4_F, v4_v, v4_tmp); // F*v+k*v = (F+k)v - // v^2 is still in v4_dv - HWIV_MSUB_4F4(v4_tmp, v4_u, v4_dv, v4_tmp); // u*v^2 - (F+k)v - HWIV_MADD_4F4(v4_dv, v4_Dv, v4_nabla_v, v4_tmp); // D_v*nabla_v + u*v^2 - (F+k)v - HWIV_SAVE_4F4(dvbase+j, v4_dv); - } - } - - // effect change - for(int i = 0; i < height; i++) { - ubase = &INDEX(u,i,0); - vbase = &INDEX(v,i,0); - dubase = &INDEX(du,i,0); - dvbase = &INDEX(dv,i,0); - for(int j = 0; j < width; j+=4) { - // u[i][j] = u[i][j] + speed * du[i][j]; - HWIV_LOAD_4F4(v4_u, ubase+j); // get u - HWIV_LOAD_4F4(v4_du, dubase+j); // get du - HWIV_MADD_4F4(v4_u, v4_speed, v4_du, v4_u); // speed*du + u - HWIV_SAVE_4F4(ubase+j, v4_u); // write it back - - // v[i][j] = v[i][j] + speed * dv[i][j]; - HWIV_LOAD_4F4(v4_v, vbase+j); - HWIV_LOAD_4F4(v4_dv, dvbase+j); - HWIV_MADD_4F4(v4_v, v4_speed, v4_dv, v4_v); - HWIV_SAVE_4F4(vbase+j, v4_v); - } - } -} - -#endif diff --git a/Ready_old_CLI/calc_Gray_Scott/gray_scott_hwivector.h b/Ready_old_CLI/calc_Gray_Scott/gray_scott_hwivector.h deleted file mode 100644 index 1d4dbd8e9..000000000 --- a/Ready_old_CLI/calc_Gray_Scott/gray_scott_hwivector.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -//#define DICEK_EMULATE -#include "dicek.h" - -typedef struct compute_params { - DICEK_THREAD_VARS; - float *u; - float *v; - float *du; - float *dv; - float D_u; - float D_v; - float F; - float k; - float speed; - int num_its; - int start_row; - int end_row; - int interlock_type; -} compute_params; - -void gs_hwi_compute_setup(int width, int height, bool wrap, bool paramspace); - -void compute_dispatch(float *u, float *v, float *du, float *dv, - float D_u, float D_v, float F, float k, float speed, - int num_its, int num_threads); - -void * compute_gs_hwiv(void * param_block); // Arg is really "compute_params * param_block" diff --git a/Ready_old_CLI/calc_Gray_Scott/gray_scott_scalar.cpp b/Ready_old_CLI/calc_Gray_Scott/gray_scott_scalar.cpp deleted file mode 100644 index a2ce7c127..000000000 --- a/Ready_old_CLI/calc_Gray_Scott/gray_scott_scalar.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See ../../README.txt for more details. - -*/ - -#include "gray_scott_scalar.h" -#include "../util.h" - -static int g_width; -static int g_height; -static bool g_wrap; -static bool g_paramspace; - -void gs_scl_compute_setup(int width, int height, bool wrap, bool paramspace) -{ - g_width = width; - g_height = height; - g_wrap = wrap; - g_paramspace = paramspace; -} - -void compute_gs_scalar(float *a, float *b, float *da, float *db, - float r_a, float r_b, float f, float k, float speed) -{ - // compute change in each cell - for(int i = 0; i < g_height; i++) { - int iprev,inext; - if (g_wrap) { - iprev = (i + g_height - 1) % g_height; - inext = (i + 1) % g_height; - } else { - iprev = max(0,i-1); - inext = min(g_height-1,i+1); - } - - for(int j = 0; j < g_width; j++) { - int jprev,jnext; - if (g_wrap) { - jprev = (j + g_width - 1) % g_width; - jnext = (j + 1) % g_width; - } else { - jprev = max(0,j-1); - jnext = min(g_width-1,j+1); - } - - float aval = a[i*g_width+j]; - float bval = b[i*g_width+j]; - - if(g_paramspace) { - const float kmin=0.045, kmax=0.07, fmin=0.01, fmax=0.09; - // set f and k for this location (ignore the provided values of f and k) - k = kmin + (g_width-j-1)*(kmax-kmin)/g_width; - f = fmin + (g_height-i-1)*(fmax-fmin)/g_height; - } - - // compute the Laplacians of a and b - float dda = a[i*g_width+jprev] + a[i*g_width+jnext] + a[iprev*g_width+j] + a[inext*g_width+j] - 4*aval; - float ddb = b[i*g_width+jprev] + b[i*g_width+jnext] + b[iprev*g_width+j] + b[inext*g_width+j] - 4*bval; - - // compute the new rate of change of a and b - da[i*g_width+j] = r_a * dda - aval*bval*bval + f*(1-aval); - db[i*g_width+j] = r_b * ddb + aval*bval*bval - (f+k)*bval; - } - } - - // effect change - for(int i = 0; i < g_width*g_height; i++) { - a[i] += speed * da[i]; - b[i] += speed * db[i]; - } -} diff --git a/Ready_old_CLI/calc_Gray_Scott/gray_scott_scalar.h b/Ready_old_CLI/calc_Gray_Scott/gray_scott_scalar.h deleted file mode 100644 index ba396affb..000000000 --- a/Ready_old_CLI/calc_Gray_Scott/gray_scott_scalar.h +++ /dev/null @@ -1,13 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -void gs_scl_compute_setup(int width, int height, bool wrap, bool paramspace); - -void compute_gs_scalar(float *a, float *b, float *da, float *db, - float r_a, float r_b, float f, float k, float speed); diff --git a/Ready_old_CLI/calc_Gray_Scott/hwi_vector.h b/Ready_old_CLI/calc_Gray_Scott/hwi_vector.h deleted file mode 100644 index ea55acd25..000000000 --- a/Ready_old_CLI/calc_Gray_Scott/hwi_vector.h +++ /dev/null @@ -1,426 +0,0 @@ -/* - - hwi_vector.h ^u 140 ^x f - -This is a hardware-independent vector library. It enables vector SIMD code to be written, compiled and run on non-vector -machines. Once the code is tested and working, a compile-time macro can be changed, and a recompile causes the program -to use actual vector instructions. - -In the simplest case, that is all that is needed. In real applications there are usually several more steps: - - * The application developer decides what base hardware platform to compile for. This might be the oldest CPU the - company will support, such as a Pentium 4 HT. - - * The compiler defines certain flags, such as __i386__ and __SSE2__ that can be tested by an #ifdef - - * The source code can set flags of its own, such as HWIV_EMULATE or HWIV_WANT_V4F4. This might be done in order to - create two versions of a calculation routine (one that uses vector instructions and one that does not) - - * The program is built from one or more compilations of the same source. This might be done in order to create a - "universal binary" capable of being copied to and run on a variety of computer products. A typical example is a - program file that contains both a 32-bit and a 64-bit version, and the operating system loads whichever one is - appropriate when the program is launched. - - * At run-time, the program tests for the presence of vector instructions using the CPUID instruction or its - equivalent on non Intel CPUs. - - * At run-time, based on the CPUID test, the program transfers control to one or another of the calculation - subroutines depending on which vector instructions are actually available. - -*/ - -// First, honor the user's request to use emulation -#ifdef HWIV_EMULATE -# define HWIV_V4F4_EMULATED -#endif - -// Next, find out if the compiler will give us SSE2 intrinsics -#ifndef HWIV_V4F4_EMULATED -# if defined(__SSE2__) -# define HWIV_V4F4_SSE2 -# endif - // Workaround because Visual Studio doesn't seem to set its _M_IX86_FP flag, - // or give us any indication what level of SSE support is available. - // So we just assume SSE2 is available -# if defined(_M_X64) || defined(_M_IX86) -# define HWIV_V4F4_SSE2 -# endif -#endif - - -// Finally, fall back to emulated if no hardware option is available -#ifndef HWIV_V4F4_SSE2 -# ifndef HWIV_V4F4_EMULATED -# define HWIV_V4F4_EMULATED -# endif -#endif - - -// See is client wants V4F4 -#ifdef HWIV_WANT_V4F4 - -// Okay, now see how we should create the macros for V4F4 - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// // -// #include // -// /* */ /* */ // -// /* */ /* */ // -// int main() {char a[] ={ 'T', 'h','i', 's',32, 'p','r' ,'o','g' // -// ,'r','a','m', 32, 'j', 'u', 's', 't' ,32 ,'d', 'o', 'e' ,'s' ,32 ,'i' // -// ,'t' ,32 ,'t' ,'h' ,'e' ,32, 'h' ,97,'r','d' ,32, 'w',97,121,33, 32, 40, // -// 68, 'o', 'n', 39, 't' ,32 ,'y' ,'o', 117, 32 ,'t' ,'h' ,'i' // -// ,'n' /* Xy =a +3 +n ++ ;a= b- (* x/z ); if // -// (Xy-++n<(z+*x))z =b;a +b, z+= x*/,107 , 63,63 ,63,41,'\n' ,00}; puts(a);} /*.RPM.*/ // -// // -// Emulated versions of the V4F4 macros. (These also serve as documentation for what each macro does) // -// // -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -# ifdef HWIV_V4F4_EMULATED - -/* Scalar code to emulate the V4F4 vector model */ -#define HWIV_HAVE_V4F4 - -/* The vector register type */ -typedef float V4F4[4]; - -/* Aligned memory suitable for load to / store from the vector register */ -typedef float HWIV_4F4_ALIGNED[4]; - -/* LOAD_4F4: dst is a vector of 4 floats. src is a pointer to an array - of floats in memory. This opcode loads 4 consecutive floats from the - given location into the vector. The first float from memory will be loaded - into element 0 of the vector, the 2nd into element 1, and so on. */ -#define HWIV_LOAD_4F4(dst, src) { (dst)[0]=(src)[0]; (dst)[1]=(src)[1]; \ - (dst)[2]=(src)[2]; (dst)[3]=(src)[3]; } - -/* LOADO_4F4: dst is a vector of 4 floats. src is a pointer to an array - of floats in memory. offset is a byte offset, which must be a multiple - of sizeof(float). - This opcode loads 4 consecutive floats from the given location plus - offset into the vector. The first float from memory will be loaded into - element 0 of the vector, the 2nd into element 1, and so on. */ -#define HWIV_LOADO_4F4(dst, src, offset) { (dst)[0]=(src)[(offset)/4]; \ - (dst)[1]=(src)[(offset)/4+1]; \ - (dst)[2]=(src)[(offset)/4+2]; \ - (dst)[3]=(src)[(offset)/4+3]; } - -/* COPY_4F4: dst and src are each a vector of 4 floats. This opcode copies - the contents of the source vector into the destination vector. */ -#define HWIV_COPY_4F4(dst, src) HWIV_LOAD_4F4((dst), (src)) - -/* FILL_4F4: dst is a vector of 4 floats. sc0, sc1, sc2, and sc3 ("scalars") - are each a float. tmp is a (float *) pointing to memory of sufficient - size to hold four floats, and aligned to a 16-byte boundary. Use the - HWIV_4F4_ALIGNED typedef to declare a suitable float[4] which can - be passed as tmp to this and other similar macros. - This opcode loads the 4 scalar values into the vector. The sc0 will be - loaded into element 0 of the vector, sc1 into element 1, and so on. */ -#define HWIV_FILL_4F4(dst, sc0, sc1, sc2, sc3) { \ - (dst)[0]=(sc0); (dst)[1]=(sc1); (dst)[2]=(sc2); (dst)[3]=(sc3); } -#define HWIV_INIT_FILL /* nop */ - -#define HWIV_SPLAT_4F4(dst, s) HWIV_FILL_4F4((dst), (s), (s), (s), (s)) - -/* SAVE_4F4: dst is a pointer to an array of floats in memory. src is a - vector of 4 floats. - This opcode stores the 4 floats in the source vector into 4 consecutive - float-sized blocks of memory (i.e. 16 consecutive bytes, 4 bytes per - float) beginning at the given destination location. Element 0 of the - vector will be stored into the first 4 bytes in memory, element 1 into - the next 4 bytes of memory, and so on. */ -#define HWIV_SAVE_4F4(dst, src) memcpy((void *) (dst), (void *) (src), 16); - -/* SAVEO_4F4: dst is a pointer to an array of floats in memory. src is a - vector of 4 floats. offset is a byte offset, which must be a multiple - of sizeof(float). - This opcode stores the 4 floats in the source vector into 4 consecutive - float-sized blocks of memory (i.e. 16 consecutive bytes, 4 bytes per - float) beginning at the given destination location plus offset. Element - 0 of the vector will be stored into the first 4 bytes in memory, element - 1 into the next 4 bytes of memory, and so on. */ -#define HWIV_SAVEO_4F4(dst, offset, src) { (dst)[(offset)/4]=(src)[0]; \ - (dst)[(offset)/4+1]=(src)[1]; \ - (dst)[(offset)/4+2]=(src)[2]; \ - (dst)[(offset)/4+3]=(src)[3]; } - -/* ADD_4F4: dst, a, and b are each a vector of 4 floats. - This opcode adds each of the components of a to the corresponding - component of b and puts the result into dst. */ -#define HWIV_ADD_4F4(dst, a, b) { (dst)[0]=a[0]+b[0]; (dst)[1]=a[1]+b[1]; \ - (dst)[2]=a[2]+b[2]; (dst)[3]=a[3]+b[3]; } - -/* SUB_4F4: dst, a, and b are each a vector of 4 floats. - This opcode adds each of the components of a to the corresponding - component of b and puts the result into dst. */ -#define HWIV_SUB_4F4(dst, a, b) { (dst)[0]=a[0]-b[0]; (dst)[1]=a[1]-b[1]; \ - (dst)[2]=a[2]-b[2]; (dst)[3]=a[3]-b[3]; } - -#define HWIV_INIT_MUL0_4F4 /* nop */ - -/* MUL_4F4: dst, a, and b are each a vector of 4 floats. v0 is a variable - declared with the HWIV_INIT_MUL0_4F4 macro. - This opcode multiplies each of the components of a to the corresponding - component of b and puts the result into dst. The varible v0 is used on - hardware that has no 2-argument multiply operation. */ -#define HWIV_MUL_4F4(dst, a, b) \ - { (dst)[0]=a[0]*b[0]; (dst)[1]=a[1]*b[1]; \ - (dst)[2]=a[2]*b[2]; (dst)[3]=a[3]*b[3]; } - -#define HWIV_INIT_MTMP_4F4 /* nop */ - -/* MADD_4F4: dst, a, b, and c are each a vector of 4 floats. t is a variable - declared with the HWIV_INIT_MTMP_4F4 macro. - This opcode multiplies each of the components of a to the corresponding - component of b, then adds the corresponding component of c, and puts the - result into dst. The varible t is used on hardware that has no 3-argument - multiply-add operation. */ -#define HWIV_MADD_4F4(dst, a, b, c) { (dst)[0]=a[0]*b[0] + c[0]; \ - (dst)[1]=a[1]*b[1] + c[1]; \ - (dst)[2]=a[2]*b[2] + c[2]; \ - (dst)[3]=a[3]*b[3] + c[3]; } - -#define HWIV_MSUB_4F4(dst, a, b, c) { (dst)[0]=a[0]*b[0] - c[0]; \ - (dst)[1]=a[1]*b[1] - c[1]; \ - (dst)[2]=a[2]*b[2] - c[2]; \ - (dst)[3]=a[3]*b[3] - c[3]; } - -/* NMSUB_4F4: dst, a, b, and c are each a vector of 4 floats. t is a variable - declared with the HWIV_INIT_MTMP_4F4 macro. - This opcode multiplies each of the components of a to the corresponding - component of b, then subtracts that product from the corresponding - component of c, then puts the result into dst. The varible t is used on - hardware that has no 3-argument multiply-add operation. */ -#define HWIV_NMSUB_4F4(dst, a, b, c) { (dst)[0]=c[0] - a[0]*b[0]; \ - (dst)[1]=c[1] - a[1]*b[1]; \ - (dst)[2]=c[2] - a[2]*b[2]; \ - (dst)[3]=c[3] - a[3]*b[3]; } - -// Declare this if you are doing any raise or lower operation -#define HWIV_INIT_RLTMP_4F4 /* nop */ - -/* RAISE_4F4: dst, src, extra, and tmp are each a vector of 4 floats. - This opcode "raises" three of the values from src to the next-higher - element of dst. Element 0 of dst is filled with the value from element - 3 of "extra". The varible t is used on hardware that requires the - result to be computed in two pieces and then assembled via a blend - operation. ("VSHR_4F4" in old macros) */ -#define HWIV_RAISE_4F4(dst, src, extra) \ - { dst[3]=src[2]; dst[2]=src[1]; \ - dst[1]=src[0]; dst[0]=extra[3]; } - -/* LOWER_4F4: dst, src, extra, and tmp are each a vector of 4 floats. - This opcode "lowers" three of the values from src to the next-lower - element of dst. Element 3 of dst is filled with the value from element - 0 of "extra". The varible t is used on hardware that requires the - result to be computed in two pieces and then assembled via a blend - operation. ("VSHL_4F4" in old macros) */ -#define HWIV_LOWER_4F4(dst, src, extra) \ - { dst[0]=src[1]; dst[1]=src[2]; \ - dst[2]=src[3]; dst[3]=extra[0]; } - - -// We also define a small set of macros for FORTRAN-style code. Using these you can build up expressions like -// -// a = v4ADD(v4MUL(b,c),d); /* a = b*c + d; */ -// -// These macros do not form a complete solution, you still need things like LOAD and SAVE to do any real work. - -#define v4ADD(a,b) {(a)[0]+(b)[0],(a)[1]+(b)[1],(a)[2]+(b)[2],(a)[3]+(b)[3]} -#define v4SUB(a,b) {(a)[0]-(b)[0],(a)[1]-(b)[1],(a)[2]-(b)[2],(a)[3]-(b)[3]} -#define v4MUL(a,b) {(a)[0]*(b)[0],(a)[1]*(b)[1],(a)[2]*(b)[2],(a)[3]*(b)[3]} -#define v4SET(v0,v1,v2,v3) {(v0),(v1),(v2),(v3)} -#define v4SPLAT(a) {(a),(a),(a),(a)} -#define v4ROUP(a) {(a)[3],(a)[0],(a)[1],(a)[2]} -#define v4RODN(a) {(a)[1],(a)[2],(a)[3],(a)[0]} -#define v4RAISE(a, new) {(new)[3],(a)[0],(a)[1],(a)[2]} -#define v4LOWER(a, new) {(a)[1],(a)[2],(a)[3],(new)[0]} - -# endif -/* - - - - - - - - - - - - - - - - - - - - - End of the EMULATED section - - - - - - - - - - - - - - - - - - - - - - */ - - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// // -// @@@@ @@@@ @@@@ // -// @@@@ @@@@ @@@@ // -// """" @@@@ @@@@ // -// eeee eeee ,e@@e.. eee@@@@eee @@@@ // -// @@@@ @@@@@@@@@@@@@@. @@@@@@@@@@ @@@@ // -// @@@@ @@@@f' `@@@@ @@@@ @@@@ // -// @@@@ @@@@ @@@@ @@@@ ,e@@@e. @@@@ // -// @@@@ @@@@ @@@@ @@@@ e@@@@@@@@@@@e @@@@ // -// @@@@ @@@@ @@@@ @@@@ .@@@@' `@@@@i @@@@ // -// @@@@ @@@@ @@@@ @@@@kee@@@@eeeeeeeee@@@@ @@@@ // -// @@@@ @@@@ @@@@ `@@@@@@@@@@@@@@@@@@@@@@@@@@@@ (R) // -// @@@@. // -// `@@@@e. .eeee- // -// *@@@@@@@@@@@* // -// "*@@@@*" // -// // -// Versions of the V4F4 macros for the Intel SSE2 (or later) 128-bit vector instruction set // -// // -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -# ifdef HWIV_V4F4_SSE2 - -/* SSE3 implementation of the V4F4 vector model */ -#define HWIV_HAVE_V4F4 - -# if (defined(HWIV_USE_IMMINTRIN) || defined (__AVX__)) -# include -# else -# include -# include -# endif - - -typedef __m128 V4F4; - -#ifdef _WIN32 -# define ALIGNED_16 __declspec( align( 16 ) ) -#else -# define ALIGNED_16 __attribute__((aligned (16))) -#endif - -typedef float ALIGNED_16 HWIV_4F4_ALIGNED[4]; - -#define HWIV_LOAD_4F4(dst, src) (dst) = _mm_load_ps(src) -#define HWIV_LOADO_4F4(dst, src, offset) \ - (dst) = _mm_load_ps((src)+(offset)/4) -#define HWIV_COPY_4F4(dst, src) (dst) = (src) - -#define HWIV_FILL_4F4(dst, sc0, sc1, sc2, sc3) \ - { HWIV_fill_4F4[0]=(sc0); HWIV_fill_4F4[1]=(sc1); \ - HWIV_fill_4F4[2]=(sc2); HWIV_fill_4F4[3]=(sc3); \ - HWIV_LOAD_4F4(dst, HWIV_fill_4F4); } - -#define HWIV_INIT_FILL float ALIGNED_16 HWIV_fill_4F4[4]; - - -#define HWIV_SPLAT_4F4(dst, s) (dst) = _mm_set1_ps(s) - -#define HWIV_SAVE_4F4(dst, src) _mm_store_ps((dst), (src)) - -#define HWIV_SAVEO_4F4(dst, offset, src) \ - _mm_store_ps((dst)+(offset)/4, (src)) - -#define HWIV_ADD_4F4(dst, a, b) (dst) = _mm_add_ps((a), (b)) - -#define HWIV_SUB_4F4(dst, a, b) (dst) = _mm_sub_ps((a), (b)) - -// For INIT_MUL0, on Intel we do nothing because Intel actually has a -// 2-operand multiply operation. -#define HWIV_INIT_MUL0_4F4 /* nop */ - -// For INIT_MTMP, on Intel SSE2 we need to declare a variable because -// Intel SSE2 has no FMA (fused multiply-add) operations (this is -// expected to come wth AVX2 on Haskell in 2013) -#define HWIV_INIT_MTMP_4F4 V4F4 HWIV_mtmp_4F4 = _mm_setzero_ps() - -#define HWIV_MUL_4F4(dst, a, b) (dst) = _mm_mul_ps((a), (b)) - -#define HWIV_MADD_4F4(dst, a, b, c) { HWIV_mtmp_4F4 = _mm_mul_ps((a), (b)); \ - (dst) = _mm_add_ps(HWIV_mtmp_4F4, (c)); } -#define HWIV_MSUB_4F4(dst, a, b, c) { HWIV_mtmp_4F4 = _mm_mul_ps((a), (b)); \ - (dst) = _mm_sub_ps(HWIV_mtmp_4F4, (c)); } -#define HWIV_NMSUB_4F4(dst, a, b, c) { HWIV_mtmp_4F4 = _mm_mul_ps((a), (b)); \ - (dst) = _mm_sub_ps((c), HWIV_mtmp_4F4); } - -#define HWIV_INIT_RLTMP_4F4 /* nop */ - - -/* -HWIV_RODN_4F4 (ROtate DOwn) does a "downwards rotate" of a 4-element vector using the Intel VSHUFPS instruction -(intrinsic _mm_shuffle_ps). If the input is {a,b,c,d} (with a being element 0) the result of the downwards rotate is -{b,c,d,a} (with each element moving down tot he next-lower slot, except for a which rotates into the top position). - - SRC1 { x3 , x2 , x1 , x0 } - SRC2 { y3 , y2 , y1 , y0 } - DEST { y0 , y3 , x2 , x1 } - imm8: 00 11 10 01 = 0x39 - - */ -#define HWIV_RODN_4F4(dest, src) (dest) = _mm_shuffle_ps((src), (src), 0x39) - -/* -HWIV_ROUP_4F4 is an "upwards rotate": if the input is {a,b,c,d} (with a being element 0) the result is {d,a,b,c}. - - SRC1 { x3 , x2 , x1 , x0 } - SRC2 { y3 , y2 , y1 , y0 } - DEST { y2 , y1 , x0 , x3 } - imm8: 10 01 00 11 = 0x93 - - */ -#define HWIV_ROUP_4F4(dest, src) (dest) = _mm_shuffle_ps((src), (src), 0x93) - -/* -HWIV_RAISE_4F4 is an "upwards shift": if the input is {a,b,c,d} (with a being element 0) the result is {X,a,b,c} with -the new element X coming from element 3 of the "new" argument. - - new { x3 , x2 , x1 , x0 } - src { y3 , y2 , y1 , y0 } - dest { y0 , y0 , x3 , x3 } - imm8: 00 00 11 11 = 0x0F - src0 src0 new3 new3 - - dest { x3 , x2 , x1 , x0 } - src { y3 , y2 , y1 , y0 } - dest { y2 , y1 , x2 , x0 } - imm8: 10 01 10 00 = 0x98 - src2 src1 src0 new3 -*/ -#define HWIV_RAISE_4F4(dest, src, new) { (dest) = _mm_shuffle_ps((new), (src), 0x0f); \ - (dest) = _mm_shuffle_ps((dest), (src), 0x98); } - -/* -HWIV_LOWER_4F4 is an "downwards shift": if the input is {a,b,c,d} (with a being element 0) the result is {b,c,d,X} with -the new element X coming from element 0 of the "new" argument. - -To accomplish a downwards shift we can just use _mm_move_ss to move a single scalar into the bottom position and then do -a RODN (downwards rotate) - */ -#define HWIV_LOWER_4F4(dest, src, new) { (dest) = _mm_move_ss((src), (new)); \ - HWIV_RODN_4F4(dest, dest); } - -// Here is the subset for FORTRAN-style code -#define v4ADD(a,b) _mm_add_ps((a), (b)) -#define v4SUB(a,b) _mm_sub_ps((a), (b)) -#define v4MUL(a,b) _mm_mul_ps((a), (b)) -// in v4SET, note the reversal of argument order -#define v4SET(v0,v1,v2,v3) _mm_set_ps((v3),(v2),(v1),(v0)) -#define v4SPLAT(a) _mm_set1_ps(a) -#define v4ROUP(src) _mm_shuffle_ps((src), (src), 0x93) -#define v4RODN(src) _mm_shuffle_ps((src), (src), 0x39) -#define v4RAISE(src, new) _mm_shuffle_ps(_mm_shuffle_ps((new), (src), 0x0f), (src), 0x98) -#define v4LOWER(src, new) _mm_shuffle_ps(_mm_move_ss((src),(new)), _mm_move_ss((src),(new)), 0x39) - -# endif -/* - - - - - - - - - - - - - - - - - - - - - - End of the INTEL section - - - - - - - - - - - - - - - - - - - - - - - */ - - - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// // -// // -// /^^^^^\ /^^^^^\ _-^^^^^^/ TM // -// / ,- )_-----_ --- ---- ----..----. ---.---/ ,- )/ ,---/ // -// / /_) // __ )| |/ |/ _// .- )/ _// /_) // / // -// / // // / / /| / / (/___// .^ / // /( | // -// / / ^^^'( (/ / | /| _/ ( `----/ / / / ^^^' | `---/ // -// /__/ \____-' |__/ |__/ \____//___/ /__/ \_____/ // -// // -// // -// Versions of the V4F4 macros for the PowerPC AltiVec 128-bit vector instruction set // -// // -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -/* - -The AltiVec instruction set first came with the "G4" (744x and 745x) processors from Motorola, then the "G5" (97x) -series from IBM, the "Cell" 8-core CPU used in the Sony Playstation 3, and in IBM's POWER6 (and later) server CPUs. - - Not yet implemented -- do we care about AltiVec? */ - -#endif diff --git a/Ready_old_CLI/main-ready.cpp b/Ready_old_CLI/main-ready.cpp deleted file mode 100644 index 847d80ef1..000000000 --- a/Ready_old_CLI/main-ready.cpp +++ /dev/null @@ -1,930 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See ../README.txt for more details. - -*/ - -// hardware -#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_IX86)) -# include -# define ALLOC_USE_MM -#endif - -// stdlib: -#include -#include -#include -#include -#include - -#ifdef _WIN32 - #include - #include - #include - // http://www.linuxjournal.com/article/5574 - void gettimeofday(struct timeval* t,void* timezone) - { struct _timeb timebuffer; - _ftime( &timebuffer ); - t->tv_sec=timebuffer.time; - t->tv_usec=1000*timebuffer.millitm; - } - - // TODO: Make sure this is the correct include file for the FindFirstFile function (used in FILE_LENGTH macro below) - #include -#else - #include - #include -#endif - -#include "util.h" - -#include "ready_display.h" - -#include "brusselator.h" - -#include "gray_scott_scalar.h" -#include "gray_scott_hwivector.h" - -static int g_width = 256; -static int g_height = 256; - -float *allocate(int width, int height, const char * error_text, bool for_mm); -float *allocate(int width, int height, const char * error_text, bool for_mm) -{ - long sz; - float * rv; - - sz = ((long) width) * ((long) height) * sizeof(*rv); -#ifdef ALLOC_USE_MM - if (for_mm) { - rv = (float *) _mm_malloc(sz, 16); - } else { -#endif - rv = (float *) malloc(((size_t)sz)); -#ifdef ALLOC_USE_MM - } -#endif - - if (rv == NULL) { - fprintf(stderr, "allocate: Could get %ld bytes for %s\n", ((long) sz), - error_text); - exit(-1); - } - return (rv); -} - -void init(float *a, float *b, int width, int height, - int density, int lowback, int BZrects); - -#define MAX_LOADS 100 -char * load_opts[MAX_LOADS]; -int num_loads = 0; - -void pearson_block(float *u, float *v, int width, int height, - int vpos, int hpos, int h, int w, float U, float V); -void ramp_block(float *u, float *v, int width, int height, - int vpos, int hpos, int h, int w, int fliph); -void homogen_uv(float F, float k, float * hU, float * hV); -void i5_bkg(float *u, float *v, int width, int height, int which); -void load_option(float * u, float * v, int width, int height, const char * option); -bool digit_p(char c); -void pattern_load(float * u, float * v, int width, int height, - const char * pattern_filename, int x, int y, int orient); -int next_patsize(int former); -void byteswap_2_double(double * array); - - -static const char * g_rd_name = ""; -static int g_color = 0; -static int g_oldcolor = 0; -static int g_pastel_mode = 0; -static bool g_paramspace = false; -static bool g_wrap = false; -static float g_k, g_F; -static int g_ramprects = 0; -static int g_lowback = 0; -static float g_scale = 1.0; -static int g_density = 0; -static bool g_video = false; -static int g_threads; -static bool g_autobright = false; - -#define READY_MODULE_GS_SCALAR 1 -#define READY_MODULE_GS_HWIV 2 -#define READY_MODULE_BRUSSELATOR 3 - -static int g_module = READY_MODULE_GS_HWIV; - -int main(int argc, char * * argv) -{ - long frames_per_display = 200; - long i; - DICEK_INIT_NTHR(g_hw_threads) - g_threads = g_hw_threads; - - float uv_range = 1.0; - - float bruss_A = 3.0; - float bruss_B = 10.0; - float bruss_D1 = 5.0; - float bruss_D2 = 12.0; - float bruss_speed = 0.001; - - // Here we implement the Gray-Scott model, as described here: - // http://arxiv.org/abs/patt-sol/9304003 - // (the seminal paper by Pearson) - // http://www.cc.gatech.edu/~turk/bio_sim/hw3.html - // (a present university course project, by Greg Turk at Georgia Tech) - // http://www.mrob.com/pub/comp/xmorphia/index.html - // (a web exhibit with over 100 videos and 500 images) - - // -- parameters -- - float D_u = 0.082; - float D_v = 0.041; - - // The default is equivalent to options: -F 0.035 -k 0.064 - g_k = 0.064; g_F = 0.035; - - // Other pattern-types to try (pass -F and -k as argument on command line, some require "-density 1" as well): - // - // -F 0.0118 -k 0.0475 Spiral waves - // -F 0.022 -k 0.059 Spots that multiply and keep killing each other off - // -F 0.035 -k 0.06 Stripes with branching (fingerprint) - // -F 0.04 -k 0.064 For spots that multiply, with stripes mixed in - // -F 0.056 -k 0.065 Long stripes ("-density 1" option helps here) - // -F 0.062 -k 0.0609 -density 2 "Uskate world", where I found all the Wolfram-class-4 behaviour - // -F 0.094 -k 0.059 -density 1 "soap bubbles" - // -F 0.094 -k 0.057 -lowback -density 1 Inverse soap bubbles - - float speed = 1.0; - - bool custom_Fk = false; - - for (i = 1; i < argc; i++) { - if (0) { - } else if (strcmp(argv[i],"-autobright")==0) { - // force display()'s auto-brightness feature - g_autobright = true; - } else if ((i+1 0.4) { - frames_per_display /= 2; - if (frames_per_display < 10) { - frames_per_display = 10; - } - } - } - } -} - -/* pearson_bkg fills everything with the trivial state (U=1, V=0) combined - with random noise of magnitude 0.01, and it does this while keeping all - U and V values between 0 and 1. */ -void pearson_bkg(float *u, float *v, int width, int height) -{ - int i, j; - for(i=0; i= 0) && (i < height)) { - for(j=hpos; j= 0) && (j < width)) { - u[i*width+j] = ut_frand(U-0.005, U+0.005); - v[i*width+j] = ut_frand(V-0.005, V+0.005); - } - } - } - } -} - -/* ramp_block creates a starting pattern for B-Z spirals and continuous - propagating wave fronts (pattern type xi in my paper). */ -void ramp_block(float *u, float *v, int width, int height, - int vpos, int hpos, int h, int w, int fliph) -{ - int i, j; - float U, V; - - for(i=vpos; i= 0) && (i < height)) { - for(j=hpos; j= 0) && (j < width)) { - U = ((float) (j-hpos)) / ((float) w); - if (fliph) { - U = 1.0 - U; - } - V = U; - - if (U < 0.1) { - U = U / 0.1; - } else { - U = (1.0 - U) / 0.9; - } - U = 1.0 - sin((1.0 - U) * 1.5708); - U = 1.0 - (U * 0.95); // formerly 0.85 - - if (V < 0.08) { // formerly 0.05 - V = V / 0.08; // formerly 0.05 - } else if (V < 0.5) { - V = (0.5 - V) / 0.42; // formerly 0.45 - } else { - V = 0; - } - V = 1.0 - sin((1.0 - V) * 1.5708); - V = V * 0.4; - - u[i*width+j] = ut_frand(U-0.005, U+0.005); - v[i*width+j] = ut_frand(V-0.005, V+0.005); - } - } - } - } -} - -/* Given an F and k, returns the U and V values for the homogeneous state at - that F and k. Tries to return the secondary (blue) state, but if that - doesn't exist it returns the trivial (red) state. - The formula is derived from Muratov and Osipov 2000 formula 2.12 (with A - defined by 2.10, and the other variables defined as in 2.3, 2.4 and 2.5 (all - of this is on pages 8-9 of "Muratov 2000 Spike.pdf"). - There is a more obvious formula (expressed in terms of F and k) in - "Leppanen 2004 Computational.pdf" page 40 (his equation 3.22). - To get the values for the center F and k settings, use (g_F_CTR, g_k_CTR) */ -void homogen_uv(float F, float k, float * hU, float * hV) -{ - float sqrt_F, A; - float U, V; - - sqrt_F = sqrt(F); - - if (k < (sqrt_F - 2.0 * F) / 2.0) { - A = sqrt_F / (F + k); - U = (A - sqrt(A*A - 4.0)) / (2.0 * A); - U = max(0.0, min(1.0, U)); - V = sqrt_F * (A + sqrt(A*A - 4.0)) / 2.0; - V = max(0.0, min(1.0, V)); - } else { - U = 1.0; - V = 0.0; - } - *hU = U; - *hV = V; -} - -/* i5_bkg is the background for my init routines. It fills the space either - with the trivial stable state (V=0, U=1), or when the other stable "blue" - state exists it uses that state. This makes for much more interesting - initial patterns for the areas near the various bifurcation lines and for - Uskate world. */ -void i5_bkg(float *u, float *v, int width, int height, int which) -{ - int i, j; - - for(i=0; i= '0') && (c <= '9'))); -} - - -#define MIN_CLIPSIZE 4 -#define MAX_CLIPSIZE 256 - -#if (defined(__APPLE__) || defined(__linux__)) -# define FILE_LENGTH(name, result) \ - struct stat _FILESTAT; int _STAT_result; \ - _FILESTAT.st_size = 0; \ - _STAT_result = stat(name, &_FILESTAT); \ - *(result) = (_STAT_result < 0) ? 0 : _FILESTAT.st_size; -#else -# ifdef _WIN32 -// Based on: vcpptips.wordpress.com/tag/get-the-size-of-a-file-without-opening-it/ -// TODO: This needs to be tested -# define FILE_LENGTH(name, result) \ - ULONGLONG _LEN_res_tmp; \ - WIN32_FIND_DATA _LEN_fff_dat = { 0 }; \ - HANDLE _LEN_fff_hdl = FindFirstFile(name, &_LEN_fff_dat); \ - if (_LEN_fff_hdl != INVALID_HANDLE_VALUE) { \ - FindClose(_LEN_fff_hdl); \ - _LEN_res_tmp = (_LEN_fff_dat.nFileSizeHigh) << (sizeof(_LEN_fff_dat.nFileSizeHigh)*8) | (_LEN_fff_dat.nFileSizeLow); \ - *(result) = _LEN_res_tmp; \ - } else { *(result) = 0; } -# else -// Neither Unix nor Windows -# define FILE_LENGTH(name, result) \ - sprintf(stdout, "Getting length of file is not yet supported in this environment.\n"); exit(-1); -# endif -#endif - -/* - Load a PDE4 pattern file with the given filename (pathname) into the U and V arrays, at the given X and Y location. - - 6 7 Starting from orientation 0: orient. 5 is rotated 90 degrees clockwise; - Orientation ^ orient. 3 is rotated 180 degrees, and orient. 6 is rotated another 90 degrees - diagram: | clockwise. - 2 | 0 Orientation 1 is a mirror image of orientation 0, flipped around a horizontal - <-----o-----> axis. Compared to orientation 1, orient. 4 is rotated 90 degrees clockwise; - 3 | 1 orient. 2 is rotated 180 degrees, and orient. 7 is rotated another 90 degrees - | clockwise. - v - 4 5 - -You'll notice an odd scaling factor of "0.63324". Most of the patterns in my collection were created in my PDE4 program -using what I call the "standard model" parameters. PDE4 converts intervals of time (frames) and space (pixels) into the -dimensionless time and space units in the actual Pearson equations, and is adjusted to use a grid that has about sqrt(2) -finer resolution than the grid Pearson used in his 1993 paper. - Here are the relevant variables and the u formula as calculated by both programs: - - PDE4: HWIV: - g_SPATIAL_REZ = 0.007 g_scale = 2.0 - dxy = (1/143)^2 = 20449 speed = 0.5 - delta_t = 0.05 D_u = 0.164 - D_u = 2.0e-5 D_v = 0.082 - D_v = 1.0e-5 - u -> u + delta_t * (D_u * nabla_u/dxy^2 - u v^2 + F(1-u)) u -> u + speed * (D_u * nabla_u - u v^2 + F(1-u)) - = u + 0.5 * (2e-5 * dxy * nabla_u - u v^2 + F(1-u)) = u + 0.5 * (0.164 * nabla_u - u v^2 + F(1-u)) - = u + 0.5 * (0.409 * nabla_u - u v^2 + F(1-u)) - (v equation is similar) (v equation is similar) - -After all the constants are combined, the result is that HWIV differs from PDE4 only in using a different effective D_u -value, namely 0.164 as compared to 0.409. This causes a difference in scale of all simulated patterns, the scale -difference is sqrt(0.164/0.409) = 0.63324. - -*/ - -void pattern_load(float * u, float * v, int width, int height, - const char * pattern_filename, int x, int y, int orient) -{ - int i, j; - FILE * patfile; - long fsize, csz; - double data[2]; - char * test_endian; - int big_endian; - float scale = 0.63324 * sqrt(g_scale / 2.0); - - data[0] = 1.0f; - test_endian = (char *) data; - // printf("endian test bytes: %02X .. %02X\n", test_endian[0], test_endian[7]); - if (test_endian[0] == 0x3F) { - // The first byte in memory is part of the exponent. This means we're on a big-endian machine. - big_endian = 1; - } else { - big_endian = 0; - } - - FILE_LENGTH(pattern_filename, &fsize); - - if (fsize <= 0) { - fprintf(stderr, "Could not find pattern file '%s'\n", pattern_filename); - exit(-1); - } - - /* PDE4 pattern files are a square grid of pixels, each pixel is a U followed by a V, all values are IEEE - /* double-precision floating-point. The size of the square is a power of 2 or a power of 2 times 1.5, ranging from 4 - /* to 256. */ - csz = MIN_CLIPSIZE; - while((csz < MAX_CLIPSIZE) && (csz * csz * 2L * sizeof(double) < fsize)) { - csz = next_patsize(csz); - } - - if (csz * csz * 2L * sizeof(double) == fsize) { - int i2, j2, i3, j3, prev_i2, prev_j2; - double u_avg, v_avg, perim_count; - int min_i = height; int max_i = 0; - int min_j = width; int max_j = 0; - prev_i2 = prev_j2 = -1; - - // Size matched exactly - // printf("Clip file is %ld bytes long, size %ldx%ld.\n", fsize, csz, csz); - printf("Placing pattern %s at position (%d,%d) orientation %d\n", pattern_filename, x, y, orient); - patfile = fopen(pattern_filename, "r"); - - u_avg = v_avg = perim_count = 0.0; - for(i = 0; i < csz; i++) { // i is row number, Y dimension - // i2 = (i - csz/2L)*63L/100L; - i2 = i - csz/2L; - i2 = (int) (scale * ((float) i2)); - for(j = 0; j < csz; j++) { // j is column number, X dimension - float u_val, v_val; - // j2 = (j - csz/2L)*63L/100L; - j2 = j - csz/2L; - j2 = (int) (scale * ((float) j2)); - - // Copy to i3, j3 because we're going to change the values - i3 = i2; j3 = j2; - - // Now apply orientation - if (orient & 1) { i3 = - i3; } - if (orient & 2) { j3 = - j3; } - if (orient & 4) { int t = i3; i3 = j3; j3 = t; } - - i3 = y + i3; - j3 = (width-1) - (x + j3); - - fread((void *) data, sizeof(double), 2, patfile); - if (big_endian) { - byteswap_2_double(data); - } - - /* Add this point to the perimeter average */ - if ((i == 0) || (i == csz-1) || (j == 0) || (j == csz-1)) { - u_avg += data[0]; v_avg += data[1]; perim_count += 1.0; - } - - /* Because we are ADDING the pattern to the existing grid values, rather than simply replacing grid values, - we need to modify each pixel at most once. */ - if ((i2 != prev_i2) && (j2 != prev_j2)) { - /* Next we clip to the actual grid dimensions */ - if ((i3 >= 0) && (i3 < height) && (j3 >= 0) && (j3 < width)) { - /* Keep track of the min and max values of both coordinates (used below) */ - if (i3 < min_i) { min_i = i3; } if (i3 > max_i) { max_i = i3; } - if (j3 < min_j) { min_j = j3; } if (j3 > max_j) { max_j = j3; } - u_val = (float) data[0]; v_val = (float) data[1]; - u[i3 * width + j3] += u_val; - v[i3 * width + j3] += v_val; - } - } - prev_j2 = j2; - } - prev_i2 = i2; - } - fclose(patfile); - - /* Now we subtract the average perimeter value from all the pixels that were adjusted. This allows the user to place - two patterns very close to one another, even if the "bounding rectangles" would normally cause part of the first - pattern to get wiped out by the second. - Note that the global limit to the range [0,1] is enforced in the main init() routine after all patterns are - loaded. */ - u_avg = u_avg / perim_count; - v_avg = v_avg / perim_count; - for(i=min_i; i<=max_i; i++) { - for(j=min_j; j<=max_j; j++) { - u[i * width + j] -= u_avg; - v[i * width + j] -= v_avg; - } - } - } else { - printf("Error: %s size %ld is not canonical.\n", pattern_filename, fsize); - } -} - -int next_patsize(int former) -{ - int newval; - - /* NOTE: We assume 2's complement notation, in which case (x & (x-1)) is zero iff x is a power of 2 */ - if ((former & (former-1)) == 0) { - // it was a power of 2 - newval = (former * 3) / 2; - } else { - newval = (former * 4) / 3; - } - if (newval > MAX_CLIPSIZE) { - newval = MAX_CLIPSIZE; - } - return(newval); -} - -void byteswap_2_double(double * array) -{ - char * bytes; - char t; - bytes = (char *) array; - int i; - - /* We have two doubles to swap */ - for(i=0; i<2; i++) { - t = bytes[8*i+0]; bytes[8*i+0] = bytes[8*i+7]; bytes[8*i+7] = t; - t = bytes[8*i+1]; bytes[8*i+1] = bytes[8*i+6]; bytes[8*i+6] = t; - t = bytes[8*i+2]; bytes[8*i+2] = bytes[8*i+5]; bytes[8*i+5] = t; - t = bytes[8*i+3]; bytes[8*i+3] = bytes[8*i+4]; bytes[8*i+4] = t; - } -} - -void init(float *u, float *v, int width, int height, int density, int lowback, int BZrects) -{ - int nsp, i; - int base, var; - - srand((unsigned int)time(NULL)); - - if (density <= 0) { - nsp = 0; - } else { - if (density == 3) { - base = (height * width) / 512; - var = 2; - } else { - base = density * 20; - var = density * (height * width) / 1000; - } - - nsp = base + (rand() % var); - printf("Adding %d random rectangles\n", nsp); - } - - i5_bkg(u, v, width, height, lowback ? 0 : 1); - - for(i=0; i -#include - -// stdlib -#include - -// local: -#include "ready_display.h" -#include "../util.h" - -bool display(int g_width, int wid_x, int g_height, float *r, float *g, float *b, - double iteration, float model_scale, bool auto_brighten,float manual_brighten, - int image_scale,int delay_ms,const char* message, bool write_video) -{ - int full_width = g_width + 2*wid_x; - static bool need_init = true; - - static IplImage *im,*im2; - static int border = 0; - static CvFont font; - static CvVideoWriter *video; - static const CvScalar white = cvScalar(255,255,255); - - const char *title = "ReaDy (ESC to quit)"; - - if(need_init) - { - need_init = false; - - im = cvCreateImage(cvSize(g_width,g_height),IPL_DEPTH_8U,3); - cvSet(im,cvScalar(0,0,0)); - im2 = cvCreateImage(cvSize(g_width*image_scale,g_height*image_scale),IPL_DEPTH_8U,3); - - cvNamedWindow(title,CV_WINDOW_AUTOSIZE); - - double hScale=0.4; - double vScale=0.4; - int lineWidth=1; - cvInitFont(&font,CV_FONT_HERSHEY_COMPLEX,hScale,vScale,0,lineWidth,CV_AA); - - if(write_video) - { - video = cvCreateVideoWriter("vid-Gray-Scott.avi",CV_FOURCC('D','I','V','X'),25.0,cvGetSize(im),1); - if(video == NULL) { - fprintf(stdout, "NULL from cvCreateVideoWriter\n"); exit(-1); - } - } - } - - // convert float arrays to IplImage for OpenCV to display - float val,minR=FLT_MAX,maxR=-FLT_MAX,minG=FLT_MAX,maxG=-FLT_MAX,minB=FLT_MAX,maxB=-FLT_MAX; - for(int i=0;imaxR) maxR=val; - - val = g[i*full_width+j]; - if(valmaxG) maxG=val; - - val = b[i*full_width+j]; - if(valmaxB) maxB=val; - } - } - // I use this to find out the range of the chemicals when adding a new R-D system to ReaDy - // printf("R:[%f..%f] G:[%f..%f] B:[%f..%f]\n", minR,maxR, minG,maxG, minB,maxB); - - for(int i=0;i255) val=255; - ((uchar *)(im->imageData + i*im->widthStep))[j*im->nChannels + 2] = (uchar)val; - - val = g[i*full_width+j2]; - if(auto_brighten) val = 255.0f * (val-minG) / (maxG-minG); - else val = (val * 255.0 / manual_brighten); - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + i*im->widthStep))[j*im->nChannels + 1] = (uchar)val; - - val = b[i*full_width+j2]; - if(auto_brighten) val = 255.0f * (val-minB) / (maxB-minB); - else val = (val * 255.0 / manual_brighten); - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + i*im->widthStep))[j*im->nChannels + 0] = (uchar)val; - } - } - - cvResize(im,im2); - - char txt[100]; - const char * fmt; - - if (g_width > 170) { - fmt = "Generation=%7g (model's t=%g)"; - } else if (g_width > 140) { - fmt = "Gen.=%7g (model's t=%g)"; - } else { - fmt = "G=%7g t=%g"; - } - sprintf(txt,fmt,iteration, ((double) iteration) / model_scale); - cvPutText(im2,txt,cvPoint(5,20),&font,white); - cvPutText(im2,message,cvPoint(5,40),&font,white); - - if(write_video) - cvWriteFrame(video,im); - - cvShowImage(title,im2); - - int key = cvWaitKey(delay_ms); // allow time for the image to be drawn - if(key==27) // did user ask to quit? - { - if(write_video) - cvReleaseVideoWriter(&video); - cvDestroyWindow(title); - cvReleaseImage(&im); - cvReleaseImage(&im2); - return true; - } - return false; -} - -void colorize(float *u, float *v, float *du, float uv_range, - float *red, float *green, float *blue, int width, int height, - int color_style, int pastel_mode) -{ - float r, g, b; - // Step by row - for(int i = 0; i < height*width; i++) { - float uval = u[i] / uv_range; - float vval = v[i] / uv_range; - float delta_u = du[i] / uv_range; - - color_mapping(uval, vval, delta_u, color_style, &r, &g, &b, pastel_mode); - red[i] = r; - green[i] = g; - blue[i] = b; - } -} - -/* - These are all the colour schemes from Robert Munafo's PDE3 and PDE4 programs, some of which which date back to 1994. -Included are Pearson's original colours from the 1993 paper, some enhanced versions of Pearson with the derivative shown -in various ways, some monochrome mappings, and the colours used for all the figures and videos on -mrob.com/pub/comp/xmorphia. - Colormap 22 is the one that used to be selected by "-color" (that option now selects colormap 17). -The rest, including colormap 0, are selected by "-oldcolor N" - */ -void color_mapping(float u0, float v0, float dU, int pm, float *red, float *green, float *blue, int pastel) -{ - float diff, dif2; - float t1, t2, t3, t4; - - // Old ratio calculation doesn't work any better than a simple linear - // scaling, so I've returned to that - 20090311 - diff = (dU * 3.0e5) + 0.5; // 3e5 - diff = minmax(diff, 0.0, 1.0); - - dif2 = (dU * 3.0e7) + 0.5; // 3e7 - dif2 = minmax(dif2, 0.0, 1.0); - - t1 = diff; - t1 = minmax(t1, 0.0, 1.0); - - t2 = u0; - t2 = minmax(t2, 0.0, 1.0); - - t3 = v0; - t3 = minmax(t3, 0.0, 1.0); - - switch(pm) { - case 0: - // Original color scheme from the very very earliest versions of pde1. - *red = t2; *green = t3; *blue = t2; - break; - case 1: - // Like colormap 6: Red component shows whether U is increasing, - // decreasing or is unchanged as compared to the (previous/next) - // generation. (Green and Blue show U and V, like colormap 6). - // This system dates back to 19940826, when I had not yet eliminated - // the second frame buffer and the derivative calculation was thus - // considerably easier. - *red = t1; *green = t2; *blue = t3; - break; - case 2: - *red = t2; *green = t1; *blue = t3; - break; - case 3: - // The classic version of this colormap was: - // *red = 1.0-t3; *green = 1.0-t3; *blue = 1.0-t2 - // But this just looks like a washed-out version of colormap 4, so I've - // replaced it with this which is a nice blue and green variation - *red = t3; *green = 1.0-t2; *blue = 1.0-t1; - break; - case 4: - // Looks way too much like colormap 6 - *blue = 1.0-t2; - t4 = 1.0-t3 - (*blue/2.0); if (t4 < 0) t4 = 0; - *red = t4; *green = t4; - break; - case 5: - t2 = 1.0-t2; - t4 = 1.0-t3 - (t2/2.0); if (t4 < 0) t4 = 0; - if (t4 > t2) { - *red = t4-t2; *green = t4-t2; *blue = t2-t4+1.0; - } else { - *red = t2-t4; *green = t4-t2+1.0; *blue = t4-t2+1.0; - } - break; - case 6: - // This is the color mode I settled with early on; as of 2009 - // I didn't even remember ever having other color modes. - t2 = 1.0-t2; - t4 = 1.0-t3 - (t2/2); if (t4 < 0) t4 = 0; - if (t4 > t2) { - *red = t4-t2; *green = t4-t2; *blue = t2-t4+1.0; - } else { - *red = t2-t4; *green = 0; *blue = t4-t2+1.0; - } - break; - case 7: - // I added this color mode on 20090116 because I wanted something - // pretty for the vision board, and for some variety -- all the - // other color modes had yellow in the large area to the right! - t2 = 1.0-t2; - t4 = 1.0-t3 - (t2/2); if (t4 < 0) t4 = 0; - if (t4 > t2) { - *red = 1.0-(t4-t2); *green = 1.0-(t4-t2); *blue = (t4-t2)/2.0; - } else { - *red = 1.0-(t2-t4); *green = 1.0; *blue = (t2-t4)/2.0; - } - break; - case 8: - // This color mode is meant to reflect Pearson's description of the - // two large plain states as "blue state" and "red state" (see the - // Pearson paper, 9304003.pdf, page 9, caption for figure 3), - // without actually using the HSV color space (for that use color - // mode 9) - *red = (1.0-t3)*0.75; *green = 0; *blue = 1.0-t2; - break; - default: - case 9: - // This is a reasonable facsimile of Pearson's coloring scheme. He - // shows only the U value, expressing it as a hue in the HSV color - // space. - t2 = 1.0 - t2; - t2 = t2 * 0.7 + 0.061; - t3 = 1.0; - t4 = 1.0; - if (t2 < 0.02136) { - t3 = 0.7324; - } else if (t2 < 0.3052) { - t3 = 0.7324 + (t2-0.2136) * 2.5; - } - if (t2 < 0.1526) { - t4 = 0.4883; - } else if (t2 < 0.2441) { - t4 = 0.4883 + (t2-0.1526) * 5.0; - } - go_hsv2rgb(t2, t3, t4, red, green, blue); - break; - - case 10: // `a' - // Colorscheme created in 200901 for PDE3. - // Like colormap 6: Saturation shows whether U is increasing, decreasing - // or is unchanged as compared to the (previous/next) generation. Hue - // shows U; value constant at 1.0. - t1 = 0.5 + t1/2.0; - go_hsv2rgb(0.8 * (1.0 - t2), t1, 1.0, red, green, blue); - break; - - case 11: // `b' - // First new colorscheme devised on 20090311. Rick likes this one. - t1 = 0.5 + t1/2.0; - go_hsv2rgb(0.8 * ((float) (1.0 - t2)), (float) t1, t2, red, green, blue); - break; - - case 12: // `c' - // This one compresses into MP4 better because it avoids bright red - t1 = 0.25 + t1 * 0.75; - go_hsv2rgb(0.8 * ((float) (t2)), t1, t2, red, green, blue); - break; - - case 13: // `d' - // Modification of scheme 10, uses a greater range of the color space - t1 = 0.5 + t1/2.0; - t4 = 4 * (1.0 - t2) / 5; - t4 = t4 + 0.75; t4 = t4 - floor(t4); // olive, yellow, red, pink, lavender - go_hsv2rgb(t4, t1, t2, red, green, blue); - break; - - case 14: // `e' - // Modification of scheme 13, limiting use of saturated colors to force - // better quality out of H264/mp4 - t1 = 0.25 + t1 * 0.4; - t4 = (1.0 - t2)*1.8; // more than one full turn around the hue wheel - t4 = t4 + 0.5798; t4 = t4 - floor(t4); // deep purple, deep blue, green, yellow, red, pink, blue, aqua - go_hsv2rgb(t4, t1, t2, red, green, blue); - break; - - case 15: // `f' - // Another one that compresses well, with orange in the "desert" - t1 = 0.25 + t1 * 0.4; - t4 = 6 * (1.0 - t2) / 5; // more than one full turn around the hue wheel - t4 = t4 + 1.0681; t4 = t4 - floor(t4); // dark brown, deep purple, deep blue, green, yellow, orange - go_hsv2rgb(t4, t1, t2, red, green, blue); - break; - - case 16: // `g' - // Modification of scheme 13, limiting use of saturated colors to force - // better quality out of H264/mp4 - t1 = 0.25 + t1 * 0.4; - t4 = (1.0 - t2) * 1.4; - t4 = t4 + 0.6409; t4 = t4 - floor(t4); // deep purple, deep blue, teal, green, yellow, red, pink, blue - go_hsv2rgb(t4, t1, t2, red, green, blue); - break; - - case 17: // colormode 17, originally letter `h' - // Back to colorscheme 11, but with subdued brightness and saturation - t1 = 0.25 + t1 * 0.6; - t4 = (1.0 - t2) * 1.0; - t4 = t4 + 1.9836; t4 = t4 - floor(t4); // deep purple, deep blue, teal, green yellow, red - go_hsv2rgb(t4, t1, 0.3815 + t2/2.0, red, green, blue); - break; - - case 18: // `i' - // Created on 20090404 specifically for monochrome figures in uskate - // world. The first version of this was: - // t2 = minmax((t2 * 2) - 0.6104, 0.0, 1.0) - // Then I did 5 shades of gray on thresholds 0.40, 0.418, 0.422, 0.55. - // Finally, on 20090405 I optimized it for the B&W illustrations in the - // paper I'm writing and came up with this combination of 3 linear - // sclings. - diff = (u0 - 0.4) * 16.28; - if (diff > 1.22) { - diff = (diff / 10.0) + 0.5; - } else if (diff > 0.4578) { - diff = (diff / 5.0) + 0.3662; - } - t2 = minmax(diff, 0.0, 1.0); - go_hsv2rgb(0, 0, t2, red, green, blue); - break; - - case 19: // `j' - // Created on 20090404 specifically for monochrome figures in uskate world - go_hsv2rgb(0, 0, (1.0 - t1), red, green, blue); - break; - - case 20: - // Created on 20090507, higher-contrast derivative - t4 = 1.0 * dif2; - t4 = minmax(t4, 0, 1.0); - go_hsv2rgb(0, 0, (1.0 - t4), red, green, blue); - break; - - case 21: - // 20101122: Monochrome without contrast adjustment - go_hsv2rgb(0, 0, u0, red, green, blue); - break; - - case 22: - // Created by Robert Munafo for Tim Hutton's reaction-diffusion project - // Something simple to start (-: - // different colour schemes result if you reorder these, or replace - // "x" with "1.0f-x" for any of the 3 variables - diff = dU * 1000.0f + 0.5f; - diff = minmax(diff, 0.0, 1.0); - *red = diff; // increasing U will look pink - *green = 1.0-u0; *blue = 1.0-v0; - - } - - // Pastel mode transformation simply reduces all three components by half -- - // either as additive primaries (making it darker) or as subtractive - // primaries (making it lighter) - if (pastel == 1) { - *red = 0.5 + *red/2.0; - *green = 0.5 + *green/2.0; - *blue = 0.5 + *blue/2.0; - } else if (pastel == 2) { - *red = *red/2.0; - *green = *green/2.0; - *blue = *blue/2.0; - } -} - -void go_hsv2rgb(float h, float s, float v, float *red, float *green, float *blue) -{ - float p16 = 65536.0; - float i, f, p, q, t, r, g, b; - int ii; - - if (s <= 0.0) { - // Ignore hue - r = v; - g = v; - b = v; - } else { - h = h * 6.0; - ii = ((int) h); - i = (float) ii; - f = h - i; - p = v*(1.0 - s); - q = v*(1.0 - (s*f)); - t = v*(1.0 - (s*(1.0 - f))); - switch(ii) { - case 0: - r = v; g = t; b = p; - break; - case 1: - r = q; g = v; b = p; - break; - case 2: - r = p; g = v; b = t; - break; - case 3: - r = p; g = q; b = v; - break; - case 4: - r = t; g = p; b = v; - break; - case 5: - default: - r = v; g = p; b = q; - break; - } - } - *red = r; *green = g; *blue = b; -} - diff --git a/Ready_old_CLI/ready_display/ready_display.h b/Ready_old_CLI/ready_display/ready_display.h deleted file mode 100644 index aa90aa499..000000000 --- a/Ready_old_CLI/ready_display/ready_display.h +++ /dev/null @@ -1,10 +0,0 @@ -bool display(int width, int wid_x, int height, float *r, float *g, float *b, - double iteration, float model_scale, bool auto_brighten,float manual_brighten, - int scale,int delay_ms,const char* message, bool write_video); - -void colorize(float *u, float *v, float *du, float uv_range, - float *red, float *green, float *blue, - int width, int height, int color_style, int pastel_mode); -void color_mapping(float u0, float v0, float dU, int pm, float *red, float *green, float *blue, int pastel); -void go_hsv2rgb(float h, float s, float v, float *red, float *green, float *blue); - diff --git a/Ready_old_CLI/util.cpp b/Ready_old_CLI/util.cpp deleted file mode 100644 index 02cf47411..000000000 --- a/Ready_old_CLI/util.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/* - - util.cpp from ReaDy module - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - - */ - -#include - -// return a random value between lower and upper -float ut_frand(float lower, float upper) -{ - return lower + rand()*(upper-lower)/RAND_MAX; -} diff --git a/Ready_old_CLI/util.h b/Ready_old_CLI/util.h deleted file mode 100644 index 6d7c63cfb..000000000 --- a/Ready_old_CLI/util.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - - util.h from ReaDy module - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - - */ - -#ifndef max -# define max(a,b) (((a) > (b)) ? (a) : (b)) -# define min(a,b) (((a) < (b)) ? (a) : (b)) -#endif - -#ifndef minmax -# define minmax(v, lo, hi) max(lo, min(v, hi)) -#endif - -float ut_frand(float lower, float upper); diff --git a/Schlogl/CMakeLists.txt b/Schlogl/CMakeLists.txt deleted file mode 100644 index 721dca237..000000000 --- a/Schlogl/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -project(Schlogl) - -FIND_PACKAGE(OpenCV REQUIRED) -INCLUDE_DIRECTORIES( ${OPENCV_INCLUDE_DIR}) - -INCLUDE_DIRECTORIES( "../Display" ) - -add_executable(Schlogl - schlogl.cpp - ../Display/display.cpp - ../Display/display.h -) - -TARGET_LINK_LIBRARIES(Schlogl ${OpenCV_LIBS} ) diff --git a/Schlogl/schlogl.cpp b/Schlogl/schlogl.cpp deleted file mode 100644 index d40505bb0..000000000 --- a/Schlogl/schlogl.cpp +++ /dev/null @@ -1,120 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// stdlib: -#include -#include -#include -#include - -// local: -#include "defs.h" -#include "display.h" - -void init(float a[X][Y]); - -void compute(float a[X][Y], - float da[X][Y], - float speed); - -int main() -{ - // Schlogl model: - // F. Schlogl, "Chemical reaction models for non-equilibrium phase transitions", Zeitschrift fur Physik, 253, 147, (1972) - // Following: - // http://wwwnlds.physik.tu-berlin.de/~hizanidis/talks/front_propagation_noise.pdf - - // -- parameters -- - float speed = 0.1f; - // ---------------- - - // this array stores the chemical concentrations: - float a[X][Y]; - // this array store the rate of change of those chemicals: - float da[X][Y]; - - // put the initial conditions into each cell - init(a); - - clock_t start,end; - - const int N_FRAMES_PER_DISPLAY = 100; - int iteration = 0; - while(true) - { - start = clock(); - - // compute: - for(int it=0;it -#include -#include -#include - -// local: -#include "defs.h" -#include "display.h" - -void init(float a[X][Y],float b[X][Y]); - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float alpha,float beta,float gamma,float nu, - float speed, - bool parameter_space); - -int main() -{ - // J. Schnakenberg, Simple chemical reaction systems with limit cycle behaviour, J. Theor. Biol. 81 (1979) 389–400. - // Following: - // ftp://ftp.comlab.ox.ac.uk/pub/Documents/techreports/NA-03-16.pdf - - // Not sure this is right though. - - // -- parameters -- - float alpha=1.0f; - float beta=0.9f; - float gamma=1.0f; - float nu=10.0f; - - float speed = 0.001f; - // ---------------- - - // these arrays store the chemical concentrations: - float a[X][Y], b[X][Y]; - // these arrays store the rate of change of those chemicals: - float da[X][Y], db[X][Y]; - - // put the initial conditions into each cell - init(a,b); - - clock_t start,end; - - const int N_FRAMES_PER_DISPLAY = 100; - int iteration = 0; - while(true) - { - start = clock(); - - // compute: - for(int it=0;it (b)) ? (a) : (b)) -#define min(a,b) (((a) < (b)) ? (a) : (b)) -#endif - -#define PI 3.1415926535 - -void init(float a[X][Y],float b[X][Y]) -{ - srand((unsigned int)time(NULL)); - - // figure the values - float x,y; - for(int i = 0; i < X; i++) - { - x=float(i)/X; - for(int j = 0; j < Y; j++) - { - /*y = float(j)/Y; - a[i][j] = 0.919145 + 0.0016*cos(2*PI*(x+y)); - b[i][j] = 0.937903 + 0.0016*cos(2*PI*(x+y)); - for(int t=1;t<=8;t++) - { - a[i][j] += 0.01 * cos(2*PI*t*x); - b[i][j] += 0.01 * cos(2*PI*t*x); - }*/ - a[i][j] = frand(-1.0f,1.0f); - b[i][j] = frand(-1.0f,1.0f); - } - } -} - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float alpha,float beta,float gamma,float nu, - float speed, - bool parameter_space) -{ - const bool toroidal = true; - - int iprev,inext,jprev,jnext; - - // compute change in each cell - for(int i = 0; i < X; i++) { - if(toroidal) { - iprev = (i + X - 1) % X; - inext = (i + 1) % X; - } - else { - iprev = max(0,i-1); - inext = min(X-1,i+1); - } - - for(int j = 0; j < Y; j++) { - if(toroidal) { - jprev = (j + Y - 1) % Y; - jnext = (j + 1) % Y; - } - else { - jprev = max(0,j-1); - jnext = min(Y-1,j+1); - } - - if(parameter_space) { - /*const float kmin=0.045f,kmax=0.07f,fmin=0.00f,fmax=0.14f; - // set f and k for this location (ignore the provided values of f and k) - k = kmin + i*(kmax-kmin)/X; - f = fmin + j*(fmax-fmin)/Y;*/ - } - - float aval = a[i][j]; - float bval = b[i][j]; - - // compute the Laplacians of a and b - float dda = a[i][jprev] + a[i][jnext] + a[iprev][j] + a[inext][j] - 4*aval; - float ddb = b[i][jprev] + b[i][jnext] + b[iprev][j] + b[inext][j] - 4*bval; - - // compute the new rate of change of a and b - //da[i][j] = 1000.0f*(0.126779f - aval + aval*aval*bval) + dda / (128.0f*128.0f); - //db[i][j] = 1000.0f*(0.792366f - aval*aval*bval) + 10.0f*ddb / (128.0f*128.0f); - da[i][j] = dda + gamma*(alpha - aval + aval*aval*bval); - db[i][j] = nu * ddb + gamma*(beta - aval*aval*bval); - } - } - - // effect change - for(int i = 0; i < X; i++) { - for(int j = 0; j < Y; j++) { - a[i][j] += (speed * da[i][j]); - b[i][j] += (speed * db[i][j]); - } - } -} - diff --git a/SpeedComparisons/CMakeLists.txt b/SpeedComparisons/CMakeLists.txt deleted file mode 100644 index c892dac05..000000000 --- a/SpeedComparisons/CMakeLists.txt +++ /dev/null @@ -1,23 +0,0 @@ -cmake_minimum_required(VERSION 2.6) - -project(SpeedComparisons) - -if (NOT CMAKE_BUILD_TYPE) - message(STATUS "No build type selected, default to Release") - set(CMAKE_BUILD_TYPE "Release") -endif() - -add_subdirectory(GrayScott) -add_subdirectory(GrayScott_HWIVector) -add_subdirectory(GrayScott_double) -add_subdirectory(GrayScott_OpenCV) -add_subdirectory(GrayScott_SSE) -add_subdirectory(GrayScott_SSE_OpenMP) -add_subdirectory(GrayScott_OpenMP) -add_subdirectory(GrayScott_OpenCL) -add_subdirectory(GrayScott_OpenCL_float2) -add_subdirectory(GrayScott_OpenCL_Local) -add_subdirectory(GrayScott_OpenCL_2x2) -add_subdirectory(GrayScott_OpenCL_Image) -add_subdirectory(GrayScott_OpenCL_Image_2x2) - diff --git a/SpeedComparisons/Display/defs.h b/SpeedComparisons/Display/defs.h deleted file mode 100644 index 6b34e615e..000000000 --- a/SpeedComparisons/Display/defs.h +++ /dev/null @@ -1,4 +0,0 @@ -// the size of the world: -#define X 256 -#define Y 256 -// (must be powers of 2) diff --git a/SpeedComparisons/Display/display.cpp b/SpeedComparisons/Display/display.cpp deleted file mode 100644 index fb0fdb0ef..000000000 --- a/SpeedComparisons/Display/display.cpp +++ /dev/null @@ -1,145 +0,0 @@ -// OpenCV: -#include -#include - -// stdlib -#include - -// local: -#include "display.h" - -bool display(float r[X][Y],float g[X][Y],float b[X][Y], - int iteration,bool auto_brighten,float manual_brighten, - int scale,int delay_ms,const char* message) -{ - static bool need_init = true; - static bool write_video = false; - - static IplImage *im,*im2,*im3; - static int border = 0; - static CvFont font; - static CvVideoWriter *video; - static const CvScalar white = cvScalar(255,255,255); - - const char *title = "Press ESC to quit"; - - if(need_init) - { - need_init = false; - - im = cvCreateImage(cvSize(X,Y),IPL_DEPTH_8U,3); - cvSet(im,cvScalar(0,0,0)); - im2 = cvCreateImage(cvSize(X*scale,Y*scale),IPL_DEPTH_8U,3); - im3 = cvCreateImage(cvSize(X*scale+border*2,Y*scale+border),IPL_DEPTH_8U,3); - - cvNamedWindow(title,CV_WINDOW_AUTOSIZE); - - double hScale=0.4; - double vScale=0.4; - int lineWidth=1; - cvInitFont(&font,CV_FONT_HERSHEY_COMPLEX,hScale,vScale,0,lineWidth,CV_AA); - - if(write_video) - { - video = cvCreateVideoWriter(title,CV_FOURCC('D','I','V','X'),25.0,cvGetSize(im3),1); - border = 20; - } - } - - // convert float arrays to IplImage for OpenCV to display - float val,minR=FLT_MAX,maxR=-FLT_MAX,minG=FLT_MAX,maxG=-FLT_MAX,minB=FLT_MAX,maxB=-FLT_MAX; - if(auto_brighten) - { - for(int i=0;imaxR) maxR=val; - } - if(g) { - val = g[i][j]; - if(valmaxG) maxG=val; - } - if(b) { - val = b[i][j]; - if(valmaxB) maxB=val; - } - } - } - } - #pragma omp parallel for - for(int i=0;i255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 2] = (uchar)val; - } - if(g) { - float val = g[i][Y-j-1]; - if(auto_brighten) val = 255.0f * (val-minG) / (maxG-minG); - else val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 1] = (uchar)val; - } - if(b) { - float val = b[i][Y-j-1]; - if(auto_brighten) val = 255.0f * (val-minB) / (maxB-minB); - else val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 0] = (uchar)val; - } - } - } - - cvResize(im,im2); - cvCopyMakeBorder(im2,im3,cvPoint(border*2,0),IPL_BORDER_CONSTANT); - - char txt[100]; - if(!write_video) - { - sprintf(txt,"%d",iteration); - cvPutText(im3,txt,cvPoint(20,20),&font,white); - - // DEBUG: - sprintf(txt,"%.4f,%.4f,%.4f",r[0][0],g[0][0],b[0][0]); - //cvPutText(im3,txt,cvPoint(20,40),&font,white); - } - - // DEBUG: - if(write_video) - { - cvPutText(im3,"0.06",cvPoint(5,15),&font,white); - cvPutText(im3,"F",cvPoint(5,im2->height/2),&font,white); - cvPutText(im3,"0.00",cvPoint(5,im2->height),&font,white); - cvPutText(im3,"0.03",cvPoint(border*2-10,im2->height+15),&font,white); - cvPutText(im3,"K",cvPoint(border*2+im2->width/2,im2->height+15),&font,white); - cvPutText(im3,"0.07",cvPoint(im3->width-35,im2->height+15),&font,white); - } - - cvPutText(im3,message,cvPoint(20,40),&font,white); - - if(write_video) - cvWriteFrame(video,im3); - - cvShowImage(title,im3); - - int key = cvWaitKey(delay_ms); // allow time for the image to be drawn - if(key==27) // did user ask to quit? - { - cvDestroyWindow(title); - cvReleaseImage(&im); - cvReleaseImage(&im2); - if(write_video) - cvReleaseVideoWriter(&video); - return true; - } - return false; -} - diff --git a/SpeedComparisons/Display/display.h b/SpeedComparisons/Display/display.h deleted file mode 100644 index dcd63ad58..000000000 --- a/SpeedComparisons/Display/display.h +++ /dev/null @@ -1,5 +0,0 @@ -#include "defs.h" - -bool display(float r[X][Y],float g[X][Y],float b[X][Y], - int iteration,bool auto_brighten,float manual_brighten, - int scale,int delay_ms,const char* message); diff --git a/SpeedComparisons/GrayScott/CMakeLists.txt b/SpeedComparisons/GrayScott/CMakeLists.txt deleted file mode 100644 index a9986d64a..000000000 --- a/SpeedComparisons/GrayScott/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -project(GrayScott) - -FIND_PACKAGE(OpenCV REQUIRED) -INCLUDE_DIRECTORIES( ${OPENCV_INCLUDE_DIR}) - -INCLUDE_DIRECTORIES( "../Display" ) - -add_executable(GrayScott - gray_scott.cpp - ../Display/display.cpp - ../Display/display.h - ../Display/defs.h -) - -TARGET_LINK_LIBRARIES(GrayScott ${OpenCV_LIBS} ) diff --git a/SpeedComparisons/GrayScott/gray_scott.cpp b/SpeedComparisons/GrayScott/gray_scott.cpp deleted file mode 100644 index 52953132a..000000000 --- a/SpeedComparisons/GrayScott/gray_scott.cpp +++ /dev/null @@ -1,238 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// stdlib: -#include -#include -#include -#include -#include - -#ifdef _WIN32 - #include - #include - #include - // http://www.linuxjournal.com/article/5574 - void gettimeofday(struct timeval* t,void* timezone) - { struct _timeb timebuffer; - _ftime( &timebuffer ); - t->tv_sec=timebuffer.time; - t->tv_usec=1000*timebuffer.millitm; - } -#else - #include -#endif - -// local: -#include "defs.h" -#include "display.h" - -void init(float a[X][Y],float b[X][Y]); - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float r_a,float r_b,float f,float k, - float speed, - bool parameter_space); - -static int g_wrap = 1; -static bool g_paramspace = 0; - -int main(int argc, char * * argv) -{ - for (int i = 1; i < argc; i++) { - if (0) { - } else if (strcmp(argv[i],"-paramspace")==0) { - // do a parameter space plot, like in the Pearson paper - g_paramspace = true; - } else if (strcmp(argv[i],"-wrap")==0) { - // patterns wrap around ("torus", also called "continuous boundary - // condition") - g_wrap = 1; - } else { - fprintf(stderr, "Unrecognized argument: '%s'\n", argv[i]); - exit(-1); - } - } - - // Here we implement the Gray-Scott model, as described here: - // http://www.cc.gatech.edu/~turk/bio_sim/hw3.html - // http://arxiv.org/abs/patt-sol/9304003 - - // -- parameters -- - float r_a = 0.082f; - float r_b = 0.041f; - - // for spots: - float k = 0.064f; - float f = 0.035f; - // for stripes: - //float k = 0.06f; - //float f = 0.035f; - // for long stripes - //float k = 0.065f; - //float f = 0.056f; - // for dots and stripes - //float k = 0.064f; - //float f = 0.04f; - // for spiral waves: - //float k = 0.0475f; - //float f = 0.0118f; - float speed = 1.0f; - // ---------------- - - // these arrays store the chemical concentrations: - float a[X][Y], b[X][Y]; - // these arrays store the rate of change of those chemicals: - float da[X][Y], db[X][Y]; - - // put the initial conditions into each cell - init(a,b); - - const int N_FRAMES_PER_DISPLAY = 200; - int iteration = 0; - double fps_avg = 0.0; // decaying average of fps - while(true) - { - struct timeval tod_record; - double tod_before, tod_after, tod_elapsed; - double fps = 0.0; // frames per second - - gettimeofday(&tod_record, 0); - tod_before = ((double) (tod_record.tv_sec)) - + ((double) (tod_record.tv_usec)) / 1.0e6; - - // compute: - for(int it=0;it (b)) ? (a) : (b)) -#define min(a,b) (((a) < (b)) ? (a) : (b)) -#endif - -void init(float a[X][Y],float b[X][Y]) -{ - srand((unsigned int)time(NULL)); - - // figure the values - for(int i = 0; i < X; i++) { - for(int j = 0; j < Y; j++) { - - //if(hypot(i%50-25/*-X/2*/,j%50-25/*-Y/2*/)<=frand(2,5)) - if(hypot(i-X/2,(j-Y/2)/1.5)<=frand(2,5)) // start with a uniform field with an approximate circle in the middle - { - a[i][j] = 0.0f; - b[i][j] = 1.0f; - } - else { - a[i][j] = 1; - b[i][j] = 0; - } - //float v = frand(0.0f,1.0f); - //a[i][j] = v; - //b[i][j] = 1.0f-v; - //a[i][j] += frand(-0.01f,0.01f); - //b[i][j] += frand(-0.01f,0.01f); - } - } -} - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float r_a,float r_b,float f,float k,float speed, - bool parameter_space) -{ - //const bool toroidal = false; - - //int iprev,inext,jprev,jnext; - - // compute change in each cell - for(int i = 0; i < X; i++) { - int iprev,inext; - if (g_wrap) { - iprev = (i + X - 1) % X; - inext = (i + 1) % X; - } else { - iprev = max(0,i-1); - inext = min(X-1,i+1); - } - - for(int j = 0; j < Y; j++) { - int jprev,jnext; - if (g_wrap) { - jprev = (j + Y - 1) % Y; - jnext = (j + 1) % Y; - } else { - jprev = max(0,j-1); - jnext = min(Y-1,j+1); - } - - float aval = a[i][j]; - float bval = b[i][j]; - - if(parameter_space) { - const float kmin=0.045f,kmax=0.07f,fmin=0.01f,fmax=0.09f; - // set f and k for this location (ignore the provided values of f and k) - k = kmin + i*(kmax-kmin)/X; - f = fmin + j*(fmax-fmin)/Y; - } - - // compute the Laplacians of a and b - float dda = a[i][jprev] + a[i][jnext] + a[iprev][j] + a[inext][j] - 4*aval; - float ddb = b[i][jprev] + b[i][jnext] + b[iprev][j] + b[inext][j] - 4*bval; - - // compute the new rate of change of a and b - da[i][j] = r_a * dda - aval*bval*bval + f*(1-aval); - db[i][j] = r_b * ddb + aval*bval*bval - (f+k)*bval; - } - } - - // effect change - for(int i = 0; i < X; i++) - { - for(int j = 0; j < Y; j++) - { - a[i][j] += speed * da[i][j]; - b[i][j] += speed * db[i][j]; - // kill denormals by adding a teeny tiny something (http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.20.1348&rank=4) - a[i][j] += 1e-10f; - b[i][j] += 1e-10f; - } - } -} - diff --git a/SpeedComparisons/GrayScott_HWIVector/CMakeLists.txt b/SpeedComparisons/GrayScott_HWIVector/CMakeLists.txt deleted file mode 100644 index e8e3a1ac8..000000000 --- a/SpeedComparisons/GrayScott_HWIVector/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -project(GrayScott_HWIVector) - -FIND_PACKAGE(OpenCV REQUIRED) -INCLUDE_DIRECTORIES( ${OPENCV_INCLUDE_DIR}) - -if(MSVC) - add_definitions(/arch:SSE2) -else() - add_definitions(-msse2) -endif() - -add_executable(GrayScott_HWIVector - gray_scott_hwivector.cpp - hwi_vector.h - dicek.h - display_hwiv.cpp - display_hwiv.h -) - -TARGET_LINK_LIBRARIES(GrayScott_HWIVector ${OpenCV_LIBS} ) diff --git a/SpeedComparisons/GrayScott_HWIVector/dicek.h b/SpeedComparisons/GrayScott_HWIVector/dicek.h deleted file mode 100644 index 432e9c73a..000000000 --- a/SpeedComparisons/GrayScott_HWIVector/dicek.h +++ /dev/null @@ -1,507 +0,0 @@ -/* - dicek.h ^u 120 ^x f - -This provides macros for multi-threaded programming. The intention is to allow a single program source to be compiled -for Windows, Linux or Mac OS, and accomplish multi-threading without having to have a lot of #ifdefs around the -OS-specific code. - -REVISION HISTORY - 20110929 First version of simple DICEK_SPLIT_MERGE function. This is a blocking, one-time N-way parallel subroutine -call (no provision for threads to continue through multiple synchronization barriers). Works in DICEK_USE_THREADS and -DICEK_EMULATE modes (tested with math3000.cxx A094358() routine). - 20110930 Add a first (extremely speculative and untested) shot at the Windows implementation, currently protected by a -fall-back #ifdef block at the beginning that checks for Windows and reverts to EMULATE mode, pending testing by a real -Windows programmer. - 20111001 Add the thread interlock macros and get them working in EMULATE and POSIX modes. - 20111009 Initialize mutexes in Win32 version of DICEK_SPLIT_1 - 20111011 Use semaphores instead of mutexes in Win32 version. - -*/ - -/* First we test for each of the known operating system environments */ - -#if (defined(__linux__) || defined(__APPLE__)) -# ifndef DICEK_USE_POSIX -# ifndef DICEK_EMULATE -# define DICEK_USE_POSIX -# endif -# endif -#endif - - -#ifdef _WIN32 -# ifndef DICEK_EMULATE -# define DICEK_USE_PROCESS_H -# endif -#else -# define __stdcall -#endif - - -#ifndef DICEK_USE_PROCESS_H -# ifndef DICEK_USE_POSIX -# ifndef DICEK_EMULATE -# define DICEK_EMULATE -# endif -# endif -#endif - - - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// // -// #include // -// /* xx*/ /* */ // -// /* */ /* */ // -// int main() {char a[] ={ 'T', 'h','i', 's',32, 'p','r' ,'o','g' // -// ,'r','a','m', 32, 'j', 'u', 's', 't' ,32 ,'d', 'o', 'e' ,'s' ,32 ,'i' // -// ,'t' ,32 ,'t' ,'h' ,'e' ,32, 'h' ,97,'r','d' ,32, 'w',97,121,33, 32, 40, // -// 68, 'o', 'n', 39, 't' ,32 ,'y' ,'o', 117, 32 ,'t' ,'h' ,'i' // -// ,'n' /* Xy =a +3 +n ++ ;a= b- (* x/z ); if // -// (Xy-++n<(z+*x))z =b;a +b, z+= x*/,107 , 63,63 ,63,41,'\n' ,00}; puts(a);} /*.RPM.*/ // -// // -// Emulated versions of the DICEK macros. (These also serve as documentation for what each macro does) // -// // -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#ifdef DICEK_EMULATE - -/* - We cannot do inter-thread communication because we execute each "spawned thread" to completion before starting the next -one. Therefore, the first time any child thread tries to DICEK_INTERLOCK, the program will deadlock because there is no -way for the parent to get to the DICEK_CH_BEGIN until the child has exited. - */ -#define DICEK_SUPPORTS_BLOCKING 0 - -/* -DICEK_THREAD_VARS defines variables which need to be included in the parameter block of any subroutine called by -DICEK_SPLIT_MERGE - Since this is the emulated version, there are no master_wkg and child_wkg semaphores. - */ -#define DICEK_THREAD_VARS \ - long DICEK_tnum; \ - void * DICEK_thread; \ - void * DICEK_return; - -/* - DICEK_INIT_NTHR declares an integer-type variable named "nth" and initializes it to the number of hardware threads -supported by this system. Place it in your main() or a function from which threads will be launched. - If you want your threads to have access to the value of nth, you should do "glob = nth;" right after the -DICEK_INIT_NTHR(), where "glob" is a suitably scoped global variable. - -In emulated mode we return 3 as the number of "hardware threads", partly because 3 is a fairly rare value in real -computers (the Athlon X3 being about the only one anyone will have) and therefore if you find that DICEK is running 3 -"threads" then you know it probably failed to auto-detect the environment. - Also, as of this writing (2011) 3 threads is about the average number of hardware threads across all portable and -desktop computers that are out there. (Anything by Intel with the "Core i3" or higher brand has 4 threads, and most -desktop machines have at least 2 cores and 4 threads as well) - However, since this is the emulated-mode version of the macros, the "threads" are actually going to just run one -after the next. - */ -#define DICEK_INIT_NTHR(nth) int nth = 3; - -/* - Place DICEK_DATA in the variable declarations area of the function containing an DICEK_FORK directive. dtype should be -a struct which contains the DICEK_THREAD_VARS macro, and as many other fields as you want. nth is the number of threads -that you will be creating with DICEK_SPLIT_MERGE. - It will declare an array aname (dynamically allocated) which will be an array of structs of type dtype, and fill the -DICEK-specific fields with a thread number and a pointer to the pthreads data for each thread that will be used by -DICEK_SPLIT_MERGE. - */ -#define DICEK_DATA(dtype, arrayname, nth) \ - dtype * arrayname; \ - arrayname = (dtype *)malloc(nth * sizeof(dtype)); \ - for(int _DICEK_i=0; _DICEK_iDICEK_return = (void*)(returnv); - -#define DICEK_MERGE_M(arrayname, index) /* nop */ - -#define DICEK_MERGE(funcname, arrayname, nth) /* nop */ - -#endif - -/* - - - - - - - - - - - - - - - - - - - - - End of the EMULATED section - - - - - - - - - - - - - - - - - - - - - - */ - - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// , , // -// /( )` // -// \ \___ / | // -// _nnnn_ /- _ `-/ ' // -// dGGGGMMb (/\/ \ \ /\ // -// @p~qp~~qMb / / | ` \ // -// M|@||@) M| ooooooooo. .oooooo. .oooooo..o ooooo ooooooo ooooo O O ) / | // -// @,----.JM| `888 `Y88. d8P' `Y8b d8P' `Y8 `888' `8888 d8' `-^--'`< ' // -// JS^\__/ qKL 888 .d88' 888 888 Y88bo. 888 Y888..8P (_.) _ ) / // -// dZP qKRb 888ooo88P' 888 888 `"Y8888o. 888 `8888' `.___/` / // -// dZP qKKb 888 888 888 `"Y88b 888 .8PY888. `-----' / // -// fZP SMMb 888 `88b d88' oo .d8P 888 d8' `888b <----. __ / __ \ // -// HZM MMMM o888o `Y8bood8P' 8""88888P' o888o o888o o88888o <----|====O)))==) \) /==== // -// FqM MMMM <----' `--' `.__,' \ // -// __| ". |\dS"qML | | // -// | `. | `' \Zq \ / /\// -//_) \.___.,| .' ______( (_ / \______/ // -//\____ )MMMMMP| .' Versions of the DICEK macros for Linux, Mac OS, ,' ,-----' | // -// `-' `--' hjm and other POSIX-compliant environments `--{__________) // -// // -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#ifdef DICEK_USE_POSIX - -#define DICEK_SUPPORTS_BLOCKING 1 - -#include - -// Mach-based system (most notably Apple) use sysctlbyname to query the number of CPUs/cores/threads -# ifdef __MACH__ -# include -# include -# endif - -// TODO: Some Linux systems might need an #include to access the mechanism for finding out the number of threads. - - -#define DICEK_THREAD_VARS \ - long DICEK_tnum; \ - void * DICEK_thread; \ - pthread_mutex_t DICEK_master_wkg; \ - pthread_mutex_t DICEK_child_wkg; \ - void * DICEK_return; - - -#ifdef __APPLE__ -# define DICEK_INIT_NTHR(nth) int nth; { \ - size_t _DICEK_sz_in = sizeof(nth); \ - long _DICEK_rv2 = sysctlbyname("hw.ncpu", (void *) (&nth), &_DICEK_sz_in, 0, 0); \ - if (nth <= 0) { nth = 1; } \ - if (nth > 64) { nth = 64; } } -#else -// TODO: How to query number of threads in Linux (I suspect reading /proc/cpuinfo will work on most, -// but not all, Linuces) -# define DICEK_INIT_NTHR(nth) int nth = 3; -#endif - -#define DICEK_DATA(dtype, arrayname, nth) \ - dtype * arrayname; \ - arrayname = (dtype *)malloc(nth * sizeof(dtype)); \ - for(int _DICEK_i=0; _DICEK_iDICEK_child_wkg)); - -#define DICEK_CH_SYNC \ - pthread_mutex_unlock(&(_DICEK_params->DICEK_master_wkg)); - -#define DICEK_INTERLOCK(arrayname, nth) \ - for(int _DICEK_i=0; _DICEK_iDICEK_child_wkg)); - -#define DICEK_RETURN(returnv) pthread_exit((void*)(returnv)); - -#define DICEK_MERGE_M(arrayname, index) \ - DICEK_MERGE_1(arrayname, index) \ - pthread_mutex_destroy(&(arrayname[index].DICEK_child_wkg)); \ - pthread_mutex_unlock(&(arrayname[index].DICEK_master_wkg)); \ - pthread_mutex_destroy(&(arrayname[index].DICEK_master_wkg)); - -#define DICEK_MERGE(funcname, arrayname, nth) \ - for(int _DICEK_i=0; _DICEK_i -#include - -#define DICEK_THREAD_VARS \ - long DICEK_tnum; \ - HANDLE DICEK_thread; \ - HANDLE DICEK_master_wkg; \ - HANDLE DICEK_child_wkg; \ - void * DICEK_return; - -#define DICEK_INIT_NTHR(nth) \ - int nth; \ - { SYSTEM_INFO si; GetSystemInfo(&si); nth = si.dwNumberOfProcessors; } - -#define DICEK_DATA(dtype, arrayname, nth) \ - dtype * arrayname; \ - arrayname = (dtype *)malloc(nth * sizeof(dtype)); \ - for(int _DICEK_i=0; _DICEK_iDICEK_child_wkg,INFINITE); - -#define DICEK_CH_SYNC \ - ReleaseSemaphore(_DICEK_params->DICEK_master_wkg, 1, NULL); - -#define DICEK_INTERLOCK(arrayname, nth) \ - for(int _DICEK_i=0; _DICEK_iDICEK_child_wkg, 1, NULL); - -#define DICEK_RETURN(returnv) \ - _DICEK_params->DICEK_return = (void*)(returnv); \ - _endthread(); - -#define DICEK_MERGE_M(arrayname, index) \ - DICEK_MERGE_1(arrayname, index) \ - CloseHandle(arrayname[index].DICEK_thread); \ - CloseHandle(arrayname[index].DICEK_child_wkg); \ - ReleaseSemaphore(arrayname[index].DICEK_master_wkg, 1, NULL); \ - CloseHandle(arrayname[index].DICEK_master_wkg); - -#define DICEK_MERGE(funcname, arrayname, nth) \ - for(int _DICEK_i=0; _DICEK_i -#include - -// stdlib -#include - -// local: -#include "display_hwiv.h" - -#define INDEX(a,x,y) ((a)[(x)*full_width+(y)]) - -bool display(int g_width, int g_height, float *r, float *g, float *b, - double iteration, float model_scale, bool auto_brighten,float manual_brighten, - int scale,int delay_ms,const char* message, bool write_video) -{ - int full_width = g_width + 8; - static bool need_init = true; - - static IplImage *im,*im2; - static int border = 0; - static CvFont font; - static CvVideoWriter *video; - static const CvScalar white = cvScalar(255,255,255); - - const char *title = "Press ESC to quit"; - - if(need_init) - { - need_init = false; - - im = cvCreateImage(cvSize(g_width,g_height),IPL_DEPTH_8U,3); - cvSet(im,cvScalar(0,0,0)); - im2 = cvCreateImage(cvSize(g_width*scale,g_height*scale),IPL_DEPTH_8U,3); - - cvNamedWindow(title,CV_WINDOW_AUTOSIZE); - - double hScale=0.4; - double vScale=0.4; - int lineWidth=1; - cvInitFont(&font,CV_FONT_HERSHEY_COMPLEX,hScale,vScale,0,lineWidth,CV_AA); - - if(write_video) - { - video = cvCreateVideoWriter("vid-Gray-Scott.avi",CV_FOURCC('D','I','V','X'),25.0,cvGetSize(im),1); - if(video == NULL) { - fprintf(stdout, "NULL from cvCreateVideoWriter\n"); exit(-1); - } - } - } - - // convert float arrays to IplImage for OpenCV to display - float val,minR=FLT_MAX,maxR=-FLT_MAX,minG=FLT_MAX,maxG=-FLT_MAX,minB=FLT_MAX,maxB=-FLT_MAX; - if(auto_brighten) - { - for(int i=0;imaxR) maxR=val; - - val = INDEX(g,i+4,j); - if(valmaxG) maxG=val; - - val = INDEX(b,i+4,j); - if(valmaxB) maxB=val; - } - } - } - for(int i=0;i255) val=255; - ((uchar *)(im->imageData + i*im->widthStep))[j*im->nChannels + 2] = (uchar)val; - - val = INDEX(g,i,full_width-j-5); - if(auto_brighten) val = 255.0f * (val-minG) / (maxG-minG); - else val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + i*im->widthStep))[j*im->nChannels + 1] = (uchar)val; - - val = INDEX(b,i,full_width-j-5); - if(auto_brighten) val = 255.0f * (val-minB) / (maxB-minB); - else val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + i*im->widthStep))[j*im->nChannels + 0] = (uchar)val; - } - } - - cvResize(im,im2); - - char txt[100]; - const char * fmt; - - if (g_width > 170) { - fmt = "Generation=%7g (model's t=%g)"; - } else if (g_width > 140) { - fmt = "Gen.=%7g (model's t=%g)"; - } else { - fmt = "G=%7g t=%g"; - } - sprintf(txt,fmt,iteration, ((double) iteration) / model_scale); - cvPutText(im2,txt,cvPoint(5,20),&font,white); - cvPutText(im2,message,cvPoint(5,40),&font,white); - - if(write_video) - cvWriteFrame(video,im); - - cvShowImage(title,im2); - - int key = cvWaitKey(delay_ms); // allow time for the image to be drawn - if(key==27) // did user ask to quit? - { - if(write_video) - cvReleaseVideoWriter(&video); - cvDestroyWindow(title); - cvReleaseImage(&im); - cvReleaseImage(&im2); - return true; - } - return false; -} - diff --git a/SpeedComparisons/GrayScott_HWIVector/display_hwiv.h b/SpeedComparisons/GrayScott_HWIVector/display_hwiv.h deleted file mode 100644 index b174670d5..000000000 --- a/SpeedComparisons/GrayScott_HWIVector/display_hwiv.h +++ /dev/null @@ -1,3 +0,0 @@ -bool display(int width, int height, float *r, float *g, float *b, - double iteration, float model_scale, bool auto_brighten,float manual_brighten, - int scale,int delay_ms,const char* message, bool write_video); diff --git a/SpeedComparisons/GrayScott_HWIVector/gray_scott_hwivector.cpp b/SpeedComparisons/GrayScott_HWIVector/gray_scott_hwivector.cpp deleted file mode 100644 index ea1454906..000000000 --- a/SpeedComparisons/GrayScott_HWIVector/gray_scott_hwivector.cpp +++ /dev/null @@ -1,904 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// hardware -#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_IX86)) -# include -#endif - -// To convince yourself that the macro library works on any hardware, -// un-comment this "#define HWIV_EMULATE", and you'll get the macros inside -// the #ifdef HWIV_V4F4_EMULATED block in hwi_vector.h. The emulated -// macros do everything with normal floats and arrays, and run about 3-4 times -// slower. -//#define HWIV_EMULATE -#define HWIV_WANT_V4F4 -#include "hwi_vector.h" - -// stdlib: -#include -#include -#include -#include -#include - -#ifdef _WIN32 - #include - #include - #include - // http://www.linuxjournal.com/article/5574 - void gettimeofday(struct timeval* t,void* timezone) - { struct _timeb timebuffer; - _ftime( &timebuffer ); - t->tv_sec=timebuffer.time; - t->tv_usec=1000*timebuffer.millitm; - } - - // TODO: Make sure this is the correct include file for the FindFirstFile function (used in FILE_LENGTH macro below) - #include -#else - #include - #include -#endif - -// local: -//#define DICEK_EMULATE -#include "dicek.h" -#include "display_hwiv.h" - -static long g_width = 256; -static long g_height = 256; - -float *allocate(long width, long height, const char * error_text, bool for_mm); -float *allocate(long width, long height, const char * error_text, bool for_mm) -{ - long sz; - float * rv; - - sz = width * height * sizeof(*rv); - if (for_mm) { - rv = (float *) _mm_malloc(sz, 16); - } else { - rv = (float *) malloc(((size_t)sz)); - } - - if (rv == NULL) { - fprintf(stderr, "allocate: Could not get %ld bytes for %s\n", ((long) sz), - error_text); - exit(-1); - } - return (rv); -} - -void init(float *a, float *b, long width, long height); - -void compute_allocate(void); -void compute_import(float *u, float *v, long width, long height); - -typedef struct compute_params { - DICEK_THREAD_VARS; - float *u; - float *v; - float *du; - float *dv; - float D_u; - float D_v; - float F; - float k; - float speed; - int parameter_space; - int num_its; - long start_row; - long end_row; - int interlock_type; -} compute_params; - -unsigned __stdcall compute(void * param_block); // Arg is really "compute_params * param_block" - -void compute_dispatch(float *u, float *v, float *du, float *dv, - float D_u, float D_v, float F, float k, float speed, - int parameter_space, int num_its, int num_threads); - -static int g_paramspace = 0; -static int g_wrap = 1; -static float g_k = 0.064; -static float g_F = 0.035; -static bool g_video = false; - -#define VECSIZE 4 - -int main(int argc, char * * argv) -{ - DICEK_INIT_NTHR(g_threads) - printf("DiceK reports %d threads.\n", g_threads); - - // Here we implement the Gray-Scott model, as described here: - // http://arxiv.org/abs/patt-sol/9304003 - // (the seminal paper by Pearson) - // http://www.cc.gatech.edu/~turk/bio_sim/hw3.html - // (a present university course project, by Greg Turk at Georgia Tech) - // http://www.mrob.com/pub/comp/xmorphia/index.html - // (a web exhibit with over 100 videos and 500 images) - - // -- parameters -- - float D_u = 0.082; - float D_v = 0.041; - - // The default is equivalent to options: -F 0.035 -k 0.064 - g_k = 0.064; g_F = 0.035; - - // Other pattern-types to try (pass -F and -k as argument on command line, some require "-density 1" as well): - // - // -F 0.0118 -k 0.0475 Spiral waves - // -F 0.022 -k 0.059 Spots that multiply and keep killing each other off - // -F 0.035 -k 0.06 Stripes with branching (fingerprint) - // -F 0.04 -k 0.064 For spots that multiply, with stripes mixed in - // -F 0.056 -k 0.065 Long stripes ("-density 1" option helps here) - // -F 0.062 -k 0.0609 -density 2 "Uskate world", where I found all the Wolfram-class-4 behaviour - // -F 0.094 -k 0.059 -density 1 "soap bubbles" - // -F 0.094 -k 0.057 -lowback -density 1 Inverse soap bubbles - - float speed = 1.0; - - for (int i = 1; i < argc; i++) { - if (0) { - } else if ((i+11) ? 1 : 0; - } - - if (nthreads > 1) { - /* Start N threads, each will immediately begin the first part of its computation */ - DICEK_SPLIT(compute, cp, nthreads); - - /* Now for each iteration we need to sync the threads once. Each iteration consists of two - work phases. - During the first phase u and v are being read and du,dv are written; each thread - reads neighboring thread's data (in a single row along its top and bottom borders) - During the second phase u,v are overwritten with the next generation; each thread keeps - to its own area. - Because inter-thread communication only happens during phase 1, there need be only - one barrier per loop. */ - for(i=0; i (b)) ? (a) : (b)) -# define min(a,b) (((a) < (b)) ? (a) : (b)) -#endif - -#ifndef minmax -# define minmax(v, lo, hi) max(lo, min(v, hi)) -#endif - -// return a random value between lower and upper -float frand(float lower,float upper) -{ - float rv; - - rv = lower + rand()*(upper-lower)/RAND_MAX; - rv = minmax(rv, 0.0, 1.0); - return rv; -} - -void init(float *u, float *v, long width, long height) -{ - long nsp, i; - long base, var; - - srand((unsigned int)time(NULL)); - - for(int i = 0; i < height; i++) { - for(int j = 0; j < width; j++) { - // start with a uniform field with an approximate circle in the middle - if (hypot(i-height/2,j-width/3)<=frand(2,5)) - { - u[i*width+j] = frand(0.0,0.1); - v[i*width+j] = frand(0.9,1.0); - } else { - u[i*width+j] = frand(0.9,1.0); - v[i*width+j] = frand(0.0,0.1); - } - } - } -} - -static float * c_u; -static float * c_v; -static float * c_du; -static float * c_dv; -static long c_full_width; - -void compute_allocate(long width, long height) -{ - c_full_width = width + 2*VECSIZE; - c_u = allocate(c_full_width, height, "U array", true); - c_v = allocate(c_full_width, height, "V array", true); - c_du = allocate(c_full_width, height, "D_u array", true); - c_dv = allocate(c_full_width, height, "D_v array", true); -} - -void compute_import(float *u, float *v, long width, long height) -{ - for(long i = 0; i < height; i++) { - for(long j = 0; j < width; j++) { - c_u[(i+VECSIZE)*width+j] = u[i*width+j]; - c_v[(i+VECSIZE)*width+j] = v[i*width+j]; - } - } -} - -unsigned __stdcall compute(void * gpb) -{ - DICEK_SUB(compute_params, gpb); - const int full_width = g_width + 2*VECSIZE; - const int wid_v = full_width / VECSIZE; - -#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_IX86)) - /* On Intel we disable accurate handling of denorms and zeros. This is an - important speed optimization. */ - int oldMXCSR = _mm_getcsr(); //read the old MXCSR setting - int newMXCSR = oldMXCSR | 0x8040; // set DAZ and FZ bits - _mm_setcsr( newMXCSR ); //write the new MXCSR setting to the MXCSR -#endif - - compute_params * param_block; - param_block = (compute_params *) gpb; - float *u = param_block->u; - float *v = param_block->v; - float *du = param_block->du; - float *dv = param_block->dv; - float D_u = param_block->D_u; - float D_v = param_block->D_v; - float F = param_block->F; - float k = param_block->k; - float speed = param_block->speed; - int parameter_space = param_block->parameter_space; - int num_its = param_block->num_its; - int start_row = param_block->start_row; - int end_row = param_block->end_row; - int interlock = param_block->interlock_type; - - - - int iter; - - if (interlock) { DICEK_CH_BEGIN } - - // Scan per iteration - for(iter = 0; iter < num_its; iter++) { - -//printf("iter %d rows [%ld,%ld)\n",iter,start_row,end_row); - - // Scan per row - for(int i = start_row; i < end_row; i++) { - const V4F4 v4_F = v4SPLAT(F); - const V4F4 v4_k = v4SPLAT(k); - const V4F4 v4_Du = v4SPLAT(D_u); - const V4F4 v4_Dv = v4SPLAT(D_v); - int iprev,inext; - if (g_wrap) { - /* Periodic boundary condition */ - iprev = (i+g_height-1) % g_height; - inext = (i+1) % g_height; - } else { - /* The edges are their own neighbors. This amounts to a von Neumann boundary condition. */ - iprev = max(i-1, 0); - inext = min(i+1, g_height-1); - } - - // Scan per column in steps of vector width - for(int j = 1; j < wid_v-1; j++) { - V4F4 * v_ubase = ((V4F4 *)u)+i*wid_v+j; - V4F4 * v_vbase = ((V4F4 *)v)+i*wid_v+j; - V4F4 * v_dubase = ((V4F4 *)du)+i*wid_v+j; - V4F4 * v_dvbase = ((V4F4 *)dv)+i*wid_v+j; - V4F4 u_left = _mm_loadu_ps(((float*)v_ubase)-1); - V4F4 u_right = _mm_loadu_ps(((float*)v_ubase)+1); - V4F4 v_left = _mm_loadu_ps(((float*)v_vbase)-1); - V4F4 v_right = _mm_loadu_ps(((float*)v_vbase)+1); - V4F4 * v_ub_prev = ((V4F4 *)u)+iprev*wid_v+j; - V4F4 * v_ub_next = ((V4F4 *)u)+inext*wid_v+j; - V4F4 * v_vb_prev = ((V4F4 *)v)+iprev*wid_v+j; - V4F4 * v_vb_next = ((V4F4 *)v)+inext*wid_v+j; - - // To compute the Laplacians of u and v, we use the 5-point neighbourhood for the Euler discrete method: - // nabla(x) = x[i][j-1]+x[i][j+1]+x[i-1][j]+x[i+1][j] - 4*x[i][j]; - // ("nabla" is the name of the "upside down delta" symbol used for the Laplacian in equations) -# define NABLA_5PT(ctr,left,right,up,down) \ - v4SUB(v4ADD(v4ADD(v4ADD(left,right),up),down),v4MUL(ctr,v4SPLAT(4.0f))) - - // compute the new rate of change of u and v - V4F4 v4_uvv = v4MUL(*v_ubase,v4MUL(*v_vbase,*v_vbase)); // u*v^2 is used twice - - /* Scalar code is: du[i][j] = D_u * nabla_u - u*v^2 + F*(1-u); - We treat it as: D_u * nabla_u - (u*v^2 - F*(1-u)) */ - *v_dubase = v4SUB(v4MUL(v4_Du, - NABLA_5PT(*v_ubase, u_left, u_right, *v_ub_prev, *v_ub_next)), - v4SUB(v4_uvv,v4MUL(v4_F,v4SUB(v4SPLAT(1.0f),*v_ubase)))); - - /* dv formula is similar: dv[i][j] = D_v * nabla_v + u*v^2 - (F+k)*v; */ - *v_dvbase = v4ADD(v4MUL(v4_Dv, - NABLA_5PT(*v_vbase, v_left, v_right, *v_vb_prev, *v_vb_next)), - v4SUB(v4_uvv,v4MUL(v4ADD(v4_F,v4_k),*v_vbase))); - } - - } // End of scan per row - - if (interlock) { DICEK_CH_SYNC } - if (interlock) { DICEK_CH_BEGIN } - - { - int right_b, left_b; - if (g_wrap) { - right_b = wid_v-2; - left_b = 1; - } else { - right_b = 1; - left_b = wid_v-2; - } - // effect change - for(int i = start_row; i < end_row; i++) { - for(int j = 1; j < wid_v-1; j++) { - const V4F4 v4_speed = v4SPLAT(speed); - V4F4 * v_ubase = ((V4F4 *)u)+i*wid_v+j; - V4F4 * v_vbase = ((V4F4 *)v)+i*wid_v+j; - V4F4 * v_dubase = ((V4F4 *)du)+i*wid_v+j; - V4F4 * v_dvbase = ((V4F4 *)dv)+i*wid_v+j; - // u[i][j] = u[i][j] + speed * du[i][j]; - *v_ubase = v4ADD(v4MUL(v4_speed, *v_dubase), *v_ubase); - // v[i][j] = v[i][j] + speed * dv[i][j]; - *v_vbase = v4ADD(v4MUL(v4_speed, *v_dvbase), *v_vbase); - } - // Update cells on boundary from one row inland - *(((V4F4 *)u)+i*wid_v) = *(((V4F4 *)u)+i*wid_v+right_b); - *(((V4F4 *)v)+i*wid_v) = *(((V4F4 *)v)+i*wid_v+right_b); - *(((V4F4 *)u)+i*wid_v+wid_v-1) = *(((V4F4 *)u)+i*wid_v+left_b); - *(((V4F4 *)v)+i*wid_v+wid_v-1) = *(((V4F4 *)v)+i*wid_v+left_b); - } - } - - } // End of scan per iteration - - // finish last phase 2 - if (interlock) { DICEK_CH_SYNC } - - DICEK_CH_END - - return 0; -} - - - -#ifdef COMPUTE_A - -/* The parameter space code, specifically the "if(parameter_space)" test itself, causes a 2.5% slowdown even when the - parameter_space flag is false */ -//#define SUPPORT_PARAM_SPACE - -void * compute(void * gpb) -{ - DICEK_SUB(compute_params, gpb); - -#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_IX86)) - /* On Intel we disable accurate handling of denorms and zeros. This is an - important speed optimization. */ - int oldMXCSR = _mm_getcsr(); //read the old MXCSR setting - int newMXCSR = oldMXCSR | 0x8040; // set DAZ and FZ bits - _mm_setcsr( newMXCSR ); //write the new MXCSR setting to the MXCSR -#endif - - compute_params * param_block; - param_block = (compute_params *) gpb; - float *u = param_block->u; - float *v = param_block->v; - float *du = param_block->du; - float *dv = param_block->dv; - float D_u = param_block->D_u; - float D_v = param_block->D_v; - float F = param_block->F; - float k = param_block->k; - float speed = param_block->speed; - int parameter_space = param_block->parameter_space; - int num_its = param_block->num_its; - long start_row = param_block->start_row; - long end_row = param_block->end_row; - int interlock = param_block->interlock_type; - - int iter; -#ifndef HWIV_HAVE_V4F4 - fprintf(stdout, "Did not get vector macros from HWIV\n"); - exit(-1); -#endif - // Vector "constants": speed, F, k, D_u, D_v - V4F4 v4_speed, v4_F, v4_k, v4_Du, v4_Dv; - // Pointers used to load data from rows of the grid - V4F4 *v_ub_prev, *v_ubase, *v_ub_next; - V4F4 *v_vb_prev, *v_vbase, *v_vb_next; - // Actual grid data - V4F4 v4_u_l, v4_u, v4_u_r; - V4F4 v4_v_l, v4_v, v4_v_r; - V4F4 v4_uvv; - // Pointers to second grid where we write the results of the main computation - V4F4 *v_dubase, *v_dvbase; - -#ifdef SUPPORT_PARAM_SPACE - const float k_min=0.045f, k_max=0.07f, F_min=0.01f, F_max=0.09f; - float k_diff; - V4F4 v4_kdiff; - k_diff = (k_max-k_min)/g_height; - v4_kdiff = v4SET(0, -k_diff, -2*k_diff, -3*k_diff); -#endif - - // Initialize our vectorized scalars - v4_speed = v4SPLAT(speed); - v4_F = v4SPLAT(F); - v4_k = v4SPLAT(k); - v4_Du = v4SPLAT(D_u); - v4_Dv = v4SPLAT(D_v); - - // Scan per iteration - for(iter = 0; iter < num_its; iter++) { - - if (interlock) { DICEK_CH_BEGIN } - -//printf("iter %d rows [%ld,%ld)\n",iter,start_row,end_row); - - // Scan per row - for(long i = start_row; i < end_row; i++) { - long iprev,inext; - long v_j2; - if (g_wrap) { - /* Periodic boundary condition */ - iprev = (i+g_height-1) % g_height; - inext = (i+1) % g_height; - } else { - /* The edges are their own neighbors. This amounts to a Neumann boundary condition. */ - iprev = max(i-1, 0); - inext = min(i+1, g_height-1); - } - - /* Get pointers to beginning of rows for each of the grids. We access - 3 rows each for u and v, and 1 row each for du and dv. */ - v_ubase = (V4F4 *)&INDEX(u,i,0); - v_ub_prev = (V4F4 *)&INDEX(u,iprev,0); - v_ub_next = (V4F4 *)&INDEX(u,inext,0); - v_vbase = (V4F4 *)&INDEX(v,i,0); - v_vb_prev = (V4F4 *)&INDEX(v,iprev,0); - v_vb_next = (V4F4 *)&INDEX(v,inext,0); - v_dubase = (V4F4 *)&INDEX(du,i,0); - v_dvbase = (V4F4 *)&INDEX(dv,i,0); - -#ifdef SUPPORT_PARAM_SPACE - if (parameter_space) { - // set F for this row (ignore the provided value) - F = F_min + (g_height-i-1) * (F_max-F_min)/g_width; - v4_F = v4SPLAT(F); - } -#endif - - /* Pre-load the first two blocks of data we need, which are the "center" - and "right" blocks from the end of the row (as if we have just wrapped - around from the end of the row back to the beginning) */ - v_j2 = g_wrap ? ((g_width-4)/VECSIZE) : 0; - - v4_u = *(v_ubase+v_j2); - v4_v = *(v_vbase+v_j2); - v4_u_r = *v_ubase++; - v4_v_r = *v_vbase++; - - // Scan per column in steps of vector width - for(long j = 0; j < g_width-VECSIZE; j+=VECSIZE) { - // Get a new 4 pixels from the current row and shift the other 8 pixels over - v4_u_l = v4_u; v4_u = v4_u_r; v4_u_r = *v_ubase; - v4_v_l = v4_v; v4_v = v4_v_r; v4_v_r = *v_vbase; - -#ifdef SUPPORT_PARAM_SPACE - if (parameter_space) { - // set k for this column (ignore the provided value) - k = k_min + (g_width-j-1)*k_diff; - // k decreases by k_diff each time j increases by 1, so this vector - // needs to contain 4 different k values. We use v4_kdiff, pre-computed - // above, to accomplish this. - v4_k = v4ADD(v4SPLAT(k),v4_kdiff); - } -#endif - - // To compute the Laplacians of u and v, we use the 5-point neighbourhood for the Euler discrete method: - // nabla(x) = x[i][j-1]+x[i][j+1]+x[i-1][j]+x[i+1][j] - 4*x[i][j]; - // ("nabla" is the name of the "upside down delta" symbol used for the Laplacian in equations) -# define NABLA_5PT(ctr,left,right,up,down) \ - v4SUB(v4ADD(v4ADD(v4ADD(left,right),up),down),v4MUL(ctr,v4SPLAT(4.0f))) - - // compute the new rate of change of u and v - v4_uvv = v4MUL(v4_u,v4MUL(v4_v,v4_v)); // u*v^2 is used twice - - /* Scalar code is: du[i][j] = D_u * nabla_u - u*v^2 + F*(1-u); - We treat it as: D_u * nabla_u - (u*v^2 - F*(1-u)) */ - *v_dubase = v4SUB(v4MUL(v4_Du, - NABLA_5PT(v4_u, v4RAISE(v4_u,v4_u_l), v4LOWER(v4_u,v4_u_r), *v_ub_prev, *v_ub_next)), - v4SUB(v4_uvv,v4MUL(v4_F,v4SUB(v4SPLAT(1.0f),v4_u)))); - - /* dv formula is similar: dv[i][j] = D_v * nabla_v + u*v^2 - (F+k)*v; */ - *v_dvbase = v4ADD(v4MUL(v4_Dv, - NABLA_5PT(v4_v, v4RAISE(v4_v,v4_v_l), v4LOWER(v4_v,v4_v_r), *v_vb_prev, *v_vb_next)), - v4SUB(v4_uvv,v4MUL(v4ADD(v4_F,v4_k),v4_v))); - - v_ub_prev++; v_ub_next++; - v_ubase++; v_vbase++; - v_vb_prev++; v_vb_next++; - v_dubase++; v_dvbase++; - } - - /* Now we do the last 4 pixels. This is unrolled out of the main loop just to avoid having to do the g_wrap - test every time in the j loop. */ - v4_u_l = v4_u; v4_u = v4_u_r; - v4_v_l = v4_v; v4_v = v4_v_r; - if (g_wrap) { - /* The 4 cells to the "right" are the first 4 in this row */ - v4_u_r = *((V4F4 *)&INDEX(u,i,0)); - v4_v_r = *((V4F4 *)&INDEX(v,i,0)); - } else { - /* just leave them alone, retaining the rightmost 4 values in this row, which were loaded on the last iteration - through the loop */ - } - v4_uvv = v4MUL(v4_u,v4MUL(v4_v,v4_v)); - *v_dubase = v4SUB(v4MUL(v4_Du, - NABLA_5PT(v4_u, v4RAISE(v4_u,v4_u_l), v4LOWER(v4_u,v4_u_r), *v_ub_prev, *v_ub_next)), - v4SUB(v4_uvv,v4MUL(v4_F,v4SUB(v4SPLAT(1.0f),v4_u)))); - *v_dvbase = v4ADD(v4MUL(v4_Dv, - NABLA_5PT(v4_v, v4RAISE(v4_v,v4_v_l), v4LOWER(v4_v,v4_v_r), *v_vb_prev, *v_vb_next)), - v4SUB(v4_uvv,v4MUL(v4ADD(v4_F,v4_k),v4_v))); - } // End of scan per row - - // First thread interlock goes here - if (interlock) { DICEK_CH_SYNC } - if (interlock) { DICEK_CH_BEGIN } - - { - // effect change - for(long i = start_row; i < end_row; i++) { - v_ubase = ((V4F4 *) (&INDEX(u,i,0))); - v_vbase = ((V4F4 *) (&INDEX(v,i,0))); - v_dubase = ((V4F4 *) (&INDEX(du,i,0))); - v_dvbase = ((V4F4 *) (&INDEX(dv,i,0))); - for(long j = 0; j < g_width; j+=VECSIZE) { - // u[i][j] = u[i][j] + speed * du[i][j]; - *v_ubase = v4ADD(v4MUL(v4_speed, *v_dubase), *v_ubase); v_ubase++; v_dubase++; - // v[i][j] = v[i][j] + speed * dv[i][j]; - *v_vbase = v4ADD(v4MUL(v4_speed, *v_dvbase), *v_vbase); v_vbase++; v_dvbase++; - } - } - } - - // second thread interlock goes here - if (interlock) { DICEK_CH_SYNC } - - } // End of scan per iteration -} -#endif - -#ifdef COMPUTE_PRE_THREADS - -/* This is the old version of compute() that used all "assembly-language" syntax */ - -void compute(float *u, float *v, float *du, float *dv, - float D_u,float D_v,float F,float k,float speed, - int parameter_space) -{ -#ifndef HWIV_HAVE_V4F4 - fprintf(stdout, "Did not get vector macros from HWIV\n"); - exit(-1); -#endif - V4F4 v4_speed; // vectorized version of speed scalar - V4F4 v4_F; // vectorized version of F scalar - V4F4 v4_k; // vectorized version of k scalar - HWIV_4F4_ALIGNED talign; // used by FILL_4F4 - V4F4 v4_u; V4F4 v4_du; - V4F4 v4_v; V4F4 v4_dv; - HWIV_INIT_MUL0_4F4; // used by MUL (on targets that need it) - HWIV_INIT_MTMP_4F4; // used by MADD (on targets that need it) - HWIV_INIT_FILL; // used by FILL - HWIV_INIT_RLTMP_4F4; // used by RAISE and LOWER - V4F4 v4_tmp; - V4F4 v4_Du; - V4F4 v4_Dv; - V4F4 v4_nabla_u; - V4F4 v4_nabla_v; - V4F4 v4_1; - V4F4 v4_4; - const float k_min=0.045f, k_max=0.07f, F_min=0.01f, F_max=0.09f; - float k_diff; - V4F4 v4_kdiff; - float * ubase; - float * ub_prev; - float * ub_next; - float * vbase; float * vb_prev; float * vb_next; - float * dubase; float * dvbase; - - V4F4 v4_u_l; - V4F4 v4_u_r; - V4F4 v4_v_l; - V4F4 v4_v_r; - - //F_diff = (F_max-F_min)/g_width; - k_diff = (k_max-k_min)/g_height; - - // Initialize our vectorized scalars - HWIV_SPLAT_4F4(v4_speed, speed); - HWIV_SPLAT_4F4(v4_F, F); - HWIV_SPLAT_4F4(v4_k, k); - HWIV_SPLAT_4F4(v4_Du, D_u); - HWIV_SPLAT_4F4(v4_Dv, D_v); - HWIV_SPLAT_4F4(v4_1, 1.0); - HWIV_SPLAT_4F4(v4_4, 4.0); - HWIV_FILL_4F4(v4_kdiff, 0, -k_diff, -2*k_diff, -3*k_diff); - - // Scan per row - for(int i = 0; i < g_height; i++) { - int iprev,inext; - int j2; - - if (g_wrap) { - iprev = (i+g_height-1) % g_height; - inext = (i+1) % g_height; - } else { - iprev = max(i-1, 0); - inext = min(i+1, g_height-1); - } - /* Get pointers to beginning of rows for each of the grids. We access - 3 rows each for u and v, and 1 row each for du and dv. */ - ubase = &INDEX(u,i,0); - ub_prev = &INDEX(u,iprev,0); - ub_next = &INDEX(u,inext,0); - vbase = &INDEX(v,i,0); - vb_prev = &INDEX(v,iprev,0); - vb_next = &INDEX(v,inext,0); - dubase = &INDEX(du,i,0); - dvbase = &INDEX(dv,i,0); - - if (parameter_space) { - // set F for this row (ignore the provided value) - F = F_min + (g_height-i-1) * (F_max-F_min)/g_width; - HWIV_SPLAT_4F4(v4_F, F); - } - - /* Pre-load the first two blocks of data we need, which are the "center" - and "right" blocks from the end of the row (as if we have just wrapped - around from the end of the row back to the beginning) */ - j2 = g_wrap ? (g_width-4) : 0; - HWIV_LOAD_4F4(v4_u, ubase+j2); - HWIV_LOAD_4F4(v4_u_r, ubase); - HWIV_LOAD_4F4(v4_v, vbase+j2); - HWIV_LOAD_4F4(v4_v_r, vbase); - - // Scan per column in steps of vector width - for(int j = 0; j < g_width; j+=4) { - if (g_wrap) { - j2 = (j+4) % g_width; - } else { - j2 = min(j+4, g_height-4); - } - - HWIV_COPY_4F4(v4_u_l, v4_u); - HWIV_COPY_4F4(v4_v_l, v4_v); - HWIV_COPY_4F4(v4_u, v4_u_r); - HWIV_COPY_4F4(v4_v, v4_v_r); - HWIV_LOAD_4F4(v4_u_r, ubase+j2); - HWIV_LOAD_4F4(v4_v_r, vbase+j2); - - if (parameter_space) { - // set k for this column (ignore the provided value) - k = k_min + (g_width-j-1)*k_diff; - // k decreases by k_diff each time j increases by 1, so this vector - // needs to contain 4 different k values. - HWIV_SPLAT_4F4(v4_tmp, k); - HWIV_ADD_4F4(v4_k, v4_tmp, v4_kdiff); - } - - // compute the Laplacians of u and v. "nabla" is the name of the - // "upside down delta" symbol used for the Laplacian in equations - - /* Scalar code is: - nabla_u = u[i][jprev]+u[i][jnext]+u[iprev][j]+u[inext][j] - 4*uval; */ - HWIV_RAISE_4F4(v4_nabla_u, v4_u, v4_u_l); - - HWIV_LOWER_4F4(v4_tmp, v4_u, v4_u_r); - HWIV_ADD_4F4(v4_nabla_u, v4_nabla_u, v4_tmp); - - // Now we add in the "up" and "down" neighbors - HWIV_LOAD_4F4(v4_tmp, ub_prev+j); - HWIV_ADD_4F4(v4_nabla_u, v4_nabla_u, v4_tmp); - HWIV_LOAD_4F4(v4_tmp, ub_next+j); - HWIV_ADD_4F4(v4_nabla_u, v4_nabla_u, v4_tmp); - - // Now we compute -(4*u-neighbors) = neighbors - 4*u - HWIV_NMSUB_4F4(v4_nabla_u, v4_4, v4_u, v4_nabla_u); - - // Same thing all over again for the v's - HWIV_RAISE_4F4(v4_nabla_v, v4_v, v4_v_l); - HWIV_LOWER_4F4(v4_tmp, v4_v, v4_v_r); - HWIV_ADD_4F4(v4_nabla_v, v4_nabla_v, v4_tmp); - HWIV_LOAD_4F4(v4_tmp, vb_prev+j); - HWIV_ADD_4F4(v4_nabla_v, v4_nabla_v, v4_tmp); - HWIV_LOAD_4F4(v4_tmp, vb_next+j); - HWIV_ADD_4F4(v4_nabla_v, v4_nabla_v, v4_tmp); - HWIV_NMSUB_4F4(v4_nabla_v, v4_4, v4_v, v4_nabla_v); - - // compute the new rate of change of u and v - - /* Scalar code is: - du[i][j] = D_u * nabla_u - uval*vval*vval + F*(1-uval); - We treat it as: - D_u * nabla_u - (uval*vval*vval - (-(F*uval-F)) ) */ - - HWIV_NMSUB_4F4(v4_tmp, v4_F, v4_u, v4_F); // -(F*u-F) = F-F*u = F(1-u) - HWIV_MUL_4F4(v4_dv, v4_v, v4_v); // v^2 - HWIV_MSUB_4F4(v4_tmp, v4_u, v4_dv, v4_tmp); // u*v^2 - F(1-u) - HWIV_MSUB_4F4(v4_du, v4_Du, v4_nabla_u, v4_tmp); // D_u*nabla_u - (u*v^2 - F(1-u)) - // = D_u*nabla_u - u*v^2 + F(1-u) - HWIV_SAVE_4F4(dubase+j, v4_du); - - /* dv formula is similar: - dv[i][j] = D_v * nabla_v + uval*vval*vval - (F+k)*vval; - We treat it as: - D_v * nabla_v + uval*vval*vval - (F*vval + k*vval); */ - HWIV_MUL_4F4(v4_tmp, v4_k, v4_v); // k*v - HWIV_MADD_4F4(v4_tmp, v4_F, v4_v, v4_tmp); // F*v+k*v = (F+k)v - // v^2 is still in v4_dv - HWIV_MSUB_4F4(v4_tmp, v4_u, v4_dv, v4_tmp); // u*v^2 - (F+k)v - HWIV_MADD_4F4(v4_dv, v4_Dv, v4_nabla_v, v4_tmp); // D_v*nabla_v + u*v^2 - (F+k)v - HWIV_SAVE_4F4(dvbase+j, v4_dv); - } - } - - // effect change - for(int i = 0; i < g_height; i++) { - ubase = &INDEX(u,i,0); - vbase = &INDEX(v,i,0); - dubase = &INDEX(du,i,0); - dvbase = &INDEX(dv,i,0); - for(int j = 0; j < g_width; j+=4) { - // u[i][j] = u[i][j] + speed * du[i][j]; - HWIV_LOAD_4F4(v4_u, ubase+j); // get u - HWIV_LOAD_4F4(v4_du, dubase+j); // get du - HWIV_MADD_4F4(v4_u, v4_speed, v4_du, v4_u); // speed*du + u - HWIV_SAVE_4F4(ubase+j, v4_u); // write it back - - // v[i][j] = v[i][j] + speed * dv[i][j]; - HWIV_LOAD_4F4(v4_v, vbase+j); - HWIV_LOAD_4F4(v4_dv, dvbase+j); - HWIV_MADD_4F4(v4_v, v4_speed, v4_dv, v4_v); - HWIV_SAVE_4F4(vbase+j, v4_v); - } - } -} - -#endif diff --git a/SpeedComparisons/GrayScott_HWIVector/hwi_vector.h b/SpeedComparisons/GrayScott_HWIVector/hwi_vector.h deleted file mode 100644 index b3751a096..000000000 --- a/SpeedComparisons/GrayScott_HWIVector/hwi_vector.h +++ /dev/null @@ -1,432 +0,0 @@ -/* - - hwi_vector.h ^u 140 ^x f - -This is a hardware-independent vector library. It enables vector SIMD code to be written, compiled and run on non-vector -machines. Once the code is tested and working, a compile-time macro can be changed, and a recompile causes the program -to use actual vector instructions. - -In the simplest case, that is all that is needed. In real applications there are usually several more steps: - - * The application developer decides what base hardware platform to compile for. This might be the oldest CPU the - company will support, such as a Pentium 4 HT. - - * The compiler defines certain flags, such as __i386__ and __SSE2__ that can be tested by an #ifdef - - * The source code can set flags of its own, such as HWIV_EMULATE or HWIV_WANT_V4F4. This might be done in order to - create two versions of a calculation routine (one that uses vector instructions and one that does not) - - * The program is built from one or more compilations of the same source. This might be done in order to create a - "universal binary" capable of being copied to and run on a variety of computer products. A typical example is a - program file that contains both a 32-bit and a 64-bit version, and the operating system loads whichever one is - appropriate when the program is launched. - - * At run-time, the program tests for the presence of vector instructions using the CPUID instruction or its - equivalent on non Intel CPUs. - - * At run-time, based on the CPUID test, the program transfers control to one or another of the calculation - subroutines depending on which vector instructions are actually available. - -*/ - -// First, honor the user's request to use emulation -#ifdef HWIV_EMULATE -# define HWIV_V4F4_EMULATED -#endif - -// Next, find out if the compiler will give us SSE2 intrinsics -#ifndef HWIV_V4F4_EMULATED -# if defined(__SSE2__) -# define HWIV_V4F4_SSE2 -# endif - // Workaround because Visual Studio doesn't seem to set its _M_IX86_FP flag, - // or give us any indication what level of SSE support is available. - // So we just assume SSE2 is available -# if defined(_M_X64) || defined(_M_IX86) -# define HWIV_V4F4_SSE2 -# endif -#endif - - -// Finally, fall back to emulated if no hardware option is available -#ifndef HWIV_V4F4_SSE2 -# ifndef HWIV_V4F4_EMULATED -# define HWIV_V4F4_EMULATED -# endif -#endif - - -// See is client wants V4F4 -#ifdef HWIV_WANT_V4F4 - -// Okay, now see how we should create the macros for V4F4 - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// // -// #include // -// /* */ /* */ // -// /* */ /* */ // -// int main() {char a[] ={ 'T', 'h','i', 's',32, 'p','r' ,'o','g' // -// ,'r','a','m', 32, 'j', 'u', 's', 't' ,32 ,'d', 'o', 'e' ,'s' ,32 ,'i' // -// ,'t' ,32 ,'t' ,'h' ,'e' ,32, 'h' ,97,'r','d' ,32, 'w',97,121,33, 32, 40, // -// 68, 'o', 'n', 39, 't' ,32 ,'y' ,'o', 117, 32 ,'t' ,'h' ,'i' // -// ,'n' /* Xy =a +3 +n ++ ;a= b- (* x/z ); if // -// (Xy-++n<(z+*x))z =b;a +b, z+= x*/,107 , 63,63 ,63,41,'\n' ,00}; puts(a);} /*.RPM.*/ // -// // -// Emulated versions of the V4F4 macros. (These also serve as documentation for what each macro does) // -// // -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -# ifdef HWIV_V4F4_EMULATED - -/* Scalar code to emulate the V4F4 vector model */ -#define HWIV_HAVE_V4F4 - -/* The vector register type */ -typedef float V4F4[4]; - -/* Aligned memory suitable for load to / store from the vector register */ -typedef float HWIV_4F4_ALIGNED[4]; - -/* LOAD_4F4: dst is a vector of 4 floats. src is a pointer to an array - of floats in memory. This opcode loads 4 consecutive floats from the - given location into the vector. The first float from memory will be loaded - into element 0 of the vector, the 2nd into element 1, and so on. */ -#define HWIV_LOAD_4F4(dst, src) { (dst)[0]=(src)[0]; (dst)[1]=(src)[1]; \ - (dst)[2]=(src)[2]; (dst)[3]=(src)[3]; } - -#define HWIV_LOADU_4F4(dst, src) { (dst)[0]=(src)[0]; (dst)[1]=(src)[1]; \ - (dst)[2]=(src)[2]; (dst)[3]=(src)[3]; } - -/* LOADO_4F4: dst is a vector of 4 floats. src is a pointer to an array - of floats in memory. offset is a byte offset, which must be a multiple - of sizeof(float). - This opcode loads 4 consecutive floats from the given location plus - offset into the vector. The first float from memory will be loaded into - element 0 of the vector, the 2nd into element 1, and so on. */ -#define HWIV_LOADO_4F4(dst, src, offset) { (dst)[0]=(src)[(offset)/4]; \ - (dst)[1]=(src)[(offset)/4+1]; \ - (dst)[2]=(src)[(offset)/4+2]; \ - (dst)[3]=(src)[(offset)/4+3]; } - -/* COPY_4F4: dst and src are each a vector of 4 floats. This opcode copies - the contents of the source vector into the destination vector. */ -#define HWIV_COPY_4F4(dst, src) HWIV_LOAD_4F4((dst), (src)) - -/* FILL_4F4: dst is a vector of 4 floats. sc0, sc1, sc2, and sc3 ("scalars") - are each a float. tmp is a (float *) pointing to memory of sufficient - size to hold four floats, and aligned to a 16-byte boundary. Use the - HWIV_4F4_ALIGNED typedef to declare a suitable float[4] which can - be passed as tmp to this and other similar macros. - This opcode loads the 4 scalar values into the vector. The sc0 will be - loaded into element 0 of the vector, sc1 into element 1, and so on. */ -#define HWIV_FILL_4F4(dst, sc0, sc1, sc2, sc3) { \ - (dst)[0]=(sc0); (dst)[1]=(sc1); (dst)[2]=(sc2); (dst)[3]=(sc3); } -#define HWIV_INIT_FILL /* nop */ - -#define HWIV_SPLAT_4F4(dst, s) HWIV_FILL_4F4((dst), (s), (s), (s), (s)) - -/* SAVE_4F4: dst is a pointer to an array of floats in memory. src is a - vector of 4 floats. - This opcode stores the 4 floats in the source vector into 4 consecutive - float-sized blocks of memory (i.e. 16 consecutive bytes, 4 bytes per - float) beginning at the given destination location. Element 0 of the - vector will be stored into the first 4 bytes in memory, element 1 into - the next 4 bytes of memory, and so on. */ -#define HWIV_SAVE_4F4(dst, src) memcpy((void *) (dst), (void *) (src), 16); - -/* SAVEO_4F4: dst is a pointer to an array of floats in memory. src is a - vector of 4 floats. offset is a byte offset, which must be a multiple - of sizeof(float). - This opcode stores the 4 floats in the source vector into 4 consecutive - float-sized blocks of memory (i.e. 16 consecutive bytes, 4 bytes per - float) beginning at the given destination location plus offset. Element - 0 of the vector will be stored into the first 4 bytes in memory, element - 1 into the next 4 bytes of memory, and so on. */ -#define HWIV_SAVEO_4F4(dst, offset, src) { (dst)[(offset)/4]=(src)[0]; \ - (dst)[(offset)/4+1]=(src)[1]; \ - (dst)[(offset)/4+2]=(src)[2]; \ - (dst)[(offset)/4+3]=(src)[3]; } - -/* ADD_4F4: dst, a, and b are each a vector of 4 floats. - This opcode adds each of the components of a to the corresponding - component of b and puts the result into dst. */ -#define HWIV_ADD_4F4(dst, a, b) { (dst)[0]=a[0]+b[0]; (dst)[1]=a[1]+b[1]; \ - (dst)[2]=a[2]+b[2]; (dst)[3]=a[3]+b[3]; } - -/* SUB_4F4: dst, a, and b are each a vector of 4 floats. - This opcode adds each of the components of a to the corresponding - component of b and puts the result into dst. */ -#define HWIV_SUB_4F4(dst, a, b) { (dst)[0]=a[0]-b[0]; (dst)[1]=a[1]-b[1]; \ - (dst)[2]=a[2]-b[2]; (dst)[3]=a[3]-b[3]; } - -#define HWIV_INIT_MUL0_4F4 /* nop */ - -/* MUL_4F4: dst, a, and b are each a vector of 4 floats. v0 is a variable - declared with the HWIV_INIT_MUL0_4F4 macro. - This opcode multiplies each of the components of a to the corresponding - component of b and puts the result into dst. The varible v0 is used on - hardware that has no 2-argument multiply operation. */ -#define HWIV_MUL_4F4(dst, a, b) \ - { (dst)[0]=a[0]*b[0]; (dst)[1]=a[1]*b[1]; \ - (dst)[2]=a[2]*b[2]; (dst)[3]=a[3]*b[3]; } - -#define HWIV_INIT_MTMP_4F4 /* nop */ - -/* MADD_4F4: dst, a, b, and c are each a vector of 4 floats. t is a variable - declared with the HWIV_INIT_MTMP_4F4 macro. - This opcode multiplies each of the components of a to the corresponding - component of b, then adds the corresponding component of c, and puts the - result into dst. The varible t is used on hardware that has no 3-argument - multiply-add operation. */ -#define HWIV_MADD_4F4(dst, a, b, c) { (dst)[0]=a[0]*b[0] + c[0]; \ - (dst)[1]=a[1]*b[1] + c[1]; \ - (dst)[2]=a[2]*b[2] + c[2]; \ - (dst)[3]=a[3]*b[3] + c[3]; } - -#define HWIV_MSUB_4F4(dst, a, b, c) { (dst)[0]=a[0]*b[0] - c[0]; \ - (dst)[1]=a[1]*b[1] - c[1]; \ - (dst)[2]=a[2]*b[2] - c[2]; \ - (dst)[3]=a[3]*b[3] - c[3]; } - -/* NMSUB_4F4: dst, a, b, and c are each a vector of 4 floats. t is a variable - declared with the HWIV_INIT_MTMP_4F4 macro. - This opcode multiplies each of the components of a to the corresponding - component of b, then subtracts that product from the corresponding - component of c, then puts the result into dst. The varible t is used on - hardware that has no 3-argument multiply-add operation. */ -#define HWIV_NMSUB_4F4(dst, a, b, c) { (dst)[0]=c[0] - a[0]*b[0]; \ - (dst)[1]=c[1] - a[1]*b[1]; \ - (dst)[2]=c[2] - a[2]*b[2]; \ - (dst)[3]=c[3] - a[3]*b[3]; } - -// Declare this if you are doing any raise or lower operation -#define HWIV_INIT_RLTMP_4F4 /* nop */ - -/* RAISE_4F4: dst, src, extra, and tmp are each a vector of 4 floats. - This opcode "raises" three of the values from src to the next-higher - element of dst. Element 0 of dst is filled with the value from element - 3 of "extra". The varible t is used on hardware that requires the - result to be computed in two pieces and then assembled via a blend - operation. ("VSHR_4F4" in old macros) */ -#define HWIV_RAISE_4F4(dst, src, extra) \ - { dst[3]=src[2]; dst[2]=src[1]; \ - dst[1]=src[0]; dst[0]=extra[3]; } - -/* LOWER_4F4: dst, src, extra, and tmp are each a vector of 4 floats. - This opcode "lowers" three of the values from src to the next-lower - element of dst. Element 3 of dst is filled with the value from element - 0 of "extra". The varible t is used on hardware that requires the - result to be computed in two pieces and then assembled via a blend - operation. ("VSHL_4F4" in old macros) */ -#define HWIV_LOWER_4F4(dst, src, extra) \ - { dst[0]=src[1]; dst[1]=src[2]; \ - dst[2]=src[3]; dst[3]=extra[0]; } - - -// We also define a small set of macros for FORTRAN-style code. Using these you can build up expressions like -// -// a = v4ADD(v4MUL(b,c),d); /* a = b*c + d; */ -// -// These macros do not form a complete solution, you still need things like LOAD and SAVE to do any real work. - -#define v4LOADU(a) {(a)[0],(a)[1],(a)[2],(a)[3]} -#define v4ADD(a,b) {(a)[0]+(b)[0],(a)[1]+(b)[1],(a)[2]+(b)[2],(a)[3]+(b)[3]} -#define v4SUB(a,b) {(a)[0]-(b)[0],(a)[1]-(b)[1],(a)[2]-(b)[2],(a)[3]-(b)[3]} -#define v4MUL(a,b) {(a)[0]*(b)[0],(a)[1]*(b)[1],(a)[2]*(b)[2],(a)[3]*(b)[3]} -#define v4SET(v0,v1,v2,v3) {(v0),(v1),(v2),(v3)} -#define v4SPLAT(a) {(a),(a),(a),(a)} -#define v4ROUP(a) {(a)[3],(a)[0],(a)[1],(a)[2]} -#define v4RODN(a) {(a)[1],(a)[2],(a)[3],(a)[0]} -#define v4RAISE(a, new) {(new)[3],(a)[0],(a)[1],(a)[2]} -#define v4LOWER(a, new) {(a)[1],(a)[2],(a)[3],(new)[0]} - -# endif -/* - - - - - - - - - - - - - - - - - - - - - End of the EMULATED section - - - - - - - - - - - - - - - - - - - - - - */ - - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// // -// @@@@ @@@@ @@@@ // -// @@@@ @@@@ @@@@ // -// """" @@@@ @@@@ // -// eeee eeee ,e@@e.. eee@@@@eee @@@@ // -// @@@@ @@@@@@@@@@@@@@. @@@@@@@@@@ @@@@ // -// @@@@ @@@@f' `@@@@ @@@@ @@@@ // -// @@@@ @@@@ @@@@ @@@@ ,e@@@e. @@@@ // -// @@@@ @@@@ @@@@ @@@@ e@@@@@@@@@@@e @@@@ // -// @@@@ @@@@ @@@@ @@@@ .@@@@' `@@@@i @@@@ // -// @@@@ @@@@ @@@@ @@@@kee@@@@eeeeeeeee@@@@ @@@@ // -// @@@@ @@@@ @@@@ `@@@@@@@@@@@@@@@@@@@@@@@@@@@@ (R) // -// @@@@. // -// `@@@@e. .eeee- // -// *@@@@@@@@@@@* // -// "*@@@@*" // -// // -// Versions of the V4F4 macros for the Intel SSE2 (or later) 128-bit vector instruction set // -// // -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -# ifdef HWIV_V4F4_SSE2 - -/* SSE3 implementation of the V4F4 vector model */ -#define HWIV_HAVE_V4F4 - -# if (defined(HWIV_USE_IMMINTRIN) || defined (__AVX__)) -# include -# else -# include -# include -# endif - - -typedef __m128 V4F4; - -#ifdef _WIN32 -# define ALIGNED_16 __declspec( align( 16 ) ) -#else -# define ALIGNED_16 __attribute__((aligned (16))) -#endif - -typedef float ALIGNED_16 HWIV_4F4_ALIGNED[4]; - -#define HWIV_LOAD_4F4(dst, src) (dst) = _mm_load_ps(src) -#define HWIV_LOADU_4F4(dst, src) (dst) = _mm_loadu_ps(src) -#define HWIV_LOADO_4F4(dst, src, offset) \ - (dst) = _mm_load_ps((src)+(offset)/4) -#define HWIV_COPY_4F4(dst, src) (dst) = (src) - -#define HWIV_FILL_4F4(dst, sc0, sc1, sc2, sc3) \ - { HWIV_fill_4F4[0]=(sc0); HWIV_fill_4F4[1]=(sc1); \ - HWIV_fill_4F4[2]=(sc2); HWIV_fill_4F4[3]=(sc3); \ - HWIV_LOAD_4F4(dst, HWIV_fill_4F4); } - -#define HWIV_INIT_FILL float ALIGNED_16 HWIV_fill_4F4[4]; - - -#define HWIV_SPLAT_4F4(dst, s) (dst) = _mm_set1_ps(s) - -#define HWIV_SAVE_4F4(dst, src) _mm_store_ps((dst), (src)) - -#define HWIV_SAVEO_4F4(dst, offset, src) \ - _mm_store_ps((dst)+(offset)/4, (src)) - -#define HWIV_ADD_4F4(dst, a, b) (dst) = _mm_add_ps((a), (b)) - -#define HWIV_SUB_4F4(dst, a, b) (dst) = _mm_sub_ps((a), (b)) - -// For INIT_MUL0, on Intel we do nothing because Intel actually has a -// 2-operand multiply operation. -#define HWIV_INIT_MUL0_4F4 /* nop */ - -// For INIT_MTMP, on Intel SSE2 we need to declare a variable because -// Intel SSE2 has no FMA (fused multiply-add) operations (this is -// expected to come wth AVX2 on Haskell in 2013) -#define HWIV_INIT_MTMP_4F4 V4F4 HWIV_mtmp_4F4 = _mm_setzero_ps() - -#define HWIV_MUL_4F4(dst, a, b) (dst) = _mm_mul_ps((a), (b)) - -#define HWIV_MADD_4F4(dst, a, b, c) { HWIV_mtmp_4F4 = _mm_mul_ps((a), (b)); \ - (dst) = _mm_add_ps(HWIV_mtmp_4F4, (c)); } -#define HWIV_MSUB_4F4(dst, a, b, c) { HWIV_mtmp_4F4 = _mm_mul_ps((a), (b)); \ - (dst) = _mm_sub_ps(HWIV_mtmp_4F4, (c)); } -#define HWIV_NMSUB_4F4(dst, a, b, c) { HWIV_mtmp_4F4 = _mm_mul_ps((a), (b)); \ - (dst) = _mm_sub_ps((c), HWIV_mtmp_4F4); } - -#define HWIV_INIT_RLTMP_4F4 /* nop */ - - -/* -HWIV_RODN_4F4 (ROtate DOwn) does a "downwards rotate" of a 4-element vector using the Intel VSHUFPS instruction -(intrinsic _mm_shuffle_ps). If the input is {a,b,c,d} (with a being element 0) the result of the downwards rotate is -{b,c,d,a} (with each element moving down tot he next-lower slot, except for a which rotates into the top position). - - SRC1 { x3 , x2 , x1 , x0 } - SRC2 { y3 , y2 , y1 , y0 } - DEST { y0 , y3 , x2 , x1 } - imm8: 00 11 10 01 = 0x39 - - */ -#define HWIV_RODN_4F4(dest, src) (dest) = _mm_shuffle_ps((src), (src), 0x39) - -/* -HWIV_ROUP_4F4 is an "upwards rotate": if the input is {a,b,c,d} (with a being element 0) the result is {d,a,b,c}. - - SRC1 { x3 , x2 , x1 , x0 } - SRC2 { y3 , y2 , y1 , y0 } - DEST { y2 , y1 , x0 , x3 } - imm8: 10 01 00 11 = 0x93 - - */ -#define HWIV_ROUP_4F4(dest, src) (dest) = _mm_shuffle_ps((src), (src), 0x93) - -/* -HWIV_RAISE_4F4 is an "upwards shift": if the input is {a,b,c,d} (with a being element 0) the result is {X,a,b,c} with -the new element X coming from element 3 of the "new" argument. - - new { x3 , x2 , x1 , x0 } - src { y3 , y2 , y1 , y0 } - dest { y0 , y0 , x3 , x3 } - imm8: 00 00 11 11 = 0x0F - src0 src0 new3 new3 - - dest { x3 , x2 , x1 , x0 } - src { y3 , y2 , y1 , y0 } - dest { y2 , y1 , x2 , x0 } - imm8: 10 01 10 00 = 0x98 - src2 src1 src0 new3 -*/ -#define HWIV_RAISE_4F4(dest, src, new) { (dest) = _mm_shuffle_ps((new), (src), 0x0f); \ - (dest) = _mm_shuffle_ps((dest), (src), 0x98); } - -/* -HWIV_LOWER_4F4 is an "downwards shift": if the input is {a,b,c,d} (with a being element 0) the result is {b,c,d,X} with -the new element X coming from element 0 of the "new" argument. - -To accomplish a downwards shift we can just use _mm_move_ss to move a single scalar into the bottom position and then do -a RODN (downwards rotate) - */ -#define HWIV_LOWER_4F4(dest, src, new) { (dest) = _mm_move_ss((src), (new)); \ - HWIV_RODN_4F4(dest, dest); } - -// Here is the subset for FORTRAN-style code -#define v4LOADU(a) _mm_loadu_ps(a) -#define v4ADD(a,b) _mm_add_ps((a), (b)) -#define v4SUB(a,b) _mm_sub_ps((a), (b)) -#define v4MUL(a,b) _mm_mul_ps((a), (b)) -// in v4SET, note the reversal of argument order -#define v4SET(v0,v1,v2,v3) _mm_set_ps((v3),(v2),(v1),(v0)) -#define v4SPLAT(a) _mm_set1_ps(a) -#define v4ROUP(src) _mm_shuffle_ps((src), (src), 0x93) -#define v4RODN(src) _mm_shuffle_ps((src), (src), 0x39) -#define v4RAISE(src, new) _mm_shuffle_ps(_mm_shuffle_ps((new), (src), 0x0f), (src), 0x98) -#define v4LOWER(src, new) _mm_shuffle_ps(_mm_move_ss((src),(new)), _mm_move_ss((src),(new)), 0x39) - -# endif -/* - - - - - - - - - - - - - - - - - - - - - - End of the INTEL section - - - - - - - - - - - - - - - - - - - - - - - */ - - - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// // -// // -// /^^^^^\ /^^^^^\ _-^^^^^^/ TM // -// / ,- )_-----_ --- ---- ----..----. ---.---/ ,- )/ ,---/ // -// / /_) // __ )| |/ |/ _// .- )/ _// /_) // / // -// / // // / / /| / / (/___// .^ / // /( | // -// / / ^^^'( (/ / | /| _/ ( `----/ / / / ^^^' | `---/ // -// /__/ \____-' |__/ |__/ \____//___/ /__/ \_____/ // -// // -// // -// Versions of the V4F4 macros for the PowerPC AltiVec 128-bit vector instruction set // -// // -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -/* - -The AltiVec instruction set first came with the "G4" (744x and 745x) processors from Motorola, then the "G5" (97x) -series from IBM, the "Cell" 8-core CPU used in the Sony Playstation 3, and in IBM's POWER6 (and later) server CPUs. - - Not yet implemented -- do we care about AltiVec? */ - -#endif diff --git a/SpeedComparisons/GrayScott_OpenCL/CMakeLists.txt b/SpeedComparisons/GrayScott_OpenCL/CMakeLists.txt deleted file mode 100644 index c62df02e1..000000000 --- a/SpeedComparisons/GrayScott_OpenCL/CMakeLists.txt +++ /dev/null @@ -1,29 +0,0 @@ -project(GrayScott_OpenCL) - -set(CMAKE_MODULE_PATH ${GrayScott_OpenCL_SOURCE_DIR}) -# (we include our own FindOpenCL.cmake until the time that CMake comes with its own) - -find_package(OpenCV REQUIRED) -include_directories( ${OPENCV_INCLUDE_DIR}) -link_libraries( ${OpenCV_LIBS} ) - -# only build the OpenCL version if OpenCL was found -find_package ( OpenCL ) -if(OPENCL_FOUND) - include_directories ( ${OPENCL_INCLUDE_DIRS} ) - link_libraries ( ${OPENCL_LIBRARIES} ) - - # tell the code where the .cl file will live - add_definitions(-DCL_SOURCE_DIR="${GrayScott_OpenCL_SOURCE_DIR}") - - INCLUDE_DIRECTORIES( "../Display" ) - - add_executable(GrayScott_OpenCL - gray_scott_opencl.cpp - grayscott_kernel.cl - ../Display/display.cpp - ../Display/display.h - ../Display/defs.h - ) -endif() - diff --git a/SpeedComparisons/GrayScott_OpenCL/FindOpenCL.cmake b/SpeedComparisons/GrayScott_OpenCL/FindOpenCL.cmake deleted file mode 100644 index fde90efae..000000000 --- a/SpeedComparisons/GrayScott_OpenCL/FindOpenCL.cmake +++ /dev/null @@ -1,79 +0,0 @@ -# - Try to find OpenCL -# This module tries to find an OpenCL implementation on your system. It supports -# AMD / ATI, Apple and NVIDIA implementations, but shoudl work, too. -# -# Once done this will define -# OPENCL_FOUND - system has OpenCL -# OPENCL_INCLUDE_DIRS - the OpenCL include directory -# OPENCL_LIBRARIES - link these to use OpenCL -# -# WIN32 should work, but is untested - -FIND_PACKAGE( PackageHandleStandardArgs ) - -SET (OPENCL_VERSION_STRING "0.1.0") -SET (OPENCL_VERSION_MAJOR 0) -SET (OPENCL_VERSION_MINOR 1) -SET (OPENCL_VERSION_PATCH 0) - -IF (APPLE) - - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX") - FIND_PATH(OPENCL_INCLUDE_DIRS OpenCL/cl.h DOC "Include for OpenCL on OSX") - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS OpenCL/cl.hpp DOC "Include for OpenCL CPP bindings on OSX") - -ELSE (APPLE) - - IF (WIN32) - - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h) - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp) - - # The AMD SDK currently installs both x86 and x86_64 libraries - # This is only a hack to find out architecture - IF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" ) - SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86_64") - ELSE (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64") - SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86") - ENDIF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" ) - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib ${OPENCL_LIB_DIR}) - - GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) - - # On Win32 search relative to the library - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS "${_OPENCL_INC_CAND}") - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS "${_OPENCL_INC_CAND}") - - ELSE (WIN32) - - # Unix style platforms - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL - ENV LD_LIBRARY_PATH - ) - - GET_FILENAME_COMPONENT(OPENCL_LIB_DIR ${OPENCL_LIBRARIES} PATH) - GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) - - # The AMD SDK currently does not place its headers - # in /usr/include, therefore also search relative - # to the library - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include") - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include") - - ENDIF (WIN32) - -ENDIF (APPLE) - -FIND_PACKAGE_HANDLE_STANDARD_ARGS( OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS ) - -IF( _OPENCL_CPP_INCLUDE_DIRS ) - SET( OPENCL_HAS_CPP_BINDINGS TRUE ) - LIST( APPEND OPENCL_INCLUDE_DIRS ${_OPENCL_CPP_INCLUDE_DIRS} ) - # This is often the same, so clean up - LIST( REMOVE_DUPLICATES OPENCL_INCLUDE_DIRS ) -ENDIF( _OPENCL_CPP_INCLUDE_DIRS ) - -MARK_AS_ADVANCED( - OPENCL_INCLUDE_DIRS -) - diff --git a/SpeedComparisons/GrayScott_OpenCL/cl.hpp b/SpeedComparisons/GrayScott_OpenCL/cl.hpp deleted file mode 100644 index 99b86a665..000000000 --- a/SpeedComparisons/GrayScott_OpenCL/cl.hpp +++ /dev/null @@ -1,4011 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2010 The Khronos Group Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and/or associated documentation files (the - * "Materials"), to deal in the Materials without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Materials, and to - * permit persons to whom the Materials are furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Materials. - * - * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. - ******************************************************************************/ - -/*! \file - * - * \brief C++ bindings for OpenCL 1.0 (rev 48) and OpenCL 1.1 (rev 33) - * \author Benedict R. Gaster and Laurent Morichetti - * - * Additions and fixes from Brian Cole, March 3rd 2010. - * - * \version 1.1 - * \date June 2010 - * - * Optional extension support - * - * cl - * cl_ext_device_fission - * #define USE_CL_DEVICE_FISSION - */ - -/*! \mainpage - * \section intro Introduction - * For many large applications C++ is the language of choice and so it seems - * reasonable to define C++ bindings for OpenCL. - * - * - * The interface is contained with a single C++ header file \em cl.hpp and all - * definitions are contained within the namespace \em cl. There is no additional - * requirement to include \em cl.h and to use either the C++ or original C - * bindings it is enough to simply include \em cl.hpp. - * - * The bindings themselves are lightweight and correspond closely to the - * underlying C API. Using the C++ bindings introduces no additional execution - * overhead. - * - * For detail documentation on the bindings see: - * - * The OpenCL C++ Wrapper API 1.1 (revision 04) - * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.1.pdf - * - * \section example Example - * - * The following example shows a general use case for the C++ - * bindings, including support for the optional exception feature and - * also the supplied vector and string classes, see following sections for - * decriptions of these features. - * - * \code - * #define __CL_ENABLE_EXCEPTIONS - * - * #if defined(__APPLE__) || defined(__MACOSX) - * #include - * #else - * #include - * #endif - * #include - * #include - * #include - * - * const char * helloStr = "__kernel void " - * "hello(void) " - * "{ " - * " " - * "} "; - * - * int - * main(void) - * { - * cl_int err = CL_SUCCESS; - * try { - * - * std::vector platforms; - * cl::Platform::get(&platforms); - * if (platforms.size() == 0) { - * std::cout << "Platform size 0\n"; - * return -1; - * } - * - * cl_context_properties properties[] = - * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; - * cl::Context context(CL_DEVICE_TYPE_CPU, properties); - * - * std::vector devices = context.getInfo(); - * - * cl::Program::Sources source(1, - * std::make_pair(helloStr,strlen(helloStr))); - * cl::Program program_ = cl::Program(context, source); - * program_.build(devices); - * - * cl::Kernel kernel(program_, "hello", &err); - * - * cl::Event event; - * cl::CommandQueue queue(context, devices[0], 0, &err); - * queue.enqueueNDRangeKernel( - * kernel, - * cl::NullRange, - * cl::NDRange(4,4), - * cl::NullRange, - * NULL, - * &event); - * - * event.wait(); - * } - * catch (cl::Error err) { - * std::cerr - * << "ERROR: " - * << err.what() - * << "(" - * << err.err() - * << ")" - * << std::endl; - * } - * - * return EXIT_SUCCESS; - * } - * - * \endcode - * - */ -#ifndef CL_HPP_ -#define CL_HPP_ - -#ifdef _WIN32 -#include -#include -#if defined(USE_DX_INTEROP) -#include -#endif -#endif // _WIN32 - -// -#if defined(USE_CL_DEVICE_FISSION) -#include -#endif - -#if defined(__APPLE__) || defined(__MACOSX) -#include -#include -#else -#include -#include -#endif // !__APPLE__ - -#if !defined(CL_CALLBACK) -#define CL_CALLBACK -#endif //CL_CALLBACK - -#include - -#if !defined(__NO_STD_VECTOR) -#include -#endif - -#if !defined(__NO_STD_STRING) -#include -#endif - -#if defined(linux) || defined(__APPLE__) || defined(__MACOSX) -# include -#endif // linux - -#include - -/*! \namespace cl - * - * \brief The OpenCL C++ bindings are defined within this namespace. - * - */ -namespace cl { - -#define __INIT_CL_EXT_FCN_PTR(name) \ - if(!pfn_##name) { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddress(#name); \ - if(!pfn_##name) { \ - } \ - } - -class Program; -class Device; -class Context; -class CommandQueue; -class Memory; - -#if defined(__CL_ENABLE_EXCEPTIONS) -#include -/*! \class Error - * \brief Exception class - */ -class Error : public std::exception -{ -private: - cl_int err_; - const char * errStr_; -public: - /*! Create a new CL error exception for a given error code - * and corresponding message. - */ - Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) - {} - - ~Error() throw() {} - - /*! \brief Get error string associated with exception - * - * \return A memory pointer to the error message string. - */ - virtual const char * what() const throw () - { - if (errStr_ == NULL) { - return "empty"; - } - else { - return errStr_; - } - } - - /*! \brief Get error code associated with exception - * - * \return The error code. - */ - const cl_int err(void) const { return err_; } -}; - -#define __ERR_STR(x) #x -#else -#define __ERR_STR(x) NULL -#endif // __CL_ENABLE_EXCEPTIONS - -//! \cond DOXYGEN_DETAIL -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#define __GET_DEVICE_INFO_ERR __ERR_STR(clgetDeviceInfo) -#define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) -#define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) -#define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) -#define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo) -#define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo) -#define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo) -#define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo) -#define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) -#define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) -#define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) -#define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) -#define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) -#define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) -#define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) - -#define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) -#define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) - -#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) -#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) -#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) -#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) -#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) -#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) -#define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) -#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) - -#define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent) -#define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus) -#define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback) -#define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents) - -#define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel) -#define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) -#define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) -#define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) -#define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) -#define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) - -#define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) -#define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty) -#define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer) -#define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect) -#define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer) -#define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) -#define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) -#define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) -#define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) -#define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) -#define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) -#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) -#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) -#define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) -#define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage) -#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject) -#define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) -#define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) -#define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) -#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) -#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) -#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) - -#define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) -#define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) - -#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) - -#define __FLUSH_ERR __ERR_STR(clFlush) -#define __FINISH_ERR __ERR_STR(clFinish) - -#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) -#endif // __CL_USER_OVERRIDE_ERROR_STRINGS -//! \endcond - -/*! \class string - * \brief Simple string class, that provides a limited subset of std::string - * functionality but avoids many of the issues that come with that class. - */ -class string -{ -private: - ::size_t size_; - char * str_; -public: - string(void) : size_(0), str_(NULL) - { - } - - string(char * str, ::size_t size) : - size_(size), - str_(NULL) - { - str_ = new char[size_+1]; - if (str_ != NULL) { - memcpy(str_, str, size_ * sizeof(char)); - str_[size_] = '\0'; - } - else { - size_ = 0; - } - } - - string(char * str) : - str_(NULL) - { - size_= ::strlen(str); - str_ = new char[size_ + 1]; - if (str_ != NULL) { - memcpy(str_, str, (size_ + 1) * sizeof(char)); - } - else { - size_ = 0; - } - } - - string& operator=(const string& rhs) - { - if (this == &rhs) { - return *this; - } - - if (rhs.size_ == 0 || rhs.str_ == NULL) { - size_ = 0; - str_ = NULL; - } - else { - size_ = rhs.size_; - str_ = new char[size_ + 1]; - if (str_ != NULL) { - memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); - } - else { - size_ = 0; - } - } - - return *this; - } - - string(const string& rhs) - { - *this = rhs; - } - - ~string() - { - if (str_ != NULL) { - delete[] str_; - } - } - - ::size_t size(void) const { return size_; } - ::size_t length(void) const { return size(); } - - const char * c_str(void) const { return (str_) ? str_ : "";} -}; - -#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) -#include -typedef std::string STRING_CLASS; -#elif !defined(__USE_DEV_STRING) -typedef cl::string STRING_CLASS; -#endif - -#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) -#include -#define VECTOR_CLASS std::vector -#elif !defined(__USE_DEV_VECTOR) -#define VECTOR_CLASS cl::vector -#endif - -#if !defined(__MAX_DEFAULT_VECTOR_SIZE) -#define __MAX_DEFAULT_VECTOR_SIZE 10 -#endif - -/*! \class vector - * \brief Fixed sized vector implementation that mirroring - * std::vector functionality. - */ -template -class vector -{ -private: - T data_[N]; - unsigned int size_; - bool empty_; -public: - vector() : - size_(-1), - empty_(true) - {} - - ~vector() {} - - unsigned int size(void) const - { - return size_ + 1; - } - - void clear() - { - size_ = -1; - empty_ = true; - } - - void push_back (const T& x) - { - if (size() < N) { - size_++; - data_[size_] = x; - empty_ = false; - } - } - - void pop_back(void) - { - if (!empty_) { - data_[size_].~T(); - size_--; - if (size_ == -1) { - empty_ = true; - } - } - } - - vector(const vector& vec) : - size_(vec.size_), - empty_(vec.empty_) - { - if (!empty_) { - memcpy(&data_[0], &vec.data_[0], size() * sizeof(T)); - } - } - - vector(unsigned int size, const T& val = T()) : - size_(-1), - empty_(true) - { - for (unsigned int i = 0; i < size; i++) { - push_back(val); - } - } - - vector& operator=(const vector& rhs) - { - if (this == &rhs) { - return *this; - } - - size_ = rhs.size_; - empty_ = rhs.empty_; - - if (!empty_) { - memcpy(&data_[0], &rhs.data_[0], size() * sizeof(T)); - } - - return *this; - } - - bool operator==(vector &vec) - { - if (empty_ && vec.empty_) { - return true; - } - - if (size() != vec.size()) { - return false; - } - - return memcmp(&data_[0], &vec.data_[0], size() * sizeof(T)) == 0 ? true : false; - } - - operator T* () { return data_; } - operator const T* () const { return data_; } - - bool empty (void) const - { - return empty_; - } - - unsigned int max_size (void) const - { - return N; - } - - unsigned int capacity () const - { - return sizeof(T) * N; - } - - T& operator[](int index) - { - return data_[index]; - } - - T operator[](int index) const - { - return data_[index]; - } - - template - void assign(I start, I end) - { - clear(); - while(start < end) { - push_back(*start); - start++; - } - } - - /*! \class iterator - * \brief Iterator class for vectors - */ - class iterator - { - private: - vector vec_; - int index_; - bool initialized_; - public: - iterator(void) : - index_(-1), - initialized_(false) - { - index_ = -1; - initialized_ = false; - } - - ~iterator(void) {} - - static iterator begin(vector &vec) - { - iterator i; - - if (!vec.empty()) { - i.index_ = 0; - } - - i.vec_ = vec; - i.initialized_ = true; - return i; - } - - static iterator end(vector &vec) - { - iterator i; - - if (!vec.empty()) { - i.index_ = vec.size(); - } - i.vec_ = vec; - i.initialized_ = true; - return i; - } - - bool operator==(iterator i) - { - return ((vec_ == i.vec_) && - (index_ == i.index_) && - (initialized_ == i.initialized_)); - } - - bool operator!=(iterator i) - { - return (!(*this==i)); - } - - void operator++() - { - index_++; - } - - void operator++(int x) - { - index_ += x; - } - - void operator--() - { - index_--; - } - - void operator--(int x) - { - index_ -= x; - } - - T operator *() - { - return vec_[index_]; - } - }; - - iterator begin(void) - { - return iterator::begin(*this); - } - - iterator end(void) - { - return iterator::end(*this); - } - - T& front(void) - { - return data_[0]; - } - - T& back(void) - { - return data_[size_]; - } - - const T& front(void) const - { - return data_[0]; - } - - const T& back(void) const - { - return data_[size_]; - } -}; - -/*! - * \brief size_t class used to interface between C++ and - * OpenCL C calls that require arrays of size_t values, who's - * size is known statically. - */ -template -struct size_t : public cl::vector< ::size_t, N> { }; - -namespace detail { - -// GetInfo help struct -template -struct GetInfoHelper -{ - static cl_int - get(Functor f, cl_uint name, T* param) - { - return f(name, sizeof(T), param, NULL); - } -}; - -// Specialized GetInfoHelper for VECTOR_CLASS params -template -struct GetInfoHelper > -{ - static cl_int get(Func f, cl_uint name, VECTOR_CLASS* param) - { - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - T* value = (T*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - param->assign(&value[0], &value[required/sizeof(T)]); - return CL_SUCCESS; - } -}; - -// Specialized for getInfo -template -struct GetInfoHelper > -{ - static cl_int - get(Func f, cl_uint name, VECTOR_CLASS* param) - { - cl_uint err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); - if (err != CL_SUCCESS) { - return err; - } - - return CL_SUCCESS; - } -}; - -// Specialized GetInfoHelper for STRING_CLASS params -template -struct GetInfoHelper -{ - static cl_int get(Func f, cl_uint name, STRING_CLASS* param) - { - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - char* value = (char*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - *param = value; - return CL_SUCCESS; - } -}; - -#define __GET_INFO_HELPER_WITH_RETAIN(CPP_TYPE) \ -namespace detail { \ -template \ -struct GetInfoHelper \ -{ \ - static cl_int get(Func f, cl_uint name, CPP_TYPE* param) \ - { \ - cl_uint err = f(name, sizeof(CPP_TYPE), param, NULL); \ - if (err != CL_SUCCESS) { \ - return err; \ - } \ - \ - return ReferenceHandler::retain((*param)()); \ - } \ -}; \ -} - - -#define __PARAM_NAME_INFO_1_0(F) \ - F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ - F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ - F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_bitfield) \ - F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ - F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \ - F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ - F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \ - F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ - F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \ - F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ - F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS) \ - F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS) \ - \ - F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ - F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ - F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ - F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \ - \ - F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ - \ - F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ - F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ - F(cl_mem_info, CL_MEM_SIZE, ::size_t) \ - F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ - F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ - \ - F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ - F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \ - F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \ - F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \ - F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \ - \ - F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ - F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ - F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \ - F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \ - F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \ - \ - F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ - F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ - F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ - F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ - F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ - F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS) \ - \ - F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \ - \ - F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \ - F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ - F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \ - F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ - \ - F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ - F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ - F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ - F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) - -#if defined(CL_VERSION_1_1) -#define __PARAM_NAME_INFO_1_1(F) \ - F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ - \ - F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ - F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ - \ - F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) -#endif // CL_VERSION_1_1 - -#if defined(USE_CL_DEVICE_FISSION) -#define __PARAM_NAME_DEVICE_FISSION(F) \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ - F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) -#endif // USE_CL_DEVICE_FISSION - -template -struct param_traits {}; - -#define __DECLARE_PARAM_TRAITS(token, param_name, T) \ -struct token; \ -template<> \ -struct param_traits \ -{ \ - enum { value = param_name }; \ - typedef T param_type; \ -}; - -__PARAM_NAME_INFO_1_0(__DECLARE_PARAM_TRAITS); -#if defined(CL_VERSION_1_1) -__PARAM_NAME_INFO_1_1(__DECLARE_PARAM_TRAITS); -#endif // CL_VERSION_1_1 - -#if defined(USE_CL_DEVICE_FISSION) -__PARAM_NAME_DEVICE_FISSION(__DECLARE_PARAM_TRAITS); -#endif // USE_CL_DEVICE_FISSION - -#undef __DECLARE_PARAM_TRAITS - -// Convenience functions - -template -inline cl_int -getInfo(Func f, cl_uint name, T* param) -{ - return GetInfoHelper::get(f, name, param); -} - -template -struct GetInfoFunctor0 -{ - Func f_; const Arg0& arg0_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, param, size, value, size_ret); } -}; - -template -struct GetInfoFunctor1 -{ - Func f_; const Arg0& arg0_; const Arg1& arg1_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, arg1_, param, size, value, size_ret); } -}; - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) -{ - GetInfoFunctor0 f0 = { f, arg0 }; - return GetInfoHelper, T> - ::get(f0, name, param); -} - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) -{ - GetInfoFunctor1 f0 = { f, arg0, arg1 }; - return GetInfoHelper, T> - ::get(f0, name, param); -} - -template -struct ReferenceHandler -{ }; - -template <> -struct ReferenceHandler -{ - // cl_device_id does not have retain(). - static cl_int retain(cl_device_id) - { return CL_INVALID_DEVICE; } - // cl_device_id does not have release(). - static cl_int release(cl_device_id) - { return CL_INVALID_DEVICE; } -}; - -template <> -struct ReferenceHandler -{ - // cl_platform_id does not have retain(). - static cl_int retain(cl_platform_id) - { return CL_INVALID_PLATFORM; } - // cl_platform_id does not have release(). - static cl_int release(cl_platform_id) - { return CL_INVALID_PLATFORM; } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_context context) - { return ::clRetainContext(context); } - static cl_int release(cl_context context) - { return ::clReleaseContext(context); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_command_queue queue) - { return ::clRetainCommandQueue(queue); } - static cl_int release(cl_command_queue queue) - { return ::clReleaseCommandQueue(queue); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_mem memory) - { return ::clRetainMemObject(memory); } - static cl_int release(cl_mem memory) - { return ::clReleaseMemObject(memory); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_sampler sampler) - { return ::clRetainSampler(sampler); } - static cl_int release(cl_sampler sampler) - { return ::clReleaseSampler(sampler); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_program program) - { return ::clRetainProgram(program); } - static cl_int release(cl_program program) - { return ::clReleaseProgram(program); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_kernel kernel) - { return ::clRetainKernel(kernel); } - static cl_int release(cl_kernel kernel) - { return ::clReleaseKernel(kernel); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_event event) - { return ::clRetainEvent(event); } - static cl_int release(cl_event event) - { return ::clReleaseEvent(event); } -}; - -template -class Wrapper -{ -public: - typedef T cl_type; - -protected: - cl_type object_; - -public: - Wrapper() : object_(NULL) { } - - ~Wrapper() - { - if (object_ != NULL) { release(); } - } - - Wrapper(const Wrapper& rhs) - { - object_ = rhs.object_; - if (object_ != NULL) { retain(); } - } - - Wrapper& operator = (const Wrapper& rhs) - { - if (object_ != NULL) { release(); } - object_ = rhs.object_; - if (object_ != NULL) { retain(); } - return *this; - } - - cl_type operator ()() const { return object_; } - - cl_type& operator ()() { return object_; } - -protected: - - cl_int retain() const - { - return ReferenceHandler::retain(object_); - } - - cl_int release() const - { - return ReferenceHandler::release(object_); - } -}; - -#if defined(__CL_ENABLE_EXCEPTIONS) -static inline cl_int errHandler ( - cl_int err, - const char * errStr = NULL) throw(Error) -{ - if (err != CL_SUCCESS) { - throw Error(err, errStr); - } - return err; -} -#else -static inline cl_int errHandler (cl_int err, const char * errStr = NULL) -{ - return err; -} -#endif // __CL_ENABLE_EXCEPTIONS - -} // namespace detail -//! \endcond - -/*! \stuct ImageFormat - * \brief ImageFormat interface fro cl_image_format. - */ -struct ImageFormat : public cl_image_format -{ - ImageFormat(){} - - ImageFormat(cl_channel_order order, cl_channel_type type) - { - image_channel_order = order; - image_channel_data_type = type; - } - - ImageFormat& operator = (const ImageFormat& rhs) - { - if (this != &rhs) { - this->image_channel_data_type = rhs.image_channel_data_type; - this->image_channel_order = rhs.image_channel_order; - } - return *this; - } -}; - -/*! \class Device - * \brief Device interface for cl_device_id. - */ -class Device : public detail::Wrapper -{ -public: - Device(cl_device_id device) { object_ = device; } - - Device() : detail::Wrapper() { } - - Device(const Device& device) : detail::Wrapper(device) { } - - Device& operator = (const Device& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_device_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetDeviceInfo, object_, name, param), - __GET_DEVICE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_device_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(USE_CL_DEVICE_FISSION) - cl_int createSubDevices( - const cl_device_partition_property_ext * properties, - VECTOR_CLASS* devices) - { - typedef CL_API_ENTRY cl_int - ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( - cl_device_id /*in_device*/, - const cl_device_partition_property_ext * /* properties */, - cl_uint /*num_entries*/, - cl_device_id * /*out_devices*/, - cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; - - static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; - __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); - - cl_uint n = 0; - cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif -}; - -/*! \class Platform - * \brief Platform interface. - */ -class Platform : public detail::Wrapper -{ -public: - static const Platform null(); - - Platform(cl_platform_id platform) { object_ = platform; } - - Platform() : detail::Wrapper() { } - - Platform(const Platform& platform) : detail::Wrapper(platform) { } - - Platform& operator = (const Platform& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetPlatformInfo, object_, name, param), - __GET_PLATFORM_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_platform_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int getDevices( - cl_device_type type, - VECTOR_CLASS* devices) const - { - cl_uint n = 0; - cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = ::clGetDeviceIDs(object_, type, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } - -#if defined(USE_DX_INTEROP) - /*! \brief Get the list of available D3D10 devices. - * - * \param d3d_device_source. - * - * \param d3d_object. - * - * \param d3d_device_set. - * - * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device - * values returned in devices can be used to identify a specific OpenCL - * device. If \a devices argument is NULL, this argument is ignored. - * - * \return One of the following values: - * - CL_SUCCESS if the function is executed successfully. - * - * The application can query specific capabilities of the OpenCL device(s) - * returned by cl::getDevices. This can be used by the application to - * determine which device(s) to use. - * - * \note In the case that exceptions are enabled and a return value - * other than CL_SUCCESS is generated, then cl::Error exception is - * generated. - */ - cl_int getDevices( - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - VECTOR_CLASS* devices) const - { - typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( - cl_platform_id platform, - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id * devices, - cl_uint* num_devices); - - static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; - __INIT_CL_EXT_FCN_PTR(clGetDeviceIDsFromD3D10KHR); - - cl_uint n = 0; - cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - 0, - NULL, - &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - n, - ids, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif - - static cl_int get( - VECTOR_CLASS* platforms) - { - cl_uint n = 0; - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - platforms->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -}; - -static inline cl_int -UnloadCompiler() -{ - return ::clUnloadCompiler(); -} - -class Context : public detail::Wrapper -{ -public: - Context( - const VECTOR_CLASS& devices, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateContext( - properties, (cl_uint) devices.size(), - (cl_device_id*) &devices.front(), - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - Context( - cl_device_type type, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateContextFromType( - properties, type, notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - Context() : detail::Wrapper() { } - - Context(const Context& context) : detail::Wrapper(context) { } - - Context& operator = (const Context& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_context_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetContextInfo, object_, name, param), - __GET_CONTEXT_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_context_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int getSupportedImageFormats( - cl_mem_flags flags, - cl_mem_object_type type, - VECTOR_CLASS* formats) const - { - cl_uint numEntries; - cl_int err = ::clGetSupportedImageFormats( - object_, - flags, - type, - 0, - NULL, - &numEntries); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - ImageFormat* value = (ImageFormat*) - alloca(numEntries * sizeof(ImageFormat)); - err = ::clGetSupportedImageFormats( - object_, - flags, - type, - numEntries, - (cl_image_format*) value, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - formats->assign(&value[0], &value[numEntries]); - return CL_SUCCESS; - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Context) - -/*! \class Event - * \brief Event interface for cl_event. - */ -class Event : public detail::Wrapper -{ -public: - Event() : detail::Wrapper() { } - - Event(const Event& event) : detail::Wrapper(event) { } - - Event& operator = (const Event& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_event_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetEventInfo, object_, name, param), - __GET_EVENT_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_event_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getProfilingInfo(cl_profiling_info name, T* param) const - { - return detail::errHandler(detail::getInfo( - &::clGetEventProfilingInfo, object_, name, param), - __GET_EVENT_PROFILE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getProfilingInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_profiling_info, name>::param_type param; - cl_int result = getProfilingInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int wait() const - { - return detail::errHandler( - ::clWaitForEvents(1, &object_), - __WAIT_FOR_EVENTS_ERR); - } - -#if defined(CL_VERSION_1_1) - cl_int setCallback( - cl_int type, - void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetEventCallback( - object_, - type, - pfn_notify, - user_data), - __SET_EVENT_CALLBACK_ERR); - } -#endif - - static cl_int - waitForEvents(const VECTOR_CLASS& events) - { - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (cl_event*)&events.front()), - __WAIT_FOR_EVENTS_ERR); - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Event) - -#if defined(CL_VERSION_1_1) -/*! \class UserEvent - * \brief User event interface for cl_event. - */ -class UserEvent : public Event -{ -public: - UserEvent( - const Context& context, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateUserEvent( - context(), - &error); - - detail::errHandler(error, __CREATE_USER_EVENT_ERR); - if (err != NULL) { - *err = error; - } - } - - UserEvent() : Event() { } - - UserEvent(const UserEvent& event) : Event(event) { } - - UserEvent& operator = (const UserEvent& rhs) - { - if (this != &rhs) { - Event::operator=(rhs); - } - return *this; - } - - cl_int setStatus(cl_int status) - { - return detail::errHandler( - ::clSetUserEventStatus(object_,status), - __SET_USER_EVENT_STATUS_ERR); - } -}; -#endif - -inline static cl_int -WaitForEvents(const VECTOR_CLASS& events) -{ - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (cl_event*)&events.front()), - __WAIT_FOR_EVENTS_ERR); -} - -/*! \class Memory - * \brief Memory interface for cl_mem. - */ -class Memory : public detail::Wrapper -{ -public: - Memory() : detail::Wrapper() { } - - Memory(const Memory& memory) : detail::Wrapper(memory) { } - - Memory& operator = (const Memory& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_mem_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetMemObjectInfo, object_, name, param), - __GET_MEM_OBJECT_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_mem_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(CL_VERSION_1_1) - cl_int setDestructorCallback( - void (CL_CALLBACK * pfn_notify)(cl_mem, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetMemObjectDestructorCallback( - object_, - pfn_notify, - user_data), - __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); - } -#endif - -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Memory) - -/*! \class Buffer - * \brief Memory buffer interface. - */ -class Buffer : public Memory -{ -public: - Buffer( - const Context& context, - cl_mem_flags flags, - ::size_t size, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - Buffer() : Memory() { } - - Buffer(const Buffer& buffer) : Memory(buffer) { } - - Buffer& operator = (const Buffer& rhs) - { - if (this != &rhs) { - Memory::operator=(rhs); - } - return *this; - } - -#if defined(CL_VERSION_1_1) - Buffer createSubBuffer( - cl_mem_flags flags, - cl_buffer_create_type buffer_create_type, - const void * buffer_create_info, - cl_int * err = NULL) - { - Buffer result; - cl_int error; - result.object_ = ::clCreateSubBuffer( - object_, - flags, - buffer_create_type, - buffer_create_info, - &error); - - detail::errHandler(error, __CREATE_SUBBUFFER_ERR); - if (err != NULL) { - *err = error; - } - - return result; - } -#endif -}; - -#if defined (USE_DX_INTEROP) -class BufferD3D10 : public Buffer -{ -public: - typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( - cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, - cl_int* errcode_ret); - - BufferD3D10( - const Context& context, - cl_mem_flags flags, - ID3D10Buffer* bufobj, - cl_int * err = NULL) - { - static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; - __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); - - cl_int error; - object_ = pfn_clCreateFromD3D10BufferKHR( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - BufferD3D10() : Buffer() { } - - BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { } - - BufferD3D10& operator = (const BufferD3D10& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } -}; -#endif - -/*! \class BufferGL - * \brief Memory buffer interface for GL interop. - */ -class BufferGL : public Buffer -{ -public: - BufferGL( - const Context& context, - cl_mem_flags flags, - GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLBuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - BufferGL() : Buffer() { } - - BufferGL(const BufferGL& buffer) : Buffer(buffer) { } - - BufferGL& operator = (const BufferGL& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } - - cl_int getObjectInfo( - cl_gl_object_type *type, - GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \class BufferRenderGL - * \brief Memory buffer interface for GL interop with renderbuffer. - */ -class BufferRenderGL : public Buffer -{ -public: - BufferRenderGL( - const Context& context, - cl_mem_flags flags, - GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLRenderbuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - BufferRenderGL() : Buffer() { } - - BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { } - - BufferRenderGL& operator = (const BufferRenderGL& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } - - cl_int getObjectInfo( - cl_gl_object_type *type, - GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \class Image - * \brief Base class interface for all images. - */ -class Image : public Memory -{ -protected: - Image() : Memory() { } - - Image(const Image& image) : Memory(image) { } - - Image& operator = (const Image& rhs) - { - if (this != &rhs) { - Memory::operator=(rhs); - } - return *this; - } -public: - template - cl_int getImageInfo(cl_image_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetImageInfo, object_, name, param), - __GET_IMAGE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getImageInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_image_info, name>::param_type param; - cl_int result = getImageInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -/*! \class Image2D - * \brief Image interface for 2D images. - */ -class Image2D : public Image -{ -public: - Image2D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t row_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateImage2D( - context(), flags,&format, width, height, row_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE2D_ERR); - if (err != NULL) { - *err = error; - } - } - - Image2D() { } - - Image2D(const Image2D& image2D) : Image(image2D) { } - - Image2D& operator = (const Image2D& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } -}; - -/*! \class Image2DGL - * \brief 2D image interface for GL interop. - */ -class Image2DGL : public Image2D -{ -public: - Image2DGL( - const Context& context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture2D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - Image2DGL() : Image2D() { } - - Image2DGL(const Image2DGL& image) : Image2D(image) { } - - Image2DGL& operator = (const Image2DGL& rhs) - { - if (this != &rhs) { - Image2D::operator=(rhs); - } - return *this; - } -}; - -/*! \class Image3D - * \brief Image interface for 3D images. - */ -class Image3D : public Image -{ -public: - Image3D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t depth, - ::size_t row_pitch = 0, - ::size_t slice_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateImage3D( - context(), flags, &format, width, height, depth, row_pitch, - slice_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE3D_ERR); - if (err != NULL) { - *err = error; - } - } - - Image3D() { } - - Image3D(const Image3D& image3D) : Image(image3D) { } - - Image3D& operator = (const Image3D& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } -}; - -/*! \class Image2DGL - * \brief 2D image interface for GL interop. - */ -class Image3DGL : public Image3D -{ -public: - Image3DGL( - const Context& context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture3D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - Image3DGL() : Image3D() { } - - Image3DGL(const Image3DGL& image) : Image3D(image) { } - - Image3DGL& operator = (const Image3DGL& rhs) - { - if (this != &rhs) { - Image3D::operator=(rhs); - } - return *this; - } -}; - -/*! \class Sampler - * \brief Sampler interface for cl_sampler. - */ -class Sampler : public detail::Wrapper -{ -public: - Sampler() { } - - Sampler( - const Context& context, - cl_bool normalized_coords, - cl_addressing_mode addressing_mode, - cl_filter_mode filter_mode, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateSampler( - context(), - normalized_coords, - addressing_mode, - filter_mode, - &error); - - detail::errHandler(error, __CREATE_SAMPLER_ERR); - if (err != NULL) { - *err = error; - } - } - - Sampler(const Sampler& sampler) : detail::Wrapper(sampler) { } - - Sampler& operator = (const Sampler& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_sampler_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetSamplerInfo, object_, name, param), - __GET_SAMPLER_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_sampler_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Sampler) - -class Program; -class CommandQueue; -class Kernel; - -/*! \class NDRange - * \brief NDRange interface - */ -class NDRange -{ -private: - size_t<3> sizes_; - cl_uint dimensions_; - -public: - NDRange() - : dimensions_(0) - { } - - NDRange(::size_t size0) - : dimensions_(1) - { - sizes_.push_back(size0); - } - - NDRange(::size_t size0, ::size_t size1) - : dimensions_(2) - { - sizes_.push_back(size0); - sizes_.push_back(size1); - } - - NDRange(::size_t size0, ::size_t size1, ::size_t size2) - : dimensions_(3) - { - sizes_.push_back(size0); - sizes_.push_back(size1); - sizes_.push_back(size2); - } - - operator const ::size_t*() const { return (const ::size_t*) sizes_; } - ::size_t dimensions() const { return dimensions_; } -}; - -static const NDRange NullRange; - -/*! - * \struct LocalSpaceArg - * \brief Local address raper for use with Kernel::setArg - */ -struct LocalSpaceArg -{ - ::size_t size_; -}; - -namespace detail { - -template -struct KernelArgumentHandler -{ - static ::size_t size(const T&) { return sizeof(T); } - static T* ptr(T& value) { return &value; } -}; - -template <> -struct KernelArgumentHandler -{ - static ::size_t size(const LocalSpaceArg& value) { return value.size_; } - static void* ptr(LocalSpaceArg&) { return NULL; } -}; - -} -//! \endcond - -inline LocalSpaceArg -__local(::size_t size) -{ - LocalSpaceArg ret = { size }; - return ret; -} - -class KernelFunctor; - -/*! \class Kernel - * \brief Kernel interface that implements cl_kernel - */ -class Kernel : public detail::Wrapper -{ -public: - inline Kernel(const Program& program, const char* name, cl_int* err = NULL); - - Kernel() { } - - Kernel(const Kernel& kernel) : detail::Wrapper(kernel) { } - - Kernel& operator = (const Kernel& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_kernel_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelInfo, object_, name, param), - __GET_KERNEL_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getWorkGroupInfo( - const Device& device, cl_kernel_work_group_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetKernelWorkGroupInfo, object_, device(), name, param), - __GET_KERNEL_WORK_GROUP_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getWorkGroupInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_work_group_info, name>::param_type param; - cl_int result = getWorkGroupInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int setArg(cl_uint index, T value) - { - return detail::errHandler( - ::clSetKernelArg( - object_, - index, - detail::KernelArgumentHandler::size(value), - detail::KernelArgumentHandler::ptr(value)), - __SET_KERNEL_ARGS_ERR); - } - - cl_int setArg(cl_uint index, ::size_t size, void* argPtr) - { - return detail::errHandler( - ::clSetKernelArg(object_, index, size, argPtr), - __SET_KERNEL_ARGS_ERR); - } - - KernelFunctor bind( - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local); - - KernelFunctor bind( - const CommandQueue& queue, - const NDRange& global, - const NDRange& local); -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Kernel) - -/*! \class Program - * \brief Program interface that implements cl_program. - */ -class Program : public detail::Wrapper -{ -public: - typedef VECTOR_CLASS > Binaries; - typedef VECTOR_CLASS > Sources; - - Program( - const Context& context, - const Sources& sources, - cl_int* err = NULL) - { - cl_int error; - - const ::size_t n = (::size_t)sources.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const char** strings = (const char**) alloca(n * sizeof(const char*)); - - for (::size_t i = 0; i < n; ++i) { - strings[i] = sources[(int)i].first; - lengths[i] = sources[(int)i].second; - } - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)n, strings, lengths, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - if (err != NULL) { - *err = error; - } - } - - Program( - const Context& context, - const VECTOR_CLASS& devices, - const Binaries& binaries, - VECTOR_CLASS* binaryStatus = NULL, - cl_int* err = NULL) - { - cl_int error; - const ::size_t n = binaries.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const unsigned char** images = (const unsigned char**) alloca(n * sizeof(const void*)); - - for (::size_t i = 0; i < n; ++i) { - images[i] = (const unsigned char*)binaries[(int)i].first; - lengths[i] = binaries[(int)i].second; - } - - object_ = ::clCreateProgramWithBinary( - context(), (cl_uint) devices.size(), - (cl_device_id*)&devices.front(), - lengths, images, binaryStatus != NULL - ? (cl_int*) &binaryStatus->front() - : NULL, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != NULL) { - *err = error; - } - } - - Program() { } - - Program(const Program& program) : detail::Wrapper(program) { } - - Program& operator = (const Program& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - cl_int build( - const VECTOR_CLASS& devices, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - return detail::errHandler( - ::clBuildProgram( - object_, - (cl_uint) - devices.size(), - (cl_device_id*)&devices.front(), - options, - notifyFptr, - data), - __BUILD_PROGRAM_ERR); - } - - template - cl_int getInfo(cl_program_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetProgramInfo, object_, name, param), - __GET_PROGRAM_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getBuildInfo( - const Device& device, cl_program_build_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetProgramBuildInfo, object_, device(), name, param), - __GET_PROGRAM_BUILD_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getBuildInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_build_info, name>::param_type param; - cl_int result = getBuildInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int createKernels(VECTOR_CLASS* kernels) - { - cl_uint numKernels; - cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel)); - err = ::clCreateKernelsInProgram( - object_, numKernels, (cl_kernel*) value, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - kernels->assign(&value[0], &value[numKernels]); - return CL_SUCCESS; - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Program) - -inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) -{ - cl_int error; - - object_ = ::clCreateKernel(program(), name, &error); - detail::errHandler(error, __CREATE_KERNEL_ERR); - - if (err != NULL) { - *err = error; - } - -} - -/*! \class CommandQueue - * \brief CommandQueue interface for cl_command_queue. - */ -class CommandQueue : public detail::Wrapper -{ -public: - CommandQueue( - const Context& context, - const Device& device, - cl_command_queue_properties properties = 0, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } - - CommandQueue() { } - - CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper(commandQueue) { } - - CommandQueue& operator = (const CommandQueue& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_command_queue_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetCommandQueueInfo, object_, name, param), - __GET_COMMAND_QUEUE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_command_queue_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int enqueueReadBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReadBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_READ_BUFFER_ERR); - } - - cl_int enqueueWriteBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - const void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueWriteBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_WRITE_BUFFER_ERR); - } - - cl_int enqueueCopyBuffer( - const Buffer& src, - const Buffer& dst, - ::size_t src_offset, - ::size_t dst_offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyBuffer( - object_, src(), dst(), src_offset, dst_offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQEUE_COPY_BUFFER_ERR); - } - -#if defined(CL_VERSION_1_1) - cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReadBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_READ_BUFFER_RECT_ERR); - } - - - cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueWriteBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_WRITE_BUFFER_RECT_ERR); - } - - cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - ::size_t src_row_pitch, - ::size_t src_slice_pitch, - ::size_t dst_row_pitch, - ::size_t dst_slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyBufferRect( - object_, - src(), - dst(), - (const ::size_t *)src_origin, - (const ::size_t *)dst_origin, - (const ::size_t *)region, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQEUE_COPY_BUFFER_RECT_ERR); - } -#endif - - cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReadImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_READ_IMAGE_ERR); - } - - cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueWriteImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_WRITE_IMAGE_ERR); - } - - cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyImage( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *)dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_COPY_IMAGE_ERR); - } - - cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& region, - ::size_t dst_offset, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyImageToBuffer( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *) region, dst_offset, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); - } - - cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - ::size_t src_offset, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyBufferToImage( - object_, src(), dst(), src_offset, - (const ::size_t *) dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); - } - - void* enqueueMapBuffer( - const Buffer& buffer, - cl_bool blocking, - cl_map_flags flags, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_int error; - void * result = ::clEnqueueMapBuffer( - object_, buffer(), blocking, flags, offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - return result; - } - - void* enqueueMapImage( - const Image& buffer, - cl_bool blocking, - cl_map_flags flags, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t * row_pitch, - ::size_t * slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_int error; - void * result = ::clEnqueueMapImage( - object_, buffer(), blocking, flags, - (const ::size_t *) origin, (const ::size_t *) region, - row_pitch, slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - return result; - } - - cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueUnmapMemObject( - object_, memory(), mapped_ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - } - - cl_int enqueueNDRangeKernel( - const Kernel& kernel, - const NDRange& offset, - const NDRange& global, - const NDRange& local, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueNDRangeKernel( - object_, kernel(), (cl_uint) global.dimensions(), - offset.dimensions() != 0 ? (const ::size_t*) offset : NULL, - (const ::size_t*) global, - local.dimensions() != 0 ? (const ::size_t*) local : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_NDRANGE_KERNEL_ERR); - } - - cl_int enqueueTask( - const Kernel& kernel, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueTask( - object_, kernel(), - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_TASK_ERR); - } - - cl_int enqueueNativeKernel( - void (*userFptr)(void *), - std::pair args, - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* mem_locs = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) - ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem)) - : NULL; - - if (mems != NULL) { - for (unsigned int i = 0; i < mem_objects->size(); i++) { - mems[i] = ((*mem_objects)[i])(); - } - } - - return detail::errHandler( - ::clEnqueueNativeKernel( - object_, userFptr, args.first, args.second, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - mems, - (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_NATIVE_KERNEL); - } - - cl_int enqueueMarker(Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueMarker(object_, (cl_event*) event), - __ENQUEUE_MARKER_ERR); - } - - cl_int enqueueWaitForEvents(const VECTOR_CLASS& events) const - { - return detail::errHandler( - ::clEnqueueWaitForEvents( - object_, - (cl_uint) events.size(), - (const cl_event*) &events.front()), - __ENQUEUE_WAIT_FOR_EVENTS_ERR); - } - - cl_int enqueueAcquireGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueAcquireGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_ACQUIRE_GL_ERR); - } - - cl_int enqueueReleaseGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReleaseGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_RELEASE_GL_ERR); - } - -#if defined (USE_DX_INTEROP) -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); - - cl_int enqueueAcquireD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; - __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); - - return detail::errHandler( - pfn_clEnqueueAcquireD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_ACQUIRE_GL_ERR); - } - - cl_int enqueueReleaseD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; - __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); - - return detail::errHandler( - pfn_clEnqueueReleaseD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_RELEASE_GL_ERR); - } -#endif - - cl_int enqueueBarrier() const - { - return detail::errHandler( - ::clEnqueueBarrier(object_), - __ENQUEUE_BARRIER_ERR); - } - - cl_int flush() const - { - return detail::errHandler(::clFlush(object_), __FLUSH_ERR); - } - - cl_int finish() const - { - return detail::errHandler(::clFinish(object_), __FINISH_ERR); - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::CommandQueue) - -/*! \class KernelFunctor - * \brief Kernel functor interface - * - * \note Currently only functors of zero to ten arguments are supported. It - * is straightforward to add more and a more general solution, similar to - * Boost.Lambda could be followed if required in the future. - */ -class KernelFunctor -{ -private: - Kernel kernel_; - CommandQueue queue_; - NDRange offset_; - NDRange global_; - NDRange local_; - - cl_int err_; -public: - KernelFunctor() { } - - KernelFunctor( - const Kernel& kernel, - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local) : - kernel_(kernel), - queue_(queue), - offset_(offset), - global_(global), - local_(local), - err_(CL_SUCCESS) - {} - - KernelFunctor& operator=(const KernelFunctor& rhs); - - KernelFunctor(const KernelFunctor& rhs); - - cl_int getError() { return err_; } - - inline Event operator()(const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const A15& a15, - const VECTOR_CLASS* events = NULL); -}; - -inline KernelFunctor Kernel::bind( - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local) -{ - return KernelFunctor(*this,queue,offset,global,local); -} - -inline KernelFunctor Kernel::bind( - const CommandQueue& queue, - const NDRange& global, - const NDRange& local) -{ - return KernelFunctor(*this,queue,NullRange,global,local); -} - -inline KernelFunctor& KernelFunctor::operator=(const KernelFunctor& rhs) -{ - if (this == &rhs) { - return *this; - } - - kernel_ = rhs.kernel_; - queue_ = rhs.queue_; - offset_ = rhs.offset_; - global_ = rhs.global_; - local_ = rhs.local_; - - return *this; -} - -inline KernelFunctor::KernelFunctor(const KernelFunctor& rhs) : - kernel_(rhs.kernel_), - queue_(rhs.queue_), - offset_(rhs.offset_), - global_(rhs.global_), - local_(rhs.local_) -{ -} - -Event KernelFunctor::operator()(const VECTOR_CLASS* events) -{ - Event event; - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - kernel_.setArg(13,a14); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const A15& a15, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - kernel_.setArg(13,a14); - kernel_.setArg(14,a15); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -#undef __ERR_STR -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#undef __GET_DEVICE_INFO_ERR -#undef __GET_PLATFORM_INFO_ERR -#undef __GET_DEVICE_IDS_ERR -#undef __GET_CONTEXT_INFO_ERR -#undef __GET_EVENT_INFO_ERR -#undef __GET_EVENT_PROFILE_INFO_ERR -#undef __GET_MEM_OBJECT_INFO_ERR -#undef __GET_IMAGE_INFO_ERR -#undef __GET_SAMPLER_INFO_ERR -#undef __GET_KERNEL_INFO_ERR -#undef __GET_KERNEL_WORK_GROUP_INFO_ERR -#undef __GET_PROGRAM_INFO_ERR -#undef __GET_PROGRAM_BUILD_INFO_ERR -#undef __GET_COMMAND_QUEUE_INFO_ERR - -#undef __CREATE_CONTEXT_FROM_TYPE_ERR -#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR - -#undef __CREATE_BUFFER_ERR -#undef __CREATE_SUBBUFFER_ERR -#undef __CREATE_IMAGE2D_ERR -#undef __CREATE_IMAGE3D_ERR -#undef __CREATE_SAMPLER_ERR -#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR - -#undef __CREATE_USER_EVENT_ERR -#undef __SET_USER_EVENT_STATUS_ERR -#undef __SET_EVENT_CALLBACK_ERR - -#undef __WAIT_FOR_EVENTS_ERR - -#undef __CREATE_KERNEL_ERR -#undef __SET_KERNEL_ARGS_ERR -#undef __CREATE_PROGRAM_WITH_SOURCE_ERR -#undef __CREATE_PROGRAM_WITH_BINARY_ERR -#undef __BUILD_PROGRAM_ERR -#undef __CREATE_KERNELS_IN_PROGRAM_ERR - -#undef __CREATE_COMMAND_QUEUE_ERR -#undef __SET_COMMAND_QUEUE_PROPERTY_ERR -#undef __ENQUEUE_READ_BUFFER_ERR -#undef __ENQUEUE_WRITE_BUFFER_ERR -#undef __ENQUEUE_READ_BUFFER_RECT_ERR -#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR -#undef __ENQEUE_COPY_BUFFER_ERR -#undef __ENQEUE_COPY_BUFFER_RECT_ERR -#undef __ENQUEUE_READ_IMAGE_ERR -#undef __ENQUEUE_WRITE_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR -#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR -#undef __ENQUEUE_MAP_BUFFER_ERR -#undef __ENQUEUE_MAP_IMAGE_ERR -#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR -#undef __ENQUEUE_NDRANGE_KERNEL_ERR -#undef __ENQUEUE_TASK_ERR -#undef __ENQUEUE_NATIVE_KERNEL - -#undef __UNLOAD_COMPILER_ERR -#endif //__CL_USER_OVERRIDE_ERROR_STRINGS - -#undef __GET_INFO_HELPER_WITH_RETAIN - -// Extensions -#undef __INIT_CL_EXT_FCN_PTR -#undef __CREATE_SUB_DEVICES - -#if defined(USE_CL_DEVICE_FISSION) -#undef __PARAM_NAME_DEVICE_FISSION -#endif // USE_CL_DEVICE_FISSION - -} // namespace cl - -#endif // CL_HPP_ diff --git a/SpeedComparisons/GrayScott_OpenCL/gray_scott_opencl.cpp b/SpeedComparisons/GrayScott_OpenCL/gray_scott_opencl.cpp deleted file mode 100644 index a7a70250c..000000000 --- a/SpeedComparisons/GrayScott_OpenCL/gray_scott_opencl.cpp +++ /dev/null @@ -1,271 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// stdlib: -#include -#include -#include -#include - -#ifdef _WIN32 - #include - #include - #include - // http://www.linuxjournal.com/article/5574 - void gettimeofday(struct timeval* t,void* timezone) - { struct _timeb timebuffer; - _ftime( &timebuffer ); - t->tv_sec=timebuffer.time; - t->tv_usec=1000*timebuffer.millitm; - } -#else - #include -#endif - -// OpenCL: -#define __NO_STD_VECTOR // Use cl::vector instead of STL version -#define __CL_ENABLE_EXCEPTIONS - -// cl.hpp is standard but doesn't come with most SDKs, so download it from here: -// http://www.khronos.org/registry/cl/api/1.1/cl.hpp -#ifdef __APPLE__ -# include "cl.hpp" -#else -# include -#endif - -using namespace cl; - -// STL: -#include -#include - -// local: -#include "defs.h" -#include "display.h" - -void init(float a[X][Y],float b[X][Y]); - -static int g_opt_device = 0; -static int g_wrap = 1; - -int main(int argc, char * * argv) -{ - for (int i = 1; i < argc; i++) { - if (0) { - } else if ((i+1 platforms; - Platform::get(&platforms); - - // Select the default platform and create a context using this platform and the GPU - cl_context_properties cps[3] = { - CL_CONTEXT_PLATFORM, - (cl_context_properties)(platforms[0])(), - 0 - }; - Context context( CL_DEVICE_TYPE_GPU, cps); - - // Get a list of devices on this platform - vector devices = context.getInfo(); - - // range-check the user's selection - int maxdev = devices.size() - 1; - g_opt_device = (g_opt_device > maxdev) ? maxdev : - ((g_opt_device < 0) ? 0 : g_opt_device); - std::cout << (maxdev+1) << " device(s) available; using device " - << g_opt_device << ".\n"; - - Device &device = devices[g_opt_device]; - std::cout << "Global memory: " << device.getInfo() << " bytes\n"; - std::cout << "Local memory: " << device.getInfo() << " bytes\n"; - std::cout << "Local memory type: " << std::string((device.getInfo()==CL_LOCAL)?"local":"global") << " \n"; - std::cout << "CL_DEVICE_MAX_WORK_GROUP_SIZE: " << device.getInfo() << "\n"; - - // Create a command queue and use the selected device - if (maxdev < 0) { - std::cerr << "error -- need at least one OpenCL capable device.\n"; - exit(-1); - } - CommandQueue queue = CommandQueue(context, device); - Event event; - - // Read source file - std::string kfn = CL_SOURCE_DIR; // (defined in CMakeLists.txt to be the source folder) - kfn += "/grayscott_kernel.cl"; - std::ifstream sourceFile(kfn.c_str()); - std::string sourceCode( - std::istreambuf_iterator(sourceFile), - (std::istreambuf_iterator())); - Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()+1)); - - // Make program of the source code in the context - Program program = Program(context, source); - - // Build program for these specific devices - // If wrap (toroidal) option is selected, we define a preprocessor flag - // that controls how the xm1, xp1, etc. are computed. - program.build(devices, g_wrap ? "-D WRAP" : NULL, NULL, NULL); - - // Make kernel - Kernel kernel(program, "grayscott_compute"); - - std::cout << "CL_KERNEL_WORK_GROUP_SIZE: " << kernel.getWorkGroupInfo(device) << "\n"; - - // Create memory buffers - Buffer bufferU = Buffer(context, CL_MEM_READ_ONLY, MEM_SIZE); - Buffer bufferV = Buffer(context, CL_MEM_READ_ONLY, MEM_SIZE); - Buffer bufferU2 = Buffer(context, CL_MEM_READ_ONLY, MEM_SIZE); - Buffer bufferV2 = Buffer(context, CL_MEM_READ_ONLY, MEM_SIZE); - - // Copy lists A and B to the memory buffers - queue.enqueueWriteBuffer(bufferU, CL_TRUE, 0, MEM_SIZE, u); - queue.enqueueWriteBuffer(bufferV, CL_TRUE, 0, MEM_SIZE, v); - - NDRange global(X,Y); - NDRange local(1,512); - - kernel.setArg(4, k); - kernel.setArg(5, f); - kernel.setArg(6, r_a); - kernel.setArg(7, r_b); - kernel.setArg(8, speed); - - int iteration = 0; - float fps_avg = 0.0; // decaying average of fps - const int N_FRAMES_PER_DISPLAY = 2000; // an even number, because of our double-buffering implementation - while(true) - { - struct timeval tod_record; - double tod_before, tod_after, tod_elap; - - gettimeofday(&tod_record, 0); - tod_before = ((double) (tod_record.tv_sec)) - + ((double) (tod_record.tv_usec)) / 1.0e6; - - // run a few iterations (without copying the data back) - for(int it=0;it 0) - fps = ((float)N_FRAMES_PER_DISPLAY) / tod_elap; - // We display an exponential moving average of the fps measurement - fps_avg = (fps_avg == 0) ? fps : (((fps_avg * 10.0) + fps) / 11.0); - double Mcgs = (fps_avg * ((double)X) * ((double)Y)) / 1.0e6; - sprintf(msg,"GrayScott - %0.2f fps %0.2f Mcgs", fps_avg, Mcgs); - - // display: - { - int quitnow = display(u,u,u,iteration,false,200.0f,1,10,msg); - if (quitnow) - break; - } - } - } - catch(Error error) - { - std::cout << error.what() << "(" << error.err() << ")" << std::endl; - } -} - -// return a random value between lower and upper -float frand(float lower,float upper) -{ - return lower + rand()*(upper-lower)/RAND_MAX; -} - -void init(float a[X][Y],float b[X][Y]) -{ - srand((unsigned int)time(NULL)); - - // figure the values - for(int i = 0; i < X; i++) { - for(int j = 0; j < Y; j++) { - // start with a uniform field with an approximate circle in the middle - //if(hypot(i%20-10/*-X/2*/,j%20-10/*-Y/2*/)<=frand(2,5)) { - if(hypot(i-X/2,(j-Y/2)/1.5)<=frand(2,5)) - { - a[i][j] = frand(0.0f,0.1f); - b[i][j] = frand(0.9f,1.0f); - } - else - { - a[i][j] = frand(0.9f,1.0f); - b[i][j] = frand(0.0f,0.1f); - } - /*float v = frand(0.0f,1.0f); - a[i][j] = v; - b[i][j] = 1.0f-v;*/ - } - } -} - diff --git a/SpeedComparisons/GrayScott_OpenCL/grayscott_kernel.cl b/SpeedComparisons/GrayScott_OpenCL/grayscott_kernel.cl deleted file mode 100644 index 6ce342a9d..000000000 --- a/SpeedComparisons/GrayScott_OpenCL/grayscott_kernel.cl +++ /dev/null @@ -1,68 +0,0 @@ -__kernel void grayscott_compute( - __global float *U,__global float *V, - __global float *U2, __global float *V2, - float k,float F,float D_u,float D_v,float delta_t) -{ - // Get the index of the current element. - const int x = get_global_id(0); - const int y = get_global_id(1); - const int X = get_global_size(0); - const int Y = get_global_size(1); - const int i = x*Y+y; - - const float u = U[i]; - const float v = V[i]; - - // compute the Laplacians of u and v -#ifdef WRAP - // speedy modulo operator for when X and Y are powers of 2 (we'll enforce this) - // http://forums.amd.com/devforum/messageview.cfm?catid=390&threadid=143648 - const int xm1 = ((x-1+X) & (X-1)); - const int xp1 = ((x+1) & (X-1)); - const int ym1 = ((y-1+Y) & (Y-1)); - const int yp1 = ((y+1) & (Y-1)); - //const int xm1 = ((x-1+X)%X); - //const int xp1 = ((x+1)%X); - //const int ym1 = ((y-1+Y)%Y); - //const int yp1 = ((y+1)%Y); -#else - const int xm1 = max(x-1,0); - const int xp1 = min(x+1,X-1); - const int ym1 = max(y-1,0); - const int yp1 = min(y+1,Y-1); -#endif - const int iLeft = xm1*Y + y; - const int iRight = xp1*Y + y; - const int iUp = x*Y + ym1; - const int iDown = x*Y + yp1; - - // Standard 5-point stencil - const float nabla_u = U[iLeft] + U[iRight] + U[iUp] + U[iDown] - 4*u; - const float nabla_v = V[iLeft] + V[iRight] + V[iUp] + V[iDown] - 4*v; - - // 9-point stencil of Arad et al. 1997 - // Arad, A Yakhot, G Ben-Dor. A Highly Accurate Numerical Solution - // of a Biharmonic Equation. Numer. Meth. PDE, 13, pp. 375-397, 1997. - // PDF at www.bgu.ac.il/~yakhot/homepage/publications/nmpde_4_97.pdf - // (see page 379) - // gives more correct results (no vertical/horizontal bias) but slows down - // the kernal a lot mainly because of the extra accesses to get neighboring - // U[] and V[] values. - // - // const int iUpLeft = xm1*Y + ym1; - // const int iUpRight = xp1*Y + ym1; - // const int iDownLeft = xm1*Y + yp1; - // const int iDownRight = xp1*Y + yp1; - // const float nabla_u = (U[iLeft]+U[iRight]+U[iUp]+U[iDown])*2.0f/3.0f - // +(U[iUpLeft]+U[iUpRight]+U[iDownLeft]+U[iDownRight])/6.0f - 10.0f*u/3.0f; - // const float nabla_v = (V[iLeft]+V[iRight]+V[iUp]+V[iDown])*2.0f/3.0f - // +(V[iUpLeft]+V[iUpRight]+V[iDownLeft]+V[iDownRight])/6.0f - 10.0f*v/3.0f; - - // compute the new rate of change - const float delta_u = D_u * nabla_u - u*v*v + F*(1.0f-u); - const float delta_v = D_v * nabla_v + u*v*v - (F+k)*v; - - // apply the change (to the new buffer) - U2[i] = u + delta_t * delta_u; - V2[i] = v + delta_t * delta_v; -} diff --git a/SpeedComparisons/GrayScott_OpenCL_2x2/CMakeLists.txt b/SpeedComparisons/GrayScott_OpenCL_2x2/CMakeLists.txt deleted file mode 100644 index 96a8439f9..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_2x2/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -project(GrayScott_OpenCL_2x2) - -set(CMAKE_MODULE_PATH ${GrayScott_OpenCL_2x2_SOURCE_DIR}) -# (we include our own FindOpenCL.cmake until the time that CMake comes with its own) - -find_package(OpenCV REQUIRED) -include_directories( ${OPENCV_INCLUDE_DIR}) -link_libraries( ${OpenCV_LIBS} ) - -# only build the OpenCL version if OpenCL was found -find_package ( OpenCL ) -if(OPENCL_FOUND) - include_directories ( ${OPENCL_INCLUDE_DIRS} ) - link_libraries ( ${OPENCL_LIBRARIES} ) - - # tell the code where the .cl file will live - add_definitions(-DCL_SOURCE_DIR="${GrayScott_OpenCL_2x2_SOURCE_DIR}") - - INCLUDE_DIRECTORIES( "../Display" ) - - add_executable(GrayScott_OpenCL_2x2 - gray_scott_opencl_2x2.cpp - grayscott_kernel_2x2.cl - ../Display/defs.h - ) -endif() - diff --git a/SpeedComparisons/GrayScott_OpenCL_2x2/FindOpenCL.cmake b/SpeedComparisons/GrayScott_OpenCL_2x2/FindOpenCL.cmake deleted file mode 100644 index fde90efae..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_2x2/FindOpenCL.cmake +++ /dev/null @@ -1,79 +0,0 @@ -# - Try to find OpenCL -# This module tries to find an OpenCL implementation on your system. It supports -# AMD / ATI, Apple and NVIDIA implementations, but shoudl work, too. -# -# Once done this will define -# OPENCL_FOUND - system has OpenCL -# OPENCL_INCLUDE_DIRS - the OpenCL include directory -# OPENCL_LIBRARIES - link these to use OpenCL -# -# WIN32 should work, but is untested - -FIND_PACKAGE( PackageHandleStandardArgs ) - -SET (OPENCL_VERSION_STRING "0.1.0") -SET (OPENCL_VERSION_MAJOR 0) -SET (OPENCL_VERSION_MINOR 1) -SET (OPENCL_VERSION_PATCH 0) - -IF (APPLE) - - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX") - FIND_PATH(OPENCL_INCLUDE_DIRS OpenCL/cl.h DOC "Include for OpenCL on OSX") - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS OpenCL/cl.hpp DOC "Include for OpenCL CPP bindings on OSX") - -ELSE (APPLE) - - IF (WIN32) - - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h) - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp) - - # The AMD SDK currently installs both x86 and x86_64 libraries - # This is only a hack to find out architecture - IF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" ) - SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86_64") - ELSE (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64") - SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86") - ENDIF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" ) - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib ${OPENCL_LIB_DIR}) - - GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) - - # On Win32 search relative to the library - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS "${_OPENCL_INC_CAND}") - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS "${_OPENCL_INC_CAND}") - - ELSE (WIN32) - - # Unix style platforms - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL - ENV LD_LIBRARY_PATH - ) - - GET_FILENAME_COMPONENT(OPENCL_LIB_DIR ${OPENCL_LIBRARIES} PATH) - GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) - - # The AMD SDK currently does not place its headers - # in /usr/include, therefore also search relative - # to the library - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include") - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include") - - ENDIF (WIN32) - -ENDIF (APPLE) - -FIND_PACKAGE_HANDLE_STANDARD_ARGS( OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS ) - -IF( _OPENCL_CPP_INCLUDE_DIRS ) - SET( OPENCL_HAS_CPP_BINDINGS TRUE ) - LIST( APPEND OPENCL_INCLUDE_DIRS ${_OPENCL_CPP_INCLUDE_DIRS} ) - # This is often the same, so clean up - LIST( REMOVE_DUPLICATES OPENCL_INCLUDE_DIRS ) -ENDIF( _OPENCL_CPP_INCLUDE_DIRS ) - -MARK_AS_ADVANCED( - OPENCL_INCLUDE_DIRS -) - diff --git a/SpeedComparisons/GrayScott_OpenCL_2x2/cl.hpp b/SpeedComparisons/GrayScott_OpenCL_2x2/cl.hpp deleted file mode 100644 index 99b86a665..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_2x2/cl.hpp +++ /dev/null @@ -1,4011 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2010 The Khronos Group Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and/or associated documentation files (the - * "Materials"), to deal in the Materials without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Materials, and to - * permit persons to whom the Materials are furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Materials. - * - * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. - ******************************************************************************/ - -/*! \file - * - * \brief C++ bindings for OpenCL 1.0 (rev 48) and OpenCL 1.1 (rev 33) - * \author Benedict R. Gaster and Laurent Morichetti - * - * Additions and fixes from Brian Cole, March 3rd 2010. - * - * \version 1.1 - * \date June 2010 - * - * Optional extension support - * - * cl - * cl_ext_device_fission - * #define USE_CL_DEVICE_FISSION - */ - -/*! \mainpage - * \section intro Introduction - * For many large applications C++ is the language of choice and so it seems - * reasonable to define C++ bindings for OpenCL. - * - * - * The interface is contained with a single C++ header file \em cl.hpp and all - * definitions are contained within the namespace \em cl. There is no additional - * requirement to include \em cl.h and to use either the C++ or original C - * bindings it is enough to simply include \em cl.hpp. - * - * The bindings themselves are lightweight and correspond closely to the - * underlying C API. Using the C++ bindings introduces no additional execution - * overhead. - * - * For detail documentation on the bindings see: - * - * The OpenCL C++ Wrapper API 1.1 (revision 04) - * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.1.pdf - * - * \section example Example - * - * The following example shows a general use case for the C++ - * bindings, including support for the optional exception feature and - * also the supplied vector and string classes, see following sections for - * decriptions of these features. - * - * \code - * #define __CL_ENABLE_EXCEPTIONS - * - * #if defined(__APPLE__) || defined(__MACOSX) - * #include - * #else - * #include - * #endif - * #include - * #include - * #include - * - * const char * helloStr = "__kernel void " - * "hello(void) " - * "{ " - * " " - * "} "; - * - * int - * main(void) - * { - * cl_int err = CL_SUCCESS; - * try { - * - * std::vector platforms; - * cl::Platform::get(&platforms); - * if (platforms.size() == 0) { - * std::cout << "Platform size 0\n"; - * return -1; - * } - * - * cl_context_properties properties[] = - * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; - * cl::Context context(CL_DEVICE_TYPE_CPU, properties); - * - * std::vector devices = context.getInfo(); - * - * cl::Program::Sources source(1, - * std::make_pair(helloStr,strlen(helloStr))); - * cl::Program program_ = cl::Program(context, source); - * program_.build(devices); - * - * cl::Kernel kernel(program_, "hello", &err); - * - * cl::Event event; - * cl::CommandQueue queue(context, devices[0], 0, &err); - * queue.enqueueNDRangeKernel( - * kernel, - * cl::NullRange, - * cl::NDRange(4,4), - * cl::NullRange, - * NULL, - * &event); - * - * event.wait(); - * } - * catch (cl::Error err) { - * std::cerr - * << "ERROR: " - * << err.what() - * << "(" - * << err.err() - * << ")" - * << std::endl; - * } - * - * return EXIT_SUCCESS; - * } - * - * \endcode - * - */ -#ifndef CL_HPP_ -#define CL_HPP_ - -#ifdef _WIN32 -#include -#include -#if defined(USE_DX_INTEROP) -#include -#endif -#endif // _WIN32 - -// -#if defined(USE_CL_DEVICE_FISSION) -#include -#endif - -#if defined(__APPLE__) || defined(__MACOSX) -#include -#include -#else -#include -#include -#endif // !__APPLE__ - -#if !defined(CL_CALLBACK) -#define CL_CALLBACK -#endif //CL_CALLBACK - -#include - -#if !defined(__NO_STD_VECTOR) -#include -#endif - -#if !defined(__NO_STD_STRING) -#include -#endif - -#if defined(linux) || defined(__APPLE__) || defined(__MACOSX) -# include -#endif // linux - -#include - -/*! \namespace cl - * - * \brief The OpenCL C++ bindings are defined within this namespace. - * - */ -namespace cl { - -#define __INIT_CL_EXT_FCN_PTR(name) \ - if(!pfn_##name) { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddress(#name); \ - if(!pfn_##name) { \ - } \ - } - -class Program; -class Device; -class Context; -class CommandQueue; -class Memory; - -#if defined(__CL_ENABLE_EXCEPTIONS) -#include -/*! \class Error - * \brief Exception class - */ -class Error : public std::exception -{ -private: - cl_int err_; - const char * errStr_; -public: - /*! Create a new CL error exception for a given error code - * and corresponding message. - */ - Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) - {} - - ~Error() throw() {} - - /*! \brief Get error string associated with exception - * - * \return A memory pointer to the error message string. - */ - virtual const char * what() const throw () - { - if (errStr_ == NULL) { - return "empty"; - } - else { - return errStr_; - } - } - - /*! \brief Get error code associated with exception - * - * \return The error code. - */ - const cl_int err(void) const { return err_; } -}; - -#define __ERR_STR(x) #x -#else -#define __ERR_STR(x) NULL -#endif // __CL_ENABLE_EXCEPTIONS - -//! \cond DOXYGEN_DETAIL -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#define __GET_DEVICE_INFO_ERR __ERR_STR(clgetDeviceInfo) -#define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) -#define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) -#define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) -#define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo) -#define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo) -#define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo) -#define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo) -#define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) -#define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) -#define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) -#define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) -#define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) -#define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) -#define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) - -#define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) -#define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) - -#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) -#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) -#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) -#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) -#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) -#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) -#define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) -#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) - -#define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent) -#define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus) -#define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback) -#define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents) - -#define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel) -#define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) -#define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) -#define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) -#define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) -#define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) - -#define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) -#define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty) -#define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer) -#define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect) -#define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer) -#define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) -#define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) -#define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) -#define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) -#define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) -#define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) -#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) -#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) -#define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) -#define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage) -#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject) -#define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) -#define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) -#define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) -#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) -#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) -#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) - -#define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) -#define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) - -#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) - -#define __FLUSH_ERR __ERR_STR(clFlush) -#define __FINISH_ERR __ERR_STR(clFinish) - -#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) -#endif // __CL_USER_OVERRIDE_ERROR_STRINGS -//! \endcond - -/*! \class string - * \brief Simple string class, that provides a limited subset of std::string - * functionality but avoids many of the issues that come with that class. - */ -class string -{ -private: - ::size_t size_; - char * str_; -public: - string(void) : size_(0), str_(NULL) - { - } - - string(char * str, ::size_t size) : - size_(size), - str_(NULL) - { - str_ = new char[size_+1]; - if (str_ != NULL) { - memcpy(str_, str, size_ * sizeof(char)); - str_[size_] = '\0'; - } - else { - size_ = 0; - } - } - - string(char * str) : - str_(NULL) - { - size_= ::strlen(str); - str_ = new char[size_ + 1]; - if (str_ != NULL) { - memcpy(str_, str, (size_ + 1) * sizeof(char)); - } - else { - size_ = 0; - } - } - - string& operator=(const string& rhs) - { - if (this == &rhs) { - return *this; - } - - if (rhs.size_ == 0 || rhs.str_ == NULL) { - size_ = 0; - str_ = NULL; - } - else { - size_ = rhs.size_; - str_ = new char[size_ + 1]; - if (str_ != NULL) { - memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); - } - else { - size_ = 0; - } - } - - return *this; - } - - string(const string& rhs) - { - *this = rhs; - } - - ~string() - { - if (str_ != NULL) { - delete[] str_; - } - } - - ::size_t size(void) const { return size_; } - ::size_t length(void) const { return size(); } - - const char * c_str(void) const { return (str_) ? str_ : "";} -}; - -#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) -#include -typedef std::string STRING_CLASS; -#elif !defined(__USE_DEV_STRING) -typedef cl::string STRING_CLASS; -#endif - -#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) -#include -#define VECTOR_CLASS std::vector -#elif !defined(__USE_DEV_VECTOR) -#define VECTOR_CLASS cl::vector -#endif - -#if !defined(__MAX_DEFAULT_VECTOR_SIZE) -#define __MAX_DEFAULT_VECTOR_SIZE 10 -#endif - -/*! \class vector - * \brief Fixed sized vector implementation that mirroring - * std::vector functionality. - */ -template -class vector -{ -private: - T data_[N]; - unsigned int size_; - bool empty_; -public: - vector() : - size_(-1), - empty_(true) - {} - - ~vector() {} - - unsigned int size(void) const - { - return size_ + 1; - } - - void clear() - { - size_ = -1; - empty_ = true; - } - - void push_back (const T& x) - { - if (size() < N) { - size_++; - data_[size_] = x; - empty_ = false; - } - } - - void pop_back(void) - { - if (!empty_) { - data_[size_].~T(); - size_--; - if (size_ == -1) { - empty_ = true; - } - } - } - - vector(const vector& vec) : - size_(vec.size_), - empty_(vec.empty_) - { - if (!empty_) { - memcpy(&data_[0], &vec.data_[0], size() * sizeof(T)); - } - } - - vector(unsigned int size, const T& val = T()) : - size_(-1), - empty_(true) - { - for (unsigned int i = 0; i < size; i++) { - push_back(val); - } - } - - vector& operator=(const vector& rhs) - { - if (this == &rhs) { - return *this; - } - - size_ = rhs.size_; - empty_ = rhs.empty_; - - if (!empty_) { - memcpy(&data_[0], &rhs.data_[0], size() * sizeof(T)); - } - - return *this; - } - - bool operator==(vector &vec) - { - if (empty_ && vec.empty_) { - return true; - } - - if (size() != vec.size()) { - return false; - } - - return memcmp(&data_[0], &vec.data_[0], size() * sizeof(T)) == 0 ? true : false; - } - - operator T* () { return data_; } - operator const T* () const { return data_; } - - bool empty (void) const - { - return empty_; - } - - unsigned int max_size (void) const - { - return N; - } - - unsigned int capacity () const - { - return sizeof(T) * N; - } - - T& operator[](int index) - { - return data_[index]; - } - - T operator[](int index) const - { - return data_[index]; - } - - template - void assign(I start, I end) - { - clear(); - while(start < end) { - push_back(*start); - start++; - } - } - - /*! \class iterator - * \brief Iterator class for vectors - */ - class iterator - { - private: - vector vec_; - int index_; - bool initialized_; - public: - iterator(void) : - index_(-1), - initialized_(false) - { - index_ = -1; - initialized_ = false; - } - - ~iterator(void) {} - - static iterator begin(vector &vec) - { - iterator i; - - if (!vec.empty()) { - i.index_ = 0; - } - - i.vec_ = vec; - i.initialized_ = true; - return i; - } - - static iterator end(vector &vec) - { - iterator i; - - if (!vec.empty()) { - i.index_ = vec.size(); - } - i.vec_ = vec; - i.initialized_ = true; - return i; - } - - bool operator==(iterator i) - { - return ((vec_ == i.vec_) && - (index_ == i.index_) && - (initialized_ == i.initialized_)); - } - - bool operator!=(iterator i) - { - return (!(*this==i)); - } - - void operator++() - { - index_++; - } - - void operator++(int x) - { - index_ += x; - } - - void operator--() - { - index_--; - } - - void operator--(int x) - { - index_ -= x; - } - - T operator *() - { - return vec_[index_]; - } - }; - - iterator begin(void) - { - return iterator::begin(*this); - } - - iterator end(void) - { - return iterator::end(*this); - } - - T& front(void) - { - return data_[0]; - } - - T& back(void) - { - return data_[size_]; - } - - const T& front(void) const - { - return data_[0]; - } - - const T& back(void) const - { - return data_[size_]; - } -}; - -/*! - * \brief size_t class used to interface between C++ and - * OpenCL C calls that require arrays of size_t values, who's - * size is known statically. - */ -template -struct size_t : public cl::vector< ::size_t, N> { }; - -namespace detail { - -// GetInfo help struct -template -struct GetInfoHelper -{ - static cl_int - get(Functor f, cl_uint name, T* param) - { - return f(name, sizeof(T), param, NULL); - } -}; - -// Specialized GetInfoHelper for VECTOR_CLASS params -template -struct GetInfoHelper > -{ - static cl_int get(Func f, cl_uint name, VECTOR_CLASS* param) - { - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - T* value = (T*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - param->assign(&value[0], &value[required/sizeof(T)]); - return CL_SUCCESS; - } -}; - -// Specialized for getInfo -template -struct GetInfoHelper > -{ - static cl_int - get(Func f, cl_uint name, VECTOR_CLASS* param) - { - cl_uint err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); - if (err != CL_SUCCESS) { - return err; - } - - return CL_SUCCESS; - } -}; - -// Specialized GetInfoHelper for STRING_CLASS params -template -struct GetInfoHelper -{ - static cl_int get(Func f, cl_uint name, STRING_CLASS* param) - { - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - char* value = (char*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - *param = value; - return CL_SUCCESS; - } -}; - -#define __GET_INFO_HELPER_WITH_RETAIN(CPP_TYPE) \ -namespace detail { \ -template \ -struct GetInfoHelper \ -{ \ - static cl_int get(Func f, cl_uint name, CPP_TYPE* param) \ - { \ - cl_uint err = f(name, sizeof(CPP_TYPE), param, NULL); \ - if (err != CL_SUCCESS) { \ - return err; \ - } \ - \ - return ReferenceHandler::retain((*param)()); \ - } \ -}; \ -} - - -#define __PARAM_NAME_INFO_1_0(F) \ - F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ - F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ - F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_bitfield) \ - F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ - F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \ - F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ - F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \ - F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ - F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \ - F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ - F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS) \ - F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS) \ - \ - F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ - F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ - F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ - F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \ - \ - F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ - \ - F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ - F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ - F(cl_mem_info, CL_MEM_SIZE, ::size_t) \ - F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ - F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ - \ - F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ - F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \ - F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \ - F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \ - F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \ - \ - F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ - F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ - F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \ - F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \ - F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \ - \ - F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ - F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ - F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ - F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ - F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ - F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS) \ - \ - F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \ - \ - F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \ - F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ - F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \ - F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ - \ - F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ - F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ - F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ - F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) - -#if defined(CL_VERSION_1_1) -#define __PARAM_NAME_INFO_1_1(F) \ - F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ - \ - F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ - F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ - \ - F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) -#endif // CL_VERSION_1_1 - -#if defined(USE_CL_DEVICE_FISSION) -#define __PARAM_NAME_DEVICE_FISSION(F) \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ - F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) -#endif // USE_CL_DEVICE_FISSION - -template -struct param_traits {}; - -#define __DECLARE_PARAM_TRAITS(token, param_name, T) \ -struct token; \ -template<> \ -struct param_traits \ -{ \ - enum { value = param_name }; \ - typedef T param_type; \ -}; - -__PARAM_NAME_INFO_1_0(__DECLARE_PARAM_TRAITS); -#if defined(CL_VERSION_1_1) -__PARAM_NAME_INFO_1_1(__DECLARE_PARAM_TRAITS); -#endif // CL_VERSION_1_1 - -#if defined(USE_CL_DEVICE_FISSION) -__PARAM_NAME_DEVICE_FISSION(__DECLARE_PARAM_TRAITS); -#endif // USE_CL_DEVICE_FISSION - -#undef __DECLARE_PARAM_TRAITS - -// Convenience functions - -template -inline cl_int -getInfo(Func f, cl_uint name, T* param) -{ - return GetInfoHelper::get(f, name, param); -} - -template -struct GetInfoFunctor0 -{ - Func f_; const Arg0& arg0_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, param, size, value, size_ret); } -}; - -template -struct GetInfoFunctor1 -{ - Func f_; const Arg0& arg0_; const Arg1& arg1_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, arg1_, param, size, value, size_ret); } -}; - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) -{ - GetInfoFunctor0 f0 = { f, arg0 }; - return GetInfoHelper, T> - ::get(f0, name, param); -} - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) -{ - GetInfoFunctor1 f0 = { f, arg0, arg1 }; - return GetInfoHelper, T> - ::get(f0, name, param); -} - -template -struct ReferenceHandler -{ }; - -template <> -struct ReferenceHandler -{ - // cl_device_id does not have retain(). - static cl_int retain(cl_device_id) - { return CL_INVALID_DEVICE; } - // cl_device_id does not have release(). - static cl_int release(cl_device_id) - { return CL_INVALID_DEVICE; } -}; - -template <> -struct ReferenceHandler -{ - // cl_platform_id does not have retain(). - static cl_int retain(cl_platform_id) - { return CL_INVALID_PLATFORM; } - // cl_platform_id does not have release(). - static cl_int release(cl_platform_id) - { return CL_INVALID_PLATFORM; } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_context context) - { return ::clRetainContext(context); } - static cl_int release(cl_context context) - { return ::clReleaseContext(context); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_command_queue queue) - { return ::clRetainCommandQueue(queue); } - static cl_int release(cl_command_queue queue) - { return ::clReleaseCommandQueue(queue); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_mem memory) - { return ::clRetainMemObject(memory); } - static cl_int release(cl_mem memory) - { return ::clReleaseMemObject(memory); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_sampler sampler) - { return ::clRetainSampler(sampler); } - static cl_int release(cl_sampler sampler) - { return ::clReleaseSampler(sampler); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_program program) - { return ::clRetainProgram(program); } - static cl_int release(cl_program program) - { return ::clReleaseProgram(program); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_kernel kernel) - { return ::clRetainKernel(kernel); } - static cl_int release(cl_kernel kernel) - { return ::clReleaseKernel(kernel); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_event event) - { return ::clRetainEvent(event); } - static cl_int release(cl_event event) - { return ::clReleaseEvent(event); } -}; - -template -class Wrapper -{ -public: - typedef T cl_type; - -protected: - cl_type object_; - -public: - Wrapper() : object_(NULL) { } - - ~Wrapper() - { - if (object_ != NULL) { release(); } - } - - Wrapper(const Wrapper& rhs) - { - object_ = rhs.object_; - if (object_ != NULL) { retain(); } - } - - Wrapper& operator = (const Wrapper& rhs) - { - if (object_ != NULL) { release(); } - object_ = rhs.object_; - if (object_ != NULL) { retain(); } - return *this; - } - - cl_type operator ()() const { return object_; } - - cl_type& operator ()() { return object_; } - -protected: - - cl_int retain() const - { - return ReferenceHandler::retain(object_); - } - - cl_int release() const - { - return ReferenceHandler::release(object_); - } -}; - -#if defined(__CL_ENABLE_EXCEPTIONS) -static inline cl_int errHandler ( - cl_int err, - const char * errStr = NULL) throw(Error) -{ - if (err != CL_SUCCESS) { - throw Error(err, errStr); - } - return err; -} -#else -static inline cl_int errHandler (cl_int err, const char * errStr = NULL) -{ - return err; -} -#endif // __CL_ENABLE_EXCEPTIONS - -} // namespace detail -//! \endcond - -/*! \stuct ImageFormat - * \brief ImageFormat interface fro cl_image_format. - */ -struct ImageFormat : public cl_image_format -{ - ImageFormat(){} - - ImageFormat(cl_channel_order order, cl_channel_type type) - { - image_channel_order = order; - image_channel_data_type = type; - } - - ImageFormat& operator = (const ImageFormat& rhs) - { - if (this != &rhs) { - this->image_channel_data_type = rhs.image_channel_data_type; - this->image_channel_order = rhs.image_channel_order; - } - return *this; - } -}; - -/*! \class Device - * \brief Device interface for cl_device_id. - */ -class Device : public detail::Wrapper -{ -public: - Device(cl_device_id device) { object_ = device; } - - Device() : detail::Wrapper() { } - - Device(const Device& device) : detail::Wrapper(device) { } - - Device& operator = (const Device& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_device_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetDeviceInfo, object_, name, param), - __GET_DEVICE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_device_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(USE_CL_DEVICE_FISSION) - cl_int createSubDevices( - const cl_device_partition_property_ext * properties, - VECTOR_CLASS* devices) - { - typedef CL_API_ENTRY cl_int - ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( - cl_device_id /*in_device*/, - const cl_device_partition_property_ext * /* properties */, - cl_uint /*num_entries*/, - cl_device_id * /*out_devices*/, - cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; - - static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; - __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); - - cl_uint n = 0; - cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif -}; - -/*! \class Platform - * \brief Platform interface. - */ -class Platform : public detail::Wrapper -{ -public: - static const Platform null(); - - Platform(cl_platform_id platform) { object_ = platform; } - - Platform() : detail::Wrapper() { } - - Platform(const Platform& platform) : detail::Wrapper(platform) { } - - Platform& operator = (const Platform& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetPlatformInfo, object_, name, param), - __GET_PLATFORM_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_platform_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int getDevices( - cl_device_type type, - VECTOR_CLASS* devices) const - { - cl_uint n = 0; - cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = ::clGetDeviceIDs(object_, type, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } - -#if defined(USE_DX_INTEROP) - /*! \brief Get the list of available D3D10 devices. - * - * \param d3d_device_source. - * - * \param d3d_object. - * - * \param d3d_device_set. - * - * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device - * values returned in devices can be used to identify a specific OpenCL - * device. If \a devices argument is NULL, this argument is ignored. - * - * \return One of the following values: - * - CL_SUCCESS if the function is executed successfully. - * - * The application can query specific capabilities of the OpenCL device(s) - * returned by cl::getDevices. This can be used by the application to - * determine which device(s) to use. - * - * \note In the case that exceptions are enabled and a return value - * other than CL_SUCCESS is generated, then cl::Error exception is - * generated. - */ - cl_int getDevices( - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - VECTOR_CLASS* devices) const - { - typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( - cl_platform_id platform, - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id * devices, - cl_uint* num_devices); - - static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; - __INIT_CL_EXT_FCN_PTR(clGetDeviceIDsFromD3D10KHR); - - cl_uint n = 0; - cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - 0, - NULL, - &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - n, - ids, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif - - static cl_int get( - VECTOR_CLASS* platforms) - { - cl_uint n = 0; - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - platforms->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -}; - -static inline cl_int -UnloadCompiler() -{ - return ::clUnloadCompiler(); -} - -class Context : public detail::Wrapper -{ -public: - Context( - const VECTOR_CLASS& devices, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateContext( - properties, (cl_uint) devices.size(), - (cl_device_id*) &devices.front(), - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - Context( - cl_device_type type, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateContextFromType( - properties, type, notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - Context() : detail::Wrapper() { } - - Context(const Context& context) : detail::Wrapper(context) { } - - Context& operator = (const Context& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_context_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetContextInfo, object_, name, param), - __GET_CONTEXT_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_context_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int getSupportedImageFormats( - cl_mem_flags flags, - cl_mem_object_type type, - VECTOR_CLASS* formats) const - { - cl_uint numEntries; - cl_int err = ::clGetSupportedImageFormats( - object_, - flags, - type, - 0, - NULL, - &numEntries); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - ImageFormat* value = (ImageFormat*) - alloca(numEntries * sizeof(ImageFormat)); - err = ::clGetSupportedImageFormats( - object_, - flags, - type, - numEntries, - (cl_image_format*) value, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - formats->assign(&value[0], &value[numEntries]); - return CL_SUCCESS; - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Context) - -/*! \class Event - * \brief Event interface for cl_event. - */ -class Event : public detail::Wrapper -{ -public: - Event() : detail::Wrapper() { } - - Event(const Event& event) : detail::Wrapper(event) { } - - Event& operator = (const Event& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_event_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetEventInfo, object_, name, param), - __GET_EVENT_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_event_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getProfilingInfo(cl_profiling_info name, T* param) const - { - return detail::errHandler(detail::getInfo( - &::clGetEventProfilingInfo, object_, name, param), - __GET_EVENT_PROFILE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getProfilingInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_profiling_info, name>::param_type param; - cl_int result = getProfilingInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int wait() const - { - return detail::errHandler( - ::clWaitForEvents(1, &object_), - __WAIT_FOR_EVENTS_ERR); - } - -#if defined(CL_VERSION_1_1) - cl_int setCallback( - cl_int type, - void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetEventCallback( - object_, - type, - pfn_notify, - user_data), - __SET_EVENT_CALLBACK_ERR); - } -#endif - - static cl_int - waitForEvents(const VECTOR_CLASS& events) - { - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (cl_event*)&events.front()), - __WAIT_FOR_EVENTS_ERR); - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Event) - -#if defined(CL_VERSION_1_1) -/*! \class UserEvent - * \brief User event interface for cl_event. - */ -class UserEvent : public Event -{ -public: - UserEvent( - const Context& context, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateUserEvent( - context(), - &error); - - detail::errHandler(error, __CREATE_USER_EVENT_ERR); - if (err != NULL) { - *err = error; - } - } - - UserEvent() : Event() { } - - UserEvent(const UserEvent& event) : Event(event) { } - - UserEvent& operator = (const UserEvent& rhs) - { - if (this != &rhs) { - Event::operator=(rhs); - } - return *this; - } - - cl_int setStatus(cl_int status) - { - return detail::errHandler( - ::clSetUserEventStatus(object_,status), - __SET_USER_EVENT_STATUS_ERR); - } -}; -#endif - -inline static cl_int -WaitForEvents(const VECTOR_CLASS& events) -{ - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (cl_event*)&events.front()), - __WAIT_FOR_EVENTS_ERR); -} - -/*! \class Memory - * \brief Memory interface for cl_mem. - */ -class Memory : public detail::Wrapper -{ -public: - Memory() : detail::Wrapper() { } - - Memory(const Memory& memory) : detail::Wrapper(memory) { } - - Memory& operator = (const Memory& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_mem_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetMemObjectInfo, object_, name, param), - __GET_MEM_OBJECT_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_mem_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(CL_VERSION_1_1) - cl_int setDestructorCallback( - void (CL_CALLBACK * pfn_notify)(cl_mem, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetMemObjectDestructorCallback( - object_, - pfn_notify, - user_data), - __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); - } -#endif - -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Memory) - -/*! \class Buffer - * \brief Memory buffer interface. - */ -class Buffer : public Memory -{ -public: - Buffer( - const Context& context, - cl_mem_flags flags, - ::size_t size, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - Buffer() : Memory() { } - - Buffer(const Buffer& buffer) : Memory(buffer) { } - - Buffer& operator = (const Buffer& rhs) - { - if (this != &rhs) { - Memory::operator=(rhs); - } - return *this; - } - -#if defined(CL_VERSION_1_1) - Buffer createSubBuffer( - cl_mem_flags flags, - cl_buffer_create_type buffer_create_type, - const void * buffer_create_info, - cl_int * err = NULL) - { - Buffer result; - cl_int error; - result.object_ = ::clCreateSubBuffer( - object_, - flags, - buffer_create_type, - buffer_create_info, - &error); - - detail::errHandler(error, __CREATE_SUBBUFFER_ERR); - if (err != NULL) { - *err = error; - } - - return result; - } -#endif -}; - -#if defined (USE_DX_INTEROP) -class BufferD3D10 : public Buffer -{ -public: - typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( - cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, - cl_int* errcode_ret); - - BufferD3D10( - const Context& context, - cl_mem_flags flags, - ID3D10Buffer* bufobj, - cl_int * err = NULL) - { - static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; - __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); - - cl_int error; - object_ = pfn_clCreateFromD3D10BufferKHR( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - BufferD3D10() : Buffer() { } - - BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { } - - BufferD3D10& operator = (const BufferD3D10& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } -}; -#endif - -/*! \class BufferGL - * \brief Memory buffer interface for GL interop. - */ -class BufferGL : public Buffer -{ -public: - BufferGL( - const Context& context, - cl_mem_flags flags, - GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLBuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - BufferGL() : Buffer() { } - - BufferGL(const BufferGL& buffer) : Buffer(buffer) { } - - BufferGL& operator = (const BufferGL& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } - - cl_int getObjectInfo( - cl_gl_object_type *type, - GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \class BufferRenderGL - * \brief Memory buffer interface for GL interop with renderbuffer. - */ -class BufferRenderGL : public Buffer -{ -public: - BufferRenderGL( - const Context& context, - cl_mem_flags flags, - GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLRenderbuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - BufferRenderGL() : Buffer() { } - - BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { } - - BufferRenderGL& operator = (const BufferRenderGL& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } - - cl_int getObjectInfo( - cl_gl_object_type *type, - GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \class Image - * \brief Base class interface for all images. - */ -class Image : public Memory -{ -protected: - Image() : Memory() { } - - Image(const Image& image) : Memory(image) { } - - Image& operator = (const Image& rhs) - { - if (this != &rhs) { - Memory::operator=(rhs); - } - return *this; - } -public: - template - cl_int getImageInfo(cl_image_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetImageInfo, object_, name, param), - __GET_IMAGE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getImageInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_image_info, name>::param_type param; - cl_int result = getImageInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -/*! \class Image2D - * \brief Image interface for 2D images. - */ -class Image2D : public Image -{ -public: - Image2D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t row_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateImage2D( - context(), flags,&format, width, height, row_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE2D_ERR); - if (err != NULL) { - *err = error; - } - } - - Image2D() { } - - Image2D(const Image2D& image2D) : Image(image2D) { } - - Image2D& operator = (const Image2D& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } -}; - -/*! \class Image2DGL - * \brief 2D image interface for GL interop. - */ -class Image2DGL : public Image2D -{ -public: - Image2DGL( - const Context& context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture2D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - Image2DGL() : Image2D() { } - - Image2DGL(const Image2DGL& image) : Image2D(image) { } - - Image2DGL& operator = (const Image2DGL& rhs) - { - if (this != &rhs) { - Image2D::operator=(rhs); - } - return *this; - } -}; - -/*! \class Image3D - * \brief Image interface for 3D images. - */ -class Image3D : public Image -{ -public: - Image3D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t depth, - ::size_t row_pitch = 0, - ::size_t slice_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateImage3D( - context(), flags, &format, width, height, depth, row_pitch, - slice_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE3D_ERR); - if (err != NULL) { - *err = error; - } - } - - Image3D() { } - - Image3D(const Image3D& image3D) : Image(image3D) { } - - Image3D& operator = (const Image3D& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } -}; - -/*! \class Image2DGL - * \brief 2D image interface for GL interop. - */ -class Image3DGL : public Image3D -{ -public: - Image3DGL( - const Context& context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture3D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - Image3DGL() : Image3D() { } - - Image3DGL(const Image3DGL& image) : Image3D(image) { } - - Image3DGL& operator = (const Image3DGL& rhs) - { - if (this != &rhs) { - Image3D::operator=(rhs); - } - return *this; - } -}; - -/*! \class Sampler - * \brief Sampler interface for cl_sampler. - */ -class Sampler : public detail::Wrapper -{ -public: - Sampler() { } - - Sampler( - const Context& context, - cl_bool normalized_coords, - cl_addressing_mode addressing_mode, - cl_filter_mode filter_mode, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateSampler( - context(), - normalized_coords, - addressing_mode, - filter_mode, - &error); - - detail::errHandler(error, __CREATE_SAMPLER_ERR); - if (err != NULL) { - *err = error; - } - } - - Sampler(const Sampler& sampler) : detail::Wrapper(sampler) { } - - Sampler& operator = (const Sampler& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_sampler_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetSamplerInfo, object_, name, param), - __GET_SAMPLER_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_sampler_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Sampler) - -class Program; -class CommandQueue; -class Kernel; - -/*! \class NDRange - * \brief NDRange interface - */ -class NDRange -{ -private: - size_t<3> sizes_; - cl_uint dimensions_; - -public: - NDRange() - : dimensions_(0) - { } - - NDRange(::size_t size0) - : dimensions_(1) - { - sizes_.push_back(size0); - } - - NDRange(::size_t size0, ::size_t size1) - : dimensions_(2) - { - sizes_.push_back(size0); - sizes_.push_back(size1); - } - - NDRange(::size_t size0, ::size_t size1, ::size_t size2) - : dimensions_(3) - { - sizes_.push_back(size0); - sizes_.push_back(size1); - sizes_.push_back(size2); - } - - operator const ::size_t*() const { return (const ::size_t*) sizes_; } - ::size_t dimensions() const { return dimensions_; } -}; - -static const NDRange NullRange; - -/*! - * \struct LocalSpaceArg - * \brief Local address raper for use with Kernel::setArg - */ -struct LocalSpaceArg -{ - ::size_t size_; -}; - -namespace detail { - -template -struct KernelArgumentHandler -{ - static ::size_t size(const T&) { return sizeof(T); } - static T* ptr(T& value) { return &value; } -}; - -template <> -struct KernelArgumentHandler -{ - static ::size_t size(const LocalSpaceArg& value) { return value.size_; } - static void* ptr(LocalSpaceArg&) { return NULL; } -}; - -} -//! \endcond - -inline LocalSpaceArg -__local(::size_t size) -{ - LocalSpaceArg ret = { size }; - return ret; -} - -class KernelFunctor; - -/*! \class Kernel - * \brief Kernel interface that implements cl_kernel - */ -class Kernel : public detail::Wrapper -{ -public: - inline Kernel(const Program& program, const char* name, cl_int* err = NULL); - - Kernel() { } - - Kernel(const Kernel& kernel) : detail::Wrapper(kernel) { } - - Kernel& operator = (const Kernel& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_kernel_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelInfo, object_, name, param), - __GET_KERNEL_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getWorkGroupInfo( - const Device& device, cl_kernel_work_group_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetKernelWorkGroupInfo, object_, device(), name, param), - __GET_KERNEL_WORK_GROUP_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getWorkGroupInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_work_group_info, name>::param_type param; - cl_int result = getWorkGroupInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int setArg(cl_uint index, T value) - { - return detail::errHandler( - ::clSetKernelArg( - object_, - index, - detail::KernelArgumentHandler::size(value), - detail::KernelArgumentHandler::ptr(value)), - __SET_KERNEL_ARGS_ERR); - } - - cl_int setArg(cl_uint index, ::size_t size, void* argPtr) - { - return detail::errHandler( - ::clSetKernelArg(object_, index, size, argPtr), - __SET_KERNEL_ARGS_ERR); - } - - KernelFunctor bind( - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local); - - KernelFunctor bind( - const CommandQueue& queue, - const NDRange& global, - const NDRange& local); -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Kernel) - -/*! \class Program - * \brief Program interface that implements cl_program. - */ -class Program : public detail::Wrapper -{ -public: - typedef VECTOR_CLASS > Binaries; - typedef VECTOR_CLASS > Sources; - - Program( - const Context& context, - const Sources& sources, - cl_int* err = NULL) - { - cl_int error; - - const ::size_t n = (::size_t)sources.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const char** strings = (const char**) alloca(n * sizeof(const char*)); - - for (::size_t i = 0; i < n; ++i) { - strings[i] = sources[(int)i].first; - lengths[i] = sources[(int)i].second; - } - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)n, strings, lengths, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - if (err != NULL) { - *err = error; - } - } - - Program( - const Context& context, - const VECTOR_CLASS& devices, - const Binaries& binaries, - VECTOR_CLASS* binaryStatus = NULL, - cl_int* err = NULL) - { - cl_int error; - const ::size_t n = binaries.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const unsigned char** images = (const unsigned char**) alloca(n * sizeof(const void*)); - - for (::size_t i = 0; i < n; ++i) { - images[i] = (const unsigned char*)binaries[(int)i].first; - lengths[i] = binaries[(int)i].second; - } - - object_ = ::clCreateProgramWithBinary( - context(), (cl_uint) devices.size(), - (cl_device_id*)&devices.front(), - lengths, images, binaryStatus != NULL - ? (cl_int*) &binaryStatus->front() - : NULL, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != NULL) { - *err = error; - } - } - - Program() { } - - Program(const Program& program) : detail::Wrapper(program) { } - - Program& operator = (const Program& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - cl_int build( - const VECTOR_CLASS& devices, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - return detail::errHandler( - ::clBuildProgram( - object_, - (cl_uint) - devices.size(), - (cl_device_id*)&devices.front(), - options, - notifyFptr, - data), - __BUILD_PROGRAM_ERR); - } - - template - cl_int getInfo(cl_program_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetProgramInfo, object_, name, param), - __GET_PROGRAM_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getBuildInfo( - const Device& device, cl_program_build_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetProgramBuildInfo, object_, device(), name, param), - __GET_PROGRAM_BUILD_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getBuildInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_build_info, name>::param_type param; - cl_int result = getBuildInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int createKernels(VECTOR_CLASS* kernels) - { - cl_uint numKernels; - cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel)); - err = ::clCreateKernelsInProgram( - object_, numKernels, (cl_kernel*) value, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - kernels->assign(&value[0], &value[numKernels]); - return CL_SUCCESS; - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Program) - -inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) -{ - cl_int error; - - object_ = ::clCreateKernel(program(), name, &error); - detail::errHandler(error, __CREATE_KERNEL_ERR); - - if (err != NULL) { - *err = error; - } - -} - -/*! \class CommandQueue - * \brief CommandQueue interface for cl_command_queue. - */ -class CommandQueue : public detail::Wrapper -{ -public: - CommandQueue( - const Context& context, - const Device& device, - cl_command_queue_properties properties = 0, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } - - CommandQueue() { } - - CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper(commandQueue) { } - - CommandQueue& operator = (const CommandQueue& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_command_queue_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetCommandQueueInfo, object_, name, param), - __GET_COMMAND_QUEUE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_command_queue_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int enqueueReadBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReadBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_READ_BUFFER_ERR); - } - - cl_int enqueueWriteBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - const void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueWriteBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_WRITE_BUFFER_ERR); - } - - cl_int enqueueCopyBuffer( - const Buffer& src, - const Buffer& dst, - ::size_t src_offset, - ::size_t dst_offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyBuffer( - object_, src(), dst(), src_offset, dst_offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQEUE_COPY_BUFFER_ERR); - } - -#if defined(CL_VERSION_1_1) - cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReadBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_READ_BUFFER_RECT_ERR); - } - - - cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueWriteBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_WRITE_BUFFER_RECT_ERR); - } - - cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - ::size_t src_row_pitch, - ::size_t src_slice_pitch, - ::size_t dst_row_pitch, - ::size_t dst_slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyBufferRect( - object_, - src(), - dst(), - (const ::size_t *)src_origin, - (const ::size_t *)dst_origin, - (const ::size_t *)region, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQEUE_COPY_BUFFER_RECT_ERR); - } -#endif - - cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReadImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_READ_IMAGE_ERR); - } - - cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueWriteImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_WRITE_IMAGE_ERR); - } - - cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyImage( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *)dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_COPY_IMAGE_ERR); - } - - cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& region, - ::size_t dst_offset, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyImageToBuffer( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *) region, dst_offset, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); - } - - cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - ::size_t src_offset, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyBufferToImage( - object_, src(), dst(), src_offset, - (const ::size_t *) dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); - } - - void* enqueueMapBuffer( - const Buffer& buffer, - cl_bool blocking, - cl_map_flags flags, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_int error; - void * result = ::clEnqueueMapBuffer( - object_, buffer(), blocking, flags, offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - return result; - } - - void* enqueueMapImage( - const Image& buffer, - cl_bool blocking, - cl_map_flags flags, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t * row_pitch, - ::size_t * slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_int error; - void * result = ::clEnqueueMapImage( - object_, buffer(), blocking, flags, - (const ::size_t *) origin, (const ::size_t *) region, - row_pitch, slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - return result; - } - - cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueUnmapMemObject( - object_, memory(), mapped_ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - } - - cl_int enqueueNDRangeKernel( - const Kernel& kernel, - const NDRange& offset, - const NDRange& global, - const NDRange& local, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueNDRangeKernel( - object_, kernel(), (cl_uint) global.dimensions(), - offset.dimensions() != 0 ? (const ::size_t*) offset : NULL, - (const ::size_t*) global, - local.dimensions() != 0 ? (const ::size_t*) local : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_NDRANGE_KERNEL_ERR); - } - - cl_int enqueueTask( - const Kernel& kernel, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueTask( - object_, kernel(), - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_TASK_ERR); - } - - cl_int enqueueNativeKernel( - void (*userFptr)(void *), - std::pair args, - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* mem_locs = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) - ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem)) - : NULL; - - if (mems != NULL) { - for (unsigned int i = 0; i < mem_objects->size(); i++) { - mems[i] = ((*mem_objects)[i])(); - } - } - - return detail::errHandler( - ::clEnqueueNativeKernel( - object_, userFptr, args.first, args.second, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - mems, - (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_NATIVE_KERNEL); - } - - cl_int enqueueMarker(Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueMarker(object_, (cl_event*) event), - __ENQUEUE_MARKER_ERR); - } - - cl_int enqueueWaitForEvents(const VECTOR_CLASS& events) const - { - return detail::errHandler( - ::clEnqueueWaitForEvents( - object_, - (cl_uint) events.size(), - (const cl_event*) &events.front()), - __ENQUEUE_WAIT_FOR_EVENTS_ERR); - } - - cl_int enqueueAcquireGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueAcquireGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_ACQUIRE_GL_ERR); - } - - cl_int enqueueReleaseGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReleaseGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_RELEASE_GL_ERR); - } - -#if defined (USE_DX_INTEROP) -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); - - cl_int enqueueAcquireD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; - __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); - - return detail::errHandler( - pfn_clEnqueueAcquireD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_ACQUIRE_GL_ERR); - } - - cl_int enqueueReleaseD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; - __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); - - return detail::errHandler( - pfn_clEnqueueReleaseD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_RELEASE_GL_ERR); - } -#endif - - cl_int enqueueBarrier() const - { - return detail::errHandler( - ::clEnqueueBarrier(object_), - __ENQUEUE_BARRIER_ERR); - } - - cl_int flush() const - { - return detail::errHandler(::clFlush(object_), __FLUSH_ERR); - } - - cl_int finish() const - { - return detail::errHandler(::clFinish(object_), __FINISH_ERR); - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::CommandQueue) - -/*! \class KernelFunctor - * \brief Kernel functor interface - * - * \note Currently only functors of zero to ten arguments are supported. It - * is straightforward to add more and a more general solution, similar to - * Boost.Lambda could be followed if required in the future. - */ -class KernelFunctor -{ -private: - Kernel kernel_; - CommandQueue queue_; - NDRange offset_; - NDRange global_; - NDRange local_; - - cl_int err_; -public: - KernelFunctor() { } - - KernelFunctor( - const Kernel& kernel, - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local) : - kernel_(kernel), - queue_(queue), - offset_(offset), - global_(global), - local_(local), - err_(CL_SUCCESS) - {} - - KernelFunctor& operator=(const KernelFunctor& rhs); - - KernelFunctor(const KernelFunctor& rhs); - - cl_int getError() { return err_; } - - inline Event operator()(const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const A15& a15, - const VECTOR_CLASS* events = NULL); -}; - -inline KernelFunctor Kernel::bind( - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local) -{ - return KernelFunctor(*this,queue,offset,global,local); -} - -inline KernelFunctor Kernel::bind( - const CommandQueue& queue, - const NDRange& global, - const NDRange& local) -{ - return KernelFunctor(*this,queue,NullRange,global,local); -} - -inline KernelFunctor& KernelFunctor::operator=(const KernelFunctor& rhs) -{ - if (this == &rhs) { - return *this; - } - - kernel_ = rhs.kernel_; - queue_ = rhs.queue_; - offset_ = rhs.offset_; - global_ = rhs.global_; - local_ = rhs.local_; - - return *this; -} - -inline KernelFunctor::KernelFunctor(const KernelFunctor& rhs) : - kernel_(rhs.kernel_), - queue_(rhs.queue_), - offset_(rhs.offset_), - global_(rhs.global_), - local_(rhs.local_) -{ -} - -Event KernelFunctor::operator()(const VECTOR_CLASS* events) -{ - Event event; - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - kernel_.setArg(13,a14); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const A15& a15, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - kernel_.setArg(13,a14); - kernel_.setArg(14,a15); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -#undef __ERR_STR -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#undef __GET_DEVICE_INFO_ERR -#undef __GET_PLATFORM_INFO_ERR -#undef __GET_DEVICE_IDS_ERR -#undef __GET_CONTEXT_INFO_ERR -#undef __GET_EVENT_INFO_ERR -#undef __GET_EVENT_PROFILE_INFO_ERR -#undef __GET_MEM_OBJECT_INFO_ERR -#undef __GET_IMAGE_INFO_ERR -#undef __GET_SAMPLER_INFO_ERR -#undef __GET_KERNEL_INFO_ERR -#undef __GET_KERNEL_WORK_GROUP_INFO_ERR -#undef __GET_PROGRAM_INFO_ERR -#undef __GET_PROGRAM_BUILD_INFO_ERR -#undef __GET_COMMAND_QUEUE_INFO_ERR - -#undef __CREATE_CONTEXT_FROM_TYPE_ERR -#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR - -#undef __CREATE_BUFFER_ERR -#undef __CREATE_SUBBUFFER_ERR -#undef __CREATE_IMAGE2D_ERR -#undef __CREATE_IMAGE3D_ERR -#undef __CREATE_SAMPLER_ERR -#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR - -#undef __CREATE_USER_EVENT_ERR -#undef __SET_USER_EVENT_STATUS_ERR -#undef __SET_EVENT_CALLBACK_ERR - -#undef __WAIT_FOR_EVENTS_ERR - -#undef __CREATE_KERNEL_ERR -#undef __SET_KERNEL_ARGS_ERR -#undef __CREATE_PROGRAM_WITH_SOURCE_ERR -#undef __CREATE_PROGRAM_WITH_BINARY_ERR -#undef __BUILD_PROGRAM_ERR -#undef __CREATE_KERNELS_IN_PROGRAM_ERR - -#undef __CREATE_COMMAND_QUEUE_ERR -#undef __SET_COMMAND_QUEUE_PROPERTY_ERR -#undef __ENQUEUE_READ_BUFFER_ERR -#undef __ENQUEUE_WRITE_BUFFER_ERR -#undef __ENQUEUE_READ_BUFFER_RECT_ERR -#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR -#undef __ENQEUE_COPY_BUFFER_ERR -#undef __ENQEUE_COPY_BUFFER_RECT_ERR -#undef __ENQUEUE_READ_IMAGE_ERR -#undef __ENQUEUE_WRITE_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR -#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR -#undef __ENQUEUE_MAP_BUFFER_ERR -#undef __ENQUEUE_MAP_IMAGE_ERR -#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR -#undef __ENQUEUE_NDRANGE_KERNEL_ERR -#undef __ENQUEUE_TASK_ERR -#undef __ENQUEUE_NATIVE_KERNEL - -#undef __UNLOAD_COMPILER_ERR -#endif //__CL_USER_OVERRIDE_ERROR_STRINGS - -#undef __GET_INFO_HELPER_WITH_RETAIN - -// Extensions -#undef __INIT_CL_EXT_FCN_PTR -#undef __CREATE_SUB_DEVICES - -#if defined(USE_CL_DEVICE_FISSION) -#undef __PARAM_NAME_DEVICE_FISSION -#endif // USE_CL_DEVICE_FISSION - -} // namespace cl - -#endif // CL_HPP_ diff --git a/SpeedComparisons/GrayScott_OpenCL_2x2/gray_scott_opencl_2x2.cpp b/SpeedComparisons/GrayScott_OpenCL_2x2/gray_scott_opencl_2x2.cpp deleted file mode 100644 index ecb7f0e07..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_2x2/gray_scott_opencl_2x2.cpp +++ /dev/null @@ -1,402 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// OpenCV: -#include -#include - -// stdlib: -#include -#include -#include -#include - -#ifdef _WIN32 - #include - #include - #include - // http://www.linuxjournal.com/article/5574 - void gettimeofday(struct timeval* t,void* timezone) - { struct _timeb timebuffer; - _ftime( &timebuffer ); - t->tv_sec=timebuffer.time; - t->tv_usec=1000*timebuffer.millitm; - } -#else - #include -#endif - -// OpenCL: -#define __NO_STD_VECTOR // Use cl::vector instead of STL version -#define __CL_ENABLE_EXCEPTIONS - -// cl.hpp is standard but doesn't come with most SDKs, so download it from here: -// http://www.khronos.org/registry/cl/api/1.1/cl.hpp -#ifdef __APPLE__ -# include "cl.hpp" -#else -# include -#endif - -using namespace cl; - -// STL: -#include -#include - -// local: -#include "defs.h" - -void init(float a[X][Y],float b[X][Y]); -bool display(float r[X][Y],float g[X][Y],float b[X][Y], - int iteration,bool auto_brighten,float manual_brighten, - int scale,int delay_ms,const char* message); - -// we pack the values in 2x2 blocks: x y -// z w -float& float_at(float* arr,int x,int y) -{ - return arr[ ( (x/2)*(Y/2) + y/2 ) * 4 + (y%2)*2 + x%2 ]; -} - -static int g_opt_device = 0; -static int g_wrap = 1; - -int main(int argc, char * * argv) -{ - for (int i = 1; i < argc; i++) { - if (0) { - } else if ((i+1 platforms; - Platform::get(&platforms); - - // Select the default platform and create a context using this platform and the GPU - cl_context_properties cps[3] = { - CL_CONTEXT_PLATFORM, - (cl_context_properties)(platforms[0])(), - 0 - }; - Context context( CL_DEVICE_TYPE_GPU, cps); - - // Get a list of devices on this platform - cl::vector devices = context.getInfo(); - - int maxdev = devices.size() - 1; - g_opt_device = (g_opt_device > maxdev) ? maxdev : - ((g_opt_device < 0) ? 0 : g_opt_device); - std::cout << (maxdev+1) << " device(s) available; using device " - << g_opt_device << ".\n"; - Device &device = devices[g_opt_device]; - - CommandQueue queue = CommandQueue(context, device); - Event event; - - // Read source file - // (CL_SOURCE_DIR is defined in CMakeLists.txt to be the folder - // containing the source files, including this file 'gray_scott_opencl_2x2.cpp' - // and the kernel source 'gs_wrap_kernel_2x2.cl'.) - std::string kfn = CL_SOURCE_DIR; - kfn += "/grayscott_kernel_2x2.cl"; - std::ifstream sourceFile(kfn.c_str()); - std::string sourceCode( - std::istreambuf_iterator(sourceFile), - (std::istreambuf_iterator())); - Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()+1)); - - - // enable this code to display kernel compilation error if you get clBuildProgram(-11) - #if 0 - const ::size_t n = (::size_t)source.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const char** strings = (const char**) alloca(n * sizeof(const char*)); - for (::size_t i = 0; i < n; ++i) { - strings[i] = source[(int)i].first; - lengths[i] = source[(int)i].second; - } - cl_int err; - cl_program myprog = clCreateProgramWithSource(context(), (cl_uint)n, strings, lengths, &err); - err = clBuildProgram(myprog, (cl_uint)devices.size(), (cl_device_id*)&devices.front(), NULL, NULL, NULL); - char proglog[1024]; - clGetProgramBuildInfo(myprog, device(), CL_PROGRAM_BUILD_LOG, 1024, proglog, 0); - printf("err=%d log=%s\n", err, proglog); - return 0; - #endif - - - // Make program of the source code in the context - Program program = Program(context, source); - - // Build program for these specific devices - // If wrap (toroidal) option is selected, we define a preprocessor flag - // that controls how the xm1, xp1, etc. are computed. - program.build(devices, g_wrap ? "-D WRAP" : NULL, NULL, NULL); - - // Make kernel - Kernel kernel(program, "grayscott_compute"); - - // Create memory buffers - Buffer bufferU = Buffer(context, CL_MEM_READ_ONLY, MEM_SIZE); - Buffer bufferV = Buffer(context, CL_MEM_READ_ONLY, MEM_SIZE); - Buffer bufferU2 = Buffer(context, CL_MEM_READ_ONLY, MEM_SIZE); - Buffer bufferV2 = Buffer(context, CL_MEM_READ_ONLY, MEM_SIZE); - - // Copy lists A and B to the memory buffers - queue.enqueueWriteBuffer(bufferU, CL_TRUE, 0, MEM_SIZE, a); - queue.enqueueWriteBuffer(bufferV, CL_TRUE, 0, MEM_SIZE, b); - - NDRange global(X/2,Y/2); - NDRange local(1,128); - - kernel.setArg(4, k); - kernel.setArg(5, f); - kernel.setArg(6, r_a); - kernel.setArg(7, r_b); - kernel.setArg(8, speed); - - int iteration = 0; - float fps_avg = 0.0; // decaying average of fps - const int N_FRAMES_PER_DISPLAY = 2000; // an even number, because of our double-buffering implementation - while(true) - { - struct timeval tod_record; - double tod_before, tod_after, tod_elap; - - gettimeofday(&tod_record, 0); - tod_before = ((double) (tod_record.tv_sec)) - + ((double) (tod_record.tv_usec)) / 1.0e6; - - // run a few iterations (without copying the data back) - for(int it=0;it 0) - fps = ((float)N_FRAMES_PER_DISPLAY) / tod_elap; - // We display an exponential moving average of the fps measurement - fps_avg = (fps_avg == 0) ? fps : (((fps_avg * 10.0) + fps) / 11.0); - double Mcgs = (fps_avg * ((double)X) * ((double)Y)) / 1.0e6; - sprintf(msg,"GrayScott - %0.2f fps %0.2f Mcgs", fps_avg, Mcgs); - - // display: - if(display(a,a,a,iteration,false,200.0f,2,10,msg)) - break; - } - } - catch(Error error) - { - std::cout << error.what() << "(" << error.err() << ")" << std::endl; - } -} - -// return a random value between lower and upper -float frand(float lower,float upper) -{ - return lower + rand()*(upper-lower)/RAND_MAX; -} - -void init(float a[X][Y],float b[X][Y]) -{ - srand((unsigned int)time(NULL)); - - // figure the values - for(int i = 0; i < X; i++) { - for(int j = 0; j < Y; j++) { - // start with a uniform field with an approximate circle in the middle - //if(hypot(i%20-10/*-X/2*/,j%20-10/*-Y/2*/)<=frand(2,5)) { - if(hypot(i-X/2,(j-Y/2)/1.5)<=frand(2,5)) - { - float_at((float*)a,i,j) = frand(0.0f,0.1f); - float_at((float*)b,i,j) = frand(0.9f,1.0f); - } - else - { - float_at((float*)a,i,j) = frand(0.9f,1.0f); - float_at((float*)b,i,j) = frand(0.0f,0.1f); - } - } - } -} - -bool display(float r[X][Y],float g[X][Y],float b[X][Y], - int iteration,bool auto_brighten,float manual_brighten, - int scale,int delay_ms,const char* message) -{ - static bool need_init = true; - static bool write_video = false; - - static IplImage *im,*im2; - static CvFont font; - static CvVideoWriter *video; - static const CvScalar white = cvScalar(255,255,255); - - const char *title = "Press ESC to quit"; - - if(need_init) - { - need_init = false; - - im = cvCreateImage(cvSize(X,Y),IPL_DEPTH_8U,3); - cvSet(im,cvScalar(0,0,0)); - im2 = cvCreateImage(cvSize(X*scale,Y*scale),IPL_DEPTH_8U,3); - - cvNamedWindow(title,CV_WINDOW_AUTOSIZE); - - double hScale=0.4; - double vScale=0.4; - int lineWidth=1; - cvInitFont(&font,CV_FONT_HERSHEY_COMPLEX,hScale,vScale,0,lineWidth,CV_AA); - - if(write_video) - { - video = cvCreateVideoWriter(title,CV_FOURCC('D','I','V','X'),25.0,cvGetSize(im2),1); - } - } - - // convert float arrays to IplImage for OpenCV to display - float val,minR=FLT_MAX,maxR=-FLT_MAX,minG=FLT_MAX,maxG=-FLT_MAX,minB=FLT_MAX,maxB=-FLT_MAX; - if(auto_brighten) - { - for(int i=0;imaxR) maxR=val; - } - if(g) { - val = float_at((float*)g,i,j); - if(valmaxG) maxG=val; - } - if(b) { - val = float_at((float*)b,i,j); - if(valmaxB) maxB=val; - } - } - } - } - #pragma omp parallel for - for(int i=0;i255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 2] = (uchar)val; - } - if(g) { - float val = float_at((float*)g,i,j);//Y-j-1); - if(auto_brighten) val = 255.0f * (val-minG) / (maxG-minG); - else val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 1] = (uchar)val; - } - if(b) { - float val = float_at((float*)b,i,j);//Y-j-1); - if(auto_brighten) val = 255.0f * (val-minB) / (maxB-minB); - else val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 0] = (uchar)val; - } - } - } - - cvResize(im,im2); - - char txt[100]; - if(!write_video) - { - sprintf(txt,"%d",iteration); - cvPutText(im2,txt,cvPoint(20,20),&font,white); - } - - cvPutText(im2,message,cvPoint(20,40),&font,white); - - if(write_video) - cvWriteFrame(video,im2); - - cvShowImage(title,im2); - - int key = cvWaitKey(delay_ms); // allow time for the image to be drawn - if(key==27) // did user ask to quit? - { - cvDestroyWindow(title); - cvReleaseImage(&im); - cvReleaseImage(&im2); - if(write_video) - cvReleaseVideoWriter(&video); - return true; - } - return false; -} diff --git a/SpeedComparisons/GrayScott_OpenCL_2x2/grayscott_kernel_2x2.cl b/SpeedComparisons/GrayScott_OpenCL_2x2/grayscott_kernel_2x2.cl deleted file mode 100644 index 7b6cadd2c..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_2x2/grayscott_kernel_2x2.cl +++ /dev/null @@ -1,59 +0,0 @@ -__kernel void grayscott_compute( - __global float4 *U,__global float4 *V, - __global float4 *U2, __global float4 *V2, - float k,float F,float D_u,float D_v,float delta_t) -{ - const int x = get_global_id(0); - const int y = get_global_id(1); - const int X = get_global_size(0); - const int Y = get_global_size(1); - const int i = x*Y+y; - - const float4 u = U[i]; - const float4 v = V[i]; - - // compute the Laplacians of a and b -#ifdef WRAP - const int xm1 = ((x-1+X)%X); - const int xp1 = ((x+1)%X); - const int ym1 = ((y-1+Y)%Y); - const int yp1 = ((y+1)%Y); -#else - const int xm1 = max(x-1,0); - const int xp1 = min(x+1,X-1); - const int ym1 = max(y-1,0); - const int yp1 = min(y+1,Y-1); -#endif - const int iLeft = xm1*Y + y; - const int iRight = xp1*Y + y; - const int iUp = x*Y + ym1; - const int iDown = x*Y + yp1; - - const float4 u_left = U[iLeft]; - const float4 u_right = U[iRight]; - const float4 u_up = U[iUp]; - const float4 u_down = U[iDown]; - const float4 v_left = V[iLeft]; - const float4 v_right = V[iRight]; - const float4 v_up = V[iUp]; - const float4 v_down = V[iDown]; - - const float4 nabla_u = (float4)( - u_left.y + u_up.z + u.y + u.z, - u.x + u_up.w + u_right.x + u.w, - u_left.w + u.x + u.w + u_down.x, - u.z + u.y + u_right.z + u_down.y) - 4.0f*u; - const float4 nabla_v = (float4)( - v_left.y + v_up.z + v.y + v.z, - v.x + v_up.w + v_right.x + v.w, - v_left.w + v.x + v.w + v_down.x, - v.z + v.y + v_right.z + v_down.y) - 4.0f*v; - - // compute the new rate of change - const float4 delta_u = D_u * nabla_u - u*v*v + F*(1.0f-u); - const float4 delta_v = D_v * nabla_v + u*v*v - (F+k)*v; - - // apply the change (to the new buffer) - U2[i] = u + delta_t * delta_u; - V2[i] = v + delta_t * delta_v; -} diff --git a/SpeedComparisons/GrayScott_OpenCL_Image/CMakeLists.txt b/SpeedComparisons/GrayScott_OpenCL_Image/CMakeLists.txt deleted file mode 100644 index 1ed3d9a95..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_Image/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -project(GrayScott_OpenCL_Image) - -set(CMAKE_MODULE_PATH ${GrayScott_OpenCL_SOURCE_DIR}) -# (we include our own FindOpenCL.cmake until the time that CMake comes with its own) - -find_package(OpenCV REQUIRED) -include_directories( ${OPENCV_INCLUDE_DIR}) -link_libraries( ${OpenCV_LIBS} ) - -# only build the OpenCL version if OpenCL was found -find_package ( OpenCL ) -if(OPENCL_FOUND) - include_directories ( ${OPENCL_INCLUDE_DIRS} ) - link_libraries ( ${OPENCL_LIBRARIES} ) - - # tell the code where the .cl file will live - add_definitions(-DCL_SOURCE_DIR="${GrayScott_OpenCL_Image_SOURCE_DIR}") - - include_directories(../Display) - - add_executable(GrayScott_OpenCL_Image - gray_scott_opencl_image.cpp - grayscott_kernel_image.cl - ../Display/defs.h - ) -endif() - diff --git a/SpeedComparisons/GrayScott_OpenCL_Image/FindOpenCL.cmake b/SpeedComparisons/GrayScott_OpenCL_Image/FindOpenCL.cmake deleted file mode 100644 index fde90efae..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_Image/FindOpenCL.cmake +++ /dev/null @@ -1,79 +0,0 @@ -# - Try to find OpenCL -# This module tries to find an OpenCL implementation on your system. It supports -# AMD / ATI, Apple and NVIDIA implementations, but shoudl work, too. -# -# Once done this will define -# OPENCL_FOUND - system has OpenCL -# OPENCL_INCLUDE_DIRS - the OpenCL include directory -# OPENCL_LIBRARIES - link these to use OpenCL -# -# WIN32 should work, but is untested - -FIND_PACKAGE( PackageHandleStandardArgs ) - -SET (OPENCL_VERSION_STRING "0.1.0") -SET (OPENCL_VERSION_MAJOR 0) -SET (OPENCL_VERSION_MINOR 1) -SET (OPENCL_VERSION_PATCH 0) - -IF (APPLE) - - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX") - FIND_PATH(OPENCL_INCLUDE_DIRS OpenCL/cl.h DOC "Include for OpenCL on OSX") - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS OpenCL/cl.hpp DOC "Include for OpenCL CPP bindings on OSX") - -ELSE (APPLE) - - IF (WIN32) - - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h) - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp) - - # The AMD SDK currently installs both x86 and x86_64 libraries - # This is only a hack to find out architecture - IF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" ) - SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86_64") - ELSE (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64") - SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86") - ENDIF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" ) - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib ${OPENCL_LIB_DIR}) - - GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) - - # On Win32 search relative to the library - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS "${_OPENCL_INC_CAND}") - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS "${_OPENCL_INC_CAND}") - - ELSE (WIN32) - - # Unix style platforms - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL - ENV LD_LIBRARY_PATH - ) - - GET_FILENAME_COMPONENT(OPENCL_LIB_DIR ${OPENCL_LIBRARIES} PATH) - GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) - - # The AMD SDK currently does not place its headers - # in /usr/include, therefore also search relative - # to the library - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include") - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include") - - ENDIF (WIN32) - -ENDIF (APPLE) - -FIND_PACKAGE_HANDLE_STANDARD_ARGS( OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS ) - -IF( _OPENCL_CPP_INCLUDE_DIRS ) - SET( OPENCL_HAS_CPP_BINDINGS TRUE ) - LIST( APPEND OPENCL_INCLUDE_DIRS ${_OPENCL_CPP_INCLUDE_DIRS} ) - # This is often the same, so clean up - LIST( REMOVE_DUPLICATES OPENCL_INCLUDE_DIRS ) -ENDIF( _OPENCL_CPP_INCLUDE_DIRS ) - -MARK_AS_ADVANCED( - OPENCL_INCLUDE_DIRS -) - diff --git a/SpeedComparisons/GrayScott_OpenCL_Image/cl.hpp b/SpeedComparisons/GrayScott_OpenCL_Image/cl.hpp deleted file mode 100644 index 99b86a665..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_Image/cl.hpp +++ /dev/null @@ -1,4011 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2010 The Khronos Group Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and/or associated documentation files (the - * "Materials"), to deal in the Materials without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Materials, and to - * permit persons to whom the Materials are furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Materials. - * - * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. - ******************************************************************************/ - -/*! \file - * - * \brief C++ bindings for OpenCL 1.0 (rev 48) and OpenCL 1.1 (rev 33) - * \author Benedict R. Gaster and Laurent Morichetti - * - * Additions and fixes from Brian Cole, March 3rd 2010. - * - * \version 1.1 - * \date June 2010 - * - * Optional extension support - * - * cl - * cl_ext_device_fission - * #define USE_CL_DEVICE_FISSION - */ - -/*! \mainpage - * \section intro Introduction - * For many large applications C++ is the language of choice and so it seems - * reasonable to define C++ bindings for OpenCL. - * - * - * The interface is contained with a single C++ header file \em cl.hpp and all - * definitions are contained within the namespace \em cl. There is no additional - * requirement to include \em cl.h and to use either the C++ or original C - * bindings it is enough to simply include \em cl.hpp. - * - * The bindings themselves are lightweight and correspond closely to the - * underlying C API. Using the C++ bindings introduces no additional execution - * overhead. - * - * For detail documentation on the bindings see: - * - * The OpenCL C++ Wrapper API 1.1 (revision 04) - * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.1.pdf - * - * \section example Example - * - * The following example shows a general use case for the C++ - * bindings, including support for the optional exception feature and - * also the supplied vector and string classes, see following sections for - * decriptions of these features. - * - * \code - * #define __CL_ENABLE_EXCEPTIONS - * - * #if defined(__APPLE__) || defined(__MACOSX) - * #include - * #else - * #include - * #endif - * #include - * #include - * #include - * - * const char * helloStr = "__kernel void " - * "hello(void) " - * "{ " - * " " - * "} "; - * - * int - * main(void) - * { - * cl_int err = CL_SUCCESS; - * try { - * - * std::vector platforms; - * cl::Platform::get(&platforms); - * if (platforms.size() == 0) { - * std::cout << "Platform size 0\n"; - * return -1; - * } - * - * cl_context_properties properties[] = - * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; - * cl::Context context(CL_DEVICE_TYPE_CPU, properties); - * - * std::vector devices = context.getInfo(); - * - * cl::Program::Sources source(1, - * std::make_pair(helloStr,strlen(helloStr))); - * cl::Program program_ = cl::Program(context, source); - * program_.build(devices); - * - * cl::Kernel kernel(program_, "hello", &err); - * - * cl::Event event; - * cl::CommandQueue queue(context, devices[0], 0, &err); - * queue.enqueueNDRangeKernel( - * kernel, - * cl::NullRange, - * cl::NDRange(4,4), - * cl::NullRange, - * NULL, - * &event); - * - * event.wait(); - * } - * catch (cl::Error err) { - * std::cerr - * << "ERROR: " - * << err.what() - * << "(" - * << err.err() - * << ")" - * << std::endl; - * } - * - * return EXIT_SUCCESS; - * } - * - * \endcode - * - */ -#ifndef CL_HPP_ -#define CL_HPP_ - -#ifdef _WIN32 -#include -#include -#if defined(USE_DX_INTEROP) -#include -#endif -#endif // _WIN32 - -// -#if defined(USE_CL_DEVICE_FISSION) -#include -#endif - -#if defined(__APPLE__) || defined(__MACOSX) -#include -#include -#else -#include -#include -#endif // !__APPLE__ - -#if !defined(CL_CALLBACK) -#define CL_CALLBACK -#endif //CL_CALLBACK - -#include - -#if !defined(__NO_STD_VECTOR) -#include -#endif - -#if !defined(__NO_STD_STRING) -#include -#endif - -#if defined(linux) || defined(__APPLE__) || defined(__MACOSX) -# include -#endif // linux - -#include - -/*! \namespace cl - * - * \brief The OpenCL C++ bindings are defined within this namespace. - * - */ -namespace cl { - -#define __INIT_CL_EXT_FCN_PTR(name) \ - if(!pfn_##name) { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddress(#name); \ - if(!pfn_##name) { \ - } \ - } - -class Program; -class Device; -class Context; -class CommandQueue; -class Memory; - -#if defined(__CL_ENABLE_EXCEPTIONS) -#include -/*! \class Error - * \brief Exception class - */ -class Error : public std::exception -{ -private: - cl_int err_; - const char * errStr_; -public: - /*! Create a new CL error exception for a given error code - * and corresponding message. - */ - Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) - {} - - ~Error() throw() {} - - /*! \brief Get error string associated with exception - * - * \return A memory pointer to the error message string. - */ - virtual const char * what() const throw () - { - if (errStr_ == NULL) { - return "empty"; - } - else { - return errStr_; - } - } - - /*! \brief Get error code associated with exception - * - * \return The error code. - */ - const cl_int err(void) const { return err_; } -}; - -#define __ERR_STR(x) #x -#else -#define __ERR_STR(x) NULL -#endif // __CL_ENABLE_EXCEPTIONS - -//! \cond DOXYGEN_DETAIL -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#define __GET_DEVICE_INFO_ERR __ERR_STR(clgetDeviceInfo) -#define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) -#define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) -#define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) -#define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo) -#define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo) -#define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo) -#define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo) -#define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) -#define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) -#define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) -#define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) -#define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) -#define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) -#define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) - -#define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) -#define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) - -#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) -#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) -#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) -#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) -#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) -#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) -#define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) -#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) - -#define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent) -#define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus) -#define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback) -#define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents) - -#define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel) -#define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) -#define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) -#define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) -#define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) -#define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) - -#define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) -#define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty) -#define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer) -#define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect) -#define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer) -#define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) -#define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) -#define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) -#define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) -#define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) -#define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) -#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) -#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) -#define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) -#define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage) -#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject) -#define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) -#define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) -#define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) -#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) -#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) -#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) - -#define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) -#define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) - -#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) - -#define __FLUSH_ERR __ERR_STR(clFlush) -#define __FINISH_ERR __ERR_STR(clFinish) - -#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) -#endif // __CL_USER_OVERRIDE_ERROR_STRINGS -//! \endcond - -/*! \class string - * \brief Simple string class, that provides a limited subset of std::string - * functionality but avoids many of the issues that come with that class. - */ -class string -{ -private: - ::size_t size_; - char * str_; -public: - string(void) : size_(0), str_(NULL) - { - } - - string(char * str, ::size_t size) : - size_(size), - str_(NULL) - { - str_ = new char[size_+1]; - if (str_ != NULL) { - memcpy(str_, str, size_ * sizeof(char)); - str_[size_] = '\0'; - } - else { - size_ = 0; - } - } - - string(char * str) : - str_(NULL) - { - size_= ::strlen(str); - str_ = new char[size_ + 1]; - if (str_ != NULL) { - memcpy(str_, str, (size_ + 1) * sizeof(char)); - } - else { - size_ = 0; - } - } - - string& operator=(const string& rhs) - { - if (this == &rhs) { - return *this; - } - - if (rhs.size_ == 0 || rhs.str_ == NULL) { - size_ = 0; - str_ = NULL; - } - else { - size_ = rhs.size_; - str_ = new char[size_ + 1]; - if (str_ != NULL) { - memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); - } - else { - size_ = 0; - } - } - - return *this; - } - - string(const string& rhs) - { - *this = rhs; - } - - ~string() - { - if (str_ != NULL) { - delete[] str_; - } - } - - ::size_t size(void) const { return size_; } - ::size_t length(void) const { return size(); } - - const char * c_str(void) const { return (str_) ? str_ : "";} -}; - -#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) -#include -typedef std::string STRING_CLASS; -#elif !defined(__USE_DEV_STRING) -typedef cl::string STRING_CLASS; -#endif - -#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) -#include -#define VECTOR_CLASS std::vector -#elif !defined(__USE_DEV_VECTOR) -#define VECTOR_CLASS cl::vector -#endif - -#if !defined(__MAX_DEFAULT_VECTOR_SIZE) -#define __MAX_DEFAULT_VECTOR_SIZE 10 -#endif - -/*! \class vector - * \brief Fixed sized vector implementation that mirroring - * std::vector functionality. - */ -template -class vector -{ -private: - T data_[N]; - unsigned int size_; - bool empty_; -public: - vector() : - size_(-1), - empty_(true) - {} - - ~vector() {} - - unsigned int size(void) const - { - return size_ + 1; - } - - void clear() - { - size_ = -1; - empty_ = true; - } - - void push_back (const T& x) - { - if (size() < N) { - size_++; - data_[size_] = x; - empty_ = false; - } - } - - void pop_back(void) - { - if (!empty_) { - data_[size_].~T(); - size_--; - if (size_ == -1) { - empty_ = true; - } - } - } - - vector(const vector& vec) : - size_(vec.size_), - empty_(vec.empty_) - { - if (!empty_) { - memcpy(&data_[0], &vec.data_[0], size() * sizeof(T)); - } - } - - vector(unsigned int size, const T& val = T()) : - size_(-1), - empty_(true) - { - for (unsigned int i = 0; i < size; i++) { - push_back(val); - } - } - - vector& operator=(const vector& rhs) - { - if (this == &rhs) { - return *this; - } - - size_ = rhs.size_; - empty_ = rhs.empty_; - - if (!empty_) { - memcpy(&data_[0], &rhs.data_[0], size() * sizeof(T)); - } - - return *this; - } - - bool operator==(vector &vec) - { - if (empty_ && vec.empty_) { - return true; - } - - if (size() != vec.size()) { - return false; - } - - return memcmp(&data_[0], &vec.data_[0], size() * sizeof(T)) == 0 ? true : false; - } - - operator T* () { return data_; } - operator const T* () const { return data_; } - - bool empty (void) const - { - return empty_; - } - - unsigned int max_size (void) const - { - return N; - } - - unsigned int capacity () const - { - return sizeof(T) * N; - } - - T& operator[](int index) - { - return data_[index]; - } - - T operator[](int index) const - { - return data_[index]; - } - - template - void assign(I start, I end) - { - clear(); - while(start < end) { - push_back(*start); - start++; - } - } - - /*! \class iterator - * \brief Iterator class for vectors - */ - class iterator - { - private: - vector vec_; - int index_; - bool initialized_; - public: - iterator(void) : - index_(-1), - initialized_(false) - { - index_ = -1; - initialized_ = false; - } - - ~iterator(void) {} - - static iterator begin(vector &vec) - { - iterator i; - - if (!vec.empty()) { - i.index_ = 0; - } - - i.vec_ = vec; - i.initialized_ = true; - return i; - } - - static iterator end(vector &vec) - { - iterator i; - - if (!vec.empty()) { - i.index_ = vec.size(); - } - i.vec_ = vec; - i.initialized_ = true; - return i; - } - - bool operator==(iterator i) - { - return ((vec_ == i.vec_) && - (index_ == i.index_) && - (initialized_ == i.initialized_)); - } - - bool operator!=(iterator i) - { - return (!(*this==i)); - } - - void operator++() - { - index_++; - } - - void operator++(int x) - { - index_ += x; - } - - void operator--() - { - index_--; - } - - void operator--(int x) - { - index_ -= x; - } - - T operator *() - { - return vec_[index_]; - } - }; - - iterator begin(void) - { - return iterator::begin(*this); - } - - iterator end(void) - { - return iterator::end(*this); - } - - T& front(void) - { - return data_[0]; - } - - T& back(void) - { - return data_[size_]; - } - - const T& front(void) const - { - return data_[0]; - } - - const T& back(void) const - { - return data_[size_]; - } -}; - -/*! - * \brief size_t class used to interface between C++ and - * OpenCL C calls that require arrays of size_t values, who's - * size is known statically. - */ -template -struct size_t : public cl::vector< ::size_t, N> { }; - -namespace detail { - -// GetInfo help struct -template -struct GetInfoHelper -{ - static cl_int - get(Functor f, cl_uint name, T* param) - { - return f(name, sizeof(T), param, NULL); - } -}; - -// Specialized GetInfoHelper for VECTOR_CLASS params -template -struct GetInfoHelper > -{ - static cl_int get(Func f, cl_uint name, VECTOR_CLASS* param) - { - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - T* value = (T*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - param->assign(&value[0], &value[required/sizeof(T)]); - return CL_SUCCESS; - } -}; - -// Specialized for getInfo -template -struct GetInfoHelper > -{ - static cl_int - get(Func f, cl_uint name, VECTOR_CLASS* param) - { - cl_uint err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); - if (err != CL_SUCCESS) { - return err; - } - - return CL_SUCCESS; - } -}; - -// Specialized GetInfoHelper for STRING_CLASS params -template -struct GetInfoHelper -{ - static cl_int get(Func f, cl_uint name, STRING_CLASS* param) - { - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - char* value = (char*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - *param = value; - return CL_SUCCESS; - } -}; - -#define __GET_INFO_HELPER_WITH_RETAIN(CPP_TYPE) \ -namespace detail { \ -template \ -struct GetInfoHelper \ -{ \ - static cl_int get(Func f, cl_uint name, CPP_TYPE* param) \ - { \ - cl_uint err = f(name, sizeof(CPP_TYPE), param, NULL); \ - if (err != CL_SUCCESS) { \ - return err; \ - } \ - \ - return ReferenceHandler::retain((*param)()); \ - } \ -}; \ -} - - -#define __PARAM_NAME_INFO_1_0(F) \ - F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ - F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ - F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_bitfield) \ - F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ - F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \ - F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ - F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \ - F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ - F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \ - F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ - F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS) \ - F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS) \ - \ - F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ - F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ - F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ - F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \ - \ - F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ - \ - F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ - F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ - F(cl_mem_info, CL_MEM_SIZE, ::size_t) \ - F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ - F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ - \ - F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ - F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \ - F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \ - F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \ - F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \ - \ - F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ - F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ - F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \ - F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \ - F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \ - \ - F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ - F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ - F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ - F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ - F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ - F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS) \ - \ - F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \ - \ - F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \ - F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ - F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \ - F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ - \ - F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ - F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ - F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ - F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) - -#if defined(CL_VERSION_1_1) -#define __PARAM_NAME_INFO_1_1(F) \ - F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ - \ - F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ - F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ - \ - F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) -#endif // CL_VERSION_1_1 - -#if defined(USE_CL_DEVICE_FISSION) -#define __PARAM_NAME_DEVICE_FISSION(F) \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ - F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) -#endif // USE_CL_DEVICE_FISSION - -template -struct param_traits {}; - -#define __DECLARE_PARAM_TRAITS(token, param_name, T) \ -struct token; \ -template<> \ -struct param_traits \ -{ \ - enum { value = param_name }; \ - typedef T param_type; \ -}; - -__PARAM_NAME_INFO_1_0(__DECLARE_PARAM_TRAITS); -#if defined(CL_VERSION_1_1) -__PARAM_NAME_INFO_1_1(__DECLARE_PARAM_TRAITS); -#endif // CL_VERSION_1_1 - -#if defined(USE_CL_DEVICE_FISSION) -__PARAM_NAME_DEVICE_FISSION(__DECLARE_PARAM_TRAITS); -#endif // USE_CL_DEVICE_FISSION - -#undef __DECLARE_PARAM_TRAITS - -// Convenience functions - -template -inline cl_int -getInfo(Func f, cl_uint name, T* param) -{ - return GetInfoHelper::get(f, name, param); -} - -template -struct GetInfoFunctor0 -{ - Func f_; const Arg0& arg0_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, param, size, value, size_ret); } -}; - -template -struct GetInfoFunctor1 -{ - Func f_; const Arg0& arg0_; const Arg1& arg1_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, arg1_, param, size, value, size_ret); } -}; - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) -{ - GetInfoFunctor0 f0 = { f, arg0 }; - return GetInfoHelper, T> - ::get(f0, name, param); -} - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) -{ - GetInfoFunctor1 f0 = { f, arg0, arg1 }; - return GetInfoHelper, T> - ::get(f0, name, param); -} - -template -struct ReferenceHandler -{ }; - -template <> -struct ReferenceHandler -{ - // cl_device_id does not have retain(). - static cl_int retain(cl_device_id) - { return CL_INVALID_DEVICE; } - // cl_device_id does not have release(). - static cl_int release(cl_device_id) - { return CL_INVALID_DEVICE; } -}; - -template <> -struct ReferenceHandler -{ - // cl_platform_id does not have retain(). - static cl_int retain(cl_platform_id) - { return CL_INVALID_PLATFORM; } - // cl_platform_id does not have release(). - static cl_int release(cl_platform_id) - { return CL_INVALID_PLATFORM; } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_context context) - { return ::clRetainContext(context); } - static cl_int release(cl_context context) - { return ::clReleaseContext(context); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_command_queue queue) - { return ::clRetainCommandQueue(queue); } - static cl_int release(cl_command_queue queue) - { return ::clReleaseCommandQueue(queue); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_mem memory) - { return ::clRetainMemObject(memory); } - static cl_int release(cl_mem memory) - { return ::clReleaseMemObject(memory); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_sampler sampler) - { return ::clRetainSampler(sampler); } - static cl_int release(cl_sampler sampler) - { return ::clReleaseSampler(sampler); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_program program) - { return ::clRetainProgram(program); } - static cl_int release(cl_program program) - { return ::clReleaseProgram(program); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_kernel kernel) - { return ::clRetainKernel(kernel); } - static cl_int release(cl_kernel kernel) - { return ::clReleaseKernel(kernel); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_event event) - { return ::clRetainEvent(event); } - static cl_int release(cl_event event) - { return ::clReleaseEvent(event); } -}; - -template -class Wrapper -{ -public: - typedef T cl_type; - -protected: - cl_type object_; - -public: - Wrapper() : object_(NULL) { } - - ~Wrapper() - { - if (object_ != NULL) { release(); } - } - - Wrapper(const Wrapper& rhs) - { - object_ = rhs.object_; - if (object_ != NULL) { retain(); } - } - - Wrapper& operator = (const Wrapper& rhs) - { - if (object_ != NULL) { release(); } - object_ = rhs.object_; - if (object_ != NULL) { retain(); } - return *this; - } - - cl_type operator ()() const { return object_; } - - cl_type& operator ()() { return object_; } - -protected: - - cl_int retain() const - { - return ReferenceHandler::retain(object_); - } - - cl_int release() const - { - return ReferenceHandler::release(object_); - } -}; - -#if defined(__CL_ENABLE_EXCEPTIONS) -static inline cl_int errHandler ( - cl_int err, - const char * errStr = NULL) throw(Error) -{ - if (err != CL_SUCCESS) { - throw Error(err, errStr); - } - return err; -} -#else -static inline cl_int errHandler (cl_int err, const char * errStr = NULL) -{ - return err; -} -#endif // __CL_ENABLE_EXCEPTIONS - -} // namespace detail -//! \endcond - -/*! \stuct ImageFormat - * \brief ImageFormat interface fro cl_image_format. - */ -struct ImageFormat : public cl_image_format -{ - ImageFormat(){} - - ImageFormat(cl_channel_order order, cl_channel_type type) - { - image_channel_order = order; - image_channel_data_type = type; - } - - ImageFormat& operator = (const ImageFormat& rhs) - { - if (this != &rhs) { - this->image_channel_data_type = rhs.image_channel_data_type; - this->image_channel_order = rhs.image_channel_order; - } - return *this; - } -}; - -/*! \class Device - * \brief Device interface for cl_device_id. - */ -class Device : public detail::Wrapper -{ -public: - Device(cl_device_id device) { object_ = device; } - - Device() : detail::Wrapper() { } - - Device(const Device& device) : detail::Wrapper(device) { } - - Device& operator = (const Device& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_device_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetDeviceInfo, object_, name, param), - __GET_DEVICE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_device_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(USE_CL_DEVICE_FISSION) - cl_int createSubDevices( - const cl_device_partition_property_ext * properties, - VECTOR_CLASS* devices) - { - typedef CL_API_ENTRY cl_int - ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( - cl_device_id /*in_device*/, - const cl_device_partition_property_ext * /* properties */, - cl_uint /*num_entries*/, - cl_device_id * /*out_devices*/, - cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; - - static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; - __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); - - cl_uint n = 0; - cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif -}; - -/*! \class Platform - * \brief Platform interface. - */ -class Platform : public detail::Wrapper -{ -public: - static const Platform null(); - - Platform(cl_platform_id platform) { object_ = platform; } - - Platform() : detail::Wrapper() { } - - Platform(const Platform& platform) : detail::Wrapper(platform) { } - - Platform& operator = (const Platform& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetPlatformInfo, object_, name, param), - __GET_PLATFORM_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_platform_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int getDevices( - cl_device_type type, - VECTOR_CLASS* devices) const - { - cl_uint n = 0; - cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = ::clGetDeviceIDs(object_, type, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } - -#if defined(USE_DX_INTEROP) - /*! \brief Get the list of available D3D10 devices. - * - * \param d3d_device_source. - * - * \param d3d_object. - * - * \param d3d_device_set. - * - * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device - * values returned in devices can be used to identify a specific OpenCL - * device. If \a devices argument is NULL, this argument is ignored. - * - * \return One of the following values: - * - CL_SUCCESS if the function is executed successfully. - * - * The application can query specific capabilities of the OpenCL device(s) - * returned by cl::getDevices. This can be used by the application to - * determine which device(s) to use. - * - * \note In the case that exceptions are enabled and a return value - * other than CL_SUCCESS is generated, then cl::Error exception is - * generated. - */ - cl_int getDevices( - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - VECTOR_CLASS* devices) const - { - typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( - cl_platform_id platform, - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id * devices, - cl_uint* num_devices); - - static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; - __INIT_CL_EXT_FCN_PTR(clGetDeviceIDsFromD3D10KHR); - - cl_uint n = 0; - cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - 0, - NULL, - &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - n, - ids, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif - - static cl_int get( - VECTOR_CLASS* platforms) - { - cl_uint n = 0; - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - platforms->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -}; - -static inline cl_int -UnloadCompiler() -{ - return ::clUnloadCompiler(); -} - -class Context : public detail::Wrapper -{ -public: - Context( - const VECTOR_CLASS& devices, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateContext( - properties, (cl_uint) devices.size(), - (cl_device_id*) &devices.front(), - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - Context( - cl_device_type type, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateContextFromType( - properties, type, notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - Context() : detail::Wrapper() { } - - Context(const Context& context) : detail::Wrapper(context) { } - - Context& operator = (const Context& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_context_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetContextInfo, object_, name, param), - __GET_CONTEXT_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_context_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int getSupportedImageFormats( - cl_mem_flags flags, - cl_mem_object_type type, - VECTOR_CLASS* formats) const - { - cl_uint numEntries; - cl_int err = ::clGetSupportedImageFormats( - object_, - flags, - type, - 0, - NULL, - &numEntries); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - ImageFormat* value = (ImageFormat*) - alloca(numEntries * sizeof(ImageFormat)); - err = ::clGetSupportedImageFormats( - object_, - flags, - type, - numEntries, - (cl_image_format*) value, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - formats->assign(&value[0], &value[numEntries]); - return CL_SUCCESS; - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Context) - -/*! \class Event - * \brief Event interface for cl_event. - */ -class Event : public detail::Wrapper -{ -public: - Event() : detail::Wrapper() { } - - Event(const Event& event) : detail::Wrapper(event) { } - - Event& operator = (const Event& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_event_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetEventInfo, object_, name, param), - __GET_EVENT_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_event_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getProfilingInfo(cl_profiling_info name, T* param) const - { - return detail::errHandler(detail::getInfo( - &::clGetEventProfilingInfo, object_, name, param), - __GET_EVENT_PROFILE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getProfilingInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_profiling_info, name>::param_type param; - cl_int result = getProfilingInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int wait() const - { - return detail::errHandler( - ::clWaitForEvents(1, &object_), - __WAIT_FOR_EVENTS_ERR); - } - -#if defined(CL_VERSION_1_1) - cl_int setCallback( - cl_int type, - void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetEventCallback( - object_, - type, - pfn_notify, - user_data), - __SET_EVENT_CALLBACK_ERR); - } -#endif - - static cl_int - waitForEvents(const VECTOR_CLASS& events) - { - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (cl_event*)&events.front()), - __WAIT_FOR_EVENTS_ERR); - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Event) - -#if defined(CL_VERSION_1_1) -/*! \class UserEvent - * \brief User event interface for cl_event. - */ -class UserEvent : public Event -{ -public: - UserEvent( - const Context& context, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateUserEvent( - context(), - &error); - - detail::errHandler(error, __CREATE_USER_EVENT_ERR); - if (err != NULL) { - *err = error; - } - } - - UserEvent() : Event() { } - - UserEvent(const UserEvent& event) : Event(event) { } - - UserEvent& operator = (const UserEvent& rhs) - { - if (this != &rhs) { - Event::operator=(rhs); - } - return *this; - } - - cl_int setStatus(cl_int status) - { - return detail::errHandler( - ::clSetUserEventStatus(object_,status), - __SET_USER_EVENT_STATUS_ERR); - } -}; -#endif - -inline static cl_int -WaitForEvents(const VECTOR_CLASS& events) -{ - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (cl_event*)&events.front()), - __WAIT_FOR_EVENTS_ERR); -} - -/*! \class Memory - * \brief Memory interface for cl_mem. - */ -class Memory : public detail::Wrapper -{ -public: - Memory() : detail::Wrapper() { } - - Memory(const Memory& memory) : detail::Wrapper(memory) { } - - Memory& operator = (const Memory& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_mem_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetMemObjectInfo, object_, name, param), - __GET_MEM_OBJECT_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_mem_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(CL_VERSION_1_1) - cl_int setDestructorCallback( - void (CL_CALLBACK * pfn_notify)(cl_mem, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetMemObjectDestructorCallback( - object_, - pfn_notify, - user_data), - __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); - } -#endif - -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Memory) - -/*! \class Buffer - * \brief Memory buffer interface. - */ -class Buffer : public Memory -{ -public: - Buffer( - const Context& context, - cl_mem_flags flags, - ::size_t size, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - Buffer() : Memory() { } - - Buffer(const Buffer& buffer) : Memory(buffer) { } - - Buffer& operator = (const Buffer& rhs) - { - if (this != &rhs) { - Memory::operator=(rhs); - } - return *this; - } - -#if defined(CL_VERSION_1_1) - Buffer createSubBuffer( - cl_mem_flags flags, - cl_buffer_create_type buffer_create_type, - const void * buffer_create_info, - cl_int * err = NULL) - { - Buffer result; - cl_int error; - result.object_ = ::clCreateSubBuffer( - object_, - flags, - buffer_create_type, - buffer_create_info, - &error); - - detail::errHandler(error, __CREATE_SUBBUFFER_ERR); - if (err != NULL) { - *err = error; - } - - return result; - } -#endif -}; - -#if defined (USE_DX_INTEROP) -class BufferD3D10 : public Buffer -{ -public: - typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( - cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, - cl_int* errcode_ret); - - BufferD3D10( - const Context& context, - cl_mem_flags flags, - ID3D10Buffer* bufobj, - cl_int * err = NULL) - { - static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; - __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); - - cl_int error; - object_ = pfn_clCreateFromD3D10BufferKHR( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - BufferD3D10() : Buffer() { } - - BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { } - - BufferD3D10& operator = (const BufferD3D10& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } -}; -#endif - -/*! \class BufferGL - * \brief Memory buffer interface for GL interop. - */ -class BufferGL : public Buffer -{ -public: - BufferGL( - const Context& context, - cl_mem_flags flags, - GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLBuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - BufferGL() : Buffer() { } - - BufferGL(const BufferGL& buffer) : Buffer(buffer) { } - - BufferGL& operator = (const BufferGL& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } - - cl_int getObjectInfo( - cl_gl_object_type *type, - GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \class BufferRenderGL - * \brief Memory buffer interface for GL interop with renderbuffer. - */ -class BufferRenderGL : public Buffer -{ -public: - BufferRenderGL( - const Context& context, - cl_mem_flags flags, - GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLRenderbuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - BufferRenderGL() : Buffer() { } - - BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { } - - BufferRenderGL& operator = (const BufferRenderGL& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } - - cl_int getObjectInfo( - cl_gl_object_type *type, - GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \class Image - * \brief Base class interface for all images. - */ -class Image : public Memory -{ -protected: - Image() : Memory() { } - - Image(const Image& image) : Memory(image) { } - - Image& operator = (const Image& rhs) - { - if (this != &rhs) { - Memory::operator=(rhs); - } - return *this; - } -public: - template - cl_int getImageInfo(cl_image_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetImageInfo, object_, name, param), - __GET_IMAGE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getImageInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_image_info, name>::param_type param; - cl_int result = getImageInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -/*! \class Image2D - * \brief Image interface for 2D images. - */ -class Image2D : public Image -{ -public: - Image2D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t row_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateImage2D( - context(), flags,&format, width, height, row_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE2D_ERR); - if (err != NULL) { - *err = error; - } - } - - Image2D() { } - - Image2D(const Image2D& image2D) : Image(image2D) { } - - Image2D& operator = (const Image2D& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } -}; - -/*! \class Image2DGL - * \brief 2D image interface for GL interop. - */ -class Image2DGL : public Image2D -{ -public: - Image2DGL( - const Context& context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture2D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - Image2DGL() : Image2D() { } - - Image2DGL(const Image2DGL& image) : Image2D(image) { } - - Image2DGL& operator = (const Image2DGL& rhs) - { - if (this != &rhs) { - Image2D::operator=(rhs); - } - return *this; - } -}; - -/*! \class Image3D - * \brief Image interface for 3D images. - */ -class Image3D : public Image -{ -public: - Image3D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t depth, - ::size_t row_pitch = 0, - ::size_t slice_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateImage3D( - context(), flags, &format, width, height, depth, row_pitch, - slice_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE3D_ERR); - if (err != NULL) { - *err = error; - } - } - - Image3D() { } - - Image3D(const Image3D& image3D) : Image(image3D) { } - - Image3D& operator = (const Image3D& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } -}; - -/*! \class Image2DGL - * \brief 2D image interface for GL interop. - */ -class Image3DGL : public Image3D -{ -public: - Image3DGL( - const Context& context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture3D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - Image3DGL() : Image3D() { } - - Image3DGL(const Image3DGL& image) : Image3D(image) { } - - Image3DGL& operator = (const Image3DGL& rhs) - { - if (this != &rhs) { - Image3D::operator=(rhs); - } - return *this; - } -}; - -/*! \class Sampler - * \brief Sampler interface for cl_sampler. - */ -class Sampler : public detail::Wrapper -{ -public: - Sampler() { } - - Sampler( - const Context& context, - cl_bool normalized_coords, - cl_addressing_mode addressing_mode, - cl_filter_mode filter_mode, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateSampler( - context(), - normalized_coords, - addressing_mode, - filter_mode, - &error); - - detail::errHandler(error, __CREATE_SAMPLER_ERR); - if (err != NULL) { - *err = error; - } - } - - Sampler(const Sampler& sampler) : detail::Wrapper(sampler) { } - - Sampler& operator = (const Sampler& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_sampler_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetSamplerInfo, object_, name, param), - __GET_SAMPLER_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_sampler_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Sampler) - -class Program; -class CommandQueue; -class Kernel; - -/*! \class NDRange - * \brief NDRange interface - */ -class NDRange -{ -private: - size_t<3> sizes_; - cl_uint dimensions_; - -public: - NDRange() - : dimensions_(0) - { } - - NDRange(::size_t size0) - : dimensions_(1) - { - sizes_.push_back(size0); - } - - NDRange(::size_t size0, ::size_t size1) - : dimensions_(2) - { - sizes_.push_back(size0); - sizes_.push_back(size1); - } - - NDRange(::size_t size0, ::size_t size1, ::size_t size2) - : dimensions_(3) - { - sizes_.push_back(size0); - sizes_.push_back(size1); - sizes_.push_back(size2); - } - - operator const ::size_t*() const { return (const ::size_t*) sizes_; } - ::size_t dimensions() const { return dimensions_; } -}; - -static const NDRange NullRange; - -/*! - * \struct LocalSpaceArg - * \brief Local address raper for use with Kernel::setArg - */ -struct LocalSpaceArg -{ - ::size_t size_; -}; - -namespace detail { - -template -struct KernelArgumentHandler -{ - static ::size_t size(const T&) { return sizeof(T); } - static T* ptr(T& value) { return &value; } -}; - -template <> -struct KernelArgumentHandler -{ - static ::size_t size(const LocalSpaceArg& value) { return value.size_; } - static void* ptr(LocalSpaceArg&) { return NULL; } -}; - -} -//! \endcond - -inline LocalSpaceArg -__local(::size_t size) -{ - LocalSpaceArg ret = { size }; - return ret; -} - -class KernelFunctor; - -/*! \class Kernel - * \brief Kernel interface that implements cl_kernel - */ -class Kernel : public detail::Wrapper -{ -public: - inline Kernel(const Program& program, const char* name, cl_int* err = NULL); - - Kernel() { } - - Kernel(const Kernel& kernel) : detail::Wrapper(kernel) { } - - Kernel& operator = (const Kernel& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_kernel_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelInfo, object_, name, param), - __GET_KERNEL_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getWorkGroupInfo( - const Device& device, cl_kernel_work_group_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetKernelWorkGroupInfo, object_, device(), name, param), - __GET_KERNEL_WORK_GROUP_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getWorkGroupInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_work_group_info, name>::param_type param; - cl_int result = getWorkGroupInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int setArg(cl_uint index, T value) - { - return detail::errHandler( - ::clSetKernelArg( - object_, - index, - detail::KernelArgumentHandler::size(value), - detail::KernelArgumentHandler::ptr(value)), - __SET_KERNEL_ARGS_ERR); - } - - cl_int setArg(cl_uint index, ::size_t size, void* argPtr) - { - return detail::errHandler( - ::clSetKernelArg(object_, index, size, argPtr), - __SET_KERNEL_ARGS_ERR); - } - - KernelFunctor bind( - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local); - - KernelFunctor bind( - const CommandQueue& queue, - const NDRange& global, - const NDRange& local); -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Kernel) - -/*! \class Program - * \brief Program interface that implements cl_program. - */ -class Program : public detail::Wrapper -{ -public: - typedef VECTOR_CLASS > Binaries; - typedef VECTOR_CLASS > Sources; - - Program( - const Context& context, - const Sources& sources, - cl_int* err = NULL) - { - cl_int error; - - const ::size_t n = (::size_t)sources.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const char** strings = (const char**) alloca(n * sizeof(const char*)); - - for (::size_t i = 0; i < n; ++i) { - strings[i] = sources[(int)i].first; - lengths[i] = sources[(int)i].second; - } - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)n, strings, lengths, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - if (err != NULL) { - *err = error; - } - } - - Program( - const Context& context, - const VECTOR_CLASS& devices, - const Binaries& binaries, - VECTOR_CLASS* binaryStatus = NULL, - cl_int* err = NULL) - { - cl_int error; - const ::size_t n = binaries.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const unsigned char** images = (const unsigned char**) alloca(n * sizeof(const void*)); - - for (::size_t i = 0; i < n; ++i) { - images[i] = (const unsigned char*)binaries[(int)i].first; - lengths[i] = binaries[(int)i].second; - } - - object_ = ::clCreateProgramWithBinary( - context(), (cl_uint) devices.size(), - (cl_device_id*)&devices.front(), - lengths, images, binaryStatus != NULL - ? (cl_int*) &binaryStatus->front() - : NULL, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != NULL) { - *err = error; - } - } - - Program() { } - - Program(const Program& program) : detail::Wrapper(program) { } - - Program& operator = (const Program& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - cl_int build( - const VECTOR_CLASS& devices, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - return detail::errHandler( - ::clBuildProgram( - object_, - (cl_uint) - devices.size(), - (cl_device_id*)&devices.front(), - options, - notifyFptr, - data), - __BUILD_PROGRAM_ERR); - } - - template - cl_int getInfo(cl_program_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetProgramInfo, object_, name, param), - __GET_PROGRAM_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getBuildInfo( - const Device& device, cl_program_build_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetProgramBuildInfo, object_, device(), name, param), - __GET_PROGRAM_BUILD_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getBuildInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_build_info, name>::param_type param; - cl_int result = getBuildInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int createKernels(VECTOR_CLASS* kernels) - { - cl_uint numKernels; - cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel)); - err = ::clCreateKernelsInProgram( - object_, numKernels, (cl_kernel*) value, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - kernels->assign(&value[0], &value[numKernels]); - return CL_SUCCESS; - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Program) - -inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) -{ - cl_int error; - - object_ = ::clCreateKernel(program(), name, &error); - detail::errHandler(error, __CREATE_KERNEL_ERR); - - if (err != NULL) { - *err = error; - } - -} - -/*! \class CommandQueue - * \brief CommandQueue interface for cl_command_queue. - */ -class CommandQueue : public detail::Wrapper -{ -public: - CommandQueue( - const Context& context, - const Device& device, - cl_command_queue_properties properties = 0, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } - - CommandQueue() { } - - CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper(commandQueue) { } - - CommandQueue& operator = (const CommandQueue& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_command_queue_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetCommandQueueInfo, object_, name, param), - __GET_COMMAND_QUEUE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_command_queue_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int enqueueReadBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReadBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_READ_BUFFER_ERR); - } - - cl_int enqueueWriteBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - const void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueWriteBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_WRITE_BUFFER_ERR); - } - - cl_int enqueueCopyBuffer( - const Buffer& src, - const Buffer& dst, - ::size_t src_offset, - ::size_t dst_offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyBuffer( - object_, src(), dst(), src_offset, dst_offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQEUE_COPY_BUFFER_ERR); - } - -#if defined(CL_VERSION_1_1) - cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReadBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_READ_BUFFER_RECT_ERR); - } - - - cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueWriteBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_WRITE_BUFFER_RECT_ERR); - } - - cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - ::size_t src_row_pitch, - ::size_t src_slice_pitch, - ::size_t dst_row_pitch, - ::size_t dst_slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyBufferRect( - object_, - src(), - dst(), - (const ::size_t *)src_origin, - (const ::size_t *)dst_origin, - (const ::size_t *)region, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQEUE_COPY_BUFFER_RECT_ERR); - } -#endif - - cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReadImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_READ_IMAGE_ERR); - } - - cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueWriteImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_WRITE_IMAGE_ERR); - } - - cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyImage( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *)dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_COPY_IMAGE_ERR); - } - - cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& region, - ::size_t dst_offset, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyImageToBuffer( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *) region, dst_offset, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); - } - - cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - ::size_t src_offset, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyBufferToImage( - object_, src(), dst(), src_offset, - (const ::size_t *) dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); - } - - void* enqueueMapBuffer( - const Buffer& buffer, - cl_bool blocking, - cl_map_flags flags, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_int error; - void * result = ::clEnqueueMapBuffer( - object_, buffer(), blocking, flags, offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - return result; - } - - void* enqueueMapImage( - const Image& buffer, - cl_bool blocking, - cl_map_flags flags, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t * row_pitch, - ::size_t * slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_int error; - void * result = ::clEnqueueMapImage( - object_, buffer(), blocking, flags, - (const ::size_t *) origin, (const ::size_t *) region, - row_pitch, slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - return result; - } - - cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueUnmapMemObject( - object_, memory(), mapped_ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - } - - cl_int enqueueNDRangeKernel( - const Kernel& kernel, - const NDRange& offset, - const NDRange& global, - const NDRange& local, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueNDRangeKernel( - object_, kernel(), (cl_uint) global.dimensions(), - offset.dimensions() != 0 ? (const ::size_t*) offset : NULL, - (const ::size_t*) global, - local.dimensions() != 0 ? (const ::size_t*) local : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_NDRANGE_KERNEL_ERR); - } - - cl_int enqueueTask( - const Kernel& kernel, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueTask( - object_, kernel(), - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_TASK_ERR); - } - - cl_int enqueueNativeKernel( - void (*userFptr)(void *), - std::pair args, - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* mem_locs = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) - ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem)) - : NULL; - - if (mems != NULL) { - for (unsigned int i = 0; i < mem_objects->size(); i++) { - mems[i] = ((*mem_objects)[i])(); - } - } - - return detail::errHandler( - ::clEnqueueNativeKernel( - object_, userFptr, args.first, args.second, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - mems, - (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_NATIVE_KERNEL); - } - - cl_int enqueueMarker(Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueMarker(object_, (cl_event*) event), - __ENQUEUE_MARKER_ERR); - } - - cl_int enqueueWaitForEvents(const VECTOR_CLASS& events) const - { - return detail::errHandler( - ::clEnqueueWaitForEvents( - object_, - (cl_uint) events.size(), - (const cl_event*) &events.front()), - __ENQUEUE_WAIT_FOR_EVENTS_ERR); - } - - cl_int enqueueAcquireGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueAcquireGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_ACQUIRE_GL_ERR); - } - - cl_int enqueueReleaseGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReleaseGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_RELEASE_GL_ERR); - } - -#if defined (USE_DX_INTEROP) -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); - - cl_int enqueueAcquireD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; - __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); - - return detail::errHandler( - pfn_clEnqueueAcquireD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_ACQUIRE_GL_ERR); - } - - cl_int enqueueReleaseD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; - __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); - - return detail::errHandler( - pfn_clEnqueueReleaseD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_RELEASE_GL_ERR); - } -#endif - - cl_int enqueueBarrier() const - { - return detail::errHandler( - ::clEnqueueBarrier(object_), - __ENQUEUE_BARRIER_ERR); - } - - cl_int flush() const - { - return detail::errHandler(::clFlush(object_), __FLUSH_ERR); - } - - cl_int finish() const - { - return detail::errHandler(::clFinish(object_), __FINISH_ERR); - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::CommandQueue) - -/*! \class KernelFunctor - * \brief Kernel functor interface - * - * \note Currently only functors of zero to ten arguments are supported. It - * is straightforward to add more and a more general solution, similar to - * Boost.Lambda could be followed if required in the future. - */ -class KernelFunctor -{ -private: - Kernel kernel_; - CommandQueue queue_; - NDRange offset_; - NDRange global_; - NDRange local_; - - cl_int err_; -public: - KernelFunctor() { } - - KernelFunctor( - const Kernel& kernel, - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local) : - kernel_(kernel), - queue_(queue), - offset_(offset), - global_(global), - local_(local), - err_(CL_SUCCESS) - {} - - KernelFunctor& operator=(const KernelFunctor& rhs); - - KernelFunctor(const KernelFunctor& rhs); - - cl_int getError() { return err_; } - - inline Event operator()(const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const A15& a15, - const VECTOR_CLASS* events = NULL); -}; - -inline KernelFunctor Kernel::bind( - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local) -{ - return KernelFunctor(*this,queue,offset,global,local); -} - -inline KernelFunctor Kernel::bind( - const CommandQueue& queue, - const NDRange& global, - const NDRange& local) -{ - return KernelFunctor(*this,queue,NullRange,global,local); -} - -inline KernelFunctor& KernelFunctor::operator=(const KernelFunctor& rhs) -{ - if (this == &rhs) { - return *this; - } - - kernel_ = rhs.kernel_; - queue_ = rhs.queue_; - offset_ = rhs.offset_; - global_ = rhs.global_; - local_ = rhs.local_; - - return *this; -} - -inline KernelFunctor::KernelFunctor(const KernelFunctor& rhs) : - kernel_(rhs.kernel_), - queue_(rhs.queue_), - offset_(rhs.offset_), - global_(rhs.global_), - local_(rhs.local_) -{ -} - -Event KernelFunctor::operator()(const VECTOR_CLASS* events) -{ - Event event; - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - kernel_.setArg(13,a14); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const A15& a15, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - kernel_.setArg(13,a14); - kernel_.setArg(14,a15); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -#undef __ERR_STR -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#undef __GET_DEVICE_INFO_ERR -#undef __GET_PLATFORM_INFO_ERR -#undef __GET_DEVICE_IDS_ERR -#undef __GET_CONTEXT_INFO_ERR -#undef __GET_EVENT_INFO_ERR -#undef __GET_EVENT_PROFILE_INFO_ERR -#undef __GET_MEM_OBJECT_INFO_ERR -#undef __GET_IMAGE_INFO_ERR -#undef __GET_SAMPLER_INFO_ERR -#undef __GET_KERNEL_INFO_ERR -#undef __GET_KERNEL_WORK_GROUP_INFO_ERR -#undef __GET_PROGRAM_INFO_ERR -#undef __GET_PROGRAM_BUILD_INFO_ERR -#undef __GET_COMMAND_QUEUE_INFO_ERR - -#undef __CREATE_CONTEXT_FROM_TYPE_ERR -#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR - -#undef __CREATE_BUFFER_ERR -#undef __CREATE_SUBBUFFER_ERR -#undef __CREATE_IMAGE2D_ERR -#undef __CREATE_IMAGE3D_ERR -#undef __CREATE_SAMPLER_ERR -#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR - -#undef __CREATE_USER_EVENT_ERR -#undef __SET_USER_EVENT_STATUS_ERR -#undef __SET_EVENT_CALLBACK_ERR - -#undef __WAIT_FOR_EVENTS_ERR - -#undef __CREATE_KERNEL_ERR -#undef __SET_KERNEL_ARGS_ERR -#undef __CREATE_PROGRAM_WITH_SOURCE_ERR -#undef __CREATE_PROGRAM_WITH_BINARY_ERR -#undef __BUILD_PROGRAM_ERR -#undef __CREATE_KERNELS_IN_PROGRAM_ERR - -#undef __CREATE_COMMAND_QUEUE_ERR -#undef __SET_COMMAND_QUEUE_PROPERTY_ERR -#undef __ENQUEUE_READ_BUFFER_ERR -#undef __ENQUEUE_WRITE_BUFFER_ERR -#undef __ENQUEUE_READ_BUFFER_RECT_ERR -#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR -#undef __ENQEUE_COPY_BUFFER_ERR -#undef __ENQEUE_COPY_BUFFER_RECT_ERR -#undef __ENQUEUE_READ_IMAGE_ERR -#undef __ENQUEUE_WRITE_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR -#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR -#undef __ENQUEUE_MAP_BUFFER_ERR -#undef __ENQUEUE_MAP_IMAGE_ERR -#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR -#undef __ENQUEUE_NDRANGE_KERNEL_ERR -#undef __ENQUEUE_TASK_ERR -#undef __ENQUEUE_NATIVE_KERNEL - -#undef __UNLOAD_COMPILER_ERR -#endif //__CL_USER_OVERRIDE_ERROR_STRINGS - -#undef __GET_INFO_HELPER_WITH_RETAIN - -// Extensions -#undef __INIT_CL_EXT_FCN_PTR -#undef __CREATE_SUB_DEVICES - -#if defined(USE_CL_DEVICE_FISSION) -#undef __PARAM_NAME_DEVICE_FISSION -#endif // USE_CL_DEVICE_FISSION - -} // namespace cl - -#endif // CL_HPP_ diff --git a/SpeedComparisons/GrayScott_OpenCL_Image/gray_scott_opencl_image.cpp b/SpeedComparisons/GrayScott_OpenCL_Image/gray_scott_opencl_image.cpp deleted file mode 100644 index 40d305ad0..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_Image/gray_scott_opencl_image.cpp +++ /dev/null @@ -1,363 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// OpenCV: -#include -#include - -// stdlib: -#include -#include -#include -#include - -#ifdef _WIN32 - #include - #include - #include - // http://www.linuxjournal.com/article/5574 - void gettimeofday(struct timeval* t,void* timezone) - { struct _timeb timebuffer; - _ftime( &timebuffer ); - t->tv_sec=timebuffer.time; - t->tv_usec=1000*timebuffer.millitm; - } -#else - #include -#endif - -// OpenCL: -#define __NO_STD_VECTOR // Use cl::vector instead of STL version -#define __CL_ENABLE_EXCEPTIONS - -// cl.hpp is standard but doesn't come with most SDKs, so download it from here: -// http://www.khronos.org/registry/cl/api/1.1/cl.hpp -#ifdef __APPLE__ -# include "cl.hpp" -#else -# include -#endif - -using namespace cl; - -// STL: -#include -#include - -// local: -#include "defs.h" - -void init(float *chemicals); -bool display(float *chemicals, - int iteration,bool auto_brighten,float manual_brighten, - int scale,int delay_ms,const char* message); - -static int g_opt_device = 0; - -int main(int argc, char * * argv) -{ - for (int i = 1; i < argc; i++) { - if (0) { - } else if ((i+1 platforms; - Platform::get(&platforms); - - // Select the default platform and create a context using this platform and the GPU - cl_context_properties cps[3] = { - CL_CONTEXT_PLATFORM, - (cl_context_properties)(platforms[0])(), - 0 - }; - Context context( CL_DEVICE_TYPE_GPU, cps); - - float *chem1 = new float[X*Y*4]; // we store up to 4 chemicals in an RGBA image - float *chem2 = new float[X*Y*4]; - init(chem1); - - // Get a list of devices on this platform - cl::vector devices = context.getInfo(); - - // range-check the user's selection - int maxdev = devices.size() - 1; - g_opt_device = (g_opt_device > maxdev) ? maxdev : - ((g_opt_device < 0) ? 0 : g_opt_device); - std::cout << (maxdev+1) << " device(s) available; using device " - << g_opt_device << ".\n"; - - Device &device = devices[g_opt_device]; - cl::vector ourdevices = cl::vector(1, device); - - - bool is_ImageSupported = device.getInfo(); - if(!is_ImageSupported) - { - printf("Images not supported on this device.\n"); - exit(-1); - } - - // we make two images and swap between them - cl::Image2D chemicals1(context,CL_MEM_READ_WRITE,cl::ImageFormat(CL_RGBA,CL_FLOAT),X,Y); - cl::Image2D chemicals2(context,CL_MEM_READ_WRITE,cl::ImageFormat(CL_RGBA,CL_FLOAT),X,Y); - - // Create a command queue and use the selected device - CommandQueue queue = CommandQueue(context, device); - Event event; - - // Copy to the memory buffers - cl::size_t<3> origin; - origin.push_back(0); - origin.push_back(0); - origin.push_back(0); - cl::size_t<3> region; - region.push_back(X); - region.push_back(Y); - region.push_back(1); - queue.enqueueWriteImage(chemicals1,true,origin,region,0,0,chem1); - queue.enqueueWriteImage(chemicals2,true,origin,region,0,0,chem2); - - // Read source file - std::string kfn = CL_SOURCE_DIR; // (defined in CMakeLists.txt to be the source folder) - kfn += "/grayscott_kernel_image.cl"; - std::ifstream sourceFile(kfn.c_str()); - std::string sourceCode( - std::istreambuf_iterator(sourceFile), - (std::istreambuf_iterator())); - Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()+1)); - - // enable this code to display kernel compilation error if you get clBuildProgram(-11) - #if 0 - const ::size_t n = (::size_t)source.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const char** strings = (const char**) alloca(n * sizeof(const char*)); - for (::size_t i = 0; i < n; ++i) { - strings[i] = source[(int)i].first; - lengths[i] = source[(int)i].second; - } - cl_int err; - cl_program myprog = clCreateProgramWithSource(context(), (cl_uint)n, strings, lengths, &err); - err = clBuildProgram(myprog, (cl_uint)ourdevices.size(), (cl_device_id*)&ourdevices.front(), NULL, NULL, NULL); - char proglog[1024]; - clGetProgramBuildInfo(myprog, device(), CL_PROGRAM_BUILD_LOG, 1024, proglog, 0); - printf("err=%d log=%s\n", err, proglog); - return 0; - #endif - - // Make program of the source code in the context - Program program = Program(context, source); - - // Build program for the specific device we are using - // IMPORTANT: If this program is running on a system that has multiple - // graphics cards, and if ANY of those cards does not support Images, - // then "program.build(devices);" will fail. Thus, we must build only - // on the device that is actually being used for the command queue. - program.build(ourdevices); - - // Make kernel - Kernel kernel(program, "grayscott_compute"); - - NDRange global(X,Y); - NDRange local(16,16); - - kernel.setArg(2, f); - kernel.setArg(3, f+k); - kernel.setArg(4, r_a); - kernel.setArg(5, r_b); - kernel.setArg(6, speed); - - int iteration = 0; - float fps_avg = 0.0; // decaying average of fps - const int N_FRAMES_PER_DISPLAY = 5000; // an even number, because of our double-buffering implementation - while(true) - { - struct timeval tod_record; - double tod_before, tod_after, tod_elap; - - gettimeofday(&tod_record, 0); - tod_before = ((double) (tod_record.tv_sec)) - + ((double) (tod_record.tv_usec)) / 1.0e6; - - // run a few iterations (without copying the data back) - for(int it=0;it 0) - fps = N_FRAMES_PER_DISPLAY / (float)tod_elap; - // We display an exponential moving average of the fps measurement - fps_avg = (fps_avg == 0) ? fps : (((fps_avg * 10.0f) + fps) / 11.0f); - sprintf(msg,"GrayScott - %0.2f fps (%.2f Mcgs)", fps_avg,fps_avg*X*Y/1e6); - - // display: - { - int quitnow = display(chem1,iteration,false,200.0f,2,10,msg); - if (quitnow) - break; - } - } - } - catch(Error error) - { - std::cout << error.what() << "(" << error.err() << ")" << std::endl; - } -} - -// return a random value between lower and upper -float frand(float lower,float upper) -{ - return lower + rand()*(upper-lower)/RAND_MAX; -} - -void init(float* chemicals) -{ - srand((unsigned int)time(NULL)); - - // figure the values - for(int i = 0; i < X; i++) - { - for(int j = 0; j < Y; j++) - { - int index = (j*X+i)*4; - // start with a uniform field with an approximate circle in the middle - if(hypot(i-X/3,(j-Y/4)/1.5)<=frand(2,5)) - { - chemicals[index+0] = frand(0.0f,0.1f); - chemicals[index+1] = frand(0.9f,1.0f); - } - else - { - chemicals[index+0] = frand(0.9f,1.0f); - chemicals[index+1] = frand(0.0f,0.1f); - } - - } - } -} - - -bool display(float *chemicals, - int iteration,bool auto_brighten,float manual_brighten, - int scale,int delay_ms,const char* message) -{ - static bool need_init = true; - static bool write_video = false; - - static IplImage *im,*im2,*im3; - static int border = 0; - static CvFont font; - static CvVideoWriter *video; - static const CvScalar white = cvScalar(255,255,255); - - const char *title = "Press ESC to quit"; - - if(need_init) - { - need_init = false; - - im = cvCreateImage(cvSize(X,Y),IPL_DEPTH_8U,3); - cvSet(im,cvScalar(0,0,0)); - im2 = cvCreateImage(cvSize(X*scale,Y*scale),IPL_DEPTH_8U,3); - - cvNamedWindow(title,CV_WINDOW_AUTOSIZE); - - double hScale=0.4; - double vScale=0.4; - int lineWidth=1; - cvInitFont(&font,CV_FONT_HERSHEY_COMPLEX,hScale,vScale,0,lineWidth,CV_AA); - } - - // convert float arrays to IplImage for OpenCV to display - for(int col=0;col255) val=255; - ((uchar *)(im->imageData + row*im->widthStep))[col*im->nChannels + 2] = (uchar)val; - val = chemicals[(row*X+col)*4 + 0]; - val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + row*im->widthStep))[col*im->nChannels + 1] = (uchar)val; - val = chemicals[(row*X+col)*4 + 0]; - val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + row*im->widthStep))[col*im->nChannels + 0] = (uchar)val; - } - } - - cvResize(im,im2); - - { - char txt[100]; - sprintf(txt,"%d",iteration); - cvPutText(im2,txt,cvPoint(20,20),&font,white); - cvPutText(im2,message,cvPoint(20,40),&font,white); - } - - cvShowImage(title,im2); - - int key = cvWaitKey(delay_ms); // allow time for the image to be drawn - if(key==27) // did user ask to quit? - { - cvDestroyWindow(title); - cvReleaseImage(&im); - cvReleaseImage(&im2); - return true; - } - return false; -} diff --git a/SpeedComparisons/GrayScott_OpenCL_Image/grayscott_kernel_image.cl b/SpeedComparisons/GrayScott_OpenCL_Image/grayscott_kernel_image.cl deleted file mode 100644 index 386704898..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_Image/grayscott_kernel_image.cl +++ /dev/null @@ -1,32 +0,0 @@ -__kernel void grayscott_compute ( - read_only image2d_t input, - write_only image2d_t output, - float f,float f_plus_k, - float r_a,float r_b, - float speed) -{ - const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_REPEAT | CLK_FILTER_NEAREST; - - const int x = get_global_id(0); - const int y = get_global_id(1); - - const float4 pixel = read_imagef(input, smp, (int2)(x, y)); - const float4 left = read_imagef(input, smp, (int2)(x-1, y)); - const float4 right = read_imagef(input, smp, (int2)(x+1, y)); - const float4 up = read_imagef(input, smp, (int2)(x, y-1)); - const float4 down = read_imagef(input, smp, (int2)(x, y+1)); - - const float4 laplacian = left + right + up + down - 4.0f*pixel; - - { - // for Gray-Scott we only use the first two components: - - const float da = r_a * laplacian.x - pixel.x*pixel.y*pixel.y + f*(1-pixel.x); - const float db = r_b * laplacian.y + pixel.x*pixel.y*pixel.y - f_plus_k*pixel.y; - - pixel.x += speed * da; - pixel.y += speed * db; - } - - write_imagef(output, (int2)(x, y), pixel); -} \ No newline at end of file diff --git a/SpeedComparisons/GrayScott_OpenCL_Image_2x2/CMakeLists.txt b/SpeedComparisons/GrayScott_OpenCL_Image_2x2/CMakeLists.txt deleted file mode 100644 index 8bf3ec0d8..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_Image_2x2/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -project(GrayScott_OpenCL_Image_2x2) - -set(CMAKE_MODULE_PATH ${GrayScott_OpenCL_Image_2x2_SOURCE_DIR}) -# (we include our own FindOpenCL.cmake until the time that CMake comes with its own) - -find_package(OpenCV REQUIRED) -include_directories( ${OPENCV_INCLUDE_DIR}) -link_libraries( ${OpenCV_LIBS} ) - -# only build the OpenCL version if OpenCL was found -find_package ( OpenCL ) -if(OPENCL_FOUND) - include_directories ( ${OPENCL_INCLUDE_DIRS} ) - link_libraries ( ${OPENCL_LIBRARIES} ) - - # tell the code where the .cl file will live - add_definitions(-DCL_SOURCE_DIR="${GrayScott_OpenCL_Image_2x2_SOURCE_DIR}") - - include_directories(../Display) - - add_executable(GrayScott_OpenCL_Image_2x2 - gray_scott_opencl_image_2x2.cpp - grayscott_kernel_image_2x2.cl - ../Display/defs.h - ) -endif() - diff --git a/SpeedComparisons/GrayScott_OpenCL_Image_2x2/FindOpenCL.cmake b/SpeedComparisons/GrayScott_OpenCL_Image_2x2/FindOpenCL.cmake deleted file mode 100644 index fde90efae..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_Image_2x2/FindOpenCL.cmake +++ /dev/null @@ -1,79 +0,0 @@ -# - Try to find OpenCL -# This module tries to find an OpenCL implementation on your system. It supports -# AMD / ATI, Apple and NVIDIA implementations, but shoudl work, too. -# -# Once done this will define -# OPENCL_FOUND - system has OpenCL -# OPENCL_INCLUDE_DIRS - the OpenCL include directory -# OPENCL_LIBRARIES - link these to use OpenCL -# -# WIN32 should work, but is untested - -FIND_PACKAGE( PackageHandleStandardArgs ) - -SET (OPENCL_VERSION_STRING "0.1.0") -SET (OPENCL_VERSION_MAJOR 0) -SET (OPENCL_VERSION_MINOR 1) -SET (OPENCL_VERSION_PATCH 0) - -IF (APPLE) - - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX") - FIND_PATH(OPENCL_INCLUDE_DIRS OpenCL/cl.h DOC "Include for OpenCL on OSX") - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS OpenCL/cl.hpp DOC "Include for OpenCL CPP bindings on OSX") - -ELSE (APPLE) - - IF (WIN32) - - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h) - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp) - - # The AMD SDK currently installs both x86 and x86_64 libraries - # This is only a hack to find out architecture - IF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" ) - SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86_64") - ELSE (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64") - SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86") - ENDIF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" ) - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib ${OPENCL_LIB_DIR}) - - GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) - - # On Win32 search relative to the library - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS "${_OPENCL_INC_CAND}") - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS "${_OPENCL_INC_CAND}") - - ELSE (WIN32) - - # Unix style platforms - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL - ENV LD_LIBRARY_PATH - ) - - GET_FILENAME_COMPONENT(OPENCL_LIB_DIR ${OPENCL_LIBRARIES} PATH) - GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) - - # The AMD SDK currently does not place its headers - # in /usr/include, therefore also search relative - # to the library - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include") - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include") - - ENDIF (WIN32) - -ENDIF (APPLE) - -FIND_PACKAGE_HANDLE_STANDARD_ARGS( OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS ) - -IF( _OPENCL_CPP_INCLUDE_DIRS ) - SET( OPENCL_HAS_CPP_BINDINGS TRUE ) - LIST( APPEND OPENCL_INCLUDE_DIRS ${_OPENCL_CPP_INCLUDE_DIRS} ) - # This is often the same, so clean up - LIST( REMOVE_DUPLICATES OPENCL_INCLUDE_DIRS ) -ENDIF( _OPENCL_CPP_INCLUDE_DIRS ) - -MARK_AS_ADVANCED( - OPENCL_INCLUDE_DIRS -) - diff --git a/SpeedComparisons/GrayScott_OpenCL_Image_2x2/cl.hpp b/SpeedComparisons/GrayScott_OpenCL_Image_2x2/cl.hpp deleted file mode 100644 index 99b86a665..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_Image_2x2/cl.hpp +++ /dev/null @@ -1,4011 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2010 The Khronos Group Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and/or associated documentation files (the - * "Materials"), to deal in the Materials without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Materials, and to - * permit persons to whom the Materials are furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Materials. - * - * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. - ******************************************************************************/ - -/*! \file - * - * \brief C++ bindings for OpenCL 1.0 (rev 48) and OpenCL 1.1 (rev 33) - * \author Benedict R. Gaster and Laurent Morichetti - * - * Additions and fixes from Brian Cole, March 3rd 2010. - * - * \version 1.1 - * \date June 2010 - * - * Optional extension support - * - * cl - * cl_ext_device_fission - * #define USE_CL_DEVICE_FISSION - */ - -/*! \mainpage - * \section intro Introduction - * For many large applications C++ is the language of choice and so it seems - * reasonable to define C++ bindings for OpenCL. - * - * - * The interface is contained with a single C++ header file \em cl.hpp and all - * definitions are contained within the namespace \em cl. There is no additional - * requirement to include \em cl.h and to use either the C++ or original C - * bindings it is enough to simply include \em cl.hpp. - * - * The bindings themselves are lightweight and correspond closely to the - * underlying C API. Using the C++ bindings introduces no additional execution - * overhead. - * - * For detail documentation on the bindings see: - * - * The OpenCL C++ Wrapper API 1.1 (revision 04) - * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.1.pdf - * - * \section example Example - * - * The following example shows a general use case for the C++ - * bindings, including support for the optional exception feature and - * also the supplied vector and string classes, see following sections for - * decriptions of these features. - * - * \code - * #define __CL_ENABLE_EXCEPTIONS - * - * #if defined(__APPLE__) || defined(__MACOSX) - * #include - * #else - * #include - * #endif - * #include - * #include - * #include - * - * const char * helloStr = "__kernel void " - * "hello(void) " - * "{ " - * " " - * "} "; - * - * int - * main(void) - * { - * cl_int err = CL_SUCCESS; - * try { - * - * std::vector platforms; - * cl::Platform::get(&platforms); - * if (platforms.size() == 0) { - * std::cout << "Platform size 0\n"; - * return -1; - * } - * - * cl_context_properties properties[] = - * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; - * cl::Context context(CL_DEVICE_TYPE_CPU, properties); - * - * std::vector devices = context.getInfo(); - * - * cl::Program::Sources source(1, - * std::make_pair(helloStr,strlen(helloStr))); - * cl::Program program_ = cl::Program(context, source); - * program_.build(devices); - * - * cl::Kernel kernel(program_, "hello", &err); - * - * cl::Event event; - * cl::CommandQueue queue(context, devices[0], 0, &err); - * queue.enqueueNDRangeKernel( - * kernel, - * cl::NullRange, - * cl::NDRange(4,4), - * cl::NullRange, - * NULL, - * &event); - * - * event.wait(); - * } - * catch (cl::Error err) { - * std::cerr - * << "ERROR: " - * << err.what() - * << "(" - * << err.err() - * << ")" - * << std::endl; - * } - * - * return EXIT_SUCCESS; - * } - * - * \endcode - * - */ -#ifndef CL_HPP_ -#define CL_HPP_ - -#ifdef _WIN32 -#include -#include -#if defined(USE_DX_INTEROP) -#include -#endif -#endif // _WIN32 - -// -#if defined(USE_CL_DEVICE_FISSION) -#include -#endif - -#if defined(__APPLE__) || defined(__MACOSX) -#include -#include -#else -#include -#include -#endif // !__APPLE__ - -#if !defined(CL_CALLBACK) -#define CL_CALLBACK -#endif //CL_CALLBACK - -#include - -#if !defined(__NO_STD_VECTOR) -#include -#endif - -#if !defined(__NO_STD_STRING) -#include -#endif - -#if defined(linux) || defined(__APPLE__) || defined(__MACOSX) -# include -#endif // linux - -#include - -/*! \namespace cl - * - * \brief The OpenCL C++ bindings are defined within this namespace. - * - */ -namespace cl { - -#define __INIT_CL_EXT_FCN_PTR(name) \ - if(!pfn_##name) { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddress(#name); \ - if(!pfn_##name) { \ - } \ - } - -class Program; -class Device; -class Context; -class CommandQueue; -class Memory; - -#if defined(__CL_ENABLE_EXCEPTIONS) -#include -/*! \class Error - * \brief Exception class - */ -class Error : public std::exception -{ -private: - cl_int err_; - const char * errStr_; -public: - /*! Create a new CL error exception for a given error code - * and corresponding message. - */ - Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) - {} - - ~Error() throw() {} - - /*! \brief Get error string associated with exception - * - * \return A memory pointer to the error message string. - */ - virtual const char * what() const throw () - { - if (errStr_ == NULL) { - return "empty"; - } - else { - return errStr_; - } - } - - /*! \brief Get error code associated with exception - * - * \return The error code. - */ - const cl_int err(void) const { return err_; } -}; - -#define __ERR_STR(x) #x -#else -#define __ERR_STR(x) NULL -#endif // __CL_ENABLE_EXCEPTIONS - -//! \cond DOXYGEN_DETAIL -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#define __GET_DEVICE_INFO_ERR __ERR_STR(clgetDeviceInfo) -#define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) -#define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) -#define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) -#define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo) -#define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo) -#define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo) -#define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo) -#define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) -#define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) -#define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) -#define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) -#define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) -#define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) -#define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) - -#define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) -#define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) - -#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) -#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) -#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) -#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) -#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) -#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) -#define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) -#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) - -#define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent) -#define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus) -#define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback) -#define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents) - -#define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel) -#define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) -#define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) -#define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) -#define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) -#define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) - -#define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) -#define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty) -#define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer) -#define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect) -#define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer) -#define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) -#define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) -#define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) -#define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) -#define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) -#define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) -#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) -#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) -#define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) -#define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage) -#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject) -#define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) -#define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) -#define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) -#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) -#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) -#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) - -#define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) -#define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) - -#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) - -#define __FLUSH_ERR __ERR_STR(clFlush) -#define __FINISH_ERR __ERR_STR(clFinish) - -#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) -#endif // __CL_USER_OVERRIDE_ERROR_STRINGS -//! \endcond - -/*! \class string - * \brief Simple string class, that provides a limited subset of std::string - * functionality but avoids many of the issues that come with that class. - */ -class string -{ -private: - ::size_t size_; - char * str_; -public: - string(void) : size_(0), str_(NULL) - { - } - - string(char * str, ::size_t size) : - size_(size), - str_(NULL) - { - str_ = new char[size_+1]; - if (str_ != NULL) { - memcpy(str_, str, size_ * sizeof(char)); - str_[size_] = '\0'; - } - else { - size_ = 0; - } - } - - string(char * str) : - str_(NULL) - { - size_= ::strlen(str); - str_ = new char[size_ + 1]; - if (str_ != NULL) { - memcpy(str_, str, (size_ + 1) * sizeof(char)); - } - else { - size_ = 0; - } - } - - string& operator=(const string& rhs) - { - if (this == &rhs) { - return *this; - } - - if (rhs.size_ == 0 || rhs.str_ == NULL) { - size_ = 0; - str_ = NULL; - } - else { - size_ = rhs.size_; - str_ = new char[size_ + 1]; - if (str_ != NULL) { - memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); - } - else { - size_ = 0; - } - } - - return *this; - } - - string(const string& rhs) - { - *this = rhs; - } - - ~string() - { - if (str_ != NULL) { - delete[] str_; - } - } - - ::size_t size(void) const { return size_; } - ::size_t length(void) const { return size(); } - - const char * c_str(void) const { return (str_) ? str_ : "";} -}; - -#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) -#include -typedef std::string STRING_CLASS; -#elif !defined(__USE_DEV_STRING) -typedef cl::string STRING_CLASS; -#endif - -#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) -#include -#define VECTOR_CLASS std::vector -#elif !defined(__USE_DEV_VECTOR) -#define VECTOR_CLASS cl::vector -#endif - -#if !defined(__MAX_DEFAULT_VECTOR_SIZE) -#define __MAX_DEFAULT_VECTOR_SIZE 10 -#endif - -/*! \class vector - * \brief Fixed sized vector implementation that mirroring - * std::vector functionality. - */ -template -class vector -{ -private: - T data_[N]; - unsigned int size_; - bool empty_; -public: - vector() : - size_(-1), - empty_(true) - {} - - ~vector() {} - - unsigned int size(void) const - { - return size_ + 1; - } - - void clear() - { - size_ = -1; - empty_ = true; - } - - void push_back (const T& x) - { - if (size() < N) { - size_++; - data_[size_] = x; - empty_ = false; - } - } - - void pop_back(void) - { - if (!empty_) { - data_[size_].~T(); - size_--; - if (size_ == -1) { - empty_ = true; - } - } - } - - vector(const vector& vec) : - size_(vec.size_), - empty_(vec.empty_) - { - if (!empty_) { - memcpy(&data_[0], &vec.data_[0], size() * sizeof(T)); - } - } - - vector(unsigned int size, const T& val = T()) : - size_(-1), - empty_(true) - { - for (unsigned int i = 0; i < size; i++) { - push_back(val); - } - } - - vector& operator=(const vector& rhs) - { - if (this == &rhs) { - return *this; - } - - size_ = rhs.size_; - empty_ = rhs.empty_; - - if (!empty_) { - memcpy(&data_[0], &rhs.data_[0], size() * sizeof(T)); - } - - return *this; - } - - bool operator==(vector &vec) - { - if (empty_ && vec.empty_) { - return true; - } - - if (size() != vec.size()) { - return false; - } - - return memcmp(&data_[0], &vec.data_[0], size() * sizeof(T)) == 0 ? true : false; - } - - operator T* () { return data_; } - operator const T* () const { return data_; } - - bool empty (void) const - { - return empty_; - } - - unsigned int max_size (void) const - { - return N; - } - - unsigned int capacity () const - { - return sizeof(T) * N; - } - - T& operator[](int index) - { - return data_[index]; - } - - T operator[](int index) const - { - return data_[index]; - } - - template - void assign(I start, I end) - { - clear(); - while(start < end) { - push_back(*start); - start++; - } - } - - /*! \class iterator - * \brief Iterator class for vectors - */ - class iterator - { - private: - vector vec_; - int index_; - bool initialized_; - public: - iterator(void) : - index_(-1), - initialized_(false) - { - index_ = -1; - initialized_ = false; - } - - ~iterator(void) {} - - static iterator begin(vector &vec) - { - iterator i; - - if (!vec.empty()) { - i.index_ = 0; - } - - i.vec_ = vec; - i.initialized_ = true; - return i; - } - - static iterator end(vector &vec) - { - iterator i; - - if (!vec.empty()) { - i.index_ = vec.size(); - } - i.vec_ = vec; - i.initialized_ = true; - return i; - } - - bool operator==(iterator i) - { - return ((vec_ == i.vec_) && - (index_ == i.index_) && - (initialized_ == i.initialized_)); - } - - bool operator!=(iterator i) - { - return (!(*this==i)); - } - - void operator++() - { - index_++; - } - - void operator++(int x) - { - index_ += x; - } - - void operator--() - { - index_--; - } - - void operator--(int x) - { - index_ -= x; - } - - T operator *() - { - return vec_[index_]; - } - }; - - iterator begin(void) - { - return iterator::begin(*this); - } - - iterator end(void) - { - return iterator::end(*this); - } - - T& front(void) - { - return data_[0]; - } - - T& back(void) - { - return data_[size_]; - } - - const T& front(void) const - { - return data_[0]; - } - - const T& back(void) const - { - return data_[size_]; - } -}; - -/*! - * \brief size_t class used to interface between C++ and - * OpenCL C calls that require arrays of size_t values, who's - * size is known statically. - */ -template -struct size_t : public cl::vector< ::size_t, N> { }; - -namespace detail { - -// GetInfo help struct -template -struct GetInfoHelper -{ - static cl_int - get(Functor f, cl_uint name, T* param) - { - return f(name, sizeof(T), param, NULL); - } -}; - -// Specialized GetInfoHelper for VECTOR_CLASS params -template -struct GetInfoHelper > -{ - static cl_int get(Func f, cl_uint name, VECTOR_CLASS* param) - { - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - T* value = (T*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - param->assign(&value[0], &value[required/sizeof(T)]); - return CL_SUCCESS; - } -}; - -// Specialized for getInfo -template -struct GetInfoHelper > -{ - static cl_int - get(Func f, cl_uint name, VECTOR_CLASS* param) - { - cl_uint err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); - if (err != CL_SUCCESS) { - return err; - } - - return CL_SUCCESS; - } -}; - -// Specialized GetInfoHelper for STRING_CLASS params -template -struct GetInfoHelper -{ - static cl_int get(Func f, cl_uint name, STRING_CLASS* param) - { - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - char* value = (char*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - *param = value; - return CL_SUCCESS; - } -}; - -#define __GET_INFO_HELPER_WITH_RETAIN(CPP_TYPE) \ -namespace detail { \ -template \ -struct GetInfoHelper \ -{ \ - static cl_int get(Func f, cl_uint name, CPP_TYPE* param) \ - { \ - cl_uint err = f(name, sizeof(CPP_TYPE), param, NULL); \ - if (err != CL_SUCCESS) { \ - return err; \ - } \ - \ - return ReferenceHandler::retain((*param)()); \ - } \ -}; \ -} - - -#define __PARAM_NAME_INFO_1_0(F) \ - F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ - F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ - F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_bitfield) \ - F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ - F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \ - F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ - F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \ - F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ - F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \ - F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ - F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS) \ - F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS) \ - \ - F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ - F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ - F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ - F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \ - \ - F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ - \ - F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ - F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ - F(cl_mem_info, CL_MEM_SIZE, ::size_t) \ - F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ - F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ - \ - F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ - F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \ - F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \ - F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \ - F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \ - \ - F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ - F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ - F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \ - F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \ - F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \ - \ - F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ - F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ - F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ - F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ - F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ - F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS) \ - \ - F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \ - \ - F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \ - F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ - F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \ - F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ - \ - F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ - F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ - F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ - F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) - -#if defined(CL_VERSION_1_1) -#define __PARAM_NAME_INFO_1_1(F) \ - F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ - \ - F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ - F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ - \ - F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) -#endif // CL_VERSION_1_1 - -#if defined(USE_CL_DEVICE_FISSION) -#define __PARAM_NAME_DEVICE_FISSION(F) \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ - F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) -#endif // USE_CL_DEVICE_FISSION - -template -struct param_traits {}; - -#define __DECLARE_PARAM_TRAITS(token, param_name, T) \ -struct token; \ -template<> \ -struct param_traits \ -{ \ - enum { value = param_name }; \ - typedef T param_type; \ -}; - -__PARAM_NAME_INFO_1_0(__DECLARE_PARAM_TRAITS); -#if defined(CL_VERSION_1_1) -__PARAM_NAME_INFO_1_1(__DECLARE_PARAM_TRAITS); -#endif // CL_VERSION_1_1 - -#if defined(USE_CL_DEVICE_FISSION) -__PARAM_NAME_DEVICE_FISSION(__DECLARE_PARAM_TRAITS); -#endif // USE_CL_DEVICE_FISSION - -#undef __DECLARE_PARAM_TRAITS - -// Convenience functions - -template -inline cl_int -getInfo(Func f, cl_uint name, T* param) -{ - return GetInfoHelper::get(f, name, param); -} - -template -struct GetInfoFunctor0 -{ - Func f_; const Arg0& arg0_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, param, size, value, size_ret); } -}; - -template -struct GetInfoFunctor1 -{ - Func f_; const Arg0& arg0_; const Arg1& arg1_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, arg1_, param, size, value, size_ret); } -}; - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) -{ - GetInfoFunctor0 f0 = { f, arg0 }; - return GetInfoHelper, T> - ::get(f0, name, param); -} - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) -{ - GetInfoFunctor1 f0 = { f, arg0, arg1 }; - return GetInfoHelper, T> - ::get(f0, name, param); -} - -template -struct ReferenceHandler -{ }; - -template <> -struct ReferenceHandler -{ - // cl_device_id does not have retain(). - static cl_int retain(cl_device_id) - { return CL_INVALID_DEVICE; } - // cl_device_id does not have release(). - static cl_int release(cl_device_id) - { return CL_INVALID_DEVICE; } -}; - -template <> -struct ReferenceHandler -{ - // cl_platform_id does not have retain(). - static cl_int retain(cl_platform_id) - { return CL_INVALID_PLATFORM; } - // cl_platform_id does not have release(). - static cl_int release(cl_platform_id) - { return CL_INVALID_PLATFORM; } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_context context) - { return ::clRetainContext(context); } - static cl_int release(cl_context context) - { return ::clReleaseContext(context); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_command_queue queue) - { return ::clRetainCommandQueue(queue); } - static cl_int release(cl_command_queue queue) - { return ::clReleaseCommandQueue(queue); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_mem memory) - { return ::clRetainMemObject(memory); } - static cl_int release(cl_mem memory) - { return ::clReleaseMemObject(memory); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_sampler sampler) - { return ::clRetainSampler(sampler); } - static cl_int release(cl_sampler sampler) - { return ::clReleaseSampler(sampler); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_program program) - { return ::clRetainProgram(program); } - static cl_int release(cl_program program) - { return ::clReleaseProgram(program); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_kernel kernel) - { return ::clRetainKernel(kernel); } - static cl_int release(cl_kernel kernel) - { return ::clReleaseKernel(kernel); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_event event) - { return ::clRetainEvent(event); } - static cl_int release(cl_event event) - { return ::clReleaseEvent(event); } -}; - -template -class Wrapper -{ -public: - typedef T cl_type; - -protected: - cl_type object_; - -public: - Wrapper() : object_(NULL) { } - - ~Wrapper() - { - if (object_ != NULL) { release(); } - } - - Wrapper(const Wrapper& rhs) - { - object_ = rhs.object_; - if (object_ != NULL) { retain(); } - } - - Wrapper& operator = (const Wrapper& rhs) - { - if (object_ != NULL) { release(); } - object_ = rhs.object_; - if (object_ != NULL) { retain(); } - return *this; - } - - cl_type operator ()() const { return object_; } - - cl_type& operator ()() { return object_; } - -protected: - - cl_int retain() const - { - return ReferenceHandler::retain(object_); - } - - cl_int release() const - { - return ReferenceHandler::release(object_); - } -}; - -#if defined(__CL_ENABLE_EXCEPTIONS) -static inline cl_int errHandler ( - cl_int err, - const char * errStr = NULL) throw(Error) -{ - if (err != CL_SUCCESS) { - throw Error(err, errStr); - } - return err; -} -#else -static inline cl_int errHandler (cl_int err, const char * errStr = NULL) -{ - return err; -} -#endif // __CL_ENABLE_EXCEPTIONS - -} // namespace detail -//! \endcond - -/*! \stuct ImageFormat - * \brief ImageFormat interface fro cl_image_format. - */ -struct ImageFormat : public cl_image_format -{ - ImageFormat(){} - - ImageFormat(cl_channel_order order, cl_channel_type type) - { - image_channel_order = order; - image_channel_data_type = type; - } - - ImageFormat& operator = (const ImageFormat& rhs) - { - if (this != &rhs) { - this->image_channel_data_type = rhs.image_channel_data_type; - this->image_channel_order = rhs.image_channel_order; - } - return *this; - } -}; - -/*! \class Device - * \brief Device interface for cl_device_id. - */ -class Device : public detail::Wrapper -{ -public: - Device(cl_device_id device) { object_ = device; } - - Device() : detail::Wrapper() { } - - Device(const Device& device) : detail::Wrapper(device) { } - - Device& operator = (const Device& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_device_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetDeviceInfo, object_, name, param), - __GET_DEVICE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_device_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(USE_CL_DEVICE_FISSION) - cl_int createSubDevices( - const cl_device_partition_property_ext * properties, - VECTOR_CLASS* devices) - { - typedef CL_API_ENTRY cl_int - ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( - cl_device_id /*in_device*/, - const cl_device_partition_property_ext * /* properties */, - cl_uint /*num_entries*/, - cl_device_id * /*out_devices*/, - cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; - - static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; - __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); - - cl_uint n = 0; - cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif -}; - -/*! \class Platform - * \brief Platform interface. - */ -class Platform : public detail::Wrapper -{ -public: - static const Platform null(); - - Platform(cl_platform_id platform) { object_ = platform; } - - Platform() : detail::Wrapper() { } - - Platform(const Platform& platform) : detail::Wrapper(platform) { } - - Platform& operator = (const Platform& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetPlatformInfo, object_, name, param), - __GET_PLATFORM_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_platform_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int getDevices( - cl_device_type type, - VECTOR_CLASS* devices) const - { - cl_uint n = 0; - cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = ::clGetDeviceIDs(object_, type, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } - -#if defined(USE_DX_INTEROP) - /*! \brief Get the list of available D3D10 devices. - * - * \param d3d_device_source. - * - * \param d3d_object. - * - * \param d3d_device_set. - * - * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device - * values returned in devices can be used to identify a specific OpenCL - * device. If \a devices argument is NULL, this argument is ignored. - * - * \return One of the following values: - * - CL_SUCCESS if the function is executed successfully. - * - * The application can query specific capabilities of the OpenCL device(s) - * returned by cl::getDevices. This can be used by the application to - * determine which device(s) to use. - * - * \note In the case that exceptions are enabled and a return value - * other than CL_SUCCESS is generated, then cl::Error exception is - * generated. - */ - cl_int getDevices( - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - VECTOR_CLASS* devices) const - { - typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( - cl_platform_id platform, - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id * devices, - cl_uint* num_devices); - - static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; - __INIT_CL_EXT_FCN_PTR(clGetDeviceIDsFromD3D10KHR); - - cl_uint n = 0; - cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - 0, - NULL, - &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - n, - ids, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif - - static cl_int get( - VECTOR_CLASS* platforms) - { - cl_uint n = 0; - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - platforms->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -}; - -static inline cl_int -UnloadCompiler() -{ - return ::clUnloadCompiler(); -} - -class Context : public detail::Wrapper -{ -public: - Context( - const VECTOR_CLASS& devices, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateContext( - properties, (cl_uint) devices.size(), - (cl_device_id*) &devices.front(), - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - Context( - cl_device_type type, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateContextFromType( - properties, type, notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - Context() : detail::Wrapper() { } - - Context(const Context& context) : detail::Wrapper(context) { } - - Context& operator = (const Context& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_context_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetContextInfo, object_, name, param), - __GET_CONTEXT_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_context_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int getSupportedImageFormats( - cl_mem_flags flags, - cl_mem_object_type type, - VECTOR_CLASS* formats) const - { - cl_uint numEntries; - cl_int err = ::clGetSupportedImageFormats( - object_, - flags, - type, - 0, - NULL, - &numEntries); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - ImageFormat* value = (ImageFormat*) - alloca(numEntries * sizeof(ImageFormat)); - err = ::clGetSupportedImageFormats( - object_, - flags, - type, - numEntries, - (cl_image_format*) value, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - formats->assign(&value[0], &value[numEntries]); - return CL_SUCCESS; - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Context) - -/*! \class Event - * \brief Event interface for cl_event. - */ -class Event : public detail::Wrapper -{ -public: - Event() : detail::Wrapper() { } - - Event(const Event& event) : detail::Wrapper(event) { } - - Event& operator = (const Event& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_event_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetEventInfo, object_, name, param), - __GET_EVENT_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_event_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getProfilingInfo(cl_profiling_info name, T* param) const - { - return detail::errHandler(detail::getInfo( - &::clGetEventProfilingInfo, object_, name, param), - __GET_EVENT_PROFILE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getProfilingInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_profiling_info, name>::param_type param; - cl_int result = getProfilingInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int wait() const - { - return detail::errHandler( - ::clWaitForEvents(1, &object_), - __WAIT_FOR_EVENTS_ERR); - } - -#if defined(CL_VERSION_1_1) - cl_int setCallback( - cl_int type, - void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetEventCallback( - object_, - type, - pfn_notify, - user_data), - __SET_EVENT_CALLBACK_ERR); - } -#endif - - static cl_int - waitForEvents(const VECTOR_CLASS& events) - { - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (cl_event*)&events.front()), - __WAIT_FOR_EVENTS_ERR); - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Event) - -#if defined(CL_VERSION_1_1) -/*! \class UserEvent - * \brief User event interface for cl_event. - */ -class UserEvent : public Event -{ -public: - UserEvent( - const Context& context, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateUserEvent( - context(), - &error); - - detail::errHandler(error, __CREATE_USER_EVENT_ERR); - if (err != NULL) { - *err = error; - } - } - - UserEvent() : Event() { } - - UserEvent(const UserEvent& event) : Event(event) { } - - UserEvent& operator = (const UserEvent& rhs) - { - if (this != &rhs) { - Event::operator=(rhs); - } - return *this; - } - - cl_int setStatus(cl_int status) - { - return detail::errHandler( - ::clSetUserEventStatus(object_,status), - __SET_USER_EVENT_STATUS_ERR); - } -}; -#endif - -inline static cl_int -WaitForEvents(const VECTOR_CLASS& events) -{ - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (cl_event*)&events.front()), - __WAIT_FOR_EVENTS_ERR); -} - -/*! \class Memory - * \brief Memory interface for cl_mem. - */ -class Memory : public detail::Wrapper -{ -public: - Memory() : detail::Wrapper() { } - - Memory(const Memory& memory) : detail::Wrapper(memory) { } - - Memory& operator = (const Memory& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_mem_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetMemObjectInfo, object_, name, param), - __GET_MEM_OBJECT_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_mem_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(CL_VERSION_1_1) - cl_int setDestructorCallback( - void (CL_CALLBACK * pfn_notify)(cl_mem, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetMemObjectDestructorCallback( - object_, - pfn_notify, - user_data), - __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); - } -#endif - -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Memory) - -/*! \class Buffer - * \brief Memory buffer interface. - */ -class Buffer : public Memory -{ -public: - Buffer( - const Context& context, - cl_mem_flags flags, - ::size_t size, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - Buffer() : Memory() { } - - Buffer(const Buffer& buffer) : Memory(buffer) { } - - Buffer& operator = (const Buffer& rhs) - { - if (this != &rhs) { - Memory::operator=(rhs); - } - return *this; - } - -#if defined(CL_VERSION_1_1) - Buffer createSubBuffer( - cl_mem_flags flags, - cl_buffer_create_type buffer_create_type, - const void * buffer_create_info, - cl_int * err = NULL) - { - Buffer result; - cl_int error; - result.object_ = ::clCreateSubBuffer( - object_, - flags, - buffer_create_type, - buffer_create_info, - &error); - - detail::errHandler(error, __CREATE_SUBBUFFER_ERR); - if (err != NULL) { - *err = error; - } - - return result; - } -#endif -}; - -#if defined (USE_DX_INTEROP) -class BufferD3D10 : public Buffer -{ -public: - typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( - cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, - cl_int* errcode_ret); - - BufferD3D10( - const Context& context, - cl_mem_flags flags, - ID3D10Buffer* bufobj, - cl_int * err = NULL) - { - static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; - __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); - - cl_int error; - object_ = pfn_clCreateFromD3D10BufferKHR( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - BufferD3D10() : Buffer() { } - - BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { } - - BufferD3D10& operator = (const BufferD3D10& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } -}; -#endif - -/*! \class BufferGL - * \brief Memory buffer interface for GL interop. - */ -class BufferGL : public Buffer -{ -public: - BufferGL( - const Context& context, - cl_mem_flags flags, - GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLBuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - BufferGL() : Buffer() { } - - BufferGL(const BufferGL& buffer) : Buffer(buffer) { } - - BufferGL& operator = (const BufferGL& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } - - cl_int getObjectInfo( - cl_gl_object_type *type, - GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \class BufferRenderGL - * \brief Memory buffer interface for GL interop with renderbuffer. - */ -class BufferRenderGL : public Buffer -{ -public: - BufferRenderGL( - const Context& context, - cl_mem_flags flags, - GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLRenderbuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - BufferRenderGL() : Buffer() { } - - BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { } - - BufferRenderGL& operator = (const BufferRenderGL& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } - - cl_int getObjectInfo( - cl_gl_object_type *type, - GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \class Image - * \brief Base class interface for all images. - */ -class Image : public Memory -{ -protected: - Image() : Memory() { } - - Image(const Image& image) : Memory(image) { } - - Image& operator = (const Image& rhs) - { - if (this != &rhs) { - Memory::operator=(rhs); - } - return *this; - } -public: - template - cl_int getImageInfo(cl_image_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetImageInfo, object_, name, param), - __GET_IMAGE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getImageInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_image_info, name>::param_type param; - cl_int result = getImageInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -/*! \class Image2D - * \brief Image interface for 2D images. - */ -class Image2D : public Image -{ -public: - Image2D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t row_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateImage2D( - context(), flags,&format, width, height, row_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE2D_ERR); - if (err != NULL) { - *err = error; - } - } - - Image2D() { } - - Image2D(const Image2D& image2D) : Image(image2D) { } - - Image2D& operator = (const Image2D& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } -}; - -/*! \class Image2DGL - * \brief 2D image interface for GL interop. - */ -class Image2DGL : public Image2D -{ -public: - Image2DGL( - const Context& context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture2D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - Image2DGL() : Image2D() { } - - Image2DGL(const Image2DGL& image) : Image2D(image) { } - - Image2DGL& operator = (const Image2DGL& rhs) - { - if (this != &rhs) { - Image2D::operator=(rhs); - } - return *this; - } -}; - -/*! \class Image3D - * \brief Image interface for 3D images. - */ -class Image3D : public Image -{ -public: - Image3D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t depth, - ::size_t row_pitch = 0, - ::size_t slice_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateImage3D( - context(), flags, &format, width, height, depth, row_pitch, - slice_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE3D_ERR); - if (err != NULL) { - *err = error; - } - } - - Image3D() { } - - Image3D(const Image3D& image3D) : Image(image3D) { } - - Image3D& operator = (const Image3D& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } -}; - -/*! \class Image2DGL - * \brief 2D image interface for GL interop. - */ -class Image3DGL : public Image3D -{ -public: - Image3DGL( - const Context& context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture3D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - Image3DGL() : Image3D() { } - - Image3DGL(const Image3DGL& image) : Image3D(image) { } - - Image3DGL& operator = (const Image3DGL& rhs) - { - if (this != &rhs) { - Image3D::operator=(rhs); - } - return *this; - } -}; - -/*! \class Sampler - * \brief Sampler interface for cl_sampler. - */ -class Sampler : public detail::Wrapper -{ -public: - Sampler() { } - - Sampler( - const Context& context, - cl_bool normalized_coords, - cl_addressing_mode addressing_mode, - cl_filter_mode filter_mode, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateSampler( - context(), - normalized_coords, - addressing_mode, - filter_mode, - &error); - - detail::errHandler(error, __CREATE_SAMPLER_ERR); - if (err != NULL) { - *err = error; - } - } - - Sampler(const Sampler& sampler) : detail::Wrapper(sampler) { } - - Sampler& operator = (const Sampler& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_sampler_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetSamplerInfo, object_, name, param), - __GET_SAMPLER_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_sampler_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Sampler) - -class Program; -class CommandQueue; -class Kernel; - -/*! \class NDRange - * \brief NDRange interface - */ -class NDRange -{ -private: - size_t<3> sizes_; - cl_uint dimensions_; - -public: - NDRange() - : dimensions_(0) - { } - - NDRange(::size_t size0) - : dimensions_(1) - { - sizes_.push_back(size0); - } - - NDRange(::size_t size0, ::size_t size1) - : dimensions_(2) - { - sizes_.push_back(size0); - sizes_.push_back(size1); - } - - NDRange(::size_t size0, ::size_t size1, ::size_t size2) - : dimensions_(3) - { - sizes_.push_back(size0); - sizes_.push_back(size1); - sizes_.push_back(size2); - } - - operator const ::size_t*() const { return (const ::size_t*) sizes_; } - ::size_t dimensions() const { return dimensions_; } -}; - -static const NDRange NullRange; - -/*! - * \struct LocalSpaceArg - * \brief Local address raper for use with Kernel::setArg - */ -struct LocalSpaceArg -{ - ::size_t size_; -}; - -namespace detail { - -template -struct KernelArgumentHandler -{ - static ::size_t size(const T&) { return sizeof(T); } - static T* ptr(T& value) { return &value; } -}; - -template <> -struct KernelArgumentHandler -{ - static ::size_t size(const LocalSpaceArg& value) { return value.size_; } - static void* ptr(LocalSpaceArg&) { return NULL; } -}; - -} -//! \endcond - -inline LocalSpaceArg -__local(::size_t size) -{ - LocalSpaceArg ret = { size }; - return ret; -} - -class KernelFunctor; - -/*! \class Kernel - * \brief Kernel interface that implements cl_kernel - */ -class Kernel : public detail::Wrapper -{ -public: - inline Kernel(const Program& program, const char* name, cl_int* err = NULL); - - Kernel() { } - - Kernel(const Kernel& kernel) : detail::Wrapper(kernel) { } - - Kernel& operator = (const Kernel& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_kernel_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelInfo, object_, name, param), - __GET_KERNEL_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getWorkGroupInfo( - const Device& device, cl_kernel_work_group_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetKernelWorkGroupInfo, object_, device(), name, param), - __GET_KERNEL_WORK_GROUP_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getWorkGroupInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_work_group_info, name>::param_type param; - cl_int result = getWorkGroupInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int setArg(cl_uint index, T value) - { - return detail::errHandler( - ::clSetKernelArg( - object_, - index, - detail::KernelArgumentHandler::size(value), - detail::KernelArgumentHandler::ptr(value)), - __SET_KERNEL_ARGS_ERR); - } - - cl_int setArg(cl_uint index, ::size_t size, void* argPtr) - { - return detail::errHandler( - ::clSetKernelArg(object_, index, size, argPtr), - __SET_KERNEL_ARGS_ERR); - } - - KernelFunctor bind( - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local); - - KernelFunctor bind( - const CommandQueue& queue, - const NDRange& global, - const NDRange& local); -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Kernel) - -/*! \class Program - * \brief Program interface that implements cl_program. - */ -class Program : public detail::Wrapper -{ -public: - typedef VECTOR_CLASS > Binaries; - typedef VECTOR_CLASS > Sources; - - Program( - const Context& context, - const Sources& sources, - cl_int* err = NULL) - { - cl_int error; - - const ::size_t n = (::size_t)sources.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const char** strings = (const char**) alloca(n * sizeof(const char*)); - - for (::size_t i = 0; i < n; ++i) { - strings[i] = sources[(int)i].first; - lengths[i] = sources[(int)i].second; - } - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)n, strings, lengths, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - if (err != NULL) { - *err = error; - } - } - - Program( - const Context& context, - const VECTOR_CLASS& devices, - const Binaries& binaries, - VECTOR_CLASS* binaryStatus = NULL, - cl_int* err = NULL) - { - cl_int error; - const ::size_t n = binaries.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const unsigned char** images = (const unsigned char**) alloca(n * sizeof(const void*)); - - for (::size_t i = 0; i < n; ++i) { - images[i] = (const unsigned char*)binaries[(int)i].first; - lengths[i] = binaries[(int)i].second; - } - - object_ = ::clCreateProgramWithBinary( - context(), (cl_uint) devices.size(), - (cl_device_id*)&devices.front(), - lengths, images, binaryStatus != NULL - ? (cl_int*) &binaryStatus->front() - : NULL, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != NULL) { - *err = error; - } - } - - Program() { } - - Program(const Program& program) : detail::Wrapper(program) { } - - Program& operator = (const Program& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - cl_int build( - const VECTOR_CLASS& devices, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - return detail::errHandler( - ::clBuildProgram( - object_, - (cl_uint) - devices.size(), - (cl_device_id*)&devices.front(), - options, - notifyFptr, - data), - __BUILD_PROGRAM_ERR); - } - - template - cl_int getInfo(cl_program_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetProgramInfo, object_, name, param), - __GET_PROGRAM_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getBuildInfo( - const Device& device, cl_program_build_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetProgramBuildInfo, object_, device(), name, param), - __GET_PROGRAM_BUILD_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getBuildInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_build_info, name>::param_type param; - cl_int result = getBuildInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int createKernels(VECTOR_CLASS* kernels) - { - cl_uint numKernels; - cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel)); - err = ::clCreateKernelsInProgram( - object_, numKernels, (cl_kernel*) value, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - kernels->assign(&value[0], &value[numKernels]); - return CL_SUCCESS; - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Program) - -inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) -{ - cl_int error; - - object_ = ::clCreateKernel(program(), name, &error); - detail::errHandler(error, __CREATE_KERNEL_ERR); - - if (err != NULL) { - *err = error; - } - -} - -/*! \class CommandQueue - * \brief CommandQueue interface for cl_command_queue. - */ -class CommandQueue : public detail::Wrapper -{ -public: - CommandQueue( - const Context& context, - const Device& device, - cl_command_queue_properties properties = 0, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } - - CommandQueue() { } - - CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper(commandQueue) { } - - CommandQueue& operator = (const CommandQueue& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_command_queue_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetCommandQueueInfo, object_, name, param), - __GET_COMMAND_QUEUE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_command_queue_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int enqueueReadBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReadBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_READ_BUFFER_ERR); - } - - cl_int enqueueWriteBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - const void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueWriteBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_WRITE_BUFFER_ERR); - } - - cl_int enqueueCopyBuffer( - const Buffer& src, - const Buffer& dst, - ::size_t src_offset, - ::size_t dst_offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyBuffer( - object_, src(), dst(), src_offset, dst_offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQEUE_COPY_BUFFER_ERR); - } - -#if defined(CL_VERSION_1_1) - cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReadBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_READ_BUFFER_RECT_ERR); - } - - - cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueWriteBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_WRITE_BUFFER_RECT_ERR); - } - - cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - ::size_t src_row_pitch, - ::size_t src_slice_pitch, - ::size_t dst_row_pitch, - ::size_t dst_slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyBufferRect( - object_, - src(), - dst(), - (const ::size_t *)src_origin, - (const ::size_t *)dst_origin, - (const ::size_t *)region, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQEUE_COPY_BUFFER_RECT_ERR); - } -#endif - - cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReadImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_READ_IMAGE_ERR); - } - - cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueWriteImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_WRITE_IMAGE_ERR); - } - - cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyImage( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *)dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_COPY_IMAGE_ERR); - } - - cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& region, - ::size_t dst_offset, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyImageToBuffer( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *) region, dst_offset, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); - } - - cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - ::size_t src_offset, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyBufferToImage( - object_, src(), dst(), src_offset, - (const ::size_t *) dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); - } - - void* enqueueMapBuffer( - const Buffer& buffer, - cl_bool blocking, - cl_map_flags flags, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_int error; - void * result = ::clEnqueueMapBuffer( - object_, buffer(), blocking, flags, offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - return result; - } - - void* enqueueMapImage( - const Image& buffer, - cl_bool blocking, - cl_map_flags flags, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t * row_pitch, - ::size_t * slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_int error; - void * result = ::clEnqueueMapImage( - object_, buffer(), blocking, flags, - (const ::size_t *) origin, (const ::size_t *) region, - row_pitch, slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - return result; - } - - cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueUnmapMemObject( - object_, memory(), mapped_ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - } - - cl_int enqueueNDRangeKernel( - const Kernel& kernel, - const NDRange& offset, - const NDRange& global, - const NDRange& local, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueNDRangeKernel( - object_, kernel(), (cl_uint) global.dimensions(), - offset.dimensions() != 0 ? (const ::size_t*) offset : NULL, - (const ::size_t*) global, - local.dimensions() != 0 ? (const ::size_t*) local : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_NDRANGE_KERNEL_ERR); - } - - cl_int enqueueTask( - const Kernel& kernel, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueTask( - object_, kernel(), - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_TASK_ERR); - } - - cl_int enqueueNativeKernel( - void (*userFptr)(void *), - std::pair args, - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* mem_locs = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) - ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem)) - : NULL; - - if (mems != NULL) { - for (unsigned int i = 0; i < mem_objects->size(); i++) { - mems[i] = ((*mem_objects)[i])(); - } - } - - return detail::errHandler( - ::clEnqueueNativeKernel( - object_, userFptr, args.first, args.second, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - mems, - (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_NATIVE_KERNEL); - } - - cl_int enqueueMarker(Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueMarker(object_, (cl_event*) event), - __ENQUEUE_MARKER_ERR); - } - - cl_int enqueueWaitForEvents(const VECTOR_CLASS& events) const - { - return detail::errHandler( - ::clEnqueueWaitForEvents( - object_, - (cl_uint) events.size(), - (const cl_event*) &events.front()), - __ENQUEUE_WAIT_FOR_EVENTS_ERR); - } - - cl_int enqueueAcquireGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueAcquireGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_ACQUIRE_GL_ERR); - } - - cl_int enqueueReleaseGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReleaseGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_RELEASE_GL_ERR); - } - -#if defined (USE_DX_INTEROP) -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); - - cl_int enqueueAcquireD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; - __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); - - return detail::errHandler( - pfn_clEnqueueAcquireD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_ACQUIRE_GL_ERR); - } - - cl_int enqueueReleaseD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; - __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); - - return detail::errHandler( - pfn_clEnqueueReleaseD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_RELEASE_GL_ERR); - } -#endif - - cl_int enqueueBarrier() const - { - return detail::errHandler( - ::clEnqueueBarrier(object_), - __ENQUEUE_BARRIER_ERR); - } - - cl_int flush() const - { - return detail::errHandler(::clFlush(object_), __FLUSH_ERR); - } - - cl_int finish() const - { - return detail::errHandler(::clFinish(object_), __FINISH_ERR); - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::CommandQueue) - -/*! \class KernelFunctor - * \brief Kernel functor interface - * - * \note Currently only functors of zero to ten arguments are supported. It - * is straightforward to add more and a more general solution, similar to - * Boost.Lambda could be followed if required in the future. - */ -class KernelFunctor -{ -private: - Kernel kernel_; - CommandQueue queue_; - NDRange offset_; - NDRange global_; - NDRange local_; - - cl_int err_; -public: - KernelFunctor() { } - - KernelFunctor( - const Kernel& kernel, - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local) : - kernel_(kernel), - queue_(queue), - offset_(offset), - global_(global), - local_(local), - err_(CL_SUCCESS) - {} - - KernelFunctor& operator=(const KernelFunctor& rhs); - - KernelFunctor(const KernelFunctor& rhs); - - cl_int getError() { return err_; } - - inline Event operator()(const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const A15& a15, - const VECTOR_CLASS* events = NULL); -}; - -inline KernelFunctor Kernel::bind( - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local) -{ - return KernelFunctor(*this,queue,offset,global,local); -} - -inline KernelFunctor Kernel::bind( - const CommandQueue& queue, - const NDRange& global, - const NDRange& local) -{ - return KernelFunctor(*this,queue,NullRange,global,local); -} - -inline KernelFunctor& KernelFunctor::operator=(const KernelFunctor& rhs) -{ - if (this == &rhs) { - return *this; - } - - kernel_ = rhs.kernel_; - queue_ = rhs.queue_; - offset_ = rhs.offset_; - global_ = rhs.global_; - local_ = rhs.local_; - - return *this; -} - -inline KernelFunctor::KernelFunctor(const KernelFunctor& rhs) : - kernel_(rhs.kernel_), - queue_(rhs.queue_), - offset_(rhs.offset_), - global_(rhs.global_), - local_(rhs.local_) -{ -} - -Event KernelFunctor::operator()(const VECTOR_CLASS* events) -{ - Event event; - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - kernel_.setArg(13,a14); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const A15& a15, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - kernel_.setArg(13,a14); - kernel_.setArg(14,a15); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -#undef __ERR_STR -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#undef __GET_DEVICE_INFO_ERR -#undef __GET_PLATFORM_INFO_ERR -#undef __GET_DEVICE_IDS_ERR -#undef __GET_CONTEXT_INFO_ERR -#undef __GET_EVENT_INFO_ERR -#undef __GET_EVENT_PROFILE_INFO_ERR -#undef __GET_MEM_OBJECT_INFO_ERR -#undef __GET_IMAGE_INFO_ERR -#undef __GET_SAMPLER_INFO_ERR -#undef __GET_KERNEL_INFO_ERR -#undef __GET_KERNEL_WORK_GROUP_INFO_ERR -#undef __GET_PROGRAM_INFO_ERR -#undef __GET_PROGRAM_BUILD_INFO_ERR -#undef __GET_COMMAND_QUEUE_INFO_ERR - -#undef __CREATE_CONTEXT_FROM_TYPE_ERR -#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR - -#undef __CREATE_BUFFER_ERR -#undef __CREATE_SUBBUFFER_ERR -#undef __CREATE_IMAGE2D_ERR -#undef __CREATE_IMAGE3D_ERR -#undef __CREATE_SAMPLER_ERR -#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR - -#undef __CREATE_USER_EVENT_ERR -#undef __SET_USER_EVENT_STATUS_ERR -#undef __SET_EVENT_CALLBACK_ERR - -#undef __WAIT_FOR_EVENTS_ERR - -#undef __CREATE_KERNEL_ERR -#undef __SET_KERNEL_ARGS_ERR -#undef __CREATE_PROGRAM_WITH_SOURCE_ERR -#undef __CREATE_PROGRAM_WITH_BINARY_ERR -#undef __BUILD_PROGRAM_ERR -#undef __CREATE_KERNELS_IN_PROGRAM_ERR - -#undef __CREATE_COMMAND_QUEUE_ERR -#undef __SET_COMMAND_QUEUE_PROPERTY_ERR -#undef __ENQUEUE_READ_BUFFER_ERR -#undef __ENQUEUE_WRITE_BUFFER_ERR -#undef __ENQUEUE_READ_BUFFER_RECT_ERR -#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR -#undef __ENQEUE_COPY_BUFFER_ERR -#undef __ENQEUE_COPY_BUFFER_RECT_ERR -#undef __ENQUEUE_READ_IMAGE_ERR -#undef __ENQUEUE_WRITE_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR -#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR -#undef __ENQUEUE_MAP_BUFFER_ERR -#undef __ENQUEUE_MAP_IMAGE_ERR -#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR -#undef __ENQUEUE_NDRANGE_KERNEL_ERR -#undef __ENQUEUE_TASK_ERR -#undef __ENQUEUE_NATIVE_KERNEL - -#undef __UNLOAD_COMPILER_ERR -#endif //__CL_USER_OVERRIDE_ERROR_STRINGS - -#undef __GET_INFO_HELPER_WITH_RETAIN - -// Extensions -#undef __INIT_CL_EXT_FCN_PTR -#undef __CREATE_SUB_DEVICES - -#if defined(USE_CL_DEVICE_FISSION) -#undef __PARAM_NAME_DEVICE_FISSION -#endif // USE_CL_DEVICE_FISSION - -} // namespace cl - -#endif // CL_HPP_ diff --git a/SpeedComparisons/GrayScott_OpenCL_Image_2x2/gray_scott_opencl_image_2x2.cpp b/SpeedComparisons/GrayScott_OpenCL_Image_2x2/gray_scott_opencl_image_2x2.cpp deleted file mode 100644 index 5e609f40e..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_Image_2x2/gray_scott_opencl_image_2x2.cpp +++ /dev/null @@ -1,365 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// OpenCV: -#include -#include - -// stdlib: -#include -#include -#include -#include - -#ifdef _WIN32 - #include - #include - #include - // http://www.linuxjournal.com/article/5574 - void gettimeofday(struct timeval* t,void* timezone) - { struct _timeb timebuffer; - _ftime( &timebuffer ); - t->tv_sec=timebuffer.time; - t->tv_usec=1000*timebuffer.millitm; - } -#else - #include -#endif - -// OpenCL: -#define __NO_STD_VECTOR // Use cl::vector instead of STL version -#define __CL_ENABLE_EXCEPTIONS - -// cl.hpp is standard but doesn't come with most SDKs, so download it from here: -// http://www.khronos.org/registry/cl/api/1.1/cl.hpp -#ifdef __APPLE__ -# include "cl.hpp" -#else -# include -#endif - -using namespace cl; - -// STL: -#include -#include - -// local: -#include "defs.h" - -void init(float *a,float *b); -bool display(float *a, - int iteration,bool auto_brighten,float manual_brighten, - int scale,int delay_ms,const char* message); - -// we pack the values in 2x2 blocks: x y -// z w -float& float_at(float* arr,int x,int y) -{ - return arr[ ( (y/2)*(X/2) + x/2 ) * 4 + (y%2)*2 + x%2 ]; -} - -static int g_opt_device = 0; - -int main(int argc, char * * argv) -{ - for (int i = 1; i < argc; i++) { - if (0) { - } else if ((i+1 platforms; - Platform::get(&platforms); - - // Select the default platform and create a context using this platform and the GPU - cl_context_properties cps[3] = { - CL_CONTEXT_PLATFORM, - (cl_context_properties)(platforms[0])(), - 0 - }; - Context context( CL_DEVICE_TYPE_GPU, cps); - - float *a = new float[X*Y]; - float *b = new float[X*Y]; - init(a,b); - - // Get a list of devices on this platform - cl::vector devices = context.getInfo(); - - // range-check the user's selection - int maxdev = devices.size() - 1; - g_opt_device = (g_opt_device > maxdev) ? maxdev : - ((g_opt_device < 0) ? 0 : g_opt_device); - std::cout << (maxdev+1) << " device(s) available; using device " - << g_opt_device << ".\n"; - - Device &device = devices[g_opt_device]; - cl::vector ourdevices = cl::vector(1, device); - - bool is_ImageSupported = device.getInfo(); - if(!is_ImageSupported) - { - printf("Images not supported on this device.\n"); - exit(-1); - } - - // we make two images and swap between them - cl::Image2D a1(context,CL_MEM_READ_WRITE,cl::ImageFormat(CL_RGBA,CL_FLOAT),X/2,Y/2); - cl::Image2D b1(context,CL_MEM_READ_WRITE,cl::ImageFormat(CL_RGBA,CL_FLOAT),X/2,Y/2); - cl::Image2D a2(context,CL_MEM_READ_WRITE,cl::ImageFormat(CL_RGBA,CL_FLOAT),X/2,Y/2); - cl::Image2D b2(context,CL_MEM_READ_WRITE,cl::ImageFormat(CL_RGBA,CL_FLOAT),X/2,Y/2); - - // Create a command queue and use the selected device - CommandQueue queue = CommandQueue(context, device); - Event event; - - // Copy to the memory buffers - cl::size_t<3> origin; - origin.push_back(0); - origin.push_back(0); - origin.push_back(0); - cl::size_t<3> region; - region.push_back(X/2); - region.push_back(Y/2); - region.push_back(1); - queue.enqueueWriteImage(a1,true,origin,region,0,0,a); - queue.enqueueWriteImage(b1,true,origin,region,0,0,b); - - // Read source file - std::string kfn = CL_SOURCE_DIR; // (defined in CMakeLists.txt to be the source folder) - kfn += "/grayscott_kernel_image_2x2.cl"; - std::ifstream sourceFile(kfn.c_str()); - std::string sourceCode( - std::istreambuf_iterator(sourceFile), - (std::istreambuf_iterator())); - Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()+1)); - - // enable this code to display kernel compilation error if you get clBuildProgram(-11) - #if 0 - const ::size_t n = (::size_t)source.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const char** strings = (const char**) alloca(n * sizeof(const char*)); - for (::size_t i = 0; i < n; ++i) { - strings[i] = source[(int)i].first; - lengths[i] = source[(int)i].second; - } - cl_int err; - cl_program myprog = clCreateProgramWithSource(context(), (cl_uint)n, strings, lengths, &err); - err = clBuildProgram(myprog, (cl_uint)ourdevices.size(), (cl_device_id*)&ourdevices.front(), NULL, NULL, NULL); - char proglog[1024]; - clGetProgramBuildInfo(myprog, device(), CL_PROGRAM_BUILD_LOG, 1024, proglog, 0); - printf("err=%d log=%s\n", err, proglog); - return 0; - #endif - - // Make program of the source code in the context - Program program = Program(context, source); - - // Build program for the specific device we are using - // IMPORTANT: If this program is running on a system that has multiple - // graphics cards, and if ANY of those cards does not support Images, - // then "program.build(devices);" will fail. Thus, we must build only - // on the device that is actually being used for the command queue. - program.build(ourdevices); - - // Make kernel - Kernel kernel(program, "grayscott_compute_2x2"); - - NDRange global(X/2,Y/2); - NDRange local(16,16); - - kernel.setArg(4, f); - kernel.setArg(5, f+k); - kernel.setArg(6, r_a); - kernel.setArg(7, r_b); - kernel.setArg(8, speed); - - int iteration = 0; - const int N_FRAMES_PER_DISPLAY = 5000; // an even number, because of our double-buffering implementation - while(true) - { - struct timeval tod_record; - double tod_before, tod_after, tod_elap; - - gettimeofday(&tod_record, 0); - tod_before = ((double) (tod_record.tv_sec)) - + ((double) (tod_record.tv_usec)) / 1.0e6; - - // run a few iterations (without copying the data back) - for(int it=0;it 0) - fps = N_FRAMES_PER_DISPLAY / (float)tod_elap; - sprintf(msg,"GrayScott - %0.2f fps (%.2f Mcgs)",fps,fps*X*Y/1e6); - - // display: - { - int quitnow = display(a,iteration,false,200.0f,2,10,msg); - if (quitnow) - break; - } - } - } - catch(Error error) - { - std::cout << error.what() << "(" << error.err() << ")" << std::endl; - } -} - -// return a random value between lower and upper -float frand(float lower,float upper) -{ - return lower + rand()*(upper-lower)/RAND_MAX; -} - -void init(float* a,float* b) -{ - srand((unsigned int)time(NULL)); - - // figure the values - for(int i = 0; i < X; i++) - { - for(int j = 0; j < Y; j++) - { - // start with a uniform field with an approximate circle in the middle - if(hypot(i-X/3,(j-Y/4)/1.5)<=frand(2,5)) - { - float_at(a,i,j) = frand(0.0f,0.1f); - float_at(b,i,j) = frand(0.9f,1.0f); - } - else - { - float_at(a,i,j) = frand(0.9f,1.0f); - float_at(b,i,j) = frand(0.0f,0.1f); - } - - } - } -} - - -bool display(float *a, - int iteration,bool auto_brighten,float manual_brighten, - int scale,int delay_ms,const char* message) -{ - static bool need_init = true; - static bool write_video = false; - - static IplImage *im,*im2,*im3; - static int border = 0; - static CvFont font; - static CvVideoWriter *video; - static const CvScalar white = cvScalar(255,255,255); - - const char *title = "Press ESC to quit"; - - if(need_init) - { - need_init = false; - - im = cvCreateImage(cvSize(X,Y),IPL_DEPTH_8U,3); - cvSet(im,cvScalar(0,0,0)); - im2 = cvCreateImage(cvSize(X*scale,Y*scale),IPL_DEPTH_8U,3); - - cvNamedWindow(title,CV_WINDOW_AUTOSIZE); - - double hScale=0.4; - double vScale=0.4; - int lineWidth=1; - cvInitFont(&font,CV_FONT_HERSHEY_COMPLEX,hScale,vScale,0,lineWidth,CV_AA); - } - - // convert float arrays to IplImage for OpenCV to display - for(int col=0;col255) val=255; - ((uchar *)(im->imageData + row*im->widthStep))[col*im->nChannels + 2] = (uchar)val; - ((uchar *)(im->imageData + row*im->widthStep))[col*im->nChannels + 1] = (uchar)val; - ((uchar *)(im->imageData + row*im->widthStep))[col*im->nChannels + 0] = (uchar)val; - } - } - - cvResize(im,im2); - - { - char txt[100]; - sprintf(txt,"%d",iteration); - cvPutText(im2,txt,cvPoint(20,20),&font,white); - cvPutText(im2,message,cvPoint(20,40),&font,white); - } - - cvShowImage(title,im2); - - int key = cvWaitKey(delay_ms); // allow time for the image to be drawn - if(key==27) // did user ask to quit? - { - cvDestroyWindow(title); - cvReleaseImage(&im); - cvReleaseImage(&im2); - return true; - } - return false; -} diff --git a/SpeedComparisons/GrayScott_OpenCL_Image_2x2/grayscott_kernel_image_2x2.cl b/SpeedComparisons/GrayScott_OpenCL_Image_2x2/grayscott_kernel_image_2x2.cl deleted file mode 100644 index 938493c22..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_Image_2x2/grayscott_kernel_image_2x2.cl +++ /dev/null @@ -1,42 +0,0 @@ -__kernel void grayscott_compute_2x2( - read_only image2d_t a, - read_only image2d_t b, - write_only image2d_t a2, - write_only image2d_t b2, - float f,float f_plus_k, - float r_a,float r_b, - float speed) -{ - const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_REPEAT | CLK_FILTER_NEAREST; - - const int x = get_global_id(0); - const int y = get_global_id(1); - - const float4 a_pixel = read_imagef(a, smp, (int2)(x, y)); - const float4 a_left = read_imagef(a, smp, (int2)(x-1, y)); - const float4 a_right = read_imagef(a, smp, (int2)(x+1, y)); - const float4 a_up = read_imagef(a, smp, (int2)(x, y-1)); - const float4 a_down = read_imagef(a, smp, (int2)(x, y+1)); - const float4 b_pixel = read_imagef(b, smp, (int2)(x, y)); - const float4 b_left = read_imagef(b, smp, (int2)(x-1, y)); - const float4 b_right = read_imagef(b, smp, (int2)(x+1, y)); - const float4 b_up = read_imagef(b, smp, (int2)(x, y-1)); - const float4 b_down = read_imagef(b, smp, (int2)(x, y+1)); - - // we pack the values in 2x2 blocks: x y - // z w - - write_imagef( a2, (int2)(x, y), a_pixel + speed * (r_a * ((float4)( - a_left.y + a_up.z + a_pixel.y + a_pixel.z, - a_pixel.x + a_up.w + a_right.x + a_pixel.w, - a_left.w + a_pixel.x + a_pixel.w + a_down.x, - a_pixel.z + a_pixel.y + a_right.z + a_down.y) - 4.0f*a_pixel) - - a_pixel * b_pixel * b_pixel + f*(1.0f-a_pixel)) ); - write_imagef( b2, (int2)(x, y), b_pixel + speed * (r_b * ((float4)( - b_left.y + b_up.z + b_pixel.y + b_pixel.z, - b_pixel.x + b_up.w + b_right.x + b_pixel.w, - b_left.w + b_pixel.x + b_pixel.w + b_down.x, - b_pixel.z + b_pixel.y + b_right.z + b_down.y) - 4.0f*b_pixel) - + a_pixel * b_pixel * b_pixel - f_plus_k*b_pixel) ); - // (it's faster than using local const floats to split up the computation, annoyingly) -} diff --git a/SpeedComparisons/GrayScott_OpenCL_Local/CMakeLists.txt b/SpeedComparisons/GrayScott_OpenCL_Local/CMakeLists.txt deleted file mode 100644 index 69b345140..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_Local/CMakeLists.txt +++ /dev/null @@ -1,29 +0,0 @@ -project(GrayScott_OpenCL_Local) - -set(CMAKE_MODULE_PATH ${GrayScott_OpenCL_Local_SOURCE_DIR}) -# (we include our own FindOpenCL.cmake until the time that CMake comes with its own) - -find_package(OpenCV REQUIRED) -include_directories( ${OPENCV_INCLUDE_DIR}) -link_libraries( ${OpenCV_LIBS} ) - -# only build the OpenCL version if OpenCL was found -find_package ( OpenCL ) -if(OPENCL_FOUND) - include_directories ( ${OPENCL_INCLUDE_DIRS} ) - link_libraries ( ${OPENCL_LIBRARIES} ) - - # tell the code where the .cl file will live - add_definitions(-DCL_SOURCE_DIR="${GrayScott_OpenCL_Local_SOURCE_DIR}") - - INCLUDE_DIRECTORIES( "../Display" ) - - add_executable(GrayScott_OpenCL_Local - gray_scott_opencl_local.cpp - grayscott_kernel_local.cl - ../Display/display.cpp - ../Display/display.h - ../Display/defs.h - ) -endif() - diff --git a/SpeedComparisons/GrayScott_OpenCL_Local/FindOpenCL.cmake b/SpeedComparisons/GrayScott_OpenCL_Local/FindOpenCL.cmake deleted file mode 100644 index fde90efae..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_Local/FindOpenCL.cmake +++ /dev/null @@ -1,79 +0,0 @@ -# - Try to find OpenCL -# This module tries to find an OpenCL implementation on your system. It supports -# AMD / ATI, Apple and NVIDIA implementations, but shoudl work, too. -# -# Once done this will define -# OPENCL_FOUND - system has OpenCL -# OPENCL_INCLUDE_DIRS - the OpenCL include directory -# OPENCL_LIBRARIES - link these to use OpenCL -# -# WIN32 should work, but is untested - -FIND_PACKAGE( PackageHandleStandardArgs ) - -SET (OPENCL_VERSION_STRING "0.1.0") -SET (OPENCL_VERSION_MAJOR 0) -SET (OPENCL_VERSION_MINOR 1) -SET (OPENCL_VERSION_PATCH 0) - -IF (APPLE) - - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX") - FIND_PATH(OPENCL_INCLUDE_DIRS OpenCL/cl.h DOC "Include for OpenCL on OSX") - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS OpenCL/cl.hpp DOC "Include for OpenCL CPP bindings on OSX") - -ELSE (APPLE) - - IF (WIN32) - - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h) - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp) - - # The AMD SDK currently installs both x86 and x86_64 libraries - # This is only a hack to find out architecture - IF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" ) - SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86_64") - ELSE (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64") - SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86") - ENDIF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" ) - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib ${OPENCL_LIB_DIR}) - - GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) - - # On Win32 search relative to the library - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS "${_OPENCL_INC_CAND}") - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS "${_OPENCL_INC_CAND}") - - ELSE (WIN32) - - # Unix style platforms - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL - ENV LD_LIBRARY_PATH - ) - - GET_FILENAME_COMPONENT(OPENCL_LIB_DIR ${OPENCL_LIBRARIES} PATH) - GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) - - # The AMD SDK currently does not place its headers - # in /usr/include, therefore also search relative - # to the library - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include") - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include") - - ENDIF (WIN32) - -ENDIF (APPLE) - -FIND_PACKAGE_HANDLE_STANDARD_ARGS( OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS ) - -IF( _OPENCL_CPP_INCLUDE_DIRS ) - SET( OPENCL_HAS_CPP_BINDINGS TRUE ) - LIST( APPEND OPENCL_INCLUDE_DIRS ${_OPENCL_CPP_INCLUDE_DIRS} ) - # This is often the same, so clean up - LIST( REMOVE_DUPLICATES OPENCL_INCLUDE_DIRS ) -ENDIF( _OPENCL_CPP_INCLUDE_DIRS ) - -MARK_AS_ADVANCED( - OPENCL_INCLUDE_DIRS -) - diff --git a/SpeedComparisons/GrayScott_OpenCL_Local/cl.hpp b/SpeedComparisons/GrayScott_OpenCL_Local/cl.hpp deleted file mode 100644 index 99b86a665..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_Local/cl.hpp +++ /dev/null @@ -1,4011 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2010 The Khronos Group Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and/or associated documentation files (the - * "Materials"), to deal in the Materials without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Materials, and to - * permit persons to whom the Materials are furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Materials. - * - * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. - ******************************************************************************/ - -/*! \file - * - * \brief C++ bindings for OpenCL 1.0 (rev 48) and OpenCL 1.1 (rev 33) - * \author Benedict R. Gaster and Laurent Morichetti - * - * Additions and fixes from Brian Cole, March 3rd 2010. - * - * \version 1.1 - * \date June 2010 - * - * Optional extension support - * - * cl - * cl_ext_device_fission - * #define USE_CL_DEVICE_FISSION - */ - -/*! \mainpage - * \section intro Introduction - * For many large applications C++ is the language of choice and so it seems - * reasonable to define C++ bindings for OpenCL. - * - * - * The interface is contained with a single C++ header file \em cl.hpp and all - * definitions are contained within the namespace \em cl. There is no additional - * requirement to include \em cl.h and to use either the C++ or original C - * bindings it is enough to simply include \em cl.hpp. - * - * The bindings themselves are lightweight and correspond closely to the - * underlying C API. Using the C++ bindings introduces no additional execution - * overhead. - * - * For detail documentation on the bindings see: - * - * The OpenCL C++ Wrapper API 1.1 (revision 04) - * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.1.pdf - * - * \section example Example - * - * The following example shows a general use case for the C++ - * bindings, including support for the optional exception feature and - * also the supplied vector and string classes, see following sections for - * decriptions of these features. - * - * \code - * #define __CL_ENABLE_EXCEPTIONS - * - * #if defined(__APPLE__) || defined(__MACOSX) - * #include - * #else - * #include - * #endif - * #include - * #include - * #include - * - * const char * helloStr = "__kernel void " - * "hello(void) " - * "{ " - * " " - * "} "; - * - * int - * main(void) - * { - * cl_int err = CL_SUCCESS; - * try { - * - * std::vector platforms; - * cl::Platform::get(&platforms); - * if (platforms.size() == 0) { - * std::cout << "Platform size 0\n"; - * return -1; - * } - * - * cl_context_properties properties[] = - * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; - * cl::Context context(CL_DEVICE_TYPE_CPU, properties); - * - * std::vector devices = context.getInfo(); - * - * cl::Program::Sources source(1, - * std::make_pair(helloStr,strlen(helloStr))); - * cl::Program program_ = cl::Program(context, source); - * program_.build(devices); - * - * cl::Kernel kernel(program_, "hello", &err); - * - * cl::Event event; - * cl::CommandQueue queue(context, devices[0], 0, &err); - * queue.enqueueNDRangeKernel( - * kernel, - * cl::NullRange, - * cl::NDRange(4,4), - * cl::NullRange, - * NULL, - * &event); - * - * event.wait(); - * } - * catch (cl::Error err) { - * std::cerr - * << "ERROR: " - * << err.what() - * << "(" - * << err.err() - * << ")" - * << std::endl; - * } - * - * return EXIT_SUCCESS; - * } - * - * \endcode - * - */ -#ifndef CL_HPP_ -#define CL_HPP_ - -#ifdef _WIN32 -#include -#include -#if defined(USE_DX_INTEROP) -#include -#endif -#endif // _WIN32 - -// -#if defined(USE_CL_DEVICE_FISSION) -#include -#endif - -#if defined(__APPLE__) || defined(__MACOSX) -#include -#include -#else -#include -#include -#endif // !__APPLE__ - -#if !defined(CL_CALLBACK) -#define CL_CALLBACK -#endif //CL_CALLBACK - -#include - -#if !defined(__NO_STD_VECTOR) -#include -#endif - -#if !defined(__NO_STD_STRING) -#include -#endif - -#if defined(linux) || defined(__APPLE__) || defined(__MACOSX) -# include -#endif // linux - -#include - -/*! \namespace cl - * - * \brief The OpenCL C++ bindings are defined within this namespace. - * - */ -namespace cl { - -#define __INIT_CL_EXT_FCN_PTR(name) \ - if(!pfn_##name) { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddress(#name); \ - if(!pfn_##name) { \ - } \ - } - -class Program; -class Device; -class Context; -class CommandQueue; -class Memory; - -#if defined(__CL_ENABLE_EXCEPTIONS) -#include -/*! \class Error - * \brief Exception class - */ -class Error : public std::exception -{ -private: - cl_int err_; - const char * errStr_; -public: - /*! Create a new CL error exception for a given error code - * and corresponding message. - */ - Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) - {} - - ~Error() throw() {} - - /*! \brief Get error string associated with exception - * - * \return A memory pointer to the error message string. - */ - virtual const char * what() const throw () - { - if (errStr_ == NULL) { - return "empty"; - } - else { - return errStr_; - } - } - - /*! \brief Get error code associated with exception - * - * \return The error code. - */ - const cl_int err(void) const { return err_; } -}; - -#define __ERR_STR(x) #x -#else -#define __ERR_STR(x) NULL -#endif // __CL_ENABLE_EXCEPTIONS - -//! \cond DOXYGEN_DETAIL -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#define __GET_DEVICE_INFO_ERR __ERR_STR(clgetDeviceInfo) -#define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) -#define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) -#define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) -#define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo) -#define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo) -#define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo) -#define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo) -#define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) -#define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) -#define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) -#define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) -#define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) -#define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) -#define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) - -#define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) -#define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) - -#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) -#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) -#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) -#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) -#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) -#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) -#define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) -#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) - -#define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent) -#define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus) -#define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback) -#define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents) - -#define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel) -#define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) -#define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) -#define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) -#define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) -#define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) - -#define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) -#define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty) -#define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer) -#define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect) -#define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer) -#define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) -#define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) -#define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) -#define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) -#define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) -#define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) -#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) -#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) -#define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) -#define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage) -#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject) -#define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) -#define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) -#define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) -#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) -#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) -#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) - -#define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) -#define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) - -#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) - -#define __FLUSH_ERR __ERR_STR(clFlush) -#define __FINISH_ERR __ERR_STR(clFinish) - -#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) -#endif // __CL_USER_OVERRIDE_ERROR_STRINGS -//! \endcond - -/*! \class string - * \brief Simple string class, that provides a limited subset of std::string - * functionality but avoids many of the issues that come with that class. - */ -class string -{ -private: - ::size_t size_; - char * str_; -public: - string(void) : size_(0), str_(NULL) - { - } - - string(char * str, ::size_t size) : - size_(size), - str_(NULL) - { - str_ = new char[size_+1]; - if (str_ != NULL) { - memcpy(str_, str, size_ * sizeof(char)); - str_[size_] = '\0'; - } - else { - size_ = 0; - } - } - - string(char * str) : - str_(NULL) - { - size_= ::strlen(str); - str_ = new char[size_ + 1]; - if (str_ != NULL) { - memcpy(str_, str, (size_ + 1) * sizeof(char)); - } - else { - size_ = 0; - } - } - - string& operator=(const string& rhs) - { - if (this == &rhs) { - return *this; - } - - if (rhs.size_ == 0 || rhs.str_ == NULL) { - size_ = 0; - str_ = NULL; - } - else { - size_ = rhs.size_; - str_ = new char[size_ + 1]; - if (str_ != NULL) { - memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); - } - else { - size_ = 0; - } - } - - return *this; - } - - string(const string& rhs) - { - *this = rhs; - } - - ~string() - { - if (str_ != NULL) { - delete[] str_; - } - } - - ::size_t size(void) const { return size_; } - ::size_t length(void) const { return size(); } - - const char * c_str(void) const { return (str_) ? str_ : "";} -}; - -#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) -#include -typedef std::string STRING_CLASS; -#elif !defined(__USE_DEV_STRING) -typedef cl::string STRING_CLASS; -#endif - -#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) -#include -#define VECTOR_CLASS std::vector -#elif !defined(__USE_DEV_VECTOR) -#define VECTOR_CLASS cl::vector -#endif - -#if !defined(__MAX_DEFAULT_VECTOR_SIZE) -#define __MAX_DEFAULT_VECTOR_SIZE 10 -#endif - -/*! \class vector - * \brief Fixed sized vector implementation that mirroring - * std::vector functionality. - */ -template -class vector -{ -private: - T data_[N]; - unsigned int size_; - bool empty_; -public: - vector() : - size_(-1), - empty_(true) - {} - - ~vector() {} - - unsigned int size(void) const - { - return size_ + 1; - } - - void clear() - { - size_ = -1; - empty_ = true; - } - - void push_back (const T& x) - { - if (size() < N) { - size_++; - data_[size_] = x; - empty_ = false; - } - } - - void pop_back(void) - { - if (!empty_) { - data_[size_].~T(); - size_--; - if (size_ == -1) { - empty_ = true; - } - } - } - - vector(const vector& vec) : - size_(vec.size_), - empty_(vec.empty_) - { - if (!empty_) { - memcpy(&data_[0], &vec.data_[0], size() * sizeof(T)); - } - } - - vector(unsigned int size, const T& val = T()) : - size_(-1), - empty_(true) - { - for (unsigned int i = 0; i < size; i++) { - push_back(val); - } - } - - vector& operator=(const vector& rhs) - { - if (this == &rhs) { - return *this; - } - - size_ = rhs.size_; - empty_ = rhs.empty_; - - if (!empty_) { - memcpy(&data_[0], &rhs.data_[0], size() * sizeof(T)); - } - - return *this; - } - - bool operator==(vector &vec) - { - if (empty_ && vec.empty_) { - return true; - } - - if (size() != vec.size()) { - return false; - } - - return memcmp(&data_[0], &vec.data_[0], size() * sizeof(T)) == 0 ? true : false; - } - - operator T* () { return data_; } - operator const T* () const { return data_; } - - bool empty (void) const - { - return empty_; - } - - unsigned int max_size (void) const - { - return N; - } - - unsigned int capacity () const - { - return sizeof(T) * N; - } - - T& operator[](int index) - { - return data_[index]; - } - - T operator[](int index) const - { - return data_[index]; - } - - template - void assign(I start, I end) - { - clear(); - while(start < end) { - push_back(*start); - start++; - } - } - - /*! \class iterator - * \brief Iterator class for vectors - */ - class iterator - { - private: - vector vec_; - int index_; - bool initialized_; - public: - iterator(void) : - index_(-1), - initialized_(false) - { - index_ = -1; - initialized_ = false; - } - - ~iterator(void) {} - - static iterator begin(vector &vec) - { - iterator i; - - if (!vec.empty()) { - i.index_ = 0; - } - - i.vec_ = vec; - i.initialized_ = true; - return i; - } - - static iterator end(vector &vec) - { - iterator i; - - if (!vec.empty()) { - i.index_ = vec.size(); - } - i.vec_ = vec; - i.initialized_ = true; - return i; - } - - bool operator==(iterator i) - { - return ((vec_ == i.vec_) && - (index_ == i.index_) && - (initialized_ == i.initialized_)); - } - - bool operator!=(iterator i) - { - return (!(*this==i)); - } - - void operator++() - { - index_++; - } - - void operator++(int x) - { - index_ += x; - } - - void operator--() - { - index_--; - } - - void operator--(int x) - { - index_ -= x; - } - - T operator *() - { - return vec_[index_]; - } - }; - - iterator begin(void) - { - return iterator::begin(*this); - } - - iterator end(void) - { - return iterator::end(*this); - } - - T& front(void) - { - return data_[0]; - } - - T& back(void) - { - return data_[size_]; - } - - const T& front(void) const - { - return data_[0]; - } - - const T& back(void) const - { - return data_[size_]; - } -}; - -/*! - * \brief size_t class used to interface between C++ and - * OpenCL C calls that require arrays of size_t values, who's - * size is known statically. - */ -template -struct size_t : public cl::vector< ::size_t, N> { }; - -namespace detail { - -// GetInfo help struct -template -struct GetInfoHelper -{ - static cl_int - get(Functor f, cl_uint name, T* param) - { - return f(name, sizeof(T), param, NULL); - } -}; - -// Specialized GetInfoHelper for VECTOR_CLASS params -template -struct GetInfoHelper > -{ - static cl_int get(Func f, cl_uint name, VECTOR_CLASS* param) - { - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - T* value = (T*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - param->assign(&value[0], &value[required/sizeof(T)]); - return CL_SUCCESS; - } -}; - -// Specialized for getInfo -template -struct GetInfoHelper > -{ - static cl_int - get(Func f, cl_uint name, VECTOR_CLASS* param) - { - cl_uint err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); - if (err != CL_SUCCESS) { - return err; - } - - return CL_SUCCESS; - } -}; - -// Specialized GetInfoHelper for STRING_CLASS params -template -struct GetInfoHelper -{ - static cl_int get(Func f, cl_uint name, STRING_CLASS* param) - { - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - char* value = (char*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - *param = value; - return CL_SUCCESS; - } -}; - -#define __GET_INFO_HELPER_WITH_RETAIN(CPP_TYPE) \ -namespace detail { \ -template \ -struct GetInfoHelper \ -{ \ - static cl_int get(Func f, cl_uint name, CPP_TYPE* param) \ - { \ - cl_uint err = f(name, sizeof(CPP_TYPE), param, NULL); \ - if (err != CL_SUCCESS) { \ - return err; \ - } \ - \ - return ReferenceHandler::retain((*param)()); \ - } \ -}; \ -} - - -#define __PARAM_NAME_INFO_1_0(F) \ - F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ - F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ - F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_bitfield) \ - F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ - F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \ - F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ - F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \ - F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ - F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \ - F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ - F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS) \ - F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS) \ - \ - F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ - F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ - F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ - F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \ - \ - F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ - \ - F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ - F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ - F(cl_mem_info, CL_MEM_SIZE, ::size_t) \ - F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ - F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ - \ - F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ - F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \ - F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \ - F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \ - F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \ - \ - F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ - F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ - F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \ - F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \ - F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \ - \ - F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ - F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ - F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ - F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ - F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ - F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS) \ - \ - F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \ - \ - F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \ - F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ - F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \ - F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ - \ - F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ - F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ - F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ - F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) - -#if defined(CL_VERSION_1_1) -#define __PARAM_NAME_INFO_1_1(F) \ - F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ - \ - F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ - F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ - \ - F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) -#endif // CL_VERSION_1_1 - -#if defined(USE_CL_DEVICE_FISSION) -#define __PARAM_NAME_DEVICE_FISSION(F) \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ - F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) -#endif // USE_CL_DEVICE_FISSION - -template -struct param_traits {}; - -#define __DECLARE_PARAM_TRAITS(token, param_name, T) \ -struct token; \ -template<> \ -struct param_traits \ -{ \ - enum { value = param_name }; \ - typedef T param_type; \ -}; - -__PARAM_NAME_INFO_1_0(__DECLARE_PARAM_TRAITS); -#if defined(CL_VERSION_1_1) -__PARAM_NAME_INFO_1_1(__DECLARE_PARAM_TRAITS); -#endif // CL_VERSION_1_1 - -#if defined(USE_CL_DEVICE_FISSION) -__PARAM_NAME_DEVICE_FISSION(__DECLARE_PARAM_TRAITS); -#endif // USE_CL_DEVICE_FISSION - -#undef __DECLARE_PARAM_TRAITS - -// Convenience functions - -template -inline cl_int -getInfo(Func f, cl_uint name, T* param) -{ - return GetInfoHelper::get(f, name, param); -} - -template -struct GetInfoFunctor0 -{ - Func f_; const Arg0& arg0_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, param, size, value, size_ret); } -}; - -template -struct GetInfoFunctor1 -{ - Func f_; const Arg0& arg0_; const Arg1& arg1_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, arg1_, param, size, value, size_ret); } -}; - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) -{ - GetInfoFunctor0 f0 = { f, arg0 }; - return GetInfoHelper, T> - ::get(f0, name, param); -} - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) -{ - GetInfoFunctor1 f0 = { f, arg0, arg1 }; - return GetInfoHelper, T> - ::get(f0, name, param); -} - -template -struct ReferenceHandler -{ }; - -template <> -struct ReferenceHandler -{ - // cl_device_id does not have retain(). - static cl_int retain(cl_device_id) - { return CL_INVALID_DEVICE; } - // cl_device_id does not have release(). - static cl_int release(cl_device_id) - { return CL_INVALID_DEVICE; } -}; - -template <> -struct ReferenceHandler -{ - // cl_platform_id does not have retain(). - static cl_int retain(cl_platform_id) - { return CL_INVALID_PLATFORM; } - // cl_platform_id does not have release(). - static cl_int release(cl_platform_id) - { return CL_INVALID_PLATFORM; } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_context context) - { return ::clRetainContext(context); } - static cl_int release(cl_context context) - { return ::clReleaseContext(context); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_command_queue queue) - { return ::clRetainCommandQueue(queue); } - static cl_int release(cl_command_queue queue) - { return ::clReleaseCommandQueue(queue); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_mem memory) - { return ::clRetainMemObject(memory); } - static cl_int release(cl_mem memory) - { return ::clReleaseMemObject(memory); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_sampler sampler) - { return ::clRetainSampler(sampler); } - static cl_int release(cl_sampler sampler) - { return ::clReleaseSampler(sampler); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_program program) - { return ::clRetainProgram(program); } - static cl_int release(cl_program program) - { return ::clReleaseProgram(program); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_kernel kernel) - { return ::clRetainKernel(kernel); } - static cl_int release(cl_kernel kernel) - { return ::clReleaseKernel(kernel); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_event event) - { return ::clRetainEvent(event); } - static cl_int release(cl_event event) - { return ::clReleaseEvent(event); } -}; - -template -class Wrapper -{ -public: - typedef T cl_type; - -protected: - cl_type object_; - -public: - Wrapper() : object_(NULL) { } - - ~Wrapper() - { - if (object_ != NULL) { release(); } - } - - Wrapper(const Wrapper& rhs) - { - object_ = rhs.object_; - if (object_ != NULL) { retain(); } - } - - Wrapper& operator = (const Wrapper& rhs) - { - if (object_ != NULL) { release(); } - object_ = rhs.object_; - if (object_ != NULL) { retain(); } - return *this; - } - - cl_type operator ()() const { return object_; } - - cl_type& operator ()() { return object_; } - -protected: - - cl_int retain() const - { - return ReferenceHandler::retain(object_); - } - - cl_int release() const - { - return ReferenceHandler::release(object_); - } -}; - -#if defined(__CL_ENABLE_EXCEPTIONS) -static inline cl_int errHandler ( - cl_int err, - const char * errStr = NULL) throw(Error) -{ - if (err != CL_SUCCESS) { - throw Error(err, errStr); - } - return err; -} -#else -static inline cl_int errHandler (cl_int err, const char * errStr = NULL) -{ - return err; -} -#endif // __CL_ENABLE_EXCEPTIONS - -} // namespace detail -//! \endcond - -/*! \stuct ImageFormat - * \brief ImageFormat interface fro cl_image_format. - */ -struct ImageFormat : public cl_image_format -{ - ImageFormat(){} - - ImageFormat(cl_channel_order order, cl_channel_type type) - { - image_channel_order = order; - image_channel_data_type = type; - } - - ImageFormat& operator = (const ImageFormat& rhs) - { - if (this != &rhs) { - this->image_channel_data_type = rhs.image_channel_data_type; - this->image_channel_order = rhs.image_channel_order; - } - return *this; - } -}; - -/*! \class Device - * \brief Device interface for cl_device_id. - */ -class Device : public detail::Wrapper -{ -public: - Device(cl_device_id device) { object_ = device; } - - Device() : detail::Wrapper() { } - - Device(const Device& device) : detail::Wrapper(device) { } - - Device& operator = (const Device& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_device_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetDeviceInfo, object_, name, param), - __GET_DEVICE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_device_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(USE_CL_DEVICE_FISSION) - cl_int createSubDevices( - const cl_device_partition_property_ext * properties, - VECTOR_CLASS* devices) - { - typedef CL_API_ENTRY cl_int - ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( - cl_device_id /*in_device*/, - const cl_device_partition_property_ext * /* properties */, - cl_uint /*num_entries*/, - cl_device_id * /*out_devices*/, - cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; - - static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; - __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); - - cl_uint n = 0; - cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif -}; - -/*! \class Platform - * \brief Platform interface. - */ -class Platform : public detail::Wrapper -{ -public: - static const Platform null(); - - Platform(cl_platform_id platform) { object_ = platform; } - - Platform() : detail::Wrapper() { } - - Platform(const Platform& platform) : detail::Wrapper(platform) { } - - Platform& operator = (const Platform& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetPlatformInfo, object_, name, param), - __GET_PLATFORM_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_platform_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int getDevices( - cl_device_type type, - VECTOR_CLASS* devices) const - { - cl_uint n = 0; - cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = ::clGetDeviceIDs(object_, type, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } - -#if defined(USE_DX_INTEROP) - /*! \brief Get the list of available D3D10 devices. - * - * \param d3d_device_source. - * - * \param d3d_object. - * - * \param d3d_device_set. - * - * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device - * values returned in devices can be used to identify a specific OpenCL - * device. If \a devices argument is NULL, this argument is ignored. - * - * \return One of the following values: - * - CL_SUCCESS if the function is executed successfully. - * - * The application can query specific capabilities of the OpenCL device(s) - * returned by cl::getDevices. This can be used by the application to - * determine which device(s) to use. - * - * \note In the case that exceptions are enabled and a return value - * other than CL_SUCCESS is generated, then cl::Error exception is - * generated. - */ - cl_int getDevices( - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - VECTOR_CLASS* devices) const - { - typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( - cl_platform_id platform, - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id * devices, - cl_uint* num_devices); - - static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; - __INIT_CL_EXT_FCN_PTR(clGetDeviceIDsFromD3D10KHR); - - cl_uint n = 0; - cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - 0, - NULL, - &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - n, - ids, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif - - static cl_int get( - VECTOR_CLASS* platforms) - { - cl_uint n = 0; - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - platforms->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -}; - -static inline cl_int -UnloadCompiler() -{ - return ::clUnloadCompiler(); -} - -class Context : public detail::Wrapper -{ -public: - Context( - const VECTOR_CLASS& devices, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateContext( - properties, (cl_uint) devices.size(), - (cl_device_id*) &devices.front(), - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - Context( - cl_device_type type, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateContextFromType( - properties, type, notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - Context() : detail::Wrapper() { } - - Context(const Context& context) : detail::Wrapper(context) { } - - Context& operator = (const Context& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_context_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetContextInfo, object_, name, param), - __GET_CONTEXT_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_context_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int getSupportedImageFormats( - cl_mem_flags flags, - cl_mem_object_type type, - VECTOR_CLASS* formats) const - { - cl_uint numEntries; - cl_int err = ::clGetSupportedImageFormats( - object_, - flags, - type, - 0, - NULL, - &numEntries); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - ImageFormat* value = (ImageFormat*) - alloca(numEntries * sizeof(ImageFormat)); - err = ::clGetSupportedImageFormats( - object_, - flags, - type, - numEntries, - (cl_image_format*) value, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - formats->assign(&value[0], &value[numEntries]); - return CL_SUCCESS; - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Context) - -/*! \class Event - * \brief Event interface for cl_event. - */ -class Event : public detail::Wrapper -{ -public: - Event() : detail::Wrapper() { } - - Event(const Event& event) : detail::Wrapper(event) { } - - Event& operator = (const Event& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_event_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetEventInfo, object_, name, param), - __GET_EVENT_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_event_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getProfilingInfo(cl_profiling_info name, T* param) const - { - return detail::errHandler(detail::getInfo( - &::clGetEventProfilingInfo, object_, name, param), - __GET_EVENT_PROFILE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getProfilingInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_profiling_info, name>::param_type param; - cl_int result = getProfilingInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int wait() const - { - return detail::errHandler( - ::clWaitForEvents(1, &object_), - __WAIT_FOR_EVENTS_ERR); - } - -#if defined(CL_VERSION_1_1) - cl_int setCallback( - cl_int type, - void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetEventCallback( - object_, - type, - pfn_notify, - user_data), - __SET_EVENT_CALLBACK_ERR); - } -#endif - - static cl_int - waitForEvents(const VECTOR_CLASS& events) - { - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (cl_event*)&events.front()), - __WAIT_FOR_EVENTS_ERR); - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Event) - -#if defined(CL_VERSION_1_1) -/*! \class UserEvent - * \brief User event interface for cl_event. - */ -class UserEvent : public Event -{ -public: - UserEvent( - const Context& context, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateUserEvent( - context(), - &error); - - detail::errHandler(error, __CREATE_USER_EVENT_ERR); - if (err != NULL) { - *err = error; - } - } - - UserEvent() : Event() { } - - UserEvent(const UserEvent& event) : Event(event) { } - - UserEvent& operator = (const UserEvent& rhs) - { - if (this != &rhs) { - Event::operator=(rhs); - } - return *this; - } - - cl_int setStatus(cl_int status) - { - return detail::errHandler( - ::clSetUserEventStatus(object_,status), - __SET_USER_EVENT_STATUS_ERR); - } -}; -#endif - -inline static cl_int -WaitForEvents(const VECTOR_CLASS& events) -{ - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (cl_event*)&events.front()), - __WAIT_FOR_EVENTS_ERR); -} - -/*! \class Memory - * \brief Memory interface for cl_mem. - */ -class Memory : public detail::Wrapper -{ -public: - Memory() : detail::Wrapper() { } - - Memory(const Memory& memory) : detail::Wrapper(memory) { } - - Memory& operator = (const Memory& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_mem_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetMemObjectInfo, object_, name, param), - __GET_MEM_OBJECT_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_mem_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(CL_VERSION_1_1) - cl_int setDestructorCallback( - void (CL_CALLBACK * pfn_notify)(cl_mem, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetMemObjectDestructorCallback( - object_, - pfn_notify, - user_data), - __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); - } -#endif - -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Memory) - -/*! \class Buffer - * \brief Memory buffer interface. - */ -class Buffer : public Memory -{ -public: - Buffer( - const Context& context, - cl_mem_flags flags, - ::size_t size, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - Buffer() : Memory() { } - - Buffer(const Buffer& buffer) : Memory(buffer) { } - - Buffer& operator = (const Buffer& rhs) - { - if (this != &rhs) { - Memory::operator=(rhs); - } - return *this; - } - -#if defined(CL_VERSION_1_1) - Buffer createSubBuffer( - cl_mem_flags flags, - cl_buffer_create_type buffer_create_type, - const void * buffer_create_info, - cl_int * err = NULL) - { - Buffer result; - cl_int error; - result.object_ = ::clCreateSubBuffer( - object_, - flags, - buffer_create_type, - buffer_create_info, - &error); - - detail::errHandler(error, __CREATE_SUBBUFFER_ERR); - if (err != NULL) { - *err = error; - } - - return result; - } -#endif -}; - -#if defined (USE_DX_INTEROP) -class BufferD3D10 : public Buffer -{ -public: - typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( - cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, - cl_int* errcode_ret); - - BufferD3D10( - const Context& context, - cl_mem_flags flags, - ID3D10Buffer* bufobj, - cl_int * err = NULL) - { - static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; - __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); - - cl_int error; - object_ = pfn_clCreateFromD3D10BufferKHR( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - BufferD3D10() : Buffer() { } - - BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { } - - BufferD3D10& operator = (const BufferD3D10& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } -}; -#endif - -/*! \class BufferGL - * \brief Memory buffer interface for GL interop. - */ -class BufferGL : public Buffer -{ -public: - BufferGL( - const Context& context, - cl_mem_flags flags, - GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLBuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - BufferGL() : Buffer() { } - - BufferGL(const BufferGL& buffer) : Buffer(buffer) { } - - BufferGL& operator = (const BufferGL& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } - - cl_int getObjectInfo( - cl_gl_object_type *type, - GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \class BufferRenderGL - * \brief Memory buffer interface for GL interop with renderbuffer. - */ -class BufferRenderGL : public Buffer -{ -public: - BufferRenderGL( - const Context& context, - cl_mem_flags flags, - GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLRenderbuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - BufferRenderGL() : Buffer() { } - - BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { } - - BufferRenderGL& operator = (const BufferRenderGL& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } - - cl_int getObjectInfo( - cl_gl_object_type *type, - GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \class Image - * \brief Base class interface for all images. - */ -class Image : public Memory -{ -protected: - Image() : Memory() { } - - Image(const Image& image) : Memory(image) { } - - Image& operator = (const Image& rhs) - { - if (this != &rhs) { - Memory::operator=(rhs); - } - return *this; - } -public: - template - cl_int getImageInfo(cl_image_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetImageInfo, object_, name, param), - __GET_IMAGE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getImageInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_image_info, name>::param_type param; - cl_int result = getImageInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -/*! \class Image2D - * \brief Image interface for 2D images. - */ -class Image2D : public Image -{ -public: - Image2D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t row_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateImage2D( - context(), flags,&format, width, height, row_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE2D_ERR); - if (err != NULL) { - *err = error; - } - } - - Image2D() { } - - Image2D(const Image2D& image2D) : Image(image2D) { } - - Image2D& operator = (const Image2D& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } -}; - -/*! \class Image2DGL - * \brief 2D image interface for GL interop. - */ -class Image2DGL : public Image2D -{ -public: - Image2DGL( - const Context& context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture2D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - Image2DGL() : Image2D() { } - - Image2DGL(const Image2DGL& image) : Image2D(image) { } - - Image2DGL& operator = (const Image2DGL& rhs) - { - if (this != &rhs) { - Image2D::operator=(rhs); - } - return *this; - } -}; - -/*! \class Image3D - * \brief Image interface for 3D images. - */ -class Image3D : public Image -{ -public: - Image3D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t depth, - ::size_t row_pitch = 0, - ::size_t slice_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateImage3D( - context(), flags, &format, width, height, depth, row_pitch, - slice_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE3D_ERR); - if (err != NULL) { - *err = error; - } - } - - Image3D() { } - - Image3D(const Image3D& image3D) : Image(image3D) { } - - Image3D& operator = (const Image3D& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } -}; - -/*! \class Image2DGL - * \brief 2D image interface for GL interop. - */ -class Image3DGL : public Image3D -{ -public: - Image3DGL( - const Context& context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture3D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - Image3DGL() : Image3D() { } - - Image3DGL(const Image3DGL& image) : Image3D(image) { } - - Image3DGL& operator = (const Image3DGL& rhs) - { - if (this != &rhs) { - Image3D::operator=(rhs); - } - return *this; - } -}; - -/*! \class Sampler - * \brief Sampler interface for cl_sampler. - */ -class Sampler : public detail::Wrapper -{ -public: - Sampler() { } - - Sampler( - const Context& context, - cl_bool normalized_coords, - cl_addressing_mode addressing_mode, - cl_filter_mode filter_mode, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateSampler( - context(), - normalized_coords, - addressing_mode, - filter_mode, - &error); - - detail::errHandler(error, __CREATE_SAMPLER_ERR); - if (err != NULL) { - *err = error; - } - } - - Sampler(const Sampler& sampler) : detail::Wrapper(sampler) { } - - Sampler& operator = (const Sampler& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_sampler_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetSamplerInfo, object_, name, param), - __GET_SAMPLER_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_sampler_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Sampler) - -class Program; -class CommandQueue; -class Kernel; - -/*! \class NDRange - * \brief NDRange interface - */ -class NDRange -{ -private: - size_t<3> sizes_; - cl_uint dimensions_; - -public: - NDRange() - : dimensions_(0) - { } - - NDRange(::size_t size0) - : dimensions_(1) - { - sizes_.push_back(size0); - } - - NDRange(::size_t size0, ::size_t size1) - : dimensions_(2) - { - sizes_.push_back(size0); - sizes_.push_back(size1); - } - - NDRange(::size_t size0, ::size_t size1, ::size_t size2) - : dimensions_(3) - { - sizes_.push_back(size0); - sizes_.push_back(size1); - sizes_.push_back(size2); - } - - operator const ::size_t*() const { return (const ::size_t*) sizes_; } - ::size_t dimensions() const { return dimensions_; } -}; - -static const NDRange NullRange; - -/*! - * \struct LocalSpaceArg - * \brief Local address raper for use with Kernel::setArg - */ -struct LocalSpaceArg -{ - ::size_t size_; -}; - -namespace detail { - -template -struct KernelArgumentHandler -{ - static ::size_t size(const T&) { return sizeof(T); } - static T* ptr(T& value) { return &value; } -}; - -template <> -struct KernelArgumentHandler -{ - static ::size_t size(const LocalSpaceArg& value) { return value.size_; } - static void* ptr(LocalSpaceArg&) { return NULL; } -}; - -} -//! \endcond - -inline LocalSpaceArg -__local(::size_t size) -{ - LocalSpaceArg ret = { size }; - return ret; -} - -class KernelFunctor; - -/*! \class Kernel - * \brief Kernel interface that implements cl_kernel - */ -class Kernel : public detail::Wrapper -{ -public: - inline Kernel(const Program& program, const char* name, cl_int* err = NULL); - - Kernel() { } - - Kernel(const Kernel& kernel) : detail::Wrapper(kernel) { } - - Kernel& operator = (const Kernel& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_kernel_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelInfo, object_, name, param), - __GET_KERNEL_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getWorkGroupInfo( - const Device& device, cl_kernel_work_group_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetKernelWorkGroupInfo, object_, device(), name, param), - __GET_KERNEL_WORK_GROUP_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getWorkGroupInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_work_group_info, name>::param_type param; - cl_int result = getWorkGroupInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int setArg(cl_uint index, T value) - { - return detail::errHandler( - ::clSetKernelArg( - object_, - index, - detail::KernelArgumentHandler::size(value), - detail::KernelArgumentHandler::ptr(value)), - __SET_KERNEL_ARGS_ERR); - } - - cl_int setArg(cl_uint index, ::size_t size, void* argPtr) - { - return detail::errHandler( - ::clSetKernelArg(object_, index, size, argPtr), - __SET_KERNEL_ARGS_ERR); - } - - KernelFunctor bind( - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local); - - KernelFunctor bind( - const CommandQueue& queue, - const NDRange& global, - const NDRange& local); -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Kernel) - -/*! \class Program - * \brief Program interface that implements cl_program. - */ -class Program : public detail::Wrapper -{ -public: - typedef VECTOR_CLASS > Binaries; - typedef VECTOR_CLASS > Sources; - - Program( - const Context& context, - const Sources& sources, - cl_int* err = NULL) - { - cl_int error; - - const ::size_t n = (::size_t)sources.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const char** strings = (const char**) alloca(n * sizeof(const char*)); - - for (::size_t i = 0; i < n; ++i) { - strings[i] = sources[(int)i].first; - lengths[i] = sources[(int)i].second; - } - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)n, strings, lengths, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - if (err != NULL) { - *err = error; - } - } - - Program( - const Context& context, - const VECTOR_CLASS& devices, - const Binaries& binaries, - VECTOR_CLASS* binaryStatus = NULL, - cl_int* err = NULL) - { - cl_int error; - const ::size_t n = binaries.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const unsigned char** images = (const unsigned char**) alloca(n * sizeof(const void*)); - - for (::size_t i = 0; i < n; ++i) { - images[i] = (const unsigned char*)binaries[(int)i].first; - lengths[i] = binaries[(int)i].second; - } - - object_ = ::clCreateProgramWithBinary( - context(), (cl_uint) devices.size(), - (cl_device_id*)&devices.front(), - lengths, images, binaryStatus != NULL - ? (cl_int*) &binaryStatus->front() - : NULL, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != NULL) { - *err = error; - } - } - - Program() { } - - Program(const Program& program) : detail::Wrapper(program) { } - - Program& operator = (const Program& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - cl_int build( - const VECTOR_CLASS& devices, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - return detail::errHandler( - ::clBuildProgram( - object_, - (cl_uint) - devices.size(), - (cl_device_id*)&devices.front(), - options, - notifyFptr, - data), - __BUILD_PROGRAM_ERR); - } - - template - cl_int getInfo(cl_program_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetProgramInfo, object_, name, param), - __GET_PROGRAM_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getBuildInfo( - const Device& device, cl_program_build_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetProgramBuildInfo, object_, device(), name, param), - __GET_PROGRAM_BUILD_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getBuildInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_build_info, name>::param_type param; - cl_int result = getBuildInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int createKernels(VECTOR_CLASS* kernels) - { - cl_uint numKernels; - cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel)); - err = ::clCreateKernelsInProgram( - object_, numKernels, (cl_kernel*) value, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - kernels->assign(&value[0], &value[numKernels]); - return CL_SUCCESS; - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Program) - -inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) -{ - cl_int error; - - object_ = ::clCreateKernel(program(), name, &error); - detail::errHandler(error, __CREATE_KERNEL_ERR); - - if (err != NULL) { - *err = error; - } - -} - -/*! \class CommandQueue - * \brief CommandQueue interface for cl_command_queue. - */ -class CommandQueue : public detail::Wrapper -{ -public: - CommandQueue( - const Context& context, - const Device& device, - cl_command_queue_properties properties = 0, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } - - CommandQueue() { } - - CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper(commandQueue) { } - - CommandQueue& operator = (const CommandQueue& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_command_queue_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetCommandQueueInfo, object_, name, param), - __GET_COMMAND_QUEUE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_command_queue_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int enqueueReadBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReadBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_READ_BUFFER_ERR); - } - - cl_int enqueueWriteBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - const void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueWriteBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_WRITE_BUFFER_ERR); - } - - cl_int enqueueCopyBuffer( - const Buffer& src, - const Buffer& dst, - ::size_t src_offset, - ::size_t dst_offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyBuffer( - object_, src(), dst(), src_offset, dst_offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQEUE_COPY_BUFFER_ERR); - } - -#if defined(CL_VERSION_1_1) - cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReadBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_READ_BUFFER_RECT_ERR); - } - - - cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueWriteBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_WRITE_BUFFER_RECT_ERR); - } - - cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - ::size_t src_row_pitch, - ::size_t src_slice_pitch, - ::size_t dst_row_pitch, - ::size_t dst_slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyBufferRect( - object_, - src(), - dst(), - (const ::size_t *)src_origin, - (const ::size_t *)dst_origin, - (const ::size_t *)region, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQEUE_COPY_BUFFER_RECT_ERR); - } -#endif - - cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReadImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_READ_IMAGE_ERR); - } - - cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueWriteImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_WRITE_IMAGE_ERR); - } - - cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyImage( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *)dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_COPY_IMAGE_ERR); - } - - cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& region, - ::size_t dst_offset, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyImageToBuffer( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *) region, dst_offset, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); - } - - cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - ::size_t src_offset, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyBufferToImage( - object_, src(), dst(), src_offset, - (const ::size_t *) dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); - } - - void* enqueueMapBuffer( - const Buffer& buffer, - cl_bool blocking, - cl_map_flags flags, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_int error; - void * result = ::clEnqueueMapBuffer( - object_, buffer(), blocking, flags, offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - return result; - } - - void* enqueueMapImage( - const Image& buffer, - cl_bool blocking, - cl_map_flags flags, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t * row_pitch, - ::size_t * slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_int error; - void * result = ::clEnqueueMapImage( - object_, buffer(), blocking, flags, - (const ::size_t *) origin, (const ::size_t *) region, - row_pitch, slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - return result; - } - - cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueUnmapMemObject( - object_, memory(), mapped_ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - } - - cl_int enqueueNDRangeKernel( - const Kernel& kernel, - const NDRange& offset, - const NDRange& global, - const NDRange& local, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueNDRangeKernel( - object_, kernel(), (cl_uint) global.dimensions(), - offset.dimensions() != 0 ? (const ::size_t*) offset : NULL, - (const ::size_t*) global, - local.dimensions() != 0 ? (const ::size_t*) local : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_NDRANGE_KERNEL_ERR); - } - - cl_int enqueueTask( - const Kernel& kernel, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueTask( - object_, kernel(), - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_TASK_ERR); - } - - cl_int enqueueNativeKernel( - void (*userFptr)(void *), - std::pair args, - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* mem_locs = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) - ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem)) - : NULL; - - if (mems != NULL) { - for (unsigned int i = 0; i < mem_objects->size(); i++) { - mems[i] = ((*mem_objects)[i])(); - } - } - - return detail::errHandler( - ::clEnqueueNativeKernel( - object_, userFptr, args.first, args.second, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - mems, - (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_NATIVE_KERNEL); - } - - cl_int enqueueMarker(Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueMarker(object_, (cl_event*) event), - __ENQUEUE_MARKER_ERR); - } - - cl_int enqueueWaitForEvents(const VECTOR_CLASS& events) const - { - return detail::errHandler( - ::clEnqueueWaitForEvents( - object_, - (cl_uint) events.size(), - (const cl_event*) &events.front()), - __ENQUEUE_WAIT_FOR_EVENTS_ERR); - } - - cl_int enqueueAcquireGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueAcquireGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_ACQUIRE_GL_ERR); - } - - cl_int enqueueReleaseGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReleaseGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_RELEASE_GL_ERR); - } - -#if defined (USE_DX_INTEROP) -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); - - cl_int enqueueAcquireD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; - __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); - - return detail::errHandler( - pfn_clEnqueueAcquireD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_ACQUIRE_GL_ERR); - } - - cl_int enqueueReleaseD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; - __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); - - return detail::errHandler( - pfn_clEnqueueReleaseD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_RELEASE_GL_ERR); - } -#endif - - cl_int enqueueBarrier() const - { - return detail::errHandler( - ::clEnqueueBarrier(object_), - __ENQUEUE_BARRIER_ERR); - } - - cl_int flush() const - { - return detail::errHandler(::clFlush(object_), __FLUSH_ERR); - } - - cl_int finish() const - { - return detail::errHandler(::clFinish(object_), __FINISH_ERR); - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::CommandQueue) - -/*! \class KernelFunctor - * \brief Kernel functor interface - * - * \note Currently only functors of zero to ten arguments are supported. It - * is straightforward to add more and a more general solution, similar to - * Boost.Lambda could be followed if required in the future. - */ -class KernelFunctor -{ -private: - Kernel kernel_; - CommandQueue queue_; - NDRange offset_; - NDRange global_; - NDRange local_; - - cl_int err_; -public: - KernelFunctor() { } - - KernelFunctor( - const Kernel& kernel, - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local) : - kernel_(kernel), - queue_(queue), - offset_(offset), - global_(global), - local_(local), - err_(CL_SUCCESS) - {} - - KernelFunctor& operator=(const KernelFunctor& rhs); - - KernelFunctor(const KernelFunctor& rhs); - - cl_int getError() { return err_; } - - inline Event operator()(const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const A15& a15, - const VECTOR_CLASS* events = NULL); -}; - -inline KernelFunctor Kernel::bind( - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local) -{ - return KernelFunctor(*this,queue,offset,global,local); -} - -inline KernelFunctor Kernel::bind( - const CommandQueue& queue, - const NDRange& global, - const NDRange& local) -{ - return KernelFunctor(*this,queue,NullRange,global,local); -} - -inline KernelFunctor& KernelFunctor::operator=(const KernelFunctor& rhs) -{ - if (this == &rhs) { - return *this; - } - - kernel_ = rhs.kernel_; - queue_ = rhs.queue_; - offset_ = rhs.offset_; - global_ = rhs.global_; - local_ = rhs.local_; - - return *this; -} - -inline KernelFunctor::KernelFunctor(const KernelFunctor& rhs) : - kernel_(rhs.kernel_), - queue_(rhs.queue_), - offset_(rhs.offset_), - global_(rhs.global_), - local_(rhs.local_) -{ -} - -Event KernelFunctor::operator()(const VECTOR_CLASS* events) -{ - Event event; - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - kernel_.setArg(13,a14); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const A15& a15, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - kernel_.setArg(13,a14); - kernel_.setArg(14,a15); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -#undef __ERR_STR -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#undef __GET_DEVICE_INFO_ERR -#undef __GET_PLATFORM_INFO_ERR -#undef __GET_DEVICE_IDS_ERR -#undef __GET_CONTEXT_INFO_ERR -#undef __GET_EVENT_INFO_ERR -#undef __GET_EVENT_PROFILE_INFO_ERR -#undef __GET_MEM_OBJECT_INFO_ERR -#undef __GET_IMAGE_INFO_ERR -#undef __GET_SAMPLER_INFO_ERR -#undef __GET_KERNEL_INFO_ERR -#undef __GET_KERNEL_WORK_GROUP_INFO_ERR -#undef __GET_PROGRAM_INFO_ERR -#undef __GET_PROGRAM_BUILD_INFO_ERR -#undef __GET_COMMAND_QUEUE_INFO_ERR - -#undef __CREATE_CONTEXT_FROM_TYPE_ERR -#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR - -#undef __CREATE_BUFFER_ERR -#undef __CREATE_SUBBUFFER_ERR -#undef __CREATE_IMAGE2D_ERR -#undef __CREATE_IMAGE3D_ERR -#undef __CREATE_SAMPLER_ERR -#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR - -#undef __CREATE_USER_EVENT_ERR -#undef __SET_USER_EVENT_STATUS_ERR -#undef __SET_EVENT_CALLBACK_ERR - -#undef __WAIT_FOR_EVENTS_ERR - -#undef __CREATE_KERNEL_ERR -#undef __SET_KERNEL_ARGS_ERR -#undef __CREATE_PROGRAM_WITH_SOURCE_ERR -#undef __CREATE_PROGRAM_WITH_BINARY_ERR -#undef __BUILD_PROGRAM_ERR -#undef __CREATE_KERNELS_IN_PROGRAM_ERR - -#undef __CREATE_COMMAND_QUEUE_ERR -#undef __SET_COMMAND_QUEUE_PROPERTY_ERR -#undef __ENQUEUE_READ_BUFFER_ERR -#undef __ENQUEUE_WRITE_BUFFER_ERR -#undef __ENQUEUE_READ_BUFFER_RECT_ERR -#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR -#undef __ENQEUE_COPY_BUFFER_ERR -#undef __ENQEUE_COPY_BUFFER_RECT_ERR -#undef __ENQUEUE_READ_IMAGE_ERR -#undef __ENQUEUE_WRITE_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR -#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR -#undef __ENQUEUE_MAP_BUFFER_ERR -#undef __ENQUEUE_MAP_IMAGE_ERR -#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR -#undef __ENQUEUE_NDRANGE_KERNEL_ERR -#undef __ENQUEUE_TASK_ERR -#undef __ENQUEUE_NATIVE_KERNEL - -#undef __UNLOAD_COMPILER_ERR -#endif //__CL_USER_OVERRIDE_ERROR_STRINGS - -#undef __GET_INFO_HELPER_WITH_RETAIN - -// Extensions -#undef __INIT_CL_EXT_FCN_PTR -#undef __CREATE_SUB_DEVICES - -#if defined(USE_CL_DEVICE_FISSION) -#undef __PARAM_NAME_DEVICE_FISSION -#endif // USE_CL_DEVICE_FISSION - -} // namespace cl - -#endif // CL_HPP_ diff --git a/SpeedComparisons/GrayScott_OpenCL_Local/gray_scott_opencl_local.cpp b/SpeedComparisons/GrayScott_OpenCL_Local/gray_scott_opencl_local.cpp deleted file mode 100644 index 47fc811de..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_Local/gray_scott_opencl_local.cpp +++ /dev/null @@ -1,273 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// stdlib: -#include -#include -#include -#include - -#ifdef _WIN32 - #include - #include - #include - // http://www.linuxjournal.com/article/5574 - void gettimeofday(struct timeval* t,void* timezone) - { struct _timeb timebuffer; - _ftime( &timebuffer ); - t->tv_sec=timebuffer.time; - t->tv_usec=1000*timebuffer.millitm; - } -#else - #include -#endif - -// OpenCL: -#define __NO_STD_VECTOR // Use cl::vector instead of STL version -#define __CL_ENABLE_EXCEPTIONS - -// cl.hpp is standard but doesn't come with most SDKs, so download it from here: -// http://www.khronos.org/registry/cl/api/1.1/cl.hpp -#ifdef __APPLE__ -# include "cl.hpp" -#else -# include -#endif - -using namespace cl; - -// STL: -#include -#include -#include - -// local: -#include "defs.h" -#include "display.h" - -void init(float a[X][Y],float b[X][Y]); - -static int g_opt_device = 0; -static int g_wrap = 1; - -int main(int argc, char * * argv) -{ - for (int i = 1; i < argc; i++) { - if (0) { - } else if ((i+1 platforms; - Platform::get(&platforms); - - // Select the default platform and create a context using this platform and the GPU - cl_context_properties cps[3] = { - CL_CONTEXT_PLATFORM, - (cl_context_properties)(platforms[0])(), - 0 - }; - Context context( CL_DEVICE_TYPE_GPU, cps); - - // Get a list of devices on this platform - vector devices = context.getInfo(); - - // range-check the user's selection - int maxdev = devices.size() - 1; - g_opt_device = (g_opt_device > maxdev) ? maxdev : - ((g_opt_device < 0) ? 0 : g_opt_device); - std::cout << (maxdev+1) << " device(s) available; using device " - << g_opt_device << ".\n"; - - Device &device = devices[g_opt_device]; - std::cout << "Global memory: " << device.getInfo() << " bytes\n"; - std::cout << "Local memory: " << device.getInfo() << " bytes\n"; - std::cout << "Local memory type: " << std::string((device.getInfo()==CL_LOCAL)?"local":"global") << " \n"; - - // Create a command queue and use the selected device - if (maxdev < 0) { - std::cerr << "error -- need at least one OpenCL capable device.\n"; - exit(-1); - } - CommandQueue queue = CommandQueue(context, device); - Event event; - - // Read source file - std::string kfn = CL_SOURCE_DIR; // (defined in CMakeLists.txt to be the source folder) - kfn += "/grayscott_kernel_local.cl"; - std::ifstream sourceFile(kfn.c_str()); - std::string sourceCode( - std::istreambuf_iterator(sourceFile), - (std::istreambuf_iterator())); - Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()+1)); - - // Make program of the source code in the context - Program program = Program(context, source); - - const int LOCAL_X=1; - const int LOCAL_Y=256; - - // Build program for these specific devices - std::ostringstream oss; - oss << "-D LOCAL_X=" << LOCAL_X << " -D LOCAL_Y=" << LOCAL_Y; - if(g_wrap) oss << " -D WRAP"; - program.build(devices, oss.str().c_str(), NULL, NULL); - - // Make kernel - Kernel kernel(program, "grayscott_compute"); - - // Create memory buffers - Buffer bufferU = Buffer(context, CL_MEM_READ_ONLY, MEM_SIZE); - Buffer bufferV = Buffer(context, CL_MEM_READ_ONLY, MEM_SIZE); - Buffer bufferU2 = Buffer(context, CL_MEM_READ_ONLY, MEM_SIZE); - Buffer bufferV2 = Buffer(context, CL_MEM_READ_ONLY, MEM_SIZE); - - // Copy lists A and B to the memory buffers - queue.enqueueWriteBuffer(bufferU, CL_TRUE, 0, MEM_SIZE, a); - queue.enqueueWriteBuffer(bufferV, CL_TRUE, 0, MEM_SIZE, b); - - NDRange global(X,Y); - NDRange local(LOCAL_X,LOCAL_Y); - - kernel.setArg(4, k); - kernel.setArg(5, f); - kernel.setArg(6, r_a); - kernel.setArg(7, r_b); - kernel.setArg(8, speed); - - int iteration = 0; - float fps_avg = 0.0; // decaying average of fps - const int N_FRAMES_PER_DISPLAY = 2000; // an even number, because of our double-buffering implementation - while(true) - { - struct timeval tod_record; - double tod_before, tod_after, tod_elap; - - gettimeofday(&tod_record, 0); - tod_before = ((double) (tod_record.tv_sec)) - + ((double) (tod_record.tv_usec)) / 1.0e6; - - // run a few iterations (without copying the data back) - for(int it=0;it 0) - fps = ((float)N_FRAMES_PER_DISPLAY) / tod_elap; - // We display an exponential moving average of the fps measurement - fps_avg = (fps_avg == 0) ? fps : (((fps_avg * 10.0) + fps) / 11.0); - double Mcgs = (fps_avg * ((double)X) * ((double)Y)) / 1.0e6; - sprintf(msg,"GrayScott - %0.2f fps %0.2f Mcgs", fps_avg, Mcgs); - - // display: - { - int quitnow = display(a,a,a,iteration,false,200.0f,2,10,msg); - if (quitnow) - break; - } - } - } - catch(Error error) - { - std::cout << error.what() << "(" << error.err() << ")" << std::endl; - } -} - -// return a random value between lower and upper -float frand(float lower,float upper) -{ - return lower + rand()*(upper-lower)/RAND_MAX; -} - -void init(float a[X][Y],float b[X][Y]) -{ - srand((unsigned int)time(NULL)); - - // figure the values - for(int i = 0; i < X; i++) { - for(int j = 0; j < Y; j++) { - // start with a uniform field with an approximate circle in the middle - //if(hypot(i%20-10/*-X/2*/,j%20-10/*-Y/2*/)<=frand(2,5)) { - if(hypot(i-X/2,(j-Y/2)/1.5)<=frand(2,5)) - { - a[i][j] = frand(0.0f,0.1f); - b[i][j] = frand(0.9f,1.0f); - } - else - { - a[i][j] = frand(0.9f,1.0f); - b[i][j] = frand(0.0f,0.1f); - } - /*float v = frand(0.0f,1.0f); - a[i][j] = v; - b[i][j] = 1.0f-v;*/ - } - } -} - diff --git a/SpeedComparisons/GrayScott_OpenCL_Local/grayscott_kernel_local.cl b/SpeedComparisons/GrayScott_OpenCL_Local/grayscott_kernel_local.cl deleted file mode 100644 index b5e4a3388..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_Local/grayscott_kernel_local.cl +++ /dev/null @@ -1,80 +0,0 @@ -// This version loads data into a local cache, see http://www.khronos.org/message_boards/viewtopic.php?p=11081#p11081 - -__kernel void grayscott_compute( - __global float *U,__global float *V, - __global float *U2, __global float *V2, - float k,float F,float D_u,float D_v,float delta_t) -{ - // Get the index of the current element. X and Y are oriented like - // in high school math class, with the origin (0,0) in the bottom-left - // corner. - const int x = get_global_id(0); // column (0=leftmost) - const int y = get_global_id(1); // row (0=bottom-most) - const int X = get_global_size(0); // number of columns - const int Y = get_global_size(1); // number of rows - const int i = x*Y+y; // column * number_of_rows + row - - // make a local cache of nearby data, for speed - const int local_x = get_local_id(0); - const int local_y = get_local_id(1); - __local float local_U[LOCAL_X][LOCAL_Y]; - __local float local_V[LOCAL_X][LOCAL_Y]; - local_U[local_x][local_y] = U[i]; - local_V[local_x][local_y] = V[i]; - barrier(CLK_LOCAL_MEM_FENCE); // wait until all the local threads have copied their data into the local cache - - const float u = U[i]; // get U - const float v = V[i]; // get V - - if(local_x>0 && local_y>0 && local_x - * #else - * #include - * #endif - * #include - * #include - * #include - * - * const char * helloStr = "__kernel void " - * "hello(void) " - * "{ " - * " " - * "} "; - * - * int - * main(void) - * { - * cl_int err = CL_SUCCESS; - * try { - * - * std::vector platforms; - * cl::Platform::get(&platforms); - * if (platforms.size() == 0) { - * std::cout << "Platform size 0\n"; - * return -1; - * } - * - * cl_context_properties properties[] = - * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; - * cl::Context context(CL_DEVICE_TYPE_CPU, properties); - * - * std::vector devices = context.getInfo(); - * - * cl::Program::Sources source(1, - * std::make_pair(helloStr,strlen(helloStr))); - * cl::Program program_ = cl::Program(context, source); - * program_.build(devices); - * - * cl::Kernel kernel(program_, "hello", &err); - * - * cl::Event event; - * cl::CommandQueue queue(context, devices[0], 0, &err); - * queue.enqueueNDRangeKernel( - * kernel, - * cl::NullRange, - * cl::NDRange(4,4), - * cl::NullRange, - * NULL, - * &event); - * - * event.wait(); - * } - * catch (cl::Error err) { - * std::cerr - * << "ERROR: " - * << err.what() - * << "(" - * << err.err() - * << ")" - * << std::endl; - * } - * - * return EXIT_SUCCESS; - * } - * - * \endcode - * - */ -#ifndef CL_HPP_ -#define CL_HPP_ - -#ifdef _WIN32 -#include -#include -#if defined(USE_DX_INTEROP) -#include -#endif -#endif // _WIN32 - -// -#if defined(USE_CL_DEVICE_FISSION) -#include -#endif - -#if defined(__APPLE__) || defined(__MACOSX) -#include -#include -#else -#include -#include -#endif // !__APPLE__ - -#if !defined(CL_CALLBACK) -#define CL_CALLBACK -#endif //CL_CALLBACK - -#include - -#if !defined(__NO_STD_VECTOR) -#include -#endif - -#if !defined(__NO_STD_STRING) -#include -#endif - -#if defined(linux) || defined(__APPLE__) || defined(__MACOSX) -# include -#endif // linux - -#include - -/*! \namespace cl - * - * \brief The OpenCL C++ bindings are defined within this namespace. - * - */ -namespace cl { - -#define __INIT_CL_EXT_FCN_PTR(name) \ - if(!pfn_##name) { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddress(#name); \ - if(!pfn_##name) { \ - } \ - } - -class Program; -class Device; -class Context; -class CommandQueue; -class Memory; - -#if defined(__CL_ENABLE_EXCEPTIONS) -#include -/*! \class Error - * \brief Exception class - */ -class Error : public std::exception -{ -private: - cl_int err_; - const char * errStr_; -public: - /*! Create a new CL error exception for a given error code - * and corresponding message. - */ - Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) - {} - - ~Error() throw() {} - - /*! \brief Get error string associated with exception - * - * \return A memory pointer to the error message string. - */ - virtual const char * what() const throw () - { - if (errStr_ == NULL) { - return "empty"; - } - else { - return errStr_; - } - } - - /*! \brief Get error code associated with exception - * - * \return The error code. - */ - const cl_int err(void) const { return err_; } -}; - -#define __ERR_STR(x) #x -#else -#define __ERR_STR(x) NULL -#endif // __CL_ENABLE_EXCEPTIONS - -//! \cond DOXYGEN_DETAIL -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#define __GET_DEVICE_INFO_ERR __ERR_STR(clgetDeviceInfo) -#define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) -#define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) -#define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) -#define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo) -#define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo) -#define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo) -#define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo) -#define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) -#define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) -#define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) -#define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) -#define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) -#define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) -#define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) - -#define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) -#define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) - -#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) -#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) -#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) -#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) -#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) -#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) -#define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) -#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) - -#define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent) -#define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus) -#define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback) -#define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents) - -#define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel) -#define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) -#define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) -#define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) -#define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) -#define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) - -#define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) -#define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty) -#define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer) -#define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect) -#define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer) -#define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) -#define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) -#define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) -#define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) -#define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) -#define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) -#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) -#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) -#define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) -#define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage) -#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject) -#define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) -#define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) -#define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) -#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) -#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) -#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) - -#define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) -#define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) - -#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) - -#define __FLUSH_ERR __ERR_STR(clFlush) -#define __FINISH_ERR __ERR_STR(clFinish) - -#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) -#endif // __CL_USER_OVERRIDE_ERROR_STRINGS -//! \endcond - -/*! \class string - * \brief Simple string class, that provides a limited subset of std::string - * functionality but avoids many of the issues that come with that class. - */ -class string -{ -private: - ::size_t size_; - char * str_; -public: - string(void) : size_(0), str_(NULL) - { - } - - string(char * str, ::size_t size) : - size_(size), - str_(NULL) - { - str_ = new char[size_+1]; - if (str_ != NULL) { - memcpy(str_, str, size_ * sizeof(char)); - str_[size_] = '\0'; - } - else { - size_ = 0; - } - } - - string(char * str) : - str_(NULL) - { - size_= ::strlen(str); - str_ = new char[size_ + 1]; - if (str_ != NULL) { - memcpy(str_, str, (size_ + 1) * sizeof(char)); - } - else { - size_ = 0; - } - } - - string& operator=(const string& rhs) - { - if (this == &rhs) { - return *this; - } - - if (rhs.size_ == 0 || rhs.str_ == NULL) { - size_ = 0; - str_ = NULL; - } - else { - size_ = rhs.size_; - str_ = new char[size_ + 1]; - if (str_ != NULL) { - memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); - } - else { - size_ = 0; - } - } - - return *this; - } - - string(const string& rhs) - { - *this = rhs; - } - - ~string() - { - if (str_ != NULL) { - delete[] str_; - } - } - - ::size_t size(void) const { return size_; } - ::size_t length(void) const { return size(); } - - const char * c_str(void) const { return (str_) ? str_ : "";} -}; - -#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) -#include -typedef std::string STRING_CLASS; -#elif !defined(__USE_DEV_STRING) -typedef cl::string STRING_CLASS; -#endif - -#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) -#include -#define VECTOR_CLASS std::vector -#elif !defined(__USE_DEV_VECTOR) -#define VECTOR_CLASS cl::vector -#endif - -#if !defined(__MAX_DEFAULT_VECTOR_SIZE) -#define __MAX_DEFAULT_VECTOR_SIZE 10 -#endif - -/*! \class vector - * \brief Fixed sized vector implementation that mirroring - * std::vector functionality. - */ -template -class vector -{ -private: - T data_[N]; - unsigned int size_; - bool empty_; -public: - vector() : - size_(-1), - empty_(true) - {} - - ~vector() {} - - unsigned int size(void) const - { - return size_ + 1; - } - - void clear() - { - size_ = -1; - empty_ = true; - } - - void push_back (const T& x) - { - if (size() < N) { - size_++; - data_[size_] = x; - empty_ = false; - } - } - - void pop_back(void) - { - if (!empty_) { - data_[size_].~T(); - size_--; - if (size_ == -1) { - empty_ = true; - } - } - } - - vector(const vector& vec) : - size_(vec.size_), - empty_(vec.empty_) - { - if (!empty_) { - memcpy(&data_[0], &vec.data_[0], size() * sizeof(T)); - } - } - - vector(unsigned int size, const T& val = T()) : - size_(-1), - empty_(true) - { - for (unsigned int i = 0; i < size; i++) { - push_back(val); - } - } - - vector& operator=(const vector& rhs) - { - if (this == &rhs) { - return *this; - } - - size_ = rhs.size_; - empty_ = rhs.empty_; - - if (!empty_) { - memcpy(&data_[0], &rhs.data_[0], size() * sizeof(T)); - } - - return *this; - } - - bool operator==(vector &vec) - { - if (empty_ && vec.empty_) { - return true; - } - - if (size() != vec.size()) { - return false; - } - - return memcmp(&data_[0], &vec.data_[0], size() * sizeof(T)) == 0 ? true : false; - } - - operator T* () { return data_; } - operator const T* () const { return data_; } - - bool empty (void) const - { - return empty_; - } - - unsigned int max_size (void) const - { - return N; - } - - unsigned int capacity () const - { - return sizeof(T) * N; - } - - T& operator[](int index) - { - return data_[index]; - } - - T operator[](int index) const - { - return data_[index]; - } - - template - void assign(I start, I end) - { - clear(); - while(start < end) { - push_back(*start); - start++; - } - } - - /*! \class iterator - * \brief Iterator class for vectors - */ - class iterator - { - private: - vector vec_; - int index_; - bool initialized_; - public: - iterator(void) : - index_(-1), - initialized_(false) - { - index_ = -1; - initialized_ = false; - } - - ~iterator(void) {} - - static iterator begin(vector &vec) - { - iterator i; - - if (!vec.empty()) { - i.index_ = 0; - } - - i.vec_ = vec; - i.initialized_ = true; - return i; - } - - static iterator end(vector &vec) - { - iterator i; - - if (!vec.empty()) { - i.index_ = vec.size(); - } - i.vec_ = vec; - i.initialized_ = true; - return i; - } - - bool operator==(iterator i) - { - return ((vec_ == i.vec_) && - (index_ == i.index_) && - (initialized_ == i.initialized_)); - } - - bool operator!=(iterator i) - { - return (!(*this==i)); - } - - void operator++() - { - index_++; - } - - void operator++(int x) - { - index_ += x; - } - - void operator--() - { - index_--; - } - - void operator--(int x) - { - index_ -= x; - } - - T operator *() - { - return vec_[index_]; - } - }; - - iterator begin(void) - { - return iterator::begin(*this); - } - - iterator end(void) - { - return iterator::end(*this); - } - - T& front(void) - { - return data_[0]; - } - - T& back(void) - { - return data_[size_]; - } - - const T& front(void) const - { - return data_[0]; - } - - const T& back(void) const - { - return data_[size_]; - } -}; - -/*! - * \brief size_t class used to interface between C++ and - * OpenCL C calls that require arrays of size_t values, who's - * size is known statically. - */ -template -struct size_t : public cl::vector< ::size_t, N> { }; - -namespace detail { - -// GetInfo help struct -template -struct GetInfoHelper -{ - static cl_int - get(Functor f, cl_uint name, T* param) - { - return f(name, sizeof(T), param, NULL); - } -}; - -// Specialized GetInfoHelper for VECTOR_CLASS params -template -struct GetInfoHelper > -{ - static cl_int get(Func f, cl_uint name, VECTOR_CLASS* param) - { - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - T* value = (T*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - param->assign(&value[0], &value[required/sizeof(T)]); - return CL_SUCCESS; - } -}; - -// Specialized for getInfo -template -struct GetInfoHelper > -{ - static cl_int - get(Func f, cl_uint name, VECTOR_CLASS* param) - { - cl_uint err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); - if (err != CL_SUCCESS) { - return err; - } - - return CL_SUCCESS; - } -}; - -// Specialized GetInfoHelper for STRING_CLASS params -template -struct GetInfoHelper -{ - static cl_int get(Func f, cl_uint name, STRING_CLASS* param) - { - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - char* value = (char*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - *param = value; - return CL_SUCCESS; - } -}; - -#define __GET_INFO_HELPER_WITH_RETAIN(CPP_TYPE) \ -namespace detail { \ -template \ -struct GetInfoHelper \ -{ \ - static cl_int get(Func f, cl_uint name, CPP_TYPE* param) \ - { \ - cl_uint err = f(name, sizeof(CPP_TYPE), param, NULL); \ - if (err != CL_SUCCESS) { \ - return err; \ - } \ - \ - return ReferenceHandler::retain((*param)()); \ - } \ -}; \ -} - - -#define __PARAM_NAME_INFO_1_0(F) \ - F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ - F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ - F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_bitfield) \ - F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ - F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \ - F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ - F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \ - F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ - F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \ - F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ - F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS) \ - F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS) \ - \ - F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ - F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ - F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ - F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \ - \ - F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ - \ - F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ - F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ - F(cl_mem_info, CL_MEM_SIZE, ::size_t) \ - F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ - F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ - \ - F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ - F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \ - F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \ - F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \ - F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \ - \ - F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ - F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ - F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \ - F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \ - F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \ - \ - F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ - F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ - F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ - F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ - F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ - F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS) \ - \ - F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \ - \ - F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \ - F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ - F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \ - F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ - \ - F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ - F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ - F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ - F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) - -#if defined(CL_VERSION_1_1) -#define __PARAM_NAME_INFO_1_1(F) \ - F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ - \ - F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ - F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ - \ - F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) -#endif // CL_VERSION_1_1 - -#if defined(USE_CL_DEVICE_FISSION) -#define __PARAM_NAME_DEVICE_FISSION(F) \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ - F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) -#endif // USE_CL_DEVICE_FISSION - -template -struct param_traits {}; - -#define __DECLARE_PARAM_TRAITS(token, param_name, T) \ -struct token; \ -template<> \ -struct param_traits \ -{ \ - enum { value = param_name }; \ - typedef T param_type; \ -}; - -__PARAM_NAME_INFO_1_0(__DECLARE_PARAM_TRAITS); -#if defined(CL_VERSION_1_1) -__PARAM_NAME_INFO_1_1(__DECLARE_PARAM_TRAITS); -#endif // CL_VERSION_1_1 - -#if defined(USE_CL_DEVICE_FISSION) -__PARAM_NAME_DEVICE_FISSION(__DECLARE_PARAM_TRAITS); -#endif // USE_CL_DEVICE_FISSION - -#undef __DECLARE_PARAM_TRAITS - -// Convenience functions - -template -inline cl_int -getInfo(Func f, cl_uint name, T* param) -{ - return GetInfoHelper::get(f, name, param); -} - -template -struct GetInfoFunctor0 -{ - Func f_; const Arg0& arg0_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, param, size, value, size_ret); } -}; - -template -struct GetInfoFunctor1 -{ - Func f_; const Arg0& arg0_; const Arg1& arg1_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, arg1_, param, size, value, size_ret); } -}; - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) -{ - GetInfoFunctor0 f0 = { f, arg0 }; - return GetInfoHelper, T> - ::get(f0, name, param); -} - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) -{ - GetInfoFunctor1 f0 = { f, arg0, arg1 }; - return GetInfoHelper, T> - ::get(f0, name, param); -} - -template -struct ReferenceHandler -{ }; - -template <> -struct ReferenceHandler -{ - // cl_device_id does not have retain(). - static cl_int retain(cl_device_id) - { return CL_INVALID_DEVICE; } - // cl_device_id does not have release(). - static cl_int release(cl_device_id) - { return CL_INVALID_DEVICE; } -}; - -template <> -struct ReferenceHandler -{ - // cl_platform_id does not have retain(). - static cl_int retain(cl_platform_id) - { return CL_INVALID_PLATFORM; } - // cl_platform_id does not have release(). - static cl_int release(cl_platform_id) - { return CL_INVALID_PLATFORM; } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_context context) - { return ::clRetainContext(context); } - static cl_int release(cl_context context) - { return ::clReleaseContext(context); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_command_queue queue) - { return ::clRetainCommandQueue(queue); } - static cl_int release(cl_command_queue queue) - { return ::clReleaseCommandQueue(queue); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_mem memory) - { return ::clRetainMemObject(memory); } - static cl_int release(cl_mem memory) - { return ::clReleaseMemObject(memory); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_sampler sampler) - { return ::clRetainSampler(sampler); } - static cl_int release(cl_sampler sampler) - { return ::clReleaseSampler(sampler); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_program program) - { return ::clRetainProgram(program); } - static cl_int release(cl_program program) - { return ::clReleaseProgram(program); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_kernel kernel) - { return ::clRetainKernel(kernel); } - static cl_int release(cl_kernel kernel) - { return ::clReleaseKernel(kernel); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_event event) - { return ::clRetainEvent(event); } - static cl_int release(cl_event event) - { return ::clReleaseEvent(event); } -}; - -template -class Wrapper -{ -public: - typedef T cl_type; - -protected: - cl_type object_; - -public: - Wrapper() : object_(NULL) { } - - ~Wrapper() - { - if (object_ != NULL) { release(); } - } - - Wrapper(const Wrapper& rhs) - { - object_ = rhs.object_; - if (object_ != NULL) { retain(); } - } - - Wrapper& operator = (const Wrapper& rhs) - { - if (object_ != NULL) { release(); } - object_ = rhs.object_; - if (object_ != NULL) { retain(); } - return *this; - } - - cl_type operator ()() const { return object_; } - - cl_type& operator ()() { return object_; } - -protected: - - cl_int retain() const - { - return ReferenceHandler::retain(object_); - } - - cl_int release() const - { - return ReferenceHandler::release(object_); - } -}; - -#if defined(__CL_ENABLE_EXCEPTIONS) -static inline cl_int errHandler ( - cl_int err, - const char * errStr = NULL) throw(Error) -{ - if (err != CL_SUCCESS) { - throw Error(err, errStr); - } - return err; -} -#else -static inline cl_int errHandler (cl_int err, const char * errStr = NULL) -{ - return err; -} -#endif // __CL_ENABLE_EXCEPTIONS - -} // namespace detail -//! \endcond - -/*! \stuct ImageFormat - * \brief ImageFormat interface fro cl_image_format. - */ -struct ImageFormat : public cl_image_format -{ - ImageFormat(){} - - ImageFormat(cl_channel_order order, cl_channel_type type) - { - image_channel_order = order; - image_channel_data_type = type; - } - - ImageFormat& operator = (const ImageFormat& rhs) - { - if (this != &rhs) { - this->image_channel_data_type = rhs.image_channel_data_type; - this->image_channel_order = rhs.image_channel_order; - } - return *this; - } -}; - -/*! \class Device - * \brief Device interface for cl_device_id. - */ -class Device : public detail::Wrapper -{ -public: - Device(cl_device_id device) { object_ = device; } - - Device() : detail::Wrapper() { } - - Device(const Device& device) : detail::Wrapper(device) { } - - Device& operator = (const Device& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_device_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetDeviceInfo, object_, name, param), - __GET_DEVICE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_device_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(USE_CL_DEVICE_FISSION) - cl_int createSubDevices( - const cl_device_partition_property_ext * properties, - VECTOR_CLASS* devices) - { - typedef CL_API_ENTRY cl_int - ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( - cl_device_id /*in_device*/, - const cl_device_partition_property_ext * /* properties */, - cl_uint /*num_entries*/, - cl_device_id * /*out_devices*/, - cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; - - static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; - __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); - - cl_uint n = 0; - cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif -}; - -/*! \class Platform - * \brief Platform interface. - */ -class Platform : public detail::Wrapper -{ -public: - static const Platform null(); - - Platform(cl_platform_id platform) { object_ = platform; } - - Platform() : detail::Wrapper() { } - - Platform(const Platform& platform) : detail::Wrapper(platform) { } - - Platform& operator = (const Platform& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetPlatformInfo, object_, name, param), - __GET_PLATFORM_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_platform_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int getDevices( - cl_device_type type, - VECTOR_CLASS* devices) const - { - cl_uint n = 0; - cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = ::clGetDeviceIDs(object_, type, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } - -#if defined(USE_DX_INTEROP) - /*! \brief Get the list of available D3D10 devices. - * - * \param d3d_device_source. - * - * \param d3d_object. - * - * \param d3d_device_set. - * - * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device - * values returned in devices can be used to identify a specific OpenCL - * device. If \a devices argument is NULL, this argument is ignored. - * - * \return One of the following values: - * - CL_SUCCESS if the function is executed successfully. - * - * The application can query specific capabilities of the OpenCL device(s) - * returned by cl::getDevices. This can be used by the application to - * determine which device(s) to use. - * - * \note In the case that exceptions are enabled and a return value - * other than CL_SUCCESS is generated, then cl::Error exception is - * generated. - */ - cl_int getDevices( - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - VECTOR_CLASS* devices) const - { - typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( - cl_platform_id platform, - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id * devices, - cl_uint* num_devices); - - static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; - __INIT_CL_EXT_FCN_PTR(clGetDeviceIDsFromD3D10KHR); - - cl_uint n = 0; - cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - 0, - NULL, - &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - n, - ids, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif - - static cl_int get( - VECTOR_CLASS* platforms) - { - cl_uint n = 0; - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - platforms->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -}; - -static inline cl_int -UnloadCompiler() -{ - return ::clUnloadCompiler(); -} - -class Context : public detail::Wrapper -{ -public: - Context( - const VECTOR_CLASS& devices, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateContext( - properties, (cl_uint) devices.size(), - (cl_device_id*) &devices.front(), - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - Context( - cl_device_type type, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateContextFromType( - properties, type, notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - Context() : detail::Wrapper() { } - - Context(const Context& context) : detail::Wrapper(context) { } - - Context& operator = (const Context& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_context_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetContextInfo, object_, name, param), - __GET_CONTEXT_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_context_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int getSupportedImageFormats( - cl_mem_flags flags, - cl_mem_object_type type, - VECTOR_CLASS* formats) const - { - cl_uint numEntries; - cl_int err = ::clGetSupportedImageFormats( - object_, - flags, - type, - 0, - NULL, - &numEntries); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - ImageFormat* value = (ImageFormat*) - alloca(numEntries * sizeof(ImageFormat)); - err = ::clGetSupportedImageFormats( - object_, - flags, - type, - numEntries, - (cl_image_format*) value, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - formats->assign(&value[0], &value[numEntries]); - return CL_SUCCESS; - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Context) - -/*! \class Event - * \brief Event interface for cl_event. - */ -class Event : public detail::Wrapper -{ -public: - Event() : detail::Wrapper() { } - - Event(const Event& event) : detail::Wrapper(event) { } - - Event& operator = (const Event& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_event_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetEventInfo, object_, name, param), - __GET_EVENT_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_event_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getProfilingInfo(cl_profiling_info name, T* param) const - { - return detail::errHandler(detail::getInfo( - &::clGetEventProfilingInfo, object_, name, param), - __GET_EVENT_PROFILE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getProfilingInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_profiling_info, name>::param_type param; - cl_int result = getProfilingInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int wait() const - { - return detail::errHandler( - ::clWaitForEvents(1, &object_), - __WAIT_FOR_EVENTS_ERR); - } - -#if defined(CL_VERSION_1_1) - cl_int setCallback( - cl_int type, - void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetEventCallback( - object_, - type, - pfn_notify, - user_data), - __SET_EVENT_CALLBACK_ERR); - } -#endif - - static cl_int - waitForEvents(const VECTOR_CLASS& events) - { - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (cl_event*)&events.front()), - __WAIT_FOR_EVENTS_ERR); - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Event) - -#if defined(CL_VERSION_1_1) -/*! \class UserEvent - * \brief User event interface for cl_event. - */ -class UserEvent : public Event -{ -public: - UserEvent( - const Context& context, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateUserEvent( - context(), - &error); - - detail::errHandler(error, __CREATE_USER_EVENT_ERR); - if (err != NULL) { - *err = error; - } - } - - UserEvent() : Event() { } - - UserEvent(const UserEvent& event) : Event(event) { } - - UserEvent& operator = (const UserEvent& rhs) - { - if (this != &rhs) { - Event::operator=(rhs); - } - return *this; - } - - cl_int setStatus(cl_int status) - { - return detail::errHandler( - ::clSetUserEventStatus(object_,status), - __SET_USER_EVENT_STATUS_ERR); - } -}; -#endif - -inline static cl_int -WaitForEvents(const VECTOR_CLASS& events) -{ - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (cl_event*)&events.front()), - __WAIT_FOR_EVENTS_ERR); -} - -/*! \class Memory - * \brief Memory interface for cl_mem. - */ -class Memory : public detail::Wrapper -{ -public: - Memory() : detail::Wrapper() { } - - Memory(const Memory& memory) : detail::Wrapper(memory) { } - - Memory& operator = (const Memory& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_mem_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetMemObjectInfo, object_, name, param), - __GET_MEM_OBJECT_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_mem_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(CL_VERSION_1_1) - cl_int setDestructorCallback( - void (CL_CALLBACK * pfn_notify)(cl_mem, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetMemObjectDestructorCallback( - object_, - pfn_notify, - user_data), - __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); - } -#endif - -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Memory) - -/*! \class Buffer - * \brief Memory buffer interface. - */ -class Buffer : public Memory -{ -public: - Buffer( - const Context& context, - cl_mem_flags flags, - ::size_t size, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - Buffer() : Memory() { } - - Buffer(const Buffer& buffer) : Memory(buffer) { } - - Buffer& operator = (const Buffer& rhs) - { - if (this != &rhs) { - Memory::operator=(rhs); - } - return *this; - } - -#if defined(CL_VERSION_1_1) - Buffer createSubBuffer( - cl_mem_flags flags, - cl_buffer_create_type buffer_create_type, - const void * buffer_create_info, - cl_int * err = NULL) - { - Buffer result; - cl_int error; - result.object_ = ::clCreateSubBuffer( - object_, - flags, - buffer_create_type, - buffer_create_info, - &error); - - detail::errHandler(error, __CREATE_SUBBUFFER_ERR); - if (err != NULL) { - *err = error; - } - - return result; - } -#endif -}; - -#if defined (USE_DX_INTEROP) -class BufferD3D10 : public Buffer -{ -public: - typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( - cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, - cl_int* errcode_ret); - - BufferD3D10( - const Context& context, - cl_mem_flags flags, - ID3D10Buffer* bufobj, - cl_int * err = NULL) - { - static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; - __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); - - cl_int error; - object_ = pfn_clCreateFromD3D10BufferKHR( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - BufferD3D10() : Buffer() { } - - BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { } - - BufferD3D10& operator = (const BufferD3D10& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } -}; -#endif - -/*! \class BufferGL - * \brief Memory buffer interface for GL interop. - */ -class BufferGL : public Buffer -{ -public: - BufferGL( - const Context& context, - cl_mem_flags flags, - GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLBuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - BufferGL() : Buffer() { } - - BufferGL(const BufferGL& buffer) : Buffer(buffer) { } - - BufferGL& operator = (const BufferGL& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } - - cl_int getObjectInfo( - cl_gl_object_type *type, - GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \class BufferRenderGL - * \brief Memory buffer interface for GL interop with renderbuffer. - */ -class BufferRenderGL : public Buffer -{ -public: - BufferRenderGL( - const Context& context, - cl_mem_flags flags, - GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLRenderbuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - BufferRenderGL() : Buffer() { } - - BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { } - - BufferRenderGL& operator = (const BufferRenderGL& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } - - cl_int getObjectInfo( - cl_gl_object_type *type, - GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \class Image - * \brief Base class interface for all images. - */ -class Image : public Memory -{ -protected: - Image() : Memory() { } - - Image(const Image& image) : Memory(image) { } - - Image& operator = (const Image& rhs) - { - if (this != &rhs) { - Memory::operator=(rhs); - } - return *this; - } -public: - template - cl_int getImageInfo(cl_image_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetImageInfo, object_, name, param), - __GET_IMAGE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getImageInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_image_info, name>::param_type param; - cl_int result = getImageInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -/*! \class Image2D - * \brief Image interface for 2D images. - */ -class Image2D : public Image -{ -public: - Image2D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t row_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateImage2D( - context(), flags,&format, width, height, row_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE2D_ERR); - if (err != NULL) { - *err = error; - } - } - - Image2D() { } - - Image2D(const Image2D& image2D) : Image(image2D) { } - - Image2D& operator = (const Image2D& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } -}; - -/*! \class Image2DGL - * \brief 2D image interface for GL interop. - */ -class Image2DGL : public Image2D -{ -public: - Image2DGL( - const Context& context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture2D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - Image2DGL() : Image2D() { } - - Image2DGL(const Image2DGL& image) : Image2D(image) { } - - Image2DGL& operator = (const Image2DGL& rhs) - { - if (this != &rhs) { - Image2D::operator=(rhs); - } - return *this; - } -}; - -/*! \class Image3D - * \brief Image interface for 3D images. - */ -class Image3D : public Image -{ -public: - Image3D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t depth, - ::size_t row_pitch = 0, - ::size_t slice_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateImage3D( - context(), flags, &format, width, height, depth, row_pitch, - slice_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE3D_ERR); - if (err != NULL) { - *err = error; - } - } - - Image3D() { } - - Image3D(const Image3D& image3D) : Image(image3D) { } - - Image3D& operator = (const Image3D& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } -}; - -/*! \class Image2DGL - * \brief 2D image interface for GL interop. - */ -class Image3DGL : public Image3D -{ -public: - Image3DGL( - const Context& context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture3D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - Image3DGL() : Image3D() { } - - Image3DGL(const Image3DGL& image) : Image3D(image) { } - - Image3DGL& operator = (const Image3DGL& rhs) - { - if (this != &rhs) { - Image3D::operator=(rhs); - } - return *this; - } -}; - -/*! \class Sampler - * \brief Sampler interface for cl_sampler. - */ -class Sampler : public detail::Wrapper -{ -public: - Sampler() { } - - Sampler( - const Context& context, - cl_bool normalized_coords, - cl_addressing_mode addressing_mode, - cl_filter_mode filter_mode, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateSampler( - context(), - normalized_coords, - addressing_mode, - filter_mode, - &error); - - detail::errHandler(error, __CREATE_SAMPLER_ERR); - if (err != NULL) { - *err = error; - } - } - - Sampler(const Sampler& sampler) : detail::Wrapper(sampler) { } - - Sampler& operator = (const Sampler& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_sampler_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetSamplerInfo, object_, name, param), - __GET_SAMPLER_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_sampler_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Sampler) - -class Program; -class CommandQueue; -class Kernel; - -/*! \class NDRange - * \brief NDRange interface - */ -class NDRange -{ -private: - size_t<3> sizes_; - cl_uint dimensions_; - -public: - NDRange() - : dimensions_(0) - { } - - NDRange(::size_t size0) - : dimensions_(1) - { - sizes_.push_back(size0); - } - - NDRange(::size_t size0, ::size_t size1) - : dimensions_(2) - { - sizes_.push_back(size0); - sizes_.push_back(size1); - } - - NDRange(::size_t size0, ::size_t size1, ::size_t size2) - : dimensions_(3) - { - sizes_.push_back(size0); - sizes_.push_back(size1); - sizes_.push_back(size2); - } - - operator const ::size_t*() const { return (const ::size_t*) sizes_; } - ::size_t dimensions() const { return dimensions_; } -}; - -static const NDRange NullRange; - -/*! - * \struct LocalSpaceArg - * \brief Local address raper for use with Kernel::setArg - */ -struct LocalSpaceArg -{ - ::size_t size_; -}; - -namespace detail { - -template -struct KernelArgumentHandler -{ - static ::size_t size(const T&) { return sizeof(T); } - static T* ptr(T& value) { return &value; } -}; - -template <> -struct KernelArgumentHandler -{ - static ::size_t size(const LocalSpaceArg& value) { return value.size_; } - static void* ptr(LocalSpaceArg&) { return NULL; } -}; - -} -//! \endcond - -inline LocalSpaceArg -__local(::size_t size) -{ - LocalSpaceArg ret = { size }; - return ret; -} - -class KernelFunctor; - -/*! \class Kernel - * \brief Kernel interface that implements cl_kernel - */ -class Kernel : public detail::Wrapper -{ -public: - inline Kernel(const Program& program, const char* name, cl_int* err = NULL); - - Kernel() { } - - Kernel(const Kernel& kernel) : detail::Wrapper(kernel) { } - - Kernel& operator = (const Kernel& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_kernel_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelInfo, object_, name, param), - __GET_KERNEL_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getWorkGroupInfo( - const Device& device, cl_kernel_work_group_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetKernelWorkGroupInfo, object_, device(), name, param), - __GET_KERNEL_WORK_GROUP_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getWorkGroupInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_work_group_info, name>::param_type param; - cl_int result = getWorkGroupInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int setArg(cl_uint index, T value) - { - return detail::errHandler( - ::clSetKernelArg( - object_, - index, - detail::KernelArgumentHandler::size(value), - detail::KernelArgumentHandler::ptr(value)), - __SET_KERNEL_ARGS_ERR); - } - - cl_int setArg(cl_uint index, ::size_t size, void* argPtr) - { - return detail::errHandler( - ::clSetKernelArg(object_, index, size, argPtr), - __SET_KERNEL_ARGS_ERR); - } - - KernelFunctor bind( - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local); - - KernelFunctor bind( - const CommandQueue& queue, - const NDRange& global, - const NDRange& local); -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Kernel) - -/*! \class Program - * \brief Program interface that implements cl_program. - */ -class Program : public detail::Wrapper -{ -public: - typedef VECTOR_CLASS > Binaries; - typedef VECTOR_CLASS > Sources; - - Program( - const Context& context, - const Sources& sources, - cl_int* err = NULL) - { - cl_int error; - - const ::size_t n = (::size_t)sources.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const char** strings = (const char**) alloca(n * sizeof(const char*)); - - for (::size_t i = 0; i < n; ++i) { - strings[i] = sources[(int)i].first; - lengths[i] = sources[(int)i].second; - } - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)n, strings, lengths, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - if (err != NULL) { - *err = error; - } - } - - Program( - const Context& context, - const VECTOR_CLASS& devices, - const Binaries& binaries, - VECTOR_CLASS* binaryStatus = NULL, - cl_int* err = NULL) - { - cl_int error; - const ::size_t n = binaries.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const unsigned char** images = (const unsigned char**) alloca(n * sizeof(const void*)); - - for (::size_t i = 0; i < n; ++i) { - images[i] = (const unsigned char*)binaries[(int)i].first; - lengths[i] = binaries[(int)i].second; - } - - object_ = ::clCreateProgramWithBinary( - context(), (cl_uint) devices.size(), - (cl_device_id*)&devices.front(), - lengths, images, binaryStatus != NULL - ? (cl_int*) &binaryStatus->front() - : NULL, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != NULL) { - *err = error; - } - } - - Program() { } - - Program(const Program& program) : detail::Wrapper(program) { } - - Program& operator = (const Program& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - cl_int build( - const VECTOR_CLASS& devices, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - return detail::errHandler( - ::clBuildProgram( - object_, - (cl_uint) - devices.size(), - (cl_device_id*)&devices.front(), - options, - notifyFptr, - data), - __BUILD_PROGRAM_ERR); - } - - template - cl_int getInfo(cl_program_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetProgramInfo, object_, name, param), - __GET_PROGRAM_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getBuildInfo( - const Device& device, cl_program_build_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetProgramBuildInfo, object_, device(), name, param), - __GET_PROGRAM_BUILD_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getBuildInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_build_info, name>::param_type param; - cl_int result = getBuildInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int createKernels(VECTOR_CLASS* kernels) - { - cl_uint numKernels; - cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel)); - err = ::clCreateKernelsInProgram( - object_, numKernels, (cl_kernel*) value, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - kernels->assign(&value[0], &value[numKernels]); - return CL_SUCCESS; - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::Program) - -inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) -{ - cl_int error; - - object_ = ::clCreateKernel(program(), name, &error); - detail::errHandler(error, __CREATE_KERNEL_ERR); - - if (err != NULL) { - *err = error; - } - -} - -/*! \class CommandQueue - * \brief CommandQueue interface for cl_command_queue. - */ -class CommandQueue : public detail::Wrapper -{ -public: - CommandQueue( - const Context& context, - const Device& device, - cl_command_queue_properties properties = 0, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } - - CommandQueue() { } - - CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper(commandQueue) { } - - CommandQueue& operator = (const CommandQueue& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - template - cl_int getInfo(cl_command_queue_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetCommandQueueInfo, object_, name, param), - __GET_COMMAND_QUEUE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_command_queue_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int enqueueReadBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReadBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_READ_BUFFER_ERR); - } - - cl_int enqueueWriteBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - const void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueWriteBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_WRITE_BUFFER_ERR); - } - - cl_int enqueueCopyBuffer( - const Buffer& src, - const Buffer& dst, - ::size_t src_offset, - ::size_t dst_offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyBuffer( - object_, src(), dst(), src_offset, dst_offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQEUE_COPY_BUFFER_ERR); - } - -#if defined(CL_VERSION_1_1) - cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReadBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_READ_BUFFER_RECT_ERR); - } - - - cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueWriteBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_WRITE_BUFFER_RECT_ERR); - } - - cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - ::size_t src_row_pitch, - ::size_t src_slice_pitch, - ::size_t dst_row_pitch, - ::size_t dst_slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyBufferRect( - object_, - src(), - dst(), - (const ::size_t *)src_origin, - (const ::size_t *)dst_origin, - (const ::size_t *)region, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQEUE_COPY_BUFFER_RECT_ERR); - } -#endif - - cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReadImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_READ_IMAGE_ERR); - } - - cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueWriteImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_WRITE_IMAGE_ERR); - } - - cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyImage( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *)dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_COPY_IMAGE_ERR); - } - - cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& region, - ::size_t dst_offset, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyImageToBuffer( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *) region, dst_offset, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); - } - - cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - ::size_t src_offset, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueCopyBufferToImage( - object_, src(), dst(), src_offset, - (const ::size_t *) dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); - } - - void* enqueueMapBuffer( - const Buffer& buffer, - cl_bool blocking, - cl_map_flags flags, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_int error; - void * result = ::clEnqueueMapBuffer( - object_, buffer(), blocking, flags, offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - return result; - } - - void* enqueueMapImage( - const Image& buffer, - cl_bool blocking, - cl_map_flags flags, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t * row_pitch, - ::size_t * slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_int error; - void * result = ::clEnqueueMapImage( - object_, buffer(), blocking, flags, - (const ::size_t *) origin, (const ::size_t *) region, - row_pitch, slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - return result; - } - - cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueUnmapMemObject( - object_, memory(), mapped_ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - } - - cl_int enqueueNDRangeKernel( - const Kernel& kernel, - const NDRange& offset, - const NDRange& global, - const NDRange& local, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueNDRangeKernel( - object_, kernel(), (cl_uint) global.dimensions(), - offset.dimensions() != 0 ? (const ::size_t*) offset : NULL, - (const ::size_t*) global, - local.dimensions() != 0 ? (const ::size_t*) local : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_NDRANGE_KERNEL_ERR); - } - - cl_int enqueueTask( - const Kernel& kernel, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueTask( - object_, kernel(), - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_TASK_ERR); - } - - cl_int enqueueNativeKernel( - void (*userFptr)(void *), - std::pair args, - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* mem_locs = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) - ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem)) - : NULL; - - if (mems != NULL) { - for (unsigned int i = 0; i < mem_objects->size(); i++) { - mems[i] = ((*mem_objects)[i])(); - } - } - - return detail::errHandler( - ::clEnqueueNativeKernel( - object_, userFptr, args.first, args.second, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - mems, - (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_NATIVE_KERNEL); - } - - cl_int enqueueMarker(Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueMarker(object_, (cl_event*) event), - __ENQUEUE_MARKER_ERR); - } - - cl_int enqueueWaitForEvents(const VECTOR_CLASS& events) const - { - return detail::errHandler( - ::clEnqueueWaitForEvents( - object_, - (cl_uint) events.size(), - (const cl_event*) &events.front()), - __ENQUEUE_WAIT_FOR_EVENTS_ERR); - } - - cl_int enqueueAcquireGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueAcquireGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_ACQUIRE_GL_ERR); - } - - cl_int enqueueReleaseGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - return detail::errHandler( - ::clEnqueueReleaseGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_RELEASE_GL_ERR); - } - -#if defined (USE_DX_INTEROP) -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); - - cl_int enqueueAcquireD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; - __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); - - return detail::errHandler( - pfn_clEnqueueAcquireD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_ACQUIRE_GL_ERR); - } - - cl_int enqueueReleaseD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; - __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); - - return detail::errHandler( - pfn_clEnqueueReleaseD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), - __ENQUEUE_RELEASE_GL_ERR); - } -#endif - - cl_int enqueueBarrier() const - { - return detail::errHandler( - ::clEnqueueBarrier(object_), - __ENQUEUE_BARRIER_ERR); - } - - cl_int flush() const - { - return detail::errHandler(::clFlush(object_), __FLUSH_ERR); - } - - cl_int finish() const - { - return detail::errHandler(::clFinish(object_), __FINISH_ERR); - } -}; - -__GET_INFO_HELPER_WITH_RETAIN(cl::CommandQueue) - -/*! \class KernelFunctor - * \brief Kernel functor interface - * - * \note Currently only functors of zero to ten arguments are supported. It - * is straightforward to add more and a more general solution, similar to - * Boost.Lambda could be followed if required in the future. - */ -class KernelFunctor -{ -private: - Kernel kernel_; - CommandQueue queue_; - NDRange offset_; - NDRange global_; - NDRange local_; - - cl_int err_; -public: - KernelFunctor() { } - - KernelFunctor( - const Kernel& kernel, - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local) : - kernel_(kernel), - queue_(queue), - offset_(offset), - global_(global), - local_(local), - err_(CL_SUCCESS) - {} - - KernelFunctor& operator=(const KernelFunctor& rhs); - - KernelFunctor(const KernelFunctor& rhs); - - cl_int getError() { return err_; } - - inline Event operator()(const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const A15& a15, - const VECTOR_CLASS* events = NULL); -}; - -inline KernelFunctor Kernel::bind( - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local) -{ - return KernelFunctor(*this,queue,offset,global,local); -} - -inline KernelFunctor Kernel::bind( - const CommandQueue& queue, - const NDRange& global, - const NDRange& local) -{ - return KernelFunctor(*this,queue,NullRange,global,local); -} - -inline KernelFunctor& KernelFunctor::operator=(const KernelFunctor& rhs) -{ - if (this == &rhs) { - return *this; - } - - kernel_ = rhs.kernel_; - queue_ = rhs.queue_; - offset_ = rhs.offset_; - global_ = rhs.global_; - local_ = rhs.local_; - - return *this; -} - -inline KernelFunctor::KernelFunctor(const KernelFunctor& rhs) : - kernel_(rhs.kernel_), - queue_(rhs.queue_), - offset_(rhs.offset_), - global_(rhs.global_), - local_(rhs.local_) -{ -} - -Event KernelFunctor::operator()(const VECTOR_CLASS* events) -{ - Event event; - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - kernel_.setArg(13,a14); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const A15& a15, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - kernel_.setArg(13,a14); - kernel_.setArg(14,a15); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -#undef __ERR_STR -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#undef __GET_DEVICE_INFO_ERR -#undef __GET_PLATFORM_INFO_ERR -#undef __GET_DEVICE_IDS_ERR -#undef __GET_CONTEXT_INFO_ERR -#undef __GET_EVENT_INFO_ERR -#undef __GET_EVENT_PROFILE_INFO_ERR -#undef __GET_MEM_OBJECT_INFO_ERR -#undef __GET_IMAGE_INFO_ERR -#undef __GET_SAMPLER_INFO_ERR -#undef __GET_KERNEL_INFO_ERR -#undef __GET_KERNEL_WORK_GROUP_INFO_ERR -#undef __GET_PROGRAM_INFO_ERR -#undef __GET_PROGRAM_BUILD_INFO_ERR -#undef __GET_COMMAND_QUEUE_INFO_ERR - -#undef __CREATE_CONTEXT_FROM_TYPE_ERR -#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR - -#undef __CREATE_BUFFER_ERR -#undef __CREATE_SUBBUFFER_ERR -#undef __CREATE_IMAGE2D_ERR -#undef __CREATE_IMAGE3D_ERR -#undef __CREATE_SAMPLER_ERR -#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR - -#undef __CREATE_USER_EVENT_ERR -#undef __SET_USER_EVENT_STATUS_ERR -#undef __SET_EVENT_CALLBACK_ERR - -#undef __WAIT_FOR_EVENTS_ERR - -#undef __CREATE_KERNEL_ERR -#undef __SET_KERNEL_ARGS_ERR -#undef __CREATE_PROGRAM_WITH_SOURCE_ERR -#undef __CREATE_PROGRAM_WITH_BINARY_ERR -#undef __BUILD_PROGRAM_ERR -#undef __CREATE_KERNELS_IN_PROGRAM_ERR - -#undef __CREATE_COMMAND_QUEUE_ERR -#undef __SET_COMMAND_QUEUE_PROPERTY_ERR -#undef __ENQUEUE_READ_BUFFER_ERR -#undef __ENQUEUE_WRITE_BUFFER_ERR -#undef __ENQUEUE_READ_BUFFER_RECT_ERR -#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR -#undef __ENQEUE_COPY_BUFFER_ERR -#undef __ENQEUE_COPY_BUFFER_RECT_ERR -#undef __ENQUEUE_READ_IMAGE_ERR -#undef __ENQUEUE_WRITE_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR -#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR -#undef __ENQUEUE_MAP_BUFFER_ERR -#undef __ENQUEUE_MAP_IMAGE_ERR -#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR -#undef __ENQUEUE_NDRANGE_KERNEL_ERR -#undef __ENQUEUE_TASK_ERR -#undef __ENQUEUE_NATIVE_KERNEL - -#undef __UNLOAD_COMPILER_ERR -#endif //__CL_USER_OVERRIDE_ERROR_STRINGS - -#undef __GET_INFO_HELPER_WITH_RETAIN - -// Extensions -#undef __INIT_CL_EXT_FCN_PTR -#undef __CREATE_SUB_DEVICES - -#if defined(USE_CL_DEVICE_FISSION) -#undef __PARAM_NAME_DEVICE_FISSION -#endif // USE_CL_DEVICE_FISSION - -} // namespace cl - -#endif // CL_HPP_ diff --git a/SpeedComparisons/GrayScott_OpenCL_float2/gray_scott_opencl_float2.cpp b/SpeedComparisons/GrayScott_OpenCL_float2/gray_scott_opencl_float2.cpp deleted file mode 100644 index 567fefc02..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_float2/gray_scott_opencl_float2.cpp +++ /dev/null @@ -1,383 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// stdlib: -#include -#include -#include -#include - -// OpenCV: -#include -#include - -#ifdef _WIN32 - #include - #include - #include - // http://www.linuxjournal.com/article/5574 - void gettimeofday(struct timeval* t,void* timezone) - { struct _timeb timebuffer; - _ftime( &timebuffer ); - t->tv_sec=timebuffer.time; - t->tv_usec=1000*timebuffer.millitm; - } -#else - #include -#endif - -// OpenCL: -#define __NO_STD_VECTOR // Use cl::vector instead of STL version -#define __CL_ENABLE_EXCEPTIONS - -// cl.hpp is standard but doesn't come with most SDKs, so download it from here: -// http://www.khronos.org/registry/cl/api/1.1/cl.hpp -#ifdef __APPLE__ -# include "cl.hpp" -#else -# include -#endif - -using namespace cl; - -// STL: -#include -#include - -// local: -#include "defs.h" - -void init(float ab[X][Y][2]); -bool display(float r[X][Y][2],float g[X][Y][2],float b[X][Y][2], - int iteration,bool auto_brighten,float manual_brighten, - int scale,int delay_ms,const char* message); - -static int g_opt_device = 0; -static int g_wrap = 1; - -int main(int argc, char * * argv) -{ - for (int i = 1; i < argc; i++) { - if (0) { - } else if ((i+1 platforms; - Platform::get(&platforms); - - // Select the default platform and create a context using this platform and the GPU - cl_context_properties cps[3] = { - CL_CONTEXT_PLATFORM, - (cl_context_properties)(platforms[0])(), - 0 - }; - Context context( CL_DEVICE_TYPE_GPU, cps); - - // Get a list of devices on this platform - cl::vector devices = context.getInfo(); - - // range-check the user's selection - int maxdev = devices.size() - 1; - g_opt_device = (g_opt_device > maxdev) ? maxdev : - ((g_opt_device < 0) ? 0 : g_opt_device); - std::cout << (maxdev+1) << " device(s) available; using device " - << g_opt_device << ".\n"; - - Device &device = devices[g_opt_device]; - std::cout << "Global memory: " << device.getInfo() << " bytes\n"; - std::cout << "Local memory: " << device.getInfo() << " bytes\n"; - std::cout << "Local memory type: " << std::string((device.getInfo()==CL_LOCAL)?"local":"global") << " \n"; - std::cout << "CL_DEVICE_MAX_WORK_GROUP_SIZE: " << device.getInfo() << "\n"; - - // Create a command queue and use the selected device - if (maxdev < 0) { - std::cerr << "error -- need at least one OpenCL capable device.\n"; - exit(-1); - } - CommandQueue queue = CommandQueue(context, device); - Event event; - - // Read source file - std::string kfn = CL_SOURCE_DIR; // (defined in CMakeLists.txt to be the source folder) - kfn += "/grayscott_kernel_float2.cl"; - std::ifstream sourceFile(kfn.c_str()); - std::string sourceCode( - std::istreambuf_iterator(sourceFile), - (std::istreambuf_iterator())); - Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()+1)); - - // Make program of the source code in the context - Program program = Program(context, source); - - // Build program for these specific devices - // If wrap (toroidal) option is selected, we define a preprocessor flag - // that controls how the xm1, xp1, etc. are computed. - program.build(devices, g_wrap ? "-D WRAP" : NULL, NULL, NULL); - - // Make kernel - Kernel kernel(program, "grayscott_compute"); - - std::cout << "CL_KERNEL_WORK_GROUP_SIZE: " << kernel.getWorkGroupInfo(device) << "\n"; - - // Create memory buffers - Buffer bufferUV = Buffer(context, CL_MEM_READ_ONLY, MEM_SIZE); - Buffer bufferUV2 = Buffer(context, CL_MEM_READ_ONLY, MEM_SIZE); - - // Copy lists A and B to the memory buffers - queue.enqueueWriteBuffer(bufferUV, CL_TRUE, 0, MEM_SIZE, ab); - - NDRange global(X,Y); - NDRange local(1,256); - - kernel.setArg(2, k); - kernel.setArg(3, f); - kernel.setArg(4, r_a); - kernel.setArg(5, r_b); - kernel.setArg(6, speed); - - int iteration = 0; - float fps_avg = 0.0; // decaying average of fps - const int N_FRAMES_PER_DISPLAY = 2000; // an even number, because of our double-buffering implementation - while(true) - { - struct timeval tod_record; - double tod_before, tod_after, tod_elap; - - gettimeofday(&tod_record, 0); - tod_before = ((double) (tod_record.tv_sec)) - + ((double) (tod_record.tv_usec)) / 1.0e6; - - // run a few iterations (without copying the data back) - for(int it=0;it 0) - fps = ((float)N_FRAMES_PER_DISPLAY) / tod_elap; - // We display an exponential moving average of the fps measurement - fps_avg = (fps_avg == 0) ? fps : (((fps_avg * 10.0) + fps) / 11.0); - double Mcgs = (fps_avg * ((double)X) * ((double)Y)) / 1.0e6; - sprintf(msg,"GrayScott - %0.2f fps %0.2f Mcgs", fps_avg, Mcgs); - - // display: - { - int quitnow = display(ab,ab,ab,iteration,false,200.0f,2,10,msg); - if (quitnow) - break; - } - } - } - catch(Error error) - { - std::cout << error.what() << "(" << error.err() << ")" << std::endl; - } -} - -// return a random value between lower and upper -float frand(float lower,float upper) -{ - return lower + rand()*(upper-lower)/RAND_MAX; -} - -void init(float ab[X][Y][2]) -{ - srand((unsigned int)time(NULL)); - - // figure the values - for(int i = 0; i < X; i++) { - for(int j = 0; j < Y; j++) { - // start with a uniform field with an approximate circle in the middle - //if(hypot(i%20-10/*-X/2*/,j%20-10/*-Y/2*/)<=frand(2,5)) { - if(hypot(i-X/3,(j-Y/2)/1.5)<=frand(2,5)) - { - ab[i][j][0] = frand(0.0f,0.1f); - ab[i][j][1] = frand(0.9f,1.0f); - } - else - { - ab[i][j][0] = frand(0.9f,1.0f); - ab[i][j][1] = frand(0.0f,0.1f); - } - } - } -} - -bool display(float r[X][Y][2],float g[X][Y][2],float b[X][Y][2], - int iteration,bool auto_brighten,float manual_brighten, - int scale,int delay_ms,const char* message) -{ - static bool need_init = true; - static bool write_video = false; - - static IplImage *im,*im2; - static int border = 0; - static CvFont font; - static CvVideoWriter *video; - static const CvScalar white = cvScalar(255,255,255); - - const char *title = "Press ESC to quit"; - - if(need_init) - { - need_init = false; - - im = cvCreateImage(cvSize(X,Y),IPL_DEPTH_8U,3); - cvSet(im,cvScalar(0,0,0)); - im2 = cvCreateImage(cvSize(X*scale,Y*scale),IPL_DEPTH_8U,3); - - cvNamedWindow(title,CV_WINDOW_AUTOSIZE); - - double hScale=0.4; - double vScale=0.4; - int lineWidth=1; - cvInitFont(&font,CV_FONT_HERSHEY_COMPLEX,hScale,vScale,0,lineWidth,CV_AA); - - if(write_video) - { - video = cvCreateVideoWriter(title,CV_FOURCC('D','I','V','X'),25.0,cvGetSize(im2),1); - border = 20; - } - } - - // convert float arrays to IplImage for OpenCV to display - float val,minR=FLT_MAX,maxR=-FLT_MAX,minG=FLT_MAX,maxG=-FLT_MAX,minB=FLT_MAX,maxB=-FLT_MAX; - if(auto_brighten) - { - for(int i=0;imaxR) maxR=val; - } - if(g) { - val = g[i][j][0]; - if(valmaxG) maxG=val; - } - if(b) { - val = b[i][j][0]; - if(valmaxB) maxB=val; - } - } - } - } - for(int i=0;i255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 2] = (uchar)val; - } - if(g) { - float val = g[i][Y-j-1][0]; - if(auto_brighten) val = 255.0f * (val-minG) / (maxG-minG); - else val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 1] = (uchar)val; - } - if(b) { - float val = b[i][Y-j-1][0]; - if(auto_brighten) val = 255.0f * (val-minB) / (maxB-minB); - else val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 0] = (uchar)val; - } - } - } - - cvResize(im,im2); - - char txt[100]; - if(!write_video) - { - sprintf(txt,"%d",iteration); - cvPutText(im2,txt,cvPoint(20,20),&font,white); - } - - cvPutText(im2,message,cvPoint(20,40),&font,white); - - if(write_video) - cvWriteFrame(video,im2); - - cvShowImage(title,im2); - - int key = cvWaitKey(delay_ms); // allow time for the image to be drawn - if(key==27) // did user ask to quit? - { - cvDestroyWindow(title); - cvReleaseImage(&im); - cvReleaseImage(&im2); - if(write_video) - cvReleaseVideoWriter(&video); - return true; - } - return false; -} - diff --git a/SpeedComparisons/GrayScott_OpenCL_float2/grayscott_kernel_float2.cl b/SpeedComparisons/GrayScott_OpenCL_float2/grayscott_kernel_float2.cl deleted file mode 100644 index 364749c31..000000000 --- a/SpeedComparisons/GrayScott_OpenCL_float2/grayscott_kernel_float2.cl +++ /dev/null @@ -1,38 +0,0 @@ -__kernel void grayscott_compute( - __global float2 *UV, - __global float2 *UV2, - float k,float F,float D_u,float D_v,float delta_t) -{ - // Get the index of the current element - const int x = get_global_id(0); - const int y = get_global_id(1); - const int X = get_global_size(0); - const int Y = get_global_size(1); - const int i = x*Y+y; - - const float2 uv = UV[i]; - - // compute the Laplacians of a and b -#ifdef WRAP - // speedy modulo operator for when X and Y are powers of 2 - // http://forums.amd.com/devforum/messageview.cfm?catid=390&threadid=143648 - const int xm1 = ((x-1+X) & (X-1)); // = ((x-1+X)%X); - const int xp1 = ((x+1) & (X-1)); // = ((x+1)%X); - const int ym1 = ((y-1+Y) & (Y-1)); // = ((y-1+Y)%Y); - const int yp1 = ((y+1) & (Y-1)); // = ((y+1)%Y); -#else - const int xm1 = max(x-1,0); - const int xp1 = min(x+1,X-1); - const int ym1 = max(y-1,0); - const int yp1 = min(y+1,Y-1); -#endif - const int iLeft = xm1*Y + y; - const int iRight = xp1*Y + y; - const int iUp = x*Y + ym1; - const int iDown = x*Y + yp1; - // Standard 5-point stencil - const float2 nabla_uv = UV[iLeft] + UV[iRight] + UV[iUp] + UV[iDown] - 4.0f*uv; - - // compute the new value - UV2[i] = uv + delta_t * (float2)(D_u * nabla_uv.x - uv.x*uv.y*uv.y + F*(1.0f-uv.x), D_v * nabla_uv.y + uv.x*uv.y*uv.y - (F+k)*uv.y); -} diff --git a/SpeedComparisons/GrayScott_OpenCV/CMakeLists.txt b/SpeedComparisons/GrayScott_OpenCV/CMakeLists.txt deleted file mode 100644 index 9a34c1102..000000000 --- a/SpeedComparisons/GrayScott_OpenCV/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -project(GrayScott_OpenCV) - -FIND_PACKAGE(OpenCV REQUIRED) -INCLUDE_DIRECTORIES( ${OPENCV_INCLUDE_DIR}) - -INCLUDE_DIRECTORIES( "../Display" ) - -add_executable(GrayScott_OpenCV - gray_scott_OpenCV.cpp - ../Display/defs.h -) - -TARGET_LINK_LIBRARIES(GrayScott_OpenCV ${OpenCV_LIBS} ) diff --git a/SpeedComparisons/GrayScott_OpenCV/gray_scott_OpenCV.cpp b/SpeedComparisons/GrayScott_OpenCV/gray_scott_OpenCV.cpp deleted file mode 100644 index 75808efff..000000000 --- a/SpeedComparisons/GrayScott_OpenCV/gray_scott_OpenCV.cpp +++ /dev/null @@ -1,268 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// stdlib: -#include -#include -#include -#include -#include - -#ifdef _WIN32 - #include - #include - #include - // http://www.linuxjournal.com/article/5574 - void gettimeofday(struct timeval* t,void* timezone) - { struct _timeb timebuffer; - _ftime( &timebuffer ); - t->tv_sec=timebuffer.time; - t->tv_usec=1000*timebuffer.millitm; - } -#else - #include -#endif - -// OpenCV: -#include -#include - -// local: -#include "defs.h" -#include "display.h" - -void init(IplImage *a,IplImage *b); - -void compute(IplImage *a,IplImage *b, - IplImage *t1,IplImage *t2, - IplImage *abb, - float r_a,float r_b,float f,float k, - float speed); - -bool display(IplImage *r,IplImage *g,IplImage *b, - int iteration,bool auto_brighten,float manual_brighten, - int scale,int delay_ms,const char* message); - -static int g_wrap = 0; -static bool g_paramspace = 0; - -int main(int argc, char * * argv) -{ - for (int i = 1; i < argc; i++) { - if (0) { - } else if (strcmp(argv[i],"-wrap")==0) { - // patterns wrap around ("torus", also called "continuous boundary - // condition") - g_wrap = 1; - } else { - fprintf(stderr, "Unrecognized argument: '%s'\n", argv[i]); - exit(-1); - } - } - - // Here we implement the Gray-Scott model, as described here: - // http://www.cc.gatech.edu/~turk/bio_sim/hw3.html - // http://arxiv.org/abs/patt-sol/9304003 - - // -- parameters -- - float r_a = 0.082f; - float r_b = 0.041f; - - // for spots: - float k = 0.064f; - float f = 0.035f; - // for stripes: - //float k = 0.06f; - //float f = 0.035f; - // for long stripes - //float k = 0.065f; - //float f = 0.056f; - // for dots and stripes - //float k = 0.064f; - //float f = 0.04f; - // for spiral waves: - //float k = 0.0475f; - //float f = 0.0118f; - float speed = 1.0f; - // ---------------- - - // these arrays store the chemical concentrations: - IplImage *a,*b; - - // temp images: - IplImage *t1,*t2,*abb; - - a = cvCreateImage(cvSize(X,Y),IPL_DEPTH_32F,1); - b = cvCreateImage(cvSize(X,Y),IPL_DEPTH_32F,1); - t1 = cvCreateImage(cvSize(X,Y),IPL_DEPTH_32F,1); - t2 = cvCreateImage(cvSize(X,Y),IPL_DEPTH_32F,1); - abb = cvCreateImage(cvSize(X,Y),IPL_DEPTH_32F,1); - - // put the initial conditions into each cell - init(a,b); - - const int N_FRAMES_PER_DISPLAY = 200; - int iteration = 0; - double fps_avg = 0.0; // decaying average of fps - while(true) - { - struct timeval tod_record; - double tod_before, tod_after, tod_elapsed; - double fps = 0.0; // frames per second - - gettimeofday(&tod_record, 0); - tod_before = ((double) (tod_record.tv_sec)) - + ((double) (tod_record.tv_usec)) / 1.0e6; - - // compute: - for(int it=0;it -#include -#include -#include -#include - -#ifdef _WIN32 - #include - #include - #include - // http://www.linuxjournal.com/article/5574 - void gettimeofday(struct timeval* t,void* timezone) - { struct _timeb timebuffer; - _ftime( &timebuffer ); - t->tv_sec=timebuffer.time; - t->tv_usec=1000*timebuffer.millitm; - } -#else - #include -#endif - -// OpenMP: -#include - -// local: -#include "defs.h" -#include "display.h" - -void init(float a[X][Y],float b[X][Y]); - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float r_a,float r_b,float f,float k, - float speed, bool parameter_space); - -static int g_nthreads; -static int g_wrap = 1; -static bool g_paramspace = 0; - -int main(int argc, char * * argv) -{ - for (int i = 1; i < argc; i++) { - if (0) { - } else if (strcmp(argv[i],"-paramspace")==0) { - // do a parameter space plot, like in the Pearson paper - g_paramspace = true; - } else if (strcmp(argv[i],"-wrap")==0) { - // patterns wrap around ("torus", also called "continuous boundary - // condition") - g_wrap = 1; - } else { - fprintf(stderr, "Unrecognized argument: '%s'\n", argv[i]); - exit(-1); - } - } - - // Here we implement the Gray-Scott model, as described here: - // http://www.cc.gatech.edu/~turk/bio_sim/hw3.html - // http://arxiv.org/abs/patt-sol/9304003 - - // -- parameters -- - float r_a = 0.082f; - float r_b = 0.041f; - - // for spots: - float k = 0.064f; - float f = 0.035f; - // for stripes: - //float k = 0.06f; - //float f = 0.035f; - // for long stripes - //float k = 0.065f; - //float f = 0.056f; - // for dots and stripes - //float k = 0.064f; - //float f = 0.04f; - // for spiral waves: - //float k = 0.0475f; - //float f = 0.0118f; - float speed = 1.0f; - // ---------------- - - // these arrays store the chemical concentrations: - float a[X][Y], b[X][Y]; - // these arrays store the rate of change of those chemicals: - float da[X][Y], db[X][Y]; - - // put the initial conditions into each cell - init(a,b); - - const int N_FRAMES_PER_DISPLAY = 1000; - int iteration = 0; - double fps_avg = 0.0; // decaying average of fps - while(true) - { - struct timeval tod_record; - double tod_before, tod_after, tod_elap; - double fps = 0.0; // frames per second - - gettimeofday(&tod_record, 0); - tod_before = ((double) (tod_record.tv_sec)) - + ((double) (tod_record.tv_usec)) / 1.0e6; - - // compute: - for(int it=0;it (b)) ? (a) : (b)) -#define min(a,b) (((a) < (b)) ? (a) : (b)) -#endif - -void init(float a[X][Y],float b[X][Y]) -{ - #pragma omp parallel default(shared) - { - g_nthreads = omp_get_num_threads(); - } - - srand((unsigned int)time(NULL)); - - // figure the values - for(int i = 0; i < X; i++) { - for(int j = 0; j < Y; j++) { - // start with a uniform field with an approximate circle in the middle - //if(hypot(i%20-10/*-X/2*/,j%20-10/*-Y/2*/)<=frand(2,5)) { - if(hypot(i-X/2,(j-Y/2)/1.5)<=frand(2,5)) - { - a[i][j] = 0.0f; - b[i][j] = 1.0f; - } - else { - a[i][j] = 1; - b[i][j] = 0; - } - /*float v = frand(0.0f,1.0f); - a[i][j] = v; - b[i][j] = 1.0f-v;*/ - } - } -} - -void compute(float a[X][Y],float b[X][Y], - float da[X][Y],float db[X][Y], - float r_a,float r_b,float par_f,float par_k,float speed, - bool parameter_space) -{ - // compute change in each cell - #pragma omp parallel for - for(int i = 0; i < X; i++) - { - int iprev,inext; - float f = par_f; - float k = par_k; - if (g_wrap) { - iprev = (i + X - 1) % X; - inext = (i + 1) % X; - } else { - iprev = max(0,i-1); - inext = min(X-1,i+1); - } - - for(int j = 0; j < Y; j++) - { - int jprev,jnext; - if (g_wrap) { - // toroidal - jprev = (j + Y - 1) % Y; - jnext = (j + 1) % Y; - } else { - jprev = max(0,j-1); - jnext = min(Y-1,j+1); - } - - float aval = a[i][j]; - float bval = b[i][j]; - - if (parameter_space) { - const float kmin=0.045f,kmax=0.07f,fmin=0.01f,fmax=0.09f; - // set f and k for this location (ignore the provided values of f and k) - k = kmin + i*(kmax-kmin)/X; - f = fmin + j*(fmax-fmin)/Y; - } - - // compute the Laplacians of a and b - float dda = a[i][jprev] + a[i][jnext] + a[iprev][j] + a[inext][j] - 4*aval; - float ddb = b[i][jprev] + b[i][jnext] + b[iprev][j] + b[inext][j] - 4*bval; - - // compute the new rate of change of a and b - da[i][j] = r_a * dda - aval*bval*bval + f*(1-aval); - db[i][j] = r_b * ddb + aval*bval*bval - (f+k)*bval; - } - } - - // effect change - #pragma omp parallel for - for(int i = 0; i < X; i++) - { - for(int j = 0; j < Y; j++) - { - a[i][j] += speed * da[i][j]; - b[i][j] += speed * db[i][j]; - // kill denormals by adding a teeny tiny something (http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.20.1348&rank=4) - a[i][j] += 1e-10f; - b[i][j] += 1e-10f; - } - } -} - diff --git a/SpeedComparisons/GrayScott_SSE/CMakeLists.txt b/SpeedComparisons/GrayScott_SSE/CMakeLists.txt deleted file mode 100644 index 01b08353f..000000000 --- a/SpeedComparisons/GrayScott_SSE/CMakeLists.txt +++ /dev/null @@ -1,19 +0,0 @@ -project(GrayScott_SSE) - -FIND_PACKAGE(OpenCV REQUIRED) -INCLUDE_DIRECTORIES( ${OPENCV_INCLUDE_DIR}) - -INCLUDE_DIRECTORIES( "../Display" ) - -if(MSVC) - add_definitions(/arch:SSE2) -else() - add_definitions(-msse2) -endif() - -add_executable(GrayScott_SSE - gray_scott_SSE.cpp - ../Display/defs.h -) - -TARGET_LINK_LIBRARIES(GrayScott_SSE ${OpenCV_LIBS} ) diff --git a/SpeedComparisons/GrayScott_SSE/gray_scott_SSE.cpp b/SpeedComparisons/GrayScott_SSE/gray_scott_SSE.cpp deleted file mode 100644 index 63d3ab750..000000000 --- a/SpeedComparisons/GrayScott_SSE/gray_scott_SSE.cpp +++ /dev/null @@ -1,496 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// stdlib: -#include -#include -#include -#include - -#ifdef _WIN32 - #include - #include - #include - // http://www.linuxjournal.com/article/5574 - void gettimeofday(struct timeval* t,void* timezone) - { struct _timeb timebuffer; - _ftime( &timebuffer ); - t->tv_sec=timebuffer.time; - t->tv_usec=1000*timebuffer.millitm; - } -#else - #include -#endif - -// SSE: -#include - -// OpenCV: -#include -#include - -// If SIZE_OPTIONS is declared, the user can specify an image size with -width N and -height N options -//#define SIZE_OPTIONS - -#ifndef SIZE_OPTIONS -// local: -#include "defs.h" -#endif - -// consecutive horizontal SSE blocks lie end to end, to enable easy use of _mm_loadu_ps() for left and right - -const int SSE_BITS_PER_BLOCK = 128; -const int FLOATS_PER_BLOCK = (SSE_BITS_PER_BLOCK/8) / sizeof(float); - -#ifndef SIZE_OPTIONS -const int PADDED_X = X + 2*FLOATS_PER_BLOCK; // our toroidal wrap-around scheme uses a border that is copied from the other side each time -const int PADDED_Y = Y + 2; -const int X_BLOCKS = PADDED_X / FLOATS_PER_BLOCK; -const int Y_BLOCKS = PADDED_Y; -const int TOTAL_BLOCKS = X_BLOCKS * Y_BLOCKS; -#else -static int X = 256; -static int Y = 256; -static int PADDED_X = X + 2*FLOATS_PER_BLOCK; // our toroidal wrap-around scheme uses a border that is copied from the other side each time -static int PADDED_Y = Y + 2; -static int X_BLOCKS = PADDED_X / FLOATS_PER_BLOCK; -static int Y_BLOCKS = PADDED_Y; -static int TOTAL_BLOCKS = X_BLOCKS * Y_BLOCKS; -#endif - -inline int at(int x,int y) { return y*PADDED_X+x; } -inline int block_at(int x,int y) { return y*X_BLOCKS+x; } - -void init(float *a,float *b,float *da,float *db); -void compute(float *a,float *b,float *da,float *db, - const float r_a,const float r_b,const float f,const float k, - const float speed); -bool display(float *r,float *g,float *b, - int iteration,bool auto_brighten,float manual_brighten, - int scale,int delay_ms,const char* message); - -int main(int argc, char * * argv) -{ -#ifdef SIZE_OPTIONS - for (int i = 1; i < argc; i++) { - if (0) { - } else if ((i+1maxR) maxR=val; - } - if(g) { - val = g[at(i,j)]; - if(valmaxG) maxG=val; - } - if(b) { - val = b[at(i,j)]; - if(valmaxB) maxB=val; - } - } - } - } - for(int col=0;col255) val=255; - ((uchar *)(im->imageData + row*im->widthStep))[col*im->nChannels + 2] = (uchar)val; - } - if(g) { - float val = g[at(col,row)]; - if(auto_brighten) val = 255.0f * (val-minG) / (maxG-minG); - else val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + row*im->widthStep))[col*im->nChannels + 1] = (uchar)val; - } - if(b) { - float val = b[at(col,row)]; - if(auto_brighten) val = 255.0f * (val-minB) / (maxB-minB); - else val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + row*im->widthStep))[col*im->nChannels + 0] = (uchar)val; - } - } - } - - cvResize(im,im2); - - { - char txt[100]; - sprintf(txt,"%d",iteration); - cvPutText(im2,txt,cvPoint(20,20),&font,white); - cvPutText(im2,message,cvPoint(20,40),&font,white); - } - - cvShowImage(title,im2); - - int key = cvWaitKey(delay_ms); // allow time for the image to be drawn - if(key==27) // did user ask to quit? - { - cvDestroyWindow(title); - cvReleaseImage(&im); - cvReleaseImage(&im2); - return true; - } - return false; -} diff --git a/SpeedComparisons/GrayScott_SSE_OpenMP/CMakeLists.txt b/SpeedComparisons/GrayScott_SSE_OpenMP/CMakeLists.txt deleted file mode 100644 index a03fbb916..000000000 --- a/SpeedComparisons/GrayScott_SSE_OpenMP/CMakeLists.txt +++ /dev/null @@ -1,29 +0,0 @@ -project(GrayScott_SSE_OpenMP) - -FIND_PACKAGE(OpenCV REQUIRED) -INCLUDE_DIRECTORIES( ${OPENCV_INCLUDE_DIR}) - -INCLUDE_DIRECTORIES( "../Display" ) - -if(MSVC) - add_definitions(/arch:SSE2) -else() - add_definitions(-msse) -endif() - -# only build the OpenMP version if OpenMP was found -include(FindOpenMP) -if(OPENMP_FOUND) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") - - add_executable(GrayScott_SSE_OpenMP - gray_scott_SSE_OpenMP.cpp - ../Display/display.cpp - ../Display/display.h - ../Display/defs.h - ) - - TARGET_LINK_LIBRARIES(GrayScott_SSE_OpenMP ${OpenCV_LIBS} ) -endif() diff --git a/SpeedComparisons/GrayScott_SSE_OpenMP/gray_scott_SSE_OpenMP.cpp b/SpeedComparisons/GrayScott_SSE_OpenMP/gray_scott_SSE_OpenMP.cpp deleted file mode 100644 index 9387e6dfd..000000000 --- a/SpeedComparisons/GrayScott_SSE_OpenMP/gray_scott_SSE_OpenMP.cpp +++ /dev/null @@ -1,388 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// stdlib: -#include -#include -#include -#include - -#ifdef _WIN32 - #include - #include - #include - // http://www.linuxjournal.com/article/5574 - void gettimeofday(struct timeval* t,void* timezone) - { struct _timeb timebuffer; - _ftime( &timebuffer ); - t->tv_sec=timebuffer.time; - t->tv_usec=1000*timebuffer.millitm; - } -#else - #include -#endif - -// SSE: -#include - -// OpenCV: -#include -#include - -// local: -#include "defs.h" -//#include "display.h" - -// OpenMP: -#include - -// consecutive horizontal SSE blocks lie end to end, to enable easy use of _mm_loadu_ps() for left and right - -const int SSE_BITS_PER_BLOCK = 128; -const int FLOATS_PER_BLOCK = (SSE_BITS_PER_BLOCK/8) / sizeof(float); -const int PADDED_X = X + 2*FLOATS_PER_BLOCK; // our toroidal wrap-around scheme uses a border that is copied from the other side each time -const int PADDED_Y = Y + 2; -const int X_BLOCKS = PADDED_X / FLOATS_PER_BLOCK; -const int Y_BLOCKS = PADDED_Y; -const int TOTAL_BLOCKS = X_BLOCKS * Y_BLOCKS; - -inline int at(int x,int y) { return y*PADDED_X+x; } -inline int block_at(int x,int y) { return y*X_BLOCKS+x; } - -void init(float *a,float *b,float *da,float *db); -void compute(float *a,float *b,float *da,float *db, - const float r_a,const float r_b,const float f,const float k, - const float speed); -bool display(float *r,float *g,float *b, - int iteration,bool auto_brighten,float manual_brighten, - int scale,int delay_ms,const char* message); - -int main() -{ - // Here we implement the Gray-Scott model, as described here: - // http://www.cc.gatech.edu/~turk/bio_sim/hw3.html - // http://arxiv.org/abs/patt-sol/9304003 - - // -- parameters -- - float r_a = 0.082f; - float r_b = 0.041f; - - // for spots: - float k = 0.064f; - float f = 0.035f; - // for stripes: - //float k = 0.06f; - //float f = 0.035f; - // for long stripes - //float k = 0.065f; - //float f = 0.056f; - // for dots and stripes - //float k = 0.064f; - //float f = 0.04f; - // for spiral waves: - //float k = 0.0475f; - //float f = 0.0118f; - float speed = 1.0f; - // ---------------- - - const int n_cells = PADDED_X*PADDED_Y; - float *a = (float*)_mm_malloc(n_cells*sizeof(float),16); - float *b = (float*)_mm_malloc(n_cells*sizeof(float),16); - float *da = (float*)_mm_malloc(n_cells*sizeof(float),16); - float *db = (float*)_mm_malloc(n_cells*sizeof(float),16); - - init(a,b,da,db); - - const int N_FRAMES_PER_DISPLAY = 1000; - int iteration = 0; - double fps_avg = 0.0; // decaying average of fps - while(true) - { - struct timeval tod_record; - double tod_before, tod_after, tod_elapsed; - double fps = 0.0; // frames per second - - gettimeofday(&tod_record, 0); - tod_before = ((double) (tod_record.tv_sec)) - + ((double) (tod_record.tv_usec)) / 1.0e6; - - // compute: - for(int it=0;itmaxR) maxR=val; - } - if(g) { - val = g[at(i,j)]; - if(valmaxG) maxG=val; - } - if(b) { - val = b[at(i,j)]; - if(valmaxB) maxB=val; - } - } - } - } - for(int i=0;i255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 2] = (uchar)val; - } - if(g) { - float val = g[at(i,PADDED_Y-j-1)]; - if(auto_brighten) val = 255.0f * (val-minG) / (maxG-minG); - else val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 1] = (uchar)val; - } - if(b) { - float val = b[at(i,PADDED_Y-j-1)]; - if(auto_brighten) val = 255.0f * (val-minB) / (maxB-minB); - else val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 0] = (uchar)val; - } - } - } - - cvResize(im,im2); - cvCopyMakeBorder(im2,im3,cvPoint(border*2,0),IPL_BORDER_CONSTANT); - - char txt[100]; - if(!write_video) - { - sprintf(txt,"%d",iteration); - cvPutText(im3,txt,cvPoint(20,20),&font,white); - } - - cvPutText(im3,message,cvPoint(20,40),&font,white); - - if(write_video) - cvWriteFrame(video,im3); - - cvShowImage(title,im3); - - int key = cvWaitKey(delay_ms); // allow time for the image to be drawn - if(key==27) // did user ask to quit? - { - cvDestroyWindow(title); - cvReleaseImage(&im); - cvReleaseImage(&im2); - if(write_video) - cvReleaseVideoWriter(&video); - return true; - } - return false; -} diff --git a/SpeedComparisons/GrayScott_double/CMakeLists.txt b/SpeedComparisons/GrayScott_double/CMakeLists.txt deleted file mode 100644 index 8a6e0449d..000000000 --- a/SpeedComparisons/GrayScott_double/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -project(GrayScott_double) - -FIND_PACKAGE(OpenCV REQUIRED) -INCLUDE_DIRECTORIES( ${OPENCV_INCLUDE_DIR}) - -INCLUDE_DIRECTORIES( "../Display" ) - -add_executable(GrayScott_double - gray_scott_double.cpp - ../Display/defs.h -) - -TARGET_LINK_LIBRARIES(GrayScott_double ${OpenCV_LIBS} ) diff --git a/SpeedComparisons/GrayScott_double/gray_scott_double.cpp b/SpeedComparisons/GrayScott_double/gray_scott_double.cpp deleted file mode 100644 index 7dc5d884c..000000000 --- a/SpeedComparisons/GrayScott_double/gray_scott_double.cpp +++ /dev/null @@ -1,375 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -*/ - -// stdlib: -#include -#include -#include -#include -#include - -#ifdef _WIN32 - #include - #include - #include - // http://www.linuxjournal.com/article/5574 - void gettimeofday(struct timeval* t,void* timezone) - { struct _timeb timebuffer; - _ftime( &timebuffer ); - t->tv_sec=timebuffer.time; - t->tv_usec=1000*timebuffer.millitm; - } -#else - #include -#endif - -// OpenCV: -#include -#include - -// stdlib -#include - -// local: -#include "defs.h" - -static int g_wrap = 1; -static bool g_paramspace = 0; - -bool display_gsd(double r[X][Y],double g[X][Y],double b[X][Y], - int iteration,bool auto_brighten,double manual_brighten, - int scale,int delay_ms,const char* message) -{ - static bool need_init = true; - static bool write_video = false; - - static IplImage *im,*im2,*im3; - static int border = 0; - static CvFont font; - static CvVideoWriter *video; - static const CvScalar white = cvScalar(255,255,255); - - const char *title = "Press ESC to quit"; - - if(need_init) - { - need_init = false; - - im = cvCreateImage(cvSize(X,Y),IPL_DEPTH_8U,3); - cvSet(im,cvScalar(0,0,0)); - im2 = cvCreateImage(cvSize(X*scale,Y*scale),IPL_DEPTH_8U,3); - im3 = cvCreateImage(cvSize(X*scale+border*2,Y*scale+border),IPL_DEPTH_8U,3); - - cvNamedWindow(title,CV_WINDOW_AUTOSIZE); - - double hScale=0.4; - double vScale=0.4; - int lineWidth=1; - cvInitFont(&font,CV_FONT_HERSHEY_COMPLEX,hScale,vScale,0,lineWidth,CV_AA); - - if(write_video) - { - video = cvCreateVideoWriter(title,CV_FOURCC('D','I','V','X'),25.0,cvGetSize(im3),1); - border = 20; - } - } - - // convert double arrays to IplImage for OpenCV to display - double val,minR=FLT_MAX,maxR=-FLT_MAX,minG=FLT_MAX,maxG=-FLT_MAX,minB=FLT_MAX,maxB=-FLT_MAX; - if(auto_brighten) - { - for(int i=0;imaxR) maxR=val; - } - if(g) { - val = g[i][j]; - if(valmaxG) maxG=val; - } - if(b) { - val = b[i][j]; - if(valmaxB) maxB=val; - } - } - } - } - #pragma omp parallel for - for(int i=0;i255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 2] = (uchar)val; - } - if(g) { - double val = g[i][Y-j-1]; - if(auto_brighten) val = 255.0f * (val-minG) / (maxG-minG); - else val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 1] = (uchar)val; - } - if(b) { - double val = b[i][Y-j-1]; - if(auto_brighten) val = 255.0f * (val-minB) / (maxB-minB); - else val *= manual_brighten; - if(val<0) val=0; if(val>255) val=255; - ((uchar *)(im->imageData + j*im->widthStep))[i*im->nChannels + 0] = (uchar)val; - } - } - } - - cvResize(im,im2); - cvCopyMakeBorder(im2,im3,cvPoint(border*2,0),IPL_BORDER_CONSTANT); - - char txt[100]; - if(!write_video) - { - sprintf(txt,"%d",iteration); - cvPutText(im3,txt,cvPoint(20,20),&font,white); - - // DEBUG: - sprintf(txt,"%.4f,%.4f,%.4f",r[0][0],g[0][0],b[0][0]); - //cvPutText(im3,txt,cvPoint(20,40),&font,white); - } - - // DEBUG: - if(write_video) - { - cvPutText(im3,"0.06",cvPoint(5,15),&font,white); - cvPutText(im3,"F",cvPoint(5,im2->height/2),&font,white); - cvPutText(im3,"0.00",cvPoint(5,im2->height),&font,white); - cvPutText(im3,"0.03",cvPoint(border*2-10,im2->height+15),&font,white); - cvPutText(im3,"K",cvPoint(border*2+im2->width/2,im2->height+15),&font,white); - cvPutText(im3,"0.07",cvPoint(im3->width-35,im2->height+15),&font,white); - } - - cvPutText(im3,message,cvPoint(20,40),&font,white); - - if(write_video) - cvWriteFrame(video,im3); - - cvShowImage(title,im3); - - int key = cvWaitKey(delay_ms); // allow time for the image to be drawn - if(key==27) // did user ask to quit? - { - cvDestroyWindow(title); - cvReleaseImage(&im); - cvReleaseImage(&im2); - if(write_video) - cvReleaseVideoWriter(&video); - return true; - } - return false; -} - -void init(double a[X][Y],double b[X][Y]); - -void compute(double a[X][Y],double b[X][Y], - double da[X][Y],double db[X][Y], - double r_a,double r_b,double f,double k, - double speed, - bool parameter_space); - -int main(int argc, char * * argv) -{ - for (int i = 1; i < argc; i++) { - if (0) { - } else if (strcmp(argv[i],"-paramspace")==0) { - // do a parameter space plot, like in the Pearson paper - g_paramspace = true; - } else if (strcmp(argv[i],"-wrap")==0) { - // patterns wrap around ("torus", also called "continuous boundary - // condition") - g_wrap = 1; - } else { - fprintf(stderr, "Unrecognized argument: '%s'\n", argv[i]); - exit(-1); - } - } - - // Here we implement the Gray-Scott model, as described here: - // http://www.cc.gatech.edu/~turk/bio_sim/hw3.html - // http://arxiv.org/abs/patt-sol/9304003 - - // -- parameters -- - double r_a = 0.082f; - double r_b = 0.041f; - - // for spots: - double k = 0.064f; - double f = 0.035f; - // for stripes: - //double k = 0.06f; - //double f = 0.035f; - // for long stripes - //double k = 0.065f; - //double f = 0.056f; - // for dots and stripes - //double k = 0.064f; - //double f = 0.04f; - // for spiral waves: - //double k = 0.0475f; - //double f = 0.0118f; - double speed = 1.0f; - // ---------------- - - // these arrays store the chemical concentrations: - double a[X][Y], b[X][Y]; - // these arrays store the rate of change of those chemicals: - double da[X][Y], db[X][Y]; - - // put the initial conditions into each cell - init(a,b); - - const int N_FRAMES_PER_DISPLAY = 200; - int iteration = 0; - double fps_avg = 0.0; // decaying average of fps - while(true) - { - struct timeval tod_record; - double tod_before, tod_after, tod_elapsed; - double fps = 0.0; // frames per second - - gettimeofday(&tod_record, 0); - tod_before = ((double) (tod_record.tv_sec)) - + ((double) (tod_record.tv_usec)) / 1.0e6; - - // compute: - for(int it=0;it (b)) ? (a) : (b)) -#define min(a,b) (((a) < (b)) ? (a) : (b)) -#endif - -void init(double a[X][Y],double b[X][Y]) -{ - srand((unsigned int)time(NULL)); - - // figure the values - for(int i = 0; i < X; i++) { - for(int j = 0; j < Y; j++) { - - //if(hypot(i%10-5/*-X/2*/,j%10-5/*-Y/2*/)<=2.0f)//frand(2,3)) - if(hypot(i-X/2,(j-Y/2)/1.5)<=frand(2,5)) // start with a uniform field with an approximate circle in the middle - { - a[i][j] = 0.0f; - b[i][j] = 1.0f; - } - else { - a[i][j] = 1; - b[i][j] = 0; - } - //double v = frand(0.0f,1.0f); - //a[i][j] = v; - //b[i][j] = 1.0f-v; - //a[i][j] += frand(-0.01f,0.01f); - //b[i][j] += frand(-0.01f,0.01f); - } - } -} - -void compute(double a[X][Y],double b[X][Y], - double da[X][Y],double db[X][Y], - double r_a,double r_b,double f,double k,double speed, - bool parameter_space) -{ - // compute change in each cell - for(int i = 0; i < X; i++) { - int iprev,inext; - if (g_wrap) { - iprev = (i + X - 1) % X; - inext = (i + 1) % X; - } else { - iprev = max(0,i-1); - inext = min(X-1,i+1); - } - - for(int j = 0; j < Y; j++) { - int jprev,jnext; - if (g_wrap) { - jprev = (j + Y - 1) % Y; - jnext = (j + 1) % Y; - } else { - jprev = max(0,j-1); - jnext = min(Y-1,j+1); - } - - double aval = a[i][j]; - double bval = b[i][j]; - - if (parameter_space) { - const double kmin=0.045f,kmax=0.07f,fmin=0.01f,fmax=0.09f; - // set f and k for this location (ignore the provided values of f and k) - k = kmin + i*(kmax-kmin)/X; - f = fmin + j*(fmax-fmin)/Y; - } - - // compute the Laplacians of a and b - double dda = a[i][jprev] + a[i][jnext] + a[iprev][j] + a[inext][j] - 4*aval; - double ddb = b[i][jprev] + b[i][jnext] + b[iprev][j] + b[inext][j] - 4*bval; - - // compute the new rate of change of a and b - da[i][j] = r_a * dda - aval*bval*bval + f*(1-aval); - db[i][j] = r_b * ddb + aval*bval*bval - (f+k)*bval; - } - } - - // effect change - for(int i = 0; i < X; i++) - { - for(int j = 0; j < Y; j++) - { - a[i][j] += speed * da[i][j]; - b[i][j] += speed * db[i][j]; - // kill denormals by adding a teeny tiny something (http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.20.1348&rank=4) - a[i][j] += 1e-20; - b[i][j] += 1e-20; - } - } -} - diff --git a/Ready/TODO.txt b/TODO.txt similarity index 100% rename from Ready/TODO.txt rename to TODO.txt diff --git a/TuringSpots/CMakeLists.txt b/TuringSpots/CMakeLists.txt deleted file mode 100644 index bc1ceaa79..000000000 --- a/TuringSpots/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -project(TuringSpots) - -FIND_PACKAGE(OpenCV REQUIRED) -INCLUDE_DIRECTORIES( ${OPENCV_INCLUDE_DIR}) - -INCLUDE_DIRECTORIES( "../Display" ) - -add_executable(TuringSpots - turing_spots.cpp - ../Display/display.cpp - ../Display/display.h -) - -TARGET_LINK_LIBRARIES(TuringSpots ${OpenCV_LIBS} ) diff --git a/TuringSpots/turing_spots.cpp b/TuringSpots/turing_spots.cpp deleted file mode 100644 index 0c6a6a53e..000000000 --- a/TuringSpots/turing_spots.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/* - -A port of part of Greg Turk's reaction-diffusion code, from: -http://www.cc.gatech.edu/~turk/reaction_diffusion/reaction_diffusion.html - -See README.txt for more details. - -See also: -http://www1.cse.wustl.edu/~faanly/materials/Sketching_RD_Texture.pdf - -*/ - -// stdlib: -#include -#include -#include - -// local: -#include "defs.h" -#include "display.h" - -void init(float a[X][Y],float b[X][Y],float beta[X][Y], - float a_steady,float b_steady,float beta_init,float beta_rand); - -void compute(float a[X][Y],float b[X][Y],float beta[X][Y], - float da[X][Y],float db[X][Y], - float diff1,float diff2,float p1,float speed); - -int main() -{ - // -- parameters -- - float diff1 = 0.125f; - float diff2 = 0.03125f; - float s = 0.0125f; - float speed = 1.0f; - float a_steady = 4.0f; - float b_steady = 4.0f; - float beta_init = 12.0f; - float beta_rand = 0.1f; - // ---------------- - - // these arrays store the chemical concentrations: - float a[X][Y], b[X][Y], beta[X][Y]; - // these arrays store the rate of change of those chemicals: - float da[X][Y], db[X][Y]; - - // put the initial conditions into each cell - init(a,b,beta,a_steady,b_steady,beta_init,beta_rand); - - clock_t start,end; - - const int N_FRAMES_PER_DISPLAY = 100; - int iteration = 0; - while(true) - { - start = clock(); - - // compute: - for(int it=0;it