2014-07-08 03:11:14 -04:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include "EmberCLPch.h"
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// Various data structures defined for the CPU and OpenCL.
|
|
|
|
/// These are stripped down versions of THE classes in Ember, for use with OpenCL.
|
|
|
|
/// Their sole purpose is to pass values from the host to the device.
|
|
|
|
/// They retain most of the member variables, but do not contain the functions.
|
|
|
|
/// Visual Studio defaults to alighment of 16, but it's made explicit in case another compiler is used.
|
|
|
|
/// This must match the alignment specified in the kernel.
|
|
|
|
/// </summary>
|
|
|
|
|
|
|
|
namespace EmberCLns
|
|
|
|
{
|
|
|
|
/// <summary>
|
|
|
|
/// Various constants needed for rendering.
|
|
|
|
/// </summary>
|
|
|
|
static string ConstantDefinesString(bool doublePrecision)
|
|
|
|
{
|
|
|
|
ostringstream os;
|
|
|
|
|
|
|
|
if (doublePrecision)
|
|
|
|
{
|
|
|
|
os << "#if defined(cl_amd_fp64)\n"//AMD extension available?
|
--User changes
-Add new variations: bubbleT3D, crob, hexaplay3D, hexcrop, hexes, hexnix3D, loonie2, loonie3, nBlur, octapol and synth.
-Allow for pre/post versions of dc_bubble, dc_cylinder and dc_linear whereas before they were omitted.
-When saving a file with multiple embers in it, detect if time values are all the same and if so, start them at zero and increment by 1 for each ember.
-Allow for numerous quality increases to be coalesced into one. It will pick up at the end of the current render.
-Show selection highlight on variations tree in response to mouse hover. This makes it easier to see for which variation or param the current mouse wheel action will apply.
-Make default temporal samples be 100, whereas before it was 1000 which was overkill.
-Require the shift key to be held with delete for deleting an ember to prevent it from triggering when the user enters delete in the edit box.
-This wasn't otherwise fixable without writing a lot more code.
--Bug fixes
-EmberGenome was crashing when generating a sequence from a source file with more than 2 embers in it.
-EmberGenome was improperly handling the first frame of a merge after the last frame of the loop.
-These bugs were due to a previous commit. Revert parts of that commit.
-Prevent a zoom value of less than 0 when reading from xml.
-Slight optimization of the crescents, and mask variations, if the compiler wasn't doing it already.
-Unique file naming was broken because it was looking for _# and the default names ended with -#.
-Disallow renaming of an ember in the library tree to an empty string.
-Severe bug that prevented some variations from being read correctly from params generated outside this program.
-Severe OpenCL randomization bug. The first x coordinates of the first points in the first kernel call of the first ember of a render since the OpenCL renderer object was created were not random and were mostly -1.
-Severe bug when populating xform selection distributions that could sometimes cause a crash due to roundoff error. Fix by using double.
-Limit the max number of variations in a random ember to MAX_CL_VARS, which is 8. This ensures they'll look the same on CPU and GPU.
-Prevent user from saving stylesheet to default.qss, it's a special reserved filename.
--Code changes
-Generalize using the running sum output point inside of a variation for all cases: pre, reg and post.
-Allow for array variables in variations where the address of each element is stored in m_Params.
-Qualify all math functions with std::
-No longer use our own Clamp() in OpenCL, instead use the standard clamp().
-Redesign how functions are used in the variations OpenCL code.
-Add tests to EmberTester to verify some of the new functionality.
-Place more const and override qualifiers on functions where appropriate.
-Add a global rand with a lock to be used very sparingly.
-Use a map instead of a vector for bad param names in Xml parsing.
-Prefix affine interpolation mode defines with "AFFINE_" to make their purpose more clear.
-Allow for variations that change state during iteration by sending a separate copy of the ember to each rendering thread.
-Implement this same functionality with a local struct in OpenCL. It's members are the total of all variables that need to change state within an ember.
-Add Contains() function to Utils.h.
-EmberRender: print names of kernels being printed with --dump_kernel option.
-Clean up EmberTester to handle some of the recent changes.
-Fix various casts.
-Replace % 2 with & 1, even though the compiler was likely doing this already.
-Add new file Variations06.h to accommodate new variations.
-General cleanup.
2015-11-22 17:15:07 -05:00
|
|
|
" #pragma OPENCL EXTENSION cl_amd_fp64 : enable\n"
|
|
|
|
"#endif\n"
|
|
|
|
"#if defined(cl_khr_fp64)\n"//Khronos extension available?
|
|
|
|
" #pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
|
|
|
|
"#endif\n"
|
|
|
|
"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"//Only supported on nVidia.
|
|
|
|
"typedef long intPrec;\n"
|
|
|
|
"typedef uint atomi;\n"//Same size as real_bucket_t, always 4 bytes.
|
|
|
|
"typedef double real_t;\n"
|
|
|
|
"typedef float real_bucket_t;\n"//Assume buckets are always float, even though iter calcs are in double.
|
|
|
|
"typedef double2 real2;\n"
|
|
|
|
"typedef double4 real4;\n"
|
|
|
|
"typedef float4 real4_bucket;\n"//And here too.
|
|
|
|
"#define EPS (DBL_EPSILON)\n"
|
|
|
|
"#define TLOW (DBL_MIN)\n"
|
|
|
|
"#define TMAX (DBL_MAX)\n"
|
|
|
|
;
|
2014-07-08 03:11:14 -04:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
os << "typedef int intPrec;\n"
|
2014-12-06 00:05:09 -05:00
|
|
|
"typedef uint atomi;\n"
|
2014-07-08 03:11:14 -04:00
|
|
|
"typedef float real_t;\n"
|
2015-08-10 23:10:23 -04:00
|
|
|
"typedef float real_bucket_t;\n"
|
--User changes
-Add new variations: bubbleT3D, crob, hexaplay3D, hexcrop, hexes, hexnix3D, loonie2, loonie3, nBlur, octapol and synth.
-Allow for pre/post versions of dc_bubble, dc_cylinder and dc_linear whereas before they were omitted.
-When saving a file with multiple embers in it, detect if time values are all the same and if so, start them at zero and increment by 1 for each ember.
-Allow for numerous quality increases to be coalesced into one. It will pick up at the end of the current render.
-Show selection highlight on variations tree in response to mouse hover. This makes it easier to see for which variation or param the current mouse wheel action will apply.
-Make default temporal samples be 100, whereas before it was 1000 which was overkill.
-Require the shift key to be held with delete for deleting an ember to prevent it from triggering when the user enters delete in the edit box.
-This wasn't otherwise fixable without writing a lot more code.
--Bug fixes
-EmberGenome was crashing when generating a sequence from a source file with more than 2 embers in it.
-EmberGenome was improperly handling the first frame of a merge after the last frame of the loop.
-These bugs were due to a previous commit. Revert parts of that commit.
-Prevent a zoom value of less than 0 when reading from xml.
-Slight optimization of the crescents, and mask variations, if the compiler wasn't doing it already.
-Unique file naming was broken because it was looking for _# and the default names ended with -#.
-Disallow renaming of an ember in the library tree to an empty string.
-Severe bug that prevented some variations from being read correctly from params generated outside this program.
-Severe OpenCL randomization bug. The first x coordinates of the first points in the first kernel call of the first ember of a render since the OpenCL renderer object was created were not random and were mostly -1.
-Severe bug when populating xform selection distributions that could sometimes cause a crash due to roundoff error. Fix by using double.
-Limit the max number of variations in a random ember to MAX_CL_VARS, which is 8. This ensures they'll look the same on CPU and GPU.
-Prevent user from saving stylesheet to default.qss, it's a special reserved filename.
--Code changes
-Generalize using the running sum output point inside of a variation for all cases: pre, reg and post.
-Allow for array variables in variations where the address of each element is stored in m_Params.
-Qualify all math functions with std::
-No longer use our own Clamp() in OpenCL, instead use the standard clamp().
-Redesign how functions are used in the variations OpenCL code.
-Add tests to EmberTester to verify some of the new functionality.
-Place more const and override qualifiers on functions where appropriate.
-Add a global rand with a lock to be used very sparingly.
-Use a map instead of a vector for bad param names in Xml parsing.
-Prefix affine interpolation mode defines with "AFFINE_" to make their purpose more clear.
-Allow for variations that change state during iteration by sending a separate copy of the ember to each rendering thread.
-Implement this same functionality with a local struct in OpenCL. It's members are the total of all variables that need to change state within an ember.
-Add Contains() function to Utils.h.
-EmberRender: print names of kernels being printed with --dump_kernel option.
-Clean up EmberTester to handle some of the recent changes.
-Fix various casts.
-Replace % 2 with & 1, even though the compiler was likely doing this already.
-Add new file Variations06.h to accommodate new variations.
-General cleanup.
2015-11-22 17:15:07 -05:00
|
|
|
"typedef float2 real2;\n"
|
Numerous fixes
0.4.0.5 Beta 07/18/2014
--User Changes
Allow for vibrancy values > 1.
Add flatten and unflatten menu items.
Automatically flatten like Apophysis does.
Add plugin and new_linear tags to Xml to be compatible with Apophysis.
--Bug Fixes
Fix blur, blur3d, bubble, cropn, cross, curl, curl3d, epispiral, ho,
julia3d, julia3dz, loonie, mirror_x, mirror_y, mirror_z, rotate_x,
sinusoidal, spherical, spherical3d, stripes.
Unique filename on final render was completely broken.
Two severe OpenCL bugs. Random seeds were biased and fusing was being
reset too often leading to results that differ from the CPU.
Subtle, but sometimes severe bug in the setup of the xaos weights.
Use properly defined epsilon by getting the value from
std::numeric_limits, rather than hard coding 1e-6 or 1e-10.
Omit incorrect usage of epsilon everywhere. It should not be
automatically added to denominators. Rather, it should only be used if
the denominator is zero.
Force final render progress bars to 100 on completion. Sometimes they
didn't seem to make it there.
Make variation name and params comparisons be case insensitive.
--Code Changes
Make ForEach and FindIf wrappers around std::for_each and std::find_if.
2014-07-19 02:33:18 -04:00
|
|
|
"typedef float4 real4;\n"
|
2015-08-10 23:10:23 -04:00
|
|
|
"typedef float4 real4_bucket;\n"
|
Numerous fixes
0.4.0.5 Beta 07/18/2014
--User Changes
Allow for vibrancy values > 1.
Add flatten and unflatten menu items.
Automatically flatten like Apophysis does.
Add plugin and new_linear tags to Xml to be compatible with Apophysis.
--Bug Fixes
Fix blur, blur3d, bubble, cropn, cross, curl, curl3d, epispiral, ho,
julia3d, julia3dz, loonie, mirror_x, mirror_y, mirror_z, rotate_x,
sinusoidal, spherical, spherical3d, stripes.
Unique filename on final render was completely broken.
Two severe OpenCL bugs. Random seeds were biased and fusing was being
reset too often leading to results that differ from the CPU.
Subtle, but sometimes severe bug in the setup of the xaos weights.
Use properly defined epsilon by getting the value from
std::numeric_limits, rather than hard coding 1e-6 or 1e-10.
Omit incorrect usage of epsilon everywhere. It should not be
automatically added to denominators. Rather, it should only be used if
the denominator is zero.
Force final render progress bars to 100 on completion. Sometimes they
didn't seem to make it there.
Make variation name and params comparisons be case insensitive.
--Code Changes
Make ForEach and FindIf wrappers around std::for_each and std::find_if.
2014-07-19 02:33:18 -04:00
|
|
|
"#define EPS (FLT_EPSILON)\n"
|
--User changes
-Add new variations: bubbleT3D, crob, hexaplay3D, hexcrop, hexes, hexnix3D, loonie2, loonie3, nBlur, octapol and synth.
-Allow for pre/post versions of dc_bubble, dc_cylinder and dc_linear whereas before they were omitted.
-When saving a file with multiple embers in it, detect if time values are all the same and if so, start them at zero and increment by 1 for each ember.
-Allow for numerous quality increases to be coalesced into one. It will pick up at the end of the current render.
-Show selection highlight on variations tree in response to mouse hover. This makes it easier to see for which variation or param the current mouse wheel action will apply.
-Make default temporal samples be 100, whereas before it was 1000 which was overkill.
-Require the shift key to be held with delete for deleting an ember to prevent it from triggering when the user enters delete in the edit box.
-This wasn't otherwise fixable without writing a lot more code.
--Bug fixes
-EmberGenome was crashing when generating a sequence from a source file with more than 2 embers in it.
-EmberGenome was improperly handling the first frame of a merge after the last frame of the loop.
-These bugs were due to a previous commit. Revert parts of that commit.
-Prevent a zoom value of less than 0 when reading from xml.
-Slight optimization of the crescents, and mask variations, if the compiler wasn't doing it already.
-Unique file naming was broken because it was looking for _# and the default names ended with -#.
-Disallow renaming of an ember in the library tree to an empty string.
-Severe bug that prevented some variations from being read correctly from params generated outside this program.
-Severe OpenCL randomization bug. The first x coordinates of the first points in the first kernel call of the first ember of a render since the OpenCL renderer object was created were not random and were mostly -1.
-Severe bug when populating xform selection distributions that could sometimes cause a crash due to roundoff error. Fix by using double.
-Limit the max number of variations in a random ember to MAX_CL_VARS, which is 8. This ensures they'll look the same on CPU and GPU.
-Prevent user from saving stylesheet to default.qss, it's a special reserved filename.
--Code changes
-Generalize using the running sum output point inside of a variation for all cases: pre, reg and post.
-Allow for array variables in variations where the address of each element is stored in m_Params.
-Qualify all math functions with std::
-No longer use our own Clamp() in OpenCL, instead use the standard clamp().
-Redesign how functions are used in the variations OpenCL code.
-Add tests to EmberTester to verify some of the new functionality.
-Place more const and override qualifiers on functions where appropriate.
-Add a global rand with a lock to be used very sparingly.
-Use a map instead of a vector for bad param names in Xml parsing.
-Prefix affine interpolation mode defines with "AFFINE_" to make their purpose more clear.
-Allow for variations that change state during iteration by sending a separate copy of the ember to each rendering thread.
-Implement this same functionality with a local struct in OpenCL. It's members are the total of all variables that need to change state within an ember.
-Add Contains() function to Utils.h.
-EmberRender: print names of kernels being printed with --dump_kernel option.
-Clean up EmberTester to handle some of the recent changes.
-Fix various casts.
-Replace % 2 with & 1, even though the compiler was likely doing this already.
-Add new file Variations06.h to accommodate new variations.
-General cleanup.
2015-11-22 17:15:07 -05:00
|
|
|
"#define TLOW (FLT_MIN)\n"
|
|
|
|
"#define TMAX (FLT_MAX)\n"
|
Numerous fixes
0.4.0.5 Beta 07/18/2014
--User Changes
Allow for vibrancy values > 1.
Add flatten and unflatten menu items.
Automatically flatten like Apophysis does.
Add plugin and new_linear tags to Xml to be compatible with Apophysis.
--Bug Fixes
Fix blur, blur3d, bubble, cropn, cross, curl, curl3d, epispiral, ho,
julia3d, julia3dz, loonie, mirror_x, mirror_y, mirror_z, rotate_x,
sinusoidal, spherical, spherical3d, stripes.
Unique filename on final render was completely broken.
Two severe OpenCL bugs. Random seeds were biased and fusing was being
reset too often leading to results that differ from the CPU.
Subtle, but sometimes severe bug in the setup of the xaos weights.
Use properly defined epsilon by getting the value from
std::numeric_limits, rather than hard coding 1e-6 or 1e-10.
Omit incorrect usage of epsilon everywhere. It should not be
automatically added to denominators. Rather, it should only be used if
the denominator is zero.
Force final render progress bars to 100 on completion. Sometimes they
didn't seem to make it there.
Make variation name and params comparisons be case insensitive.
--Code Changes
Make ForEach and FindIf wrappers around std::for_each and std::find_if.
2014-07-19 02:33:18 -04:00
|
|
|
;
|
2014-07-08 03:11:14 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
os <<
|
|
|
|
"typedef long int int64;\n"
|
|
|
|
"typedef unsigned long int uint64;\n"
|
|
|
|
"\n"
|
|
|
|
"#define EPS6 ((1e-6))\n"
|
|
|
|
"\n"
|
|
|
|
"//The number of threads per block used in the iteration function. Don't change\n"
|
|
|
|
"//it lightly; the block size is hard coded to be exactly 32 x 8.\n"
|
|
|
|
"#define NTHREADS 256u\n"
|
|
|
|
"#define THREADS_PER_WARP 32u\n"
|
|
|
|
"#define NWARPS (NTHREADS / THREADS_PER_WARP)\n"
|
|
|
|
"#define COLORMAP_LENGTH 256u\n"
|
|
|
|
"#define COLORMAP_LENGTH_MINUS_1 255u\n"
|
|
|
|
"#define DE_THRESH 100u\n"
|
|
|
|
"#define BadVal(x) (((x) != (x)) || ((x) > 1e10) || ((x) < -1e10))\n"
|
|
|
|
"#define Rint(A) floor((A) + (((A) < 0) ? -0.5 : 0.5))\n"
|
|
|
|
"#define SQR(x) ((x) * (x))\n"
|
|
|
|
"#define CUBE(x) ((x) * (x) * (x))\n"
|
|
|
|
"#define M_2PI (M_PI * 2)\n"
|
|
|
|
"#define M_3PI (M_PI * 3)\n"
|
|
|
|
"#define SQRT5 2.2360679774997896964091736687313\n"
|
|
|
|
"#define M_PHI 1.61803398874989484820458683436563\n"
|
|
|
|
"#define DEG_2_RAD (M_PI / 180)\n"
|
|
|
|
"\n"
|
|
|
|
"//Index in each dimension of a thread within a block.\n"
|
|
|
|
"#define THREAD_ID_X (get_local_id(0))\n"
|
|
|
|
"#define THREAD_ID_Y (get_local_id(1))\n"
|
|
|
|
"#define THREAD_ID_Z (get_local_id(2))\n"
|
|
|
|
"\n"
|
|
|
|
"//Index in each dimension of a block within a grid.\n"
|
|
|
|
"#define BLOCK_ID_X (get_group_id(0))\n"
|
|
|
|
"#define BLOCK_ID_Y (get_group_id(1))\n"
|
|
|
|
"#define BLOCK_ID_Z (get_group_id(2))\n"
|
|
|
|
"\n"
|
|
|
|
"//Absolute index in each dimension of a thread within a grid.\n"
|
|
|
|
"#define GLOBAL_ID_X (get_global_id(0))\n"
|
|
|
|
"#define GLOBAL_ID_Y (get_global_id(1))\n"
|
|
|
|
"#define GLOBAL_ID_Z (get_global_id(2))\n"
|
|
|
|
"\n"
|
|
|
|
"//Dimensions of a block.\n"
|
|
|
|
"#define BLOCK_SIZE_X (get_local_size(0))\n"
|
|
|
|
"#define BLOCK_SIZE_Y (get_local_size(1))\n"
|
|
|
|
"#define BLOCK_SIZE_Z (get_local_size(2))\n"
|
|
|
|
"\n"
|
|
|
|
"//Dimensions of a grid, in terms of blocks.\n"
|
|
|
|
"#define GRID_SIZE_X (get_num_groups(0))\n"
|
|
|
|
"#define GRID_SIZE_Y (get_num_groups(1))\n"
|
|
|
|
"#define GRID_SIZE_Z (get_num_groups(2))\n"
|
|
|
|
"\n"
|
|
|
|
"//Dimensions of a grid, in terms of threads.\n"
|
|
|
|
"#define GLOBAL_SIZE_X (get_global_size(0))\n"
|
|
|
|
"#define GLOBAL_SIZE_Y (get_global_size(1))\n"
|
|
|
|
"#define GLOBAL_SIZE_Z (get_global_size(2))\n"
|
|
|
|
"\n"
|
|
|
|
"#define INDEX_IN_BLOCK_2D (THREAD_ID_Y * BLOCK_SIZE_X + THREAD_ID_X)\n"
|
|
|
|
"#define INDEX_IN_BLOCK_3D ((BLOCK_SIZE_X * BLOCK_SIZE_Y * THREAD_ID_Z) + INDEX_IN_BLOCK_2D)\n"
|
|
|
|
"\n"
|
|
|
|
"#define INDEX_IN_GRID_2D (GLOBAL_ID_Y * GLOBAL_SIZE_X + GLOBAL_ID_X)\n"
|
|
|
|
"#define INDEX_IN_GRID_3D ((GLOBAL_SIZE_X * GLOBAL_SIZE_Y * GLOBAL_ID_Z) + INDEX_IN_GRID_2D)\n"
|
|
|
|
"\n";
|
|
|
|
|
|
|
|
return os.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// A point structure on the host that maps to the one used on the device to iterate in OpenCL.
|
|
|
|
/// It might seem better to use vec4, however 2D palettes and even 3D coordinates may eventually
|
|
|
|
/// be supported, which will make it more than 4 members.
|
|
|
|
/// </summary>
|
|
|
|
template <typename T>
|
|
|
|
struct ALIGN PointCL
|
|
|
|
{
|
|
|
|
T m_X;
|
|
|
|
T m_Y;
|
|
|
|
T m_Z;
|
|
|
|
T m_ColorX;
|
2014-12-06 00:05:09 -05:00
|
|
|
uint m_LastXfUsed;
|
2014-07-08 03:11:14 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// The point structure used to iterate in OpenCL.
|
|
|
|
/// It might seem better to use float4, however 2D palettes and even 3D coordinates may eventually
|
|
|
|
/// be supported, which will make it more than 4 members.
|
|
|
|
/// </summary>
|
|
|
|
static const char* PointCLStructString =
|
|
|
|
"typedef struct __attribute__ " ALIGN_CL " _Point\n"
|
|
|
|
"{\n"
|
|
|
|
" real_t m_X;\n"
|
|
|
|
" real_t m_Y;\n"
|
|
|
|
" real_t m_Z;\n"
|
|
|
|
" real_t m_ColorX;\n"
|
|
|
|
" uint m_LastXfUsed;\n"
|
|
|
|
"} Point;\n"
|
|
|
|
"\n";
|
|
|
|
|
|
|
|
#define MAX_CL_VARS 8//These must always match.
|
|
|
|
#define MAX_CL_VARS_STRING "8"
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// A structure on the host used to hold all of the needed information for an xform used on the device to iterate in OpenCL.
|
|
|
|
/// Template argument expected to be float or double.
|
|
|
|
/// </summary>
|
|
|
|
template <typename T>
|
|
|
|
struct ALIGN XformCL
|
|
|
|
{
|
|
|
|
T m_A, m_B, m_C, m_D, m_E, m_F;//24 (48)
|
|
|
|
T m_VariationWeights[MAX_CL_VARS];//56 (112)
|
|
|
|
T m_PostA, m_PostB, m_PostC, m_PostD, m_PostE, m_PostF;//80 (160)
|
|
|
|
T m_DirectColor;//84 (168)
|
|
|
|
T m_ColorSpeedCache;//88 (176)
|
|
|
|
T m_OneMinusColorCache;//92 (184)
|
|
|
|
T m_Opacity;//96 (192)
|
|
|
|
T m_VizAdjusted;//100 (200)
|
|
|
|
};
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// The xform structure used to iterate in OpenCL.
|
|
|
|
/// </summary>
|
|
|
|
static const char* XformCLStructString =
|
|
|
|
"typedef struct __attribute__ " ALIGN_CL " _XformCL\n"
|
|
|
|
"{\n"
|
|
|
|
" real_t m_A, m_B, m_C, m_D, m_E, m_F;\n"
|
|
|
|
" real_t m_VariationWeights[" MAX_CL_VARS_STRING "];\n"
|
|
|
|
" real_t m_PostA, m_PostB, m_PostC, m_PostD, m_PostE, m_PostF;\n"
|
|
|
|
" real_t m_DirectColor;\n"
|
|
|
|
" real_t m_ColorSpeedCache;\n"
|
|
|
|
" real_t m_OneMinusColorCache;\n"
|
|
|
|
" real_t m_Opacity;\n"
|
|
|
|
" real_t m_VizAdjusted;\n"
|
|
|
|
"} XformCL;\n"
|
|
|
|
"\n";
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// A structure on the host used to hold all of the needed information for an ember used on the device to iterate in OpenCL.
|
|
|
|
/// Template argument expected to be float or double.
|
|
|
|
/// </summary>
|
|
|
|
template <typename T>
|
|
|
|
struct ALIGN EmberCL
|
|
|
|
{
|
|
|
|
T m_CamZPos;
|
|
|
|
T m_CamPerspective;
|
|
|
|
T m_CamYaw;
|
|
|
|
T m_CamPitch;
|
|
|
|
T m_CamDepthBlur;
|
|
|
|
T m_BlurCoef;
|
|
|
|
m3T m_CamMat;
|
|
|
|
T m_CenterX, m_CenterY;
|
|
|
|
T m_RotA, m_RotB, m_RotD, m_RotE;
|
|
|
|
};
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// The ember structure used to iterate in OpenCL.
|
|
|
|
/// </summary>
|
|
|
|
static const char* EmberCLStructString =
|
|
|
|
"typedef struct __attribute__ " ALIGN_CL " _EmberCL\n"
|
|
|
|
"{\n"
|
|
|
|
" real_t m_CamZPos;\n"
|
|
|
|
" real_t m_CamPerspective;\n"
|
|
|
|
" real_t m_CamYaw;\n"
|
|
|
|
" real_t m_CamPitch;\n"
|
|
|
|
" real_t m_CamDepthBlur;\n"
|
|
|
|
" real_t m_BlurCoef;\n"
|
|
|
|
" real_t m_C00;\n"
|
|
|
|
" real_t m_C01;\n"
|
|
|
|
" real_t m_C02;\n"
|
|
|
|
" real_t m_C10;\n"
|
|
|
|
" real_t m_C11;\n"
|
|
|
|
" real_t m_C12;\n"
|
|
|
|
" real_t m_C20;\n"
|
|
|
|
" real_t m_C21;\n"
|
|
|
|
" real_t m_C22;\n"
|
|
|
|
" real_t m_CenterX, m_CenterY;\n"
|
|
|
|
" real_t m_RotA, m_RotB, m_RotD, m_RotE;\n"
|
|
|
|
"} EmberCL;\n"
|
|
|
|
"\n";
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// A structure on the host used to hold all of the needed information for cartesian to raster mapping used on the device to iterate in OpenCL.
|
|
|
|
/// Template argument expected to be float or double.
|
|
|
|
/// </summary>
|
|
|
|
template <typename T>
|
|
|
|
struct ALIGN CarToRasCL
|
|
|
|
{
|
|
|
|
T m_PixPerImageUnitW, m_RasLlX;
|
2014-12-06 00:05:09 -05:00
|
|
|
uint m_RasWidth;
|
2014-07-08 03:11:14 -04:00
|
|
|
T m_PixPerImageUnitH, m_RasLlY;
|
|
|
|
T m_CarLlX, m_CarUrX, m_CarUrY, m_CarLlY;
|
|
|
|
};
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// The cartesian to raster structure used to iterate in OpenCL.
|
|
|
|
/// </summary>
|
|
|
|
static const char* CarToRasCLStructString =
|
|
|
|
"typedef struct __attribute__ " ALIGN_CL " _CarToRasCL\n"
|
|
|
|
"{\n"
|
|
|
|
" real_t m_PixPerImageUnitW, m_RasLlX;\n"
|
|
|
|
" uint m_RasWidth;\n"
|
|
|
|
" real_t m_PixPerImageUnitH, m_RasLlY;\n"
|
|
|
|
" real_t m_CarLlX, m_CarUrX, m_CarUrY, m_CarLlY;\n"
|
|
|
|
"} CarToRasCL;\n"
|
|
|
|
"\n";
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// A structure on the host used to hold all of the needed information for density filtering used on the device to iterate in OpenCL.
|
|
|
|
/// Note that the actual filter buffer is held elsewhere.
|
|
|
|
/// Template argument expected to be float or double.
|
|
|
|
/// </summary>
|
|
|
|
template <typename T>
|
|
|
|
struct ALIGN DensityFilterCL
|
|
|
|
{
|
|
|
|
T m_Curve;
|
|
|
|
T m_K1;
|
|
|
|
T m_K2;
|
2014-12-06 00:05:09 -05:00
|
|
|
uint m_Supersample;
|
|
|
|
uint m_SuperRasW;
|
|
|
|
uint m_SuperRasH;
|
|
|
|
uint m_KernelSize;
|
|
|
|
uint m_MaxFilterIndex;
|
|
|
|
uint m_MaxFilteredCounts;
|
|
|
|
uint m_FilterWidth;
|
2014-07-08 03:11:14 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// The density filtering structure used to iterate in OpenCL.
|
|
|
|
/// Note that the actual filter buffer is held elsewhere.
|
|
|
|
/// </summary>
|
|
|
|
static const char* DensityFilterCLStructString =
|
|
|
|
"typedef struct __attribute__ " ALIGN_CL " _DensityFilterCL\n"
|
|
|
|
"{\n"
|
2015-08-10 23:10:23 -04:00
|
|
|
" real_bucket_t m_Curve;\n"
|
|
|
|
" real_bucket_t m_K1;\n"
|
|
|
|
" real_bucket_t m_K2;\n"
|
2014-07-08 03:11:14 -04:00
|
|
|
" uint m_Supersample;\n"
|
|
|
|
" uint m_SuperRasW;\n"
|
|
|
|
" uint m_SuperRasH;\n"
|
|
|
|
" uint m_KernelSize;\n"
|
|
|
|
" uint m_MaxFilterIndex;\n"
|
|
|
|
" uint m_MaxFilteredCounts;\n"
|
|
|
|
" uint m_FilterWidth;\n"
|
|
|
|
"} DensityFilterCL;\n"
|
|
|
|
"\n";
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// A structure on the host used to hold all of the needed information for spatial filtering used on the device to iterate in OpenCL.
|
|
|
|
/// Note that the actual filter buffer is held elsewhere.
|
|
|
|
/// </summary>
|
|
|
|
template <typename T>
|
|
|
|
struct ALIGN SpatialFilterCL
|
|
|
|
{
|
2014-12-06 00:05:09 -05:00
|
|
|
uint m_SuperRasW;
|
|
|
|
uint m_SuperRasH;
|
|
|
|
uint m_FinalRasW;
|
|
|
|
uint m_FinalRasH;
|
|
|
|
uint m_Supersample;
|
|
|
|
uint m_FilterWidth;
|
|
|
|
uint m_NumChannels;
|
|
|
|
uint m_BytesPerChannel;
|
|
|
|
uint m_DensityFilterOffset;
|
|
|
|
uint m_Transparency;
|
|
|
|
uint m_YAxisUp;
|
2014-07-08 03:11:14 -04:00
|
|
|
T m_Vibrancy;
|
|
|
|
T m_HighlightPower;
|
|
|
|
T m_Gamma;
|
|
|
|
T m_LinRange;
|
|
|
|
Color<T> m_Background;
|
|
|
|
};
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// The spatial filtering structure used to iterate in OpenCL.
|
|
|
|
/// Note that the actual filter buffer is held elsewhere.
|
|
|
|
/// </summary>
|
|
|
|
static const char* SpatialFilterCLStructString =
|
|
|
|
"typedef struct __attribute__ ((aligned (16))) _SpatialFilterCL\n"
|
|
|
|
"{\n"
|
|
|
|
" uint m_SuperRasW;\n"
|
|
|
|
" uint m_SuperRasH;\n"
|
|
|
|
" uint m_FinalRasW;\n"
|
|
|
|
" uint m_FinalRasH;\n"
|
|
|
|
" uint m_Supersample;\n"
|
|
|
|
" uint m_FilterWidth;\n"
|
|
|
|
" uint m_NumChannels;\n"
|
|
|
|
" uint m_BytesPerChannel;\n"
|
|
|
|
" uint m_DensityFilterOffset;\n"
|
|
|
|
" uint m_Transparency;\n"
|
2014-07-26 15:03:51 -04:00
|
|
|
" uint m_YAxisUp;\n"
|
2015-08-10 23:10:23 -04:00
|
|
|
" real_bucket_t m_Vibrancy;\n"
|
|
|
|
" real_bucket_t m_HighlightPower;\n"
|
|
|
|
" real_bucket_t m_Gamma;\n"
|
|
|
|
" real_bucket_t m_LinRange;\n"
|
|
|
|
" real_bucket_t m_Background[4];\n"//For some reason, using float4/double4 here does not align no matter what. So just use an array of 4.
|
2014-07-08 03:11:14 -04:00
|
|
|
"} SpatialFilterCL;\n"
|
|
|
|
"\n";
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// EmberCL makes extensive use of the build in vector types, however accessing
|
|
|
|
/// their members as a buffer is not natively supported.
|
|
|
|
/// Declaring them in a union with a buffer resolves this problem.
|
|
|
|
/// </summary>
|
|
|
|
static const char* UnionCLStructString =
|
|
|
|
"typedef union\n"
|
|
|
|
"{\n"
|
|
|
|
" uchar3 m_Uchar3;\n"
|
|
|
|
" uchar m_Uchars[3];\n"
|
|
|
|
"} uchar3uchars;\n"
|
|
|
|
"\n"
|
|
|
|
"typedef union\n"
|
|
|
|
"{\n"
|
|
|
|
" uchar4 m_Uchar4;\n"
|
|
|
|
" uchar m_Uchars[4];\n"
|
|
|
|
"} uchar4uchars;\n"
|
|
|
|
"\n"
|
|
|
|
"typedef union\n"
|
|
|
|
"{\n"
|
|
|
|
" uint4 m_Uint4;\n"
|
|
|
|
" uint m_Uints[4];\n"
|
|
|
|
"} uint4uints;\n"
|
|
|
|
"\n"
|
|
|
|
"typedef union\n"//Use in places where float is required.
|
|
|
|
"{\n"
|
|
|
|
" float4 m_Float4;\n"
|
|
|
|
" float m_Floats[4];\n"
|
|
|
|
"} float4floats;\n"
|
|
|
|
"\n"
|
|
|
|
"typedef union\n"//Use in places where float or double can be used depending on the template type.
|
|
|
|
"{\n"
|
|
|
|
" real4 m_Real4;\n"
|
|
|
|
" real_t m_Reals[4];\n"
|
|
|
|
"} real4reals;\n"
|
2015-08-10 23:10:23 -04:00
|
|
|
"\n"
|
|
|
|
"typedef union\n"//Used to match the bucket template type.
|
|
|
|
"{\n"
|
|
|
|
" real4_bucket m_Real4;\n"
|
|
|
|
" real_bucket_t m_Reals[4];\n"
|
|
|
|
"} real4reals_bucket;\n"
|
2014-07-08 03:11:14 -04:00
|
|
|
"\n";
|
2014-12-05 21:30:46 -05:00
|
|
|
}
|