
Better, but there are some bugs.
1> Don`t know the root cause of distortion (back surface).
2> Some refraction are disappeared.
3> Both bottom-right & bottom-left corners are rendered with incorrect lighting.
//-----------------------------------------------------------------------------
__global__ void __FloatTest(unsigned int* pIn)
{
unsigned int fMask = *pIn;
float iMask =
(float)((fMask & 0xff000000) >> 24) * 0.1122f +
(float)((fMask & 0x00ff0000) >> 16) * 0.2233f +
(float)((fMask & 0x0000ff00) >> 8) * 0.3344f +
(float)((fMask & 0x000000ff) >> 0) * 0.4455f;
*((float*)pIn) = iMask;
}
//-----------------------------------------------------------------------------
void FloatTest()
{
const unsigned int fMask = 0x22446688;
float iMask = 0.0f;
float* pMask = 0;
::cudaMalloc(&pMask, sizeof(float));
::cudaMemcpy(pMask, &fMask, sizeof(unsigned int), cudaMemcpyHostToDevice);
__FloatTest<<<1, 1>>>((unsigned int*)pMask);
::cudaMemcpy(&iMask, pMask, sizeof(float), cudaMemcpyDeviceToHost);
::cudaFree(pMask);
::printf("gpu : %f\n", iMask);
iMask =
(float)((fMask & 0xff000000) >> 24) * 0.1122f +
(float)((fMask & 0x00ff0000) >> 16) * 0.2233f +
(float)((fMask & 0x0000ff00) >> 8) * 0.3344f +
(float)((fMask & 0x000000ff) >> 0) * 0.4455f;
::printf("cpu : %f\n", iMask);
}
//-----------------------------------------------------------------------------
__global__ void __FloatTest(unsigned int* pIn)
{
unsigned int fMask = *pIn;
float iMask =
(float)((fMask & 0xff000000) >> 24) * 0.112233f +
(float)((fMask & 0x00ff0000) >> 16) * 0.223344f +
(float)((fMask & 0x0000ff00) >> 8) * 0.334455f +
(float)((fMask & 0x000000ff) >> 0) * 0.445566f;
*((float*)pIn) = iMask;
}
//-----------------------------------------------------------------------------
void FloatTest()
{
const unsigned int fMask = 0x22446688;
float iMask = 0.0f;
float* pMask = 0;
::cudaMalloc(&pMask, sizeof(float));
::cudaMemcpy(pMask, &fMask, sizeof(unsigned int), cudaMemcpyHostToDevice);
__FloatTest<<<1, 1>>>((unsigned int*)pMask);
::cudaMemcpy(&iMask, pMask, sizeof(float), cudaMemcpyDeviceToHost);
::cudaFree(pMask);
::printf("gpu : %f\n", iMask);
iMask =
(float)((fMask & 0xff000000) >> 24) * 0.112233f +
(float)((fMask & 0x00ff0000) >> 16) * 0.223344f +
(float)((fMask & 0x0000ff00) >> 8) * 0.334455f +
(float)((fMask & 0x000000ff) >> 0) * 0.445566f;
::printf("cpu : %f\n", iMask);
}
struct NullType
{};
template <
typename X,
typename Y,
typename Z = NullType,
typename W = NullType>
struct Caster
{
X x;
Y y;
Z z;
W w;
};
int main()
{
::printf("%d\n", sizeof(NullType));
::printf("%d\n", sizeof(Caster));
::printf("%d\n", sizeof(Caster));
::printf("%d\n", sizeof(Caster));
::printf("%d\n", sizeof(Caster));
}
//-----------------------------------------------------------------------------
__global__ void RCastTest0(unsigned int* rgTar, unsigned int* rgSrc)
{
rgTar[0] = rgSrc[0];
rgTar[1] = *(unsigned int*)((unsigned char*)rgSrc + 2);
}
//-----------------------------------------------------------------------------
void RCastTest()
{
unsigned int rgTestSrcHost[2] = {0x11223344, 0xaabbccdd};
unsigned int rgTestTarHost[2];
unsigned int* rgTestSrcDevice = 0;
unsigned int* rgTestTarDevice = 0;
unsigned int* rgTestDevice = 0;
::cudaMalloc(&rgTestDevice, 4 * sizeof(unsigned int));
rgTestTarDevice = rgTestDevice;
rgTestSrcDevice = rgTestDevice + 2;
::cudaMemcpy(
rgTestSrcDevice,
rgTestSrcHost,
2 * sizeof(unsigned int),
cudaMemcpyHostToDevice);
//--cast in cuda
RCastTest0<<<1, 1>>>(rgTestTarDevice, rgTestSrcDevice);
::cudaMemcpy(
rgTestTarHost,
rgTestTarDevice,
2 * sizeof(unsigned int),
cudaMemcpyDeviceToHost);
//--cast in cpu
unsigned int dCasted = *(unsigned int*)((unsigned char*)(rgTestSrcHost) + 2);
::printf("CUDA (align) : 0x%08X\n", rgTestTarHost[0]);
::printf("CUDA (un-align) : 0x%08X\n", rgTestTarHost[1]);
::printf("CPU (un-align) : 0x%08X\n", dCasted);
::cudaFree(rgTestDevice);
}