Better, but there are some bugs.
1> Don`t know the root cause of distortion (back surface).
2> Some refraction are disappeared.
3> Both bottom-right & bottom-left corners are rendered with incorrect lighting.
//-----------------------------------------------------------------------------
__global__ void __FloatTest(unsigned int* pIn)
{
unsigned int fMask = *pIn;
float iMask =
(float)((fMask & 0xff000000) >> 24) * 0.1122f +
(float)((fMask & 0x00ff0000) >> 16) * 0.2233f +
(float)((fMask & 0x0000ff00) >> 8) * 0.3344f +
(float)((fMask & 0x000000ff) >> 0) * 0.4455f;
*((float*)pIn) = iMask;
}
//-----------------------------------------------------------------------------
void FloatTest()
{
const unsigned int fMask = 0x22446688;
float iMask = 0.0f;
float* pMask = 0;
::cudaMalloc(&pMask, sizeof(float));
::cudaMemcpy(pMask, &fMask, sizeof(unsigned int), cudaMemcpyHostToDevice);
__FloatTest<<<1, 1>>>((unsigned int*)pMask);
::cudaMemcpy(&iMask, pMask, sizeof(float), cudaMemcpyDeviceToHost);
::cudaFree(pMask);
::printf("gpu : %f\n", iMask);
iMask =
(float)((fMask & 0xff000000) >> 24) * 0.1122f +
(float)((fMask & 0x00ff0000) >> 16) * 0.2233f +
(float)((fMask & 0x0000ff00) >> 8) * 0.3344f +
(float)((fMask & 0x000000ff) >> 0) * 0.4455f;
::printf("cpu : %f\n", iMask);
}
//-----------------------------------------------------------------------------
__global__ void __FloatTest(unsigned int* pIn)
{
unsigned int fMask = *pIn;
float iMask =
(float)((fMask & 0xff000000) >> 24) * 0.112233f +
(float)((fMask & 0x00ff0000) >> 16) * 0.223344f +
(float)((fMask & 0x0000ff00) >> 8) * 0.334455f +
(float)((fMask & 0x000000ff) >> 0) * 0.445566f;
*((float*)pIn) = iMask;
}
//-----------------------------------------------------------------------------
void FloatTest()
{
const unsigned int fMask = 0x22446688;
float iMask = 0.0f;
float* pMask = 0;
::cudaMalloc(&pMask, sizeof(float));
::cudaMemcpy(pMask, &fMask, sizeof(unsigned int), cudaMemcpyHostToDevice);
__FloatTest<<<1, 1>>>((unsigned int*)pMask);
::cudaMemcpy(&iMask, pMask, sizeof(float), cudaMemcpyDeviceToHost);
::cudaFree(pMask);
::printf("gpu : %f\n", iMask);
iMask =
(float)((fMask & 0xff000000) >> 24) * 0.112233f +
(float)((fMask & 0x00ff0000) >> 16) * 0.223344f +
(float)((fMask & 0x0000ff00) >> 8) * 0.334455f +
(float)((fMask & 0x000000ff) >> 0) * 0.445566f;
::printf("cpu : %f\n", iMask);
}