Nov 6, 2009

CUDA Note[5]="float";


//-----------------------------------------------------------------------------
__global__ void __FloatTest(unsigned int* pIn)
{
unsigned int fMask = *pIn;

float iMask =
(float)((fMask & 0xff000000) >> 24) * 0.1122f +
(float)((fMask & 0x00ff0000) >> 16) * 0.2233f +
(float)((fMask & 0x0000ff00) >> 8) * 0.3344f +
(float)((fMask & 0x000000ff) >> 0) * 0.4455f;

*((float*)pIn) = iMask;
}

//-----------------------------------------------------------------------------
void FloatTest()
{
const unsigned int fMask = 0x22446688;

float iMask = 0.0f;
float* pMask = 0;

::cudaMalloc(&pMask, sizeof(float));

::cudaMemcpy(pMask, &fMask, sizeof(unsigned int), cudaMemcpyHostToDevice);

__FloatTest<<<1, 1>>>((unsigned int*)pMask);

::cudaMemcpy(&iMask, pMask, sizeof(float), cudaMemcpyDeviceToHost);

::cudaFree(pMask);

::printf("gpu : %f\n", iMask);

iMask =
(float)((fMask & 0xff000000) >> 24) * 0.1122f +
(float)((fMask & 0x00ff0000) >> 16) * 0.2233f +
(float)((fMask & 0x0000ff00) >> 8) * 0.3344f +
(float)((fMask & 0x000000ff) >> 0) * 0.4455f;

::printf("cpu : %f\n", iMask);
}


output :
gpu : 113.695999
cpu : 113.695999


//-----------------------------------------------------------------------------
__global__ void __FloatTest(unsigned int* pIn)
{
unsigned int fMask = *pIn;

float iMask =
(float)((fMask & 0xff000000) >> 24) * 0.112233f +
(float)((fMask & 0x00ff0000) >> 16) * 0.223344f +
(float)((fMask & 0x0000ff00) >> 8) * 0.334455f +
(float)((fMask & 0x000000ff) >> 0) * 0.445566f;

*((float*)pIn) = iMask;
}

//-----------------------------------------------------------------------------
void FloatTest()
{
const unsigned int fMask = 0x22446688;

float iMask = 0.0f;
float* pMask = 0;

::cudaMalloc(&pMask, sizeof(float));

::cudaMemcpy(pMask, &fMask, sizeof(unsigned int), cudaMemcpyHostToDevice);

__FloatTest<<<1, 1>>>((unsigned int*)pMask);

::cudaMemcpy(&iMask, pMask, sizeof(float), cudaMemcpyDeviceToHost);

::cudaFree(pMask);

::printf("gpu : %f\n", iMask);

iMask =
(float)((fMask & 0xff000000) >> 24) * 0.112233f +
(float)((fMask & 0x00ff0000) >> 16) * 0.223344f +
(float)((fMask & 0x0000ff00) >> 8) * 0.334455f +
(float)((fMask & 0x000000ff) >> 0) * 0.445566f;

::printf("cpu : %f\n", iMask);
}


output :
gpu : 113.714699
cpu : 113.714706

so keep in mind that the float result may different between cpu & gpu.

No comments: