- get the next m*SIZE0-WARP_SIZE = 0xffffffffffffffe0
1
2
size_t n = (nelems+WARP_SZ-1) & ((size_t)0-WARP_SZ)
eo = 0b11100000
it will select multiples of WARP_SZ
nelems+WARP_SZ-1 will counter for the removed numbers by 00000
n will be [32, 64, 96, …]
- make a value to be 1<<N
1
2
3uiint32_t a;
while(a & (a-1)) // equal to 0 only when a is 1<<N
a -= (a&(0-a)); // subtract the first 1 bit