| extern "C" __device__ void simple_mul_workgroup(float *lhs, size_t lhs_offset, | |
| float *rhs, size_t rhs_offset, | |
| float *result, | |
| size_t result_offset, | |
| size_t size) { | |
| int threadId = threadIdx.x; | |
| if (threadId < size) { | |
| result[result_offset + threadId] = | |
| lhs[lhs_offset + threadId] * rhs[rhs_offset + threadId]; | |
| } | |
| } |