Commit 35e90a44 authored by Michiel Cottaar's avatar Michiel Cottaar
Browse files

BUG: several bug fixes

parent 7661c6b7
Pipeline #5287 failed with stage
in 9 minutes and 33 seconds
......@@ -362,7 +362,10 @@ class BasisFunc(object):
for idx in range(0, flat_all_pos.shape[0], nsim):
set_pos = flat_all_pos[idx:idx+nsim, :]
sim_evaluator.update_pos(set_pos)
flat_res[idx:idx+nsim] = sim_evaluator(parameters, inverse=False)[:set_pos.shape[0], :]
part_res = sim_evaluator(parameters, inverse=False)[:set_pos.shape[0], :]
if sim_evaluator.use_cuda and hasattr(part_res, 'get'):
part_res = part_res.get()
flat_res[idx:idx+nsim] = part_res
res = flat_res.reshape(all_pos.shape)
if not isinstance(req, request.FieldRequest):
return res
......
......@@ -411,7 +411,8 @@ class RadialBasisCudaEvaluator(RequestEvaluator):
Specialised evaluator to compute the radial basis functions on the GPU
"""
global_cuda_params = {}
use_cuda = False
use_cuda = True
use_mat = False
_main_comp = lambda self, dim, dim2: """
tmp = offset[%(dim2)i] * offset[%(dim2)i] * rsq;
tomult = offset[%(dim)i] * offset[%(dim2)i] * rsq * (d2f_dr2 - df_dr * r);
......@@ -655,7 +656,7 @@ __global__ void matrix_mult_nd_invert({dtype} *derparam, {dtype} *derfield, int
def update_pos(self, new_positions):
idx = self.req_params_cuda_names.index('all_pos')
self.global_cuda_params[self.request][idx][:new_positions.size] = new_positions.flatten()
self.global_cuda_params[self.request][idx][:new_positions.size] = new_positions.astype(cuda.dtype).flatten()
self.request.positions[:new_positions.shape[0]] = new_positions
self.update_indices()
......@@ -92,7 +92,7 @@ class Fourier(BasisFunc):
}}
""")
def update_pos(params, new_positions):
params['pos'][:new_positions.shape[0] * self.ndim] = new_positions.flatten()
params['pos'][:new_positions.shape[0] * self.ndim] = new_positions.astype(cuda.dtype).flatten()
return cuda.CudaMatrixMult(code, positions.shape[0], positions.shape[1], self.scalar_nparams,
params={'freq': (cuda.dtype + ' *freq', self.frequencies.flatten()),
'pos': (cuda.dtype + ' *pos', positions.flatten())}, scalar=True,
......@@ -261,7 +261,7 @@ class ChargeDistribution(BasisFunc):
'pos': (cuda.dtype + ' *pos', positions.flatten())}
values = {'size_squared': size_squared, 'norm_internal': norm_internal, 'inv_4pi': 1 / (4 * sp.pi)}
def update_pos(params, new_positions):
params['pos'][:new_positions.shape[0] * self.ndim] = new_positions.flatten()
params['pos'][:new_positions.shape[0] * self.ndim] = new_positions.astype(cuda.dtype).flatten()
return cuda.CudaMatrixMult(code, positions.shape[0], self.ndim, self.nparams, params=params,
values=values, scalar=False, update_pos=update_pos)
......
......@@ -368,7 +368,7 @@ class CudaMatrixMult(object):
else:
result = gpuarray.zeros(npos if self.scalar else (npos, ndim), dtype=dtype)
f(result, parameters, *self.param_arrs,
f(result, to_gpu_correct(parameters), *self.param_arrs,
block=(nthreads, 1, 1), grid=(int(sp.ceil(result.shape[0] / nthreads)), 1, 1))
return result
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment