Commit 370415b6 authored by Pierre NARVOR's avatar Pierre NARVOR
Browse files

[cuda][WIP] Added a CompoundFunctor type (have to enable experimental CUDA...

[cuda][WIP] Added a CompoundFunctor type (have to enable experimental CUDA feature --expt-relaxed-constexpr
parent a3766abd
......@@ -31,7 +31,8 @@ target_include_directories(rtac_cuda PUBLIC
${CUDA_TOOLKIT_INCLUDE}
)
target_compile_options(rtac_cuda PUBLIC
$<$<COMPILE_LANGUAGE:CUDA>:-gencode arch=compute_61,code=sm_61>
# $<$<COMPILE_LANGUAGE:CUDA>:-gencode arch=compute_61,code=sm_61>
$<$<COMPILE_LANGUAGE:CUDA>:-gencode arch=compute_61,code=sm_61 --expt-relaxed-constexpr>
)
target_link_libraries(rtac_cuda PUBLIC
${CUDA_LIBRARIES}
......
#ifndef _DEF_RTAC_BASE_CUDA_FUNCTOR_COMPOUND_H_
#define _DEF_RTAC_BASE_CUDA_FUNCTOR_COMPOUND_H_
#include <tuple>
#include <rtac_base/cuda/utils.h>
namespace rtac { namespace cuda {
/**
* This class allows for creating of custom unary Functor types on the fly.
*
* A functor is a callable struct (defines an operator()). In the RTAC
* framework, a valid functor must define an InputT and OutputT types, as well
* as the operator(). As such, a minimal functor code has the following form :
*
* \code
* struct MultiplyBy2 {
* using InputT = float;
* using OutputT = float;
*
* float operator()(float input) const { return 2.0f * input; }
* };
* \endcode
*
* Functors can be templates :
*
* \code
* template <typename T>
* struct MultiplyBy2 {
* using InputT = T;
* using OutputT = T;
*
* T operator()(T input) const { return 2.0f * input; }
* };
* \endcode
*/
template <class... FunctorsT>
struct FunctorCompound
{
using TupleT = std::tuple<FunctorsT...>;
static constexpr unsigned int FunctorCount = std::tuple_size<TupleT>::value;
static constexpr unsigned int LastIndex = FunctorCount - 1;
template <unsigned int Level>
struct functor_get {
using type = typename std::tuple_element<Level,TupleT>::type;
using InputT = typename type::InputT;
using OutputT = typename type::OutputT;
};
using InputT = typename functor_get<LastIndex>::InputT;
using OutputT = typename functor_get<0>::OutputT;
TupleT functors_;
template <unsigned int Level> RTAC_HOSTDEVICE
typename functor_get<Level>::OutputT call_functor(const InputT& input) const {
if constexpr(Level == LastIndex) {
return std::get<Level>(functors_)(input);
}
else {
return std::get<Level>(functors_)(call_functor<Level+1>(input));
}
}
public:
constexpr FunctorCompound(const TupleT& functors) : functors_(functors) {}
constexpr FunctorCompound(FunctorsT... functors) : functors_(std::make_tuple(functors...)) {}
RTAC_HOSTDEVICE OutputT operator()(const InputT& input) const {
return call_functor<0>(input);
}
};
}; //namespace cuda
}; //namespace rtac
#endif //_DEF_RTAC_BASE_CUDA_FUNCTOR_COMPOUND_H_
#ifndef _DEF_RTAC_BASE_CUDA_FUNCTORS_H_
#define _DEF_RTAC_BASE_CUDA_FUNCTORS_H_
/**
* This file implemetes various functors. The aim is to replace the operator
* types which are less versatile.
*/
#include <tuple>
#include <rtac_base/cuda/utils.h>
namespace rtac { namespace cuda { namespace functor {
template <typename Tout, typename Tin = Tout, typename Tscale = Tin>
struct Scaling {
using InputT = Tin;
using OutputT = Tout;
using ScaleT = Tscale;
Tscale scaling;
RTAC_HOSTDEVICE Tout operator()(const Tin& input) const {
return scaling*input;
}
};
template <typename Tout, typename Tin = Tout, typename Toff = Tout>
struct Offset {
using InputT = Tin;
using OutputT = Tout;
Toff offset;
RTAC_HOSTDEVICE Tout operator()(const Tin& input) const {
return input + offset;
}
};
}; //namespace functor
}; //namespace cuda
}; //namespace rtac
#endif //_DEF_RTAC_BASE_CUDA_FUNCTORS_H_
......@@ -33,6 +33,7 @@ endforeach(name)
add_subdirectory(texture)
add_subdirectory(mapping)
add_subdirectory(functors)
add_executable(reductions_test
......
set(target_name ${PROJECT_NAME}_functors_test)
add_executable(${target_name}
src/functors_test.cpp
src/functors_test.cu
)
target_link_libraries(${target_name} PRIVATE
rtac_cuda
)
#include <iostream>
using namespace std;
#include <rtac_base/cuda/DeviceVector.h>
#include <rtac_base/cuda/HostVector.h>
using namespace rtac::cuda;
#include "functors_test.h"
int main()
{
int N = 10;
HostVector<float> input(N);
for(int n = 0; n < N; n++) {
input[n] = n;
}
//auto output = scaling(input, functor::Scaling<float>({2.0f}));
auto f = Saxpy(functor::Offset<float>({3.0f}), functor::Scaling<float>({2.0f}));
cout << f(1.0f) << endl;
auto output = saxpy(input, Saxpy(functor::Offset<float>({3.0f}),
functor::Scaling<float>({2.0f})));
cout << input << endl;
cout << output << endl;
return 0;
}
#include "functors_test.h"
#include "functors_test.hcu"
namespace rtac { namespace cuda {
DeviceVector<float> scaling(const DeviceVector<float>& input,
const functor::Scaling<float>& func)
{
DeviceVector<float> output(input.size());
apply_functor<<<1,1>>>(output.data(), input.data(), func, input.size());
cudaDeviceSynchronize();
return output;
}
DeviceVector<float> saxpy(const DeviceVector<float>& input, const Saxpy& func)
{
DeviceVector<float> output(input.size());
apply_functor<<<1,1>>>(output.data(), input.data(), func, input.size());
cudaDeviceSynchronize();
return output;
}
}; //namespace cuda
}; //namespace rtac
#ifndef _DEF_RTAC_CUDA_TESTS_FUNCTORS_TEST_H_
#define _DEF_RTAC_CUDA_TESTS_FUNCTORS_TEST_H_
#include <rtac_base/cuda/DeviceVector.h>
#include <rtac_base/cuda/Functors.h>
#include <rtac_base/cuda/functors.h>
namespace rtac { namespace cuda {
using Saxpy = FunctorCompound<functor::Offset<float>, functor::Scaling<float>>;
DeviceVector<float> scaling(const DeviceVector<float>& input,
const functor::Scaling<float>& func);
DeviceVector<float> saxpy(const DeviceVector<float>& input, const Saxpy& func);
}; //namespace cuda
}; //namespace rtac
#endif //_DEF_RTAC_CUDA_TESTS_FUNCTORS_TEST_H_
#ifndef _DEF_RTAC_CUDA_TESTS_FUNCTORS_TEST_HCU_
#define _DEF_RTAC_CUDA_TESTS_FUNCTORS_TEST_HCU_
template <typename Tout, typename Tin, class FunctorT>
__device__ void do_apply_functor(Tout& output, const Tin& input, const FunctorT& func)
{
output = func(input);
}
template <typename Tout, typename Tin, class FunctorT>
__global__ void apply_functor(Tout* output, const Tin* input, FunctorT func, unsigned int count)
{
for(int i = 0; i < count; i++) {
do_apply_functor(output[i], input[i], func);
}
}
#endif //_DEF_RTAC_CUDA_TESTS_FUNCTORS_TEST_H_
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment