Skip to content

Commit

Permalink
dev-0.6.0 (#78)
Browse files Browse the repository at this point in the history
* INSTALL

* make ttl::range less generic

* ttl::copy

* --with-cuda

* add .size() and .dims() to tensor types (#79)

* add size method to tensor

* dims()

* deprecate from_host, to_host (#80)

* support customize install prefix
  • Loading branch information
lgarithm authored Feb 1, 2020
1 parent c0c6ffb commit f3d26d6
Show file tree
Hide file tree
Showing 13 changed files with 150 additions and 52 deletions.
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ OPTION(BUILD_EXAMPLES "Build examples." OFF)
OPTION(HAVE_CUDA "Have cuda_runtime.h." OFF)

IF(HAVE_CUDA)
# noop
INCLUDE_DIRECTORIES(${CUDA_HOME}/include)
LINK_DIRECTORIES(${CUDA_HOME}/lib64)
ELSE()
ADD_DEFINITIONS(-DUSE_FAKE_CUDA_RUNTIME)
ENDIF()
Expand Down
10 changes: 10 additions & 0 deletions INSTALL
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/sh
set -e

if [ -z $PREFIX ]; then
PREFIX=$HOME/local
fi

./configure --prefix=$PREFIX

make install
19 changes: 14 additions & 5 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
set -e

PREFIX=$(pwd)/local
CUDA_HOME=/usr/local/cuda
USE_OPENCV=0
BUILD_TESTS=0
BUILD_BENCHMARKS=0
Expand All @@ -11,10 +12,6 @@ BUILD_GBENCH=0
HAVE_CUDA=0
VERBOSE=0

if [ $(find /usr/include/cuda_runtime.h | wc -l) -gt 0 ]; then
HAVE_CUDA=1
fi

parse_args() {
for i in "$@"; do
case $i in
Expand Down Expand Up @@ -48,6 +45,10 @@ parse_args() {
--build-gbench)
BUILD_GBENCH=1
;;
--with-cuda=*)
CUDA_HOME="${i#*=}"
echo "configure --with-cuda=$CUDA_HOME"
;;
--verbose)
VERBOSE=1
;;
Expand All @@ -57,6 +58,10 @@ parse_args() {
;;
esac
done

if [ -f $CUDA_HOME/include/cuda_runtime.h ]; then
HAVE_CUDA=1
fi
}

CMAKE_FLAGS=
Expand Down Expand Up @@ -96,7 +101,11 @@ add_cmake_flags() {
add_cmake_flag BUILD_TESTS ${BUILD_TESTS}
add_cmake_flag BUILD_BENCHMARKS ${BUILD_BENCHMARKS}
add_cmake_flag BUILD_EXAMPLES ${BUILD_EXAMPLES}
add_cmake_flag HAVE_CUDA ${HAVE_CUDA}

if [ ${HAVE_CUDA} -eq 1 ]; then
add_cmake_flag HAVE_CUDA ${HAVE_CUDA}
add_cmake_flag CUDA_HOME $CUDA_HOME
fi

if [ ${BUILD_EXAMPLES} -eq 1 ]; then
add_cmake_flag USE_OPENCV ${USE_OPENCV}
Expand Down
28 changes: 28 additions & 0 deletions include/ttl/bits/std_copy.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#pragma once
#include <ttl/bits/std_cuda_allocator.hpp>
#include <ttl/bits/std_tensor.hpp>

namespace ttl
{
namespace internal
{
namespace experimental
{
template <typename R, typename S>
void copy(const basic_tensor<R, S, host_memory, readwrite> &dst,
const basic_tensor<R, S, cuda_memory, readonly> &src)
{
using copier = internal::cuda_copier;
copier::copy<copier::d2h>(dst.data(), src.data(), src.data_size());
}

template <typename R, typename S>
void copy(const basic_tensor<R, S, cuda_memory, readwrite> &dst,
const basic_tensor<R, S, host_memory, readonly> &src)
{
using copier = internal::cuda_copier;
copier::copy<copier::h2d>(dst.data(), src.data(), src.data_size());
}
} // namespace experimental
} // namespace internal
} // namespace ttl
1 change: 0 additions & 1 deletion include/ttl/bits/std_range.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,5 @@ basic_integer_range<N> range(N m, N n)
{
return basic_integer_range<N>(m, n);
}

} // namespace internal
} // namespace ttl
30 changes: 10 additions & 20 deletions include/ttl/bits/std_tensor_mixin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ class basic_scalar_mixin
using data_ref = typename trait::ref_type;
using data_t = typename trait::Data;

using Dim = typename S::dimension_type;

data_t data_;

protected:
Expand All @@ -33,23 +35,17 @@ class basic_scalar_mixin

basic_scalar_mixin(data_ptr data, const S &) : data_(data) {}

constexpr Dim size() const { return 1; }

constexpr auto dims() const { return S().dims(); }

constexpr size_t data_size() const { return sizeof(R); }

data_ptr data() const { return data_.get(); }

data_ptr data_end() const { return data_.get() + 1; }

S shape() const { return S(); }

void from_host(const void *data) const
{
basic_copier<D, host_memory>()(data_.get(), data, data_size());
}

void to_host(void *data) const
{
basic_copier<host_memory, D>()(data, data_.get(), data_size());
}
};

template <typename R, typename S, typename D, typename A>
Expand Down Expand Up @@ -121,6 +117,10 @@ class basic_tensor_mixin

static constexpr auto rank = S::rank;

Dim size() const { return shape_.size(); }

const auto &dims() const { return shape_.dims(); }

size_t data_size() const { return shape_.size() * sizeof(R); }

const S &shape() const { return shape_; }
Expand Down Expand Up @@ -158,16 +158,6 @@ class basic_tensor_mixin
return slice_type(data_.get() + i * sub_shape.size(),
batch(j - i, sub_shape));
}

void from_host(const void *data) const
{
basic_copier<D, host_memory>()(data_.get(), data, data_size());
}

void to_host(void *data) const
{
basic_copier<host_memory, D>()(data, data_.get(), data_size());
}
};
} // namespace internal
} // namespace ttl
8 changes: 8 additions & 0 deletions include/ttl/experimental/copy
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// # -*- mode: c++ -*-
#pragma once
#include <ttl/bits/std_copy.hpp>

namespace ttl
{
using internal::experimental::copy;
} // namespace ttl
6 changes: 3 additions & 3 deletions include/ttl/range
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
#include <cstdint>

#include <ttl/bits/std_range.hpp>
#include <ttl/bits/std_tensor_fwd.hpp>

namespace ttl
{
using internal::range;

using rank_t = uint8_t;

// FIXME: make T less generic
template <rank_t r, typename T> auto range(const T &t)
template <rank_t r, typename R, typename S, typename D, typename A>
auto range(const internal::basic_tensor<R, S, D, A> &t)
{
return range(std::get<r>(t.shape().dims()));
}

} // namespace ttl
8 changes: 5 additions & 3 deletions tests/bench_cuda_tensor.cpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
#include "benchmark.hpp"

#include <ttl/cuda_tensor>
#include <ttl/experimental/copy>

template <typename R, int n> struct bench_cuda_tensor {
template <typename R, int n>
struct bench_cuda_tensor {
static void run(benchmark::State &state)
{
ttl::cuda_tensor<R, 1> m1(n);
ttl::tensor<R, 1> m2(n);

for (auto _ : state) {
m1.from_host(m2.data());
m1.to_host(m2.data());
ttl::copy(ttl::ref(m1), ttl::view(m2));
ttl::copy(ttl::ref(m2), ttl::view(m1));
}
}
};
Expand Down
35 changes: 35 additions & 0 deletions tests/test_copy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#include "testing.hpp"

#include <ttl/algorithm>
#include <ttl/cuda_tensor>
#include <ttl/device>
#include <ttl/experimental/copy>
#include <ttl/range>
#include <ttl/tensor>

void test_copy(int n)
{
ttl::tensor<int, 1> x_host(n);
ttl::cuda_tensor<int, 1> x_cuda(n);

ttl::fill(ttl::ref(x_host), 1);
ttl::copy(ttl::ref(x_cuda), ttl::view(x_host));

ttl::fill(ttl::ref(x_host), 2);
for (auto i : ttl::range<0>(x_host)) { ASSERT_EQ(x_host.data()[i], 2); }

ttl::copy(ttl::ref(x_host), ttl::view(x_cuda));
for (auto i : ttl::range<0>(x_host)) { ASSERT_EQ(x_host.data()[i], 1); }
}

TEST(copy_test, test_copy)
{
test_copy(1);
test_copy(2);
test_copy(10);
test_copy(100);
test_copy(1000);
test_copy(1 << 20);
test_copy(1 << 20);
test_copy(1 << 20);
}
30 changes: 17 additions & 13 deletions tests/test_cuda_tensor.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "testing.hpp"

#include <ttl/cuda_tensor>
#include <ttl/experimental/copy>
#include <ttl/range>
#include <ttl/tensor>

Expand All @@ -23,11 +24,10 @@ TEST(cuda_tensor_test, test0)
{
using R = float;
cuda_tensor<R, 0> m0;

tensor<R, 0> x;

m0.from_host(x.data());
m0.to_host(x.data());
ttl::copy(ttl::ref(m0), ttl::view(x));
ttl::copy(ttl::ref(x), ttl::view(m0));
}

TEST(cuda_tensor_test, test1)
Expand All @@ -42,8 +42,8 @@ TEST(cuda_tensor_test, test2)
cuda_tensor<R, 2> m1(10, 100);
tensor<R, 2> m2(10, 100);

m1.from_host(m2.data());
m1.to_host(m2.data());
ttl::copy(ttl::ref(m1), ttl::view(m2));
ttl::copy(ttl::ref(m2), ttl::view(m1));

m1.slice(1, 2);
auto r = ref(m1);
Expand All @@ -58,14 +58,16 @@ TEST(cuda_tensor_test, test_3)
cuda_tensor<R, 2> m1(ttl::make_shape(10, 100));
}

template <typename R, uint8_t r> void test_auto_ref()
template <typename R, uint8_t r>
void test_auto_ref()
{
static_assert(
std::is_convertible<cuda_tensor<R, r>, cuda_tensor_ref<R, r>>::value,
"can't convert to ref");
}

template <typename R, uint8_t r> void test_auto_view()
template <typename R, uint8_t r>
void test_auto_view()
{
static_assert(
std::is_convertible<cuda_tensor<R, r>, cuda_tensor_view<R, r>>::value,
Expand All @@ -87,28 +89,30 @@ TEST(cuda_tensor_test, test_convert)
test_auto_view<int, 2>();
}

template <typename R, uint8_t r> void test_copy(const ttl::shape<r> &shape)
template <typename R, uint8_t r>
void test_copy(const ttl::shape<r> &shape)
{
tensor<R, r> x(shape);
cuda_tensor<R, r> y(shape);
tensor<R, r> z(shape);

std::iota(x.data(), x.data_end(), 1);
y.from_host(x.data());
y.to_host(z.data());

ttl::copy(ttl::ref(y), ttl::view(x));
ttl::copy(ttl::ref(z), ttl::view(y));

for (auto i : ttl::range(shape.size())) {
ASSERT_EQ(x.data()[i], z.data()[i]);
}

{
cuda_tensor_ref<R, r> ry = ref(y);
ry.from_host(x.data());
ry.to_host(x.data());
ttl::copy(ry, ttl::view(x));
ttl::copy(ttl::ref(z), ttl::view(ry));
}
{
cuda_tensor_view<R, r> vy = view(y);
vy.to_host(x.data());
ttl::copy(ttl::ref(x), vy);
}
}

Expand Down
14 changes: 14 additions & 0 deletions tests/test_public_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@ ttl::shape<r> unit_shape()
return ttl::shape<r>(dims);
}

template <typename T>
void test_public_apis(const T &t)
{
const auto size = t.size();
ASSERT_EQ(size, static_cast<decltype(size)>(1));

const auto dims = t.dims();
static_assert(dims.size() == T::rank, "");
}

template <ttl::rank_t r>
struct test_ranked_type {
template <typename R>
Expand All @@ -65,6 +75,10 @@ struct test_ranked_type {
Tensor t(unit_shape<r>());
TensorRef tr(t);
TensorView tv(t);

test_public_apis(t);
test_public_apis(tr);
test_public_apis(tv);
}
};

Expand Down
Loading

0 comments on commit f3d26d6

Please sign in to comment.