cxtream  0.5.1
C++17 data pipeline with Python bindings.
transform.hpp
1 /****************************************************************************
2  * cxtream library
3  * Copyright (c) 2017, Cognexa Solutions s.r.o.
4  * Author(s) Filip Matzner
5  *
6  * This file is distributed under the MIT License.
7  * See the accompanying file LICENSE.txt for the complete license agreement.
8  ****************************************************************************/
9 
10 #ifndef CXTREAM_CORE_STREAM_TRANSFORM_HPP
11 #define CXTREAM_CORE_STREAM_TRANSFORM_HPP
12 
13 #include <cxtream/build_config.hpp>
14 #include <cxtream/core/stream/template_arguments.hpp>
15 #include <cxtream/core/utility/random.hpp>
16 #include <cxtream/core/utility/tuple.hpp>
17 #include <cxtream/core/utility/vector.hpp>
18 
19 #include <range/v3/view/any_view.hpp>
20 #include <range/v3/view/transform.hpp>
21 #include <range/v3/view/zip.hpp>
22 
23 #include <functional>
24 #include <utility>
25 
26 namespace cxtream::stream {
27 
28 // partial transform //
29 
30 namespace detail {
31 
32  // Implementation of partial_transform.
33  template<typename Fun, typename Projection, typename Source, typename From, typename To>
34  struct partial_transformer;
35 
36  template<typename Fun, typename Projection, typename... SourceTypes,
37  typename... FromTypes, typename... ToTypes>
38  struct partial_transformer<Fun, Projection, std::tuple<SourceTypes...>,
39  from_t<FromTypes...>, to_t<ToTypes...>> {
40  Fun fun;
41  Projection proj;
42 
43  constexpr auto operator()(std::tuple<SourceTypes...> source)
44  {
45  // build the view for the transformer, i.e., slice and project
46  auto slice_view =
47  utility::tuple_transform(utility::tuple_type_view<FromTypes...>(source), proj);
48  // process the transformer's result and convert it to the requested types
49  std::tuple<ToTypes...> result{std::invoke(fun, std::move(slice_view))};
50  // replace the corresponding fields
51  return utility::tuple_cat_unique(std::move(result), std::move(source));
52  }
53  };
54 
55  class partial_transform_fn {
56  private:
57  friend ranges::view::view_access;
58 
59  template <typename From, typename To, typename Fun, typename Projection = ref_wrap_t>
60  static auto bind(partial_transform_fn transformer, From f, To t, Fun fun,
61  Projection proj = Projection{})
62  {
63  return ranges::make_pipeable(
64  std::bind(transformer, std::placeholders::_1, f, t, std::move(fun), std::move(proj)));
65  }
66 
67  public:
68  template <typename Rng, typename... FromTypes, typename... ToTypes,
69  typename Fun, typename Projection = ref_wrap_t,
70  CONCEPT_REQUIRES_(ranges::ForwardRange<Rng>())>
71  constexpr auto operator()(Rng&& rng, from_t<FromTypes...>, to_t<ToTypes...>, Fun fun,
72  Projection proj = Projection{}) const
73  {
74  static_assert(sizeof...(ToTypes) > 0, "For non-transforming operations, please"
75  " use stream::for_each.");
76 
77  using StreamType = ranges::range_value_type_t<Rng>;
78  detail::partial_transformer<Fun, Projection,
79  StreamType, from_t<FromTypes...>, to_t<ToTypes...>>
80  trans_fun{std::move(fun), std::move(proj)};
81 
82  // any_view is used to erase types and speed up compilation time
83  using RefType = ranges::range_reference_t<Rng>;
85  ranges::any_view<RefType, ranges::category::forward>{std::forward<Rng>(rng)},
86  std::move(trans_fun));
87  }
88 
90  template <typename Rng, typename From, typename To,
91  typename Fun, typename Proj = ref_wrap_t,
92  CONCEPT_REQUIRES_(!ranges::ForwardRange<Rng>())>
93  constexpr auto operator()(Rng&& rng, From, To, Fun, Proj, Proj proj = Proj{}) const
94  {
95  CONCEPT_ASSERT_MSG(ranges::ForwardRange<Rng>(),
96  "Stream transformations only work on ranges satisfying the ForwardRange concept.");
97  }
99  };
100 
101 } // namespace detail
102 
103 // Transform a subset of tuple elements for each tuple in a range and concatenate the result
104 // with the original tuple.
105 //
106 // The result tuple overrides the corresponding types from the original tuple.
107 constexpr ranges::view::view<detail::partial_transform_fn> partial_transform{};
108 
109 // transform //
110 
111 namespace detail {
112 
113  // Apply fun to each element in tuple of ranges in the given dimension.
114  template<typename Fun, std::size_t Dim, std::size_t NOuts, typename From, typename To>
115  struct wrap_fun_for_dim;
116 
117  template<typename Fun, std::size_t Dim, std::size_t NOuts,
118  typename... FromTypes, typename... ToTypes>
119  struct wrap_fun_for_dim<Fun, Dim, NOuts, from_t<FromTypes...>, to_t<ToTypes...>> {
120  Fun fun;
121  using FunRef = decltype(std::ref(fun));
122 
123  constexpr utility::maybe_tuple<ToTypes...>
124  operator()(std::tuple<FromTypes&...> tuple_of_ranges)
125  {
126  assert(utility::same_size(tuple_of_ranges));
127  // build the function to be applied
128  wrap_fun_for_dim<FunRef, Dim-1, NOuts,
129  from_t<ranges::range_value_type_t<FromTypes>...>,
130  to_t<ranges::range_value_type_t<ToTypes>...>>
131  fun_wrapper{std::ref(fun)};
132  // transform
133  auto range_of_tuples =
135  boost::hana::unpack(std::move(tuple_of_ranges), ranges::view::zip),
136  std::move(fun_wrapper));
137  return utility::unzip_if<(NOuts > 1)>(std::move(range_of_tuples));
138  }
139  };
140 
141  template<typename Fun, std::size_t NOuts, typename... FromTypes, typename... ToTypes>
142  struct wrap_fun_for_dim<Fun, 0, NOuts, from_t<FromTypes...>, to_t<ToTypes...>> {
143  Fun fun;
144 
145  constexpr utility::maybe_tuple<ToTypes...>
146  operator()(std::tuple<FromTypes&...> tuple)
147  {
148  return boost::hana::unpack(std::move(tuple), fun);
149  }
150  };
151 
152 } // namespace detail
153 
176 template<typename... FromColumns, typename... ToColumns, typename Fun, int Dim = 1>
177 constexpr auto transform(from_t<FromColumns...> f,
178  to_t<ToColumns...> t,
179  Fun fun,
180  dim_t<Dim> d = dim_t<1>{})
181 {
182  // wrap the function to be applied in the appropriate dimension
183  detail::wrap_fun_for_dim<
184  Fun, Dim, sizeof...(ToColumns),
185  from_t<typename FromColumns::batch_type...>,
186  to_t<typename ToColumns::batch_type...>>
187  fun_wrapper{std::move(fun)};
188 
189  auto proj = [](auto& column) { return std::ref(column.value()); };
190  return stream::partial_transform(f, t, std::move(fun_wrapper), std::move(proj));
191 }
192 
193 // conditional transform //
194 
195 namespace detail {
196 
197  // wrap the function to be applied only on if the first argument evaluates to true
198  template<typename Fun, typename FromIdxs, typename ToIdxs, typename From, typename To>
199  struct wrap_fun_with_cond;
200 
201  template<typename Fun, typename FromIdxs, std::size_t... ToIdxs,
202  typename CondCol, typename... Cols, typename... ToTypes>
203  struct wrap_fun_with_cond<Fun, FromIdxs, std::index_sequence<ToIdxs...>,
204  from_t<CondCol, Cols...>, to_t<ToTypes...>> {
205  Fun fun;
206 
207  constexpr utility::maybe_tuple<ToTypes...> operator()(CondCol& cond, Cols&... cols)
208  {
209  // make a tuple of all arguments, except for the condition
210  std::tuple<Cols&...> args_view{cols...};
211  // apply the function if the condition is true
212  if (cond) {
213  // the function is applied only on a subset of the arguments
214  // representing FromColumns
215  return boost::hana::unpack(
216  utility::tuple_index_view(args_view, FromIdxs{}), fun);
217  }
218  // return the original arguments if the condition is false
219  // only a subset of the arguments representing ToColumns is returned
220  // note: We can force std::move in here, because
221  // we are only copying data to themselves.
222  return {std::move(std::get<ToIdxs>(args_view))...};
223  }
224  };
225 
226 } // namespace detail
227 
278 template<
279  typename... FromColumns,
280  typename... ToColumns,
281  typename CondColumn,
282  typename Fun,
283  int Dim = 1>
284 constexpr auto transform(
285  from_t<FromColumns...> f,
286  to_t<ToColumns...> t,
287  cond_t<CondColumn> c,
288  Fun fun,
289  dim_t<Dim> d = dim_t<1>{})
290 {
291  // make index sequences for source and target columns when they
292  // are concatenated in a single tuple
293  constexpr std::size_t n_from = sizeof...(FromColumns);
294  constexpr std::size_t n_to = sizeof...(ToColumns);
295  using FromIdxs = std::make_index_sequence<n_from>;
297 
298  // wrap the function to be applied in the appropriate dimension using the condition column
299  detail::wrap_fun_with_cond<
300  Fun, FromIdxs, ToIdxs,
301  from_t<utility::ndim_type_t<typename CondColumn::batch_type, Dim>,
302  utility::ndim_type_t<typename FromColumns::batch_type, Dim>...,
303  utility::ndim_type_t<typename ToColumns::batch_type, Dim>...>,
304  to_t<utility::ndim_type_t<typename ToColumns::batch_type, Dim>...>>
305  cond_fun{std::move(fun)};
306 
307  // transform from both, FromColumns and ToColumns into ToColumns
308  // the wrapper function takes care of extracting the parameters for the original function
309  return stream::transform(from_t<CondColumn, FromColumns..., ToColumns...>{},
310  t, std::move(cond_fun), d);
311 }
312 
313 // probabilistic transform //
314 
315 namespace detail {
316 
317  // wrap the function to be an identity if the dice roll fails
318  template<typename Fun, typename Prng,
319  typename FromIdxs, typename ToIdxs,
320  typename From, typename To>
321  struct wrap_fun_with_prob;
322 
323  template<typename Fun, typename Prng,
324  typename FromIdxs, std::size_t... ToIdxs,
325  typename... FromTypes, typename... ToTypes>
326  struct wrap_fun_with_prob<Fun, Prng,
327  FromIdxs, std::index_sequence<ToIdxs...>,
328  from_t<FromTypes...>, to_t<ToTypes...>> {
329  Fun fun;
330  std::reference_wrapper<Prng> prng;
331  const double prob;
332 
333  utility::maybe_tuple<ToTypes...> operator()(FromTypes&... cols)
334  {
335  assert(prob >= 0. && prob <= 1.);
336  std::uniform_real_distribution<> dis{0, 1};
337  // make a tuple of all arguments
338  std::tuple<FromTypes&...> args_view{cols...};
339  // apply the function if the dice roll succeeds
340  if (prob == 1. || (prob > 0. && dis(prng.get()) < prob)) {
341  // the function is applied only on a subset of the arguments
342  // representing FromColumns
343  return boost::hana::unpack(
344  utility::tuple_index_view(args_view, FromIdxs{}), fun);
345  }
346  // return the original arguments if the dice roll fails
347  // only a subset of the arguments representing ToColumns is returned
348  // note: We can force std::move in here, because
349  // we are only copying data to themselves.
350  return {std::move(std::get<ToIdxs>(args_view))...};
351  }
352  };
353 
354 } // namespace detail
355 
387 template<
388  typename... FromColumns,
389  typename... ToColumns,
390  typename Fun,
391  typename Prng = std::mt19937,
392  int Dim = 1>
393 constexpr auto transform(
394  from_t<FromColumns...> f,
395  to_t<ToColumns...> t,
396  double prob,
397  Fun fun,
398  Prng& prng = utility::random_generator,
399  dim_t<Dim> d = dim_t<1>{})
400 {
401  // make index sequences for source and target columns when they
402  // are concatenated in a single tuple
403  constexpr std::size_t n_from = sizeof...(FromColumns);
404  constexpr std::size_t n_to = sizeof...(ToColumns);
405  using FromIdxs = std::make_index_sequence<n_from>;
407 
408  // wrap the function to be applied in the appropriate dimension with the given probabiliy
409  detail::wrap_fun_with_prob<
410  Fun, Prng, FromIdxs, ToIdxs,
411  from_t<utility::ndim_type_t<typename FromColumns::batch_type, Dim>...,
412  utility::ndim_type_t<typename ToColumns::batch_type, Dim>...>,
413  to_t<utility::ndim_type_t<typename ToColumns::batch_type, Dim>...>>
414  prob_fun{std::move(fun), prng, prob};
415 
416  // transform from both, FromColumns and ToColumns into ToColumns
417  // the wrapper function takes care of extracting the parameters for the original function
418  return stream::transform(from_t<FromColumns..., ToColumns...>{}, t, std::move(prob_fun), d);
419 }
420 
421 } // namespace cxtream::stream
422 #endif
constexpr auto tuple_cat_unique(Tuples &&... tuples)
Concatenate two tuples and keep only the first element of each type.
Definition: tuple.hpp:291
decltype(plus< Offset >(std::make_index_sequence< N >{})) make_offset_index_sequence
Make std::index_sequence with the given offset.
Definition: tuple.hpp:92
static thread_local std::mt19937 random_generator
Thread local pseudo-random number generator seeded by std::random_device.
Definition: random.hpp:20
STL namespace.
constexpr bool same_size(Tuple &&rngs)
Utility function which checks that all the ranges in a tuple have the same size.
Definition: vector.hpp:712
constexpr auto unpack(Rng &&rng, from_t< FromColumns... > f, dim_t< Dim > d=dim_t< 1 >{})
Unpack a stream into a tuple of ranges.
Definition: unpack.hpp:100
constexpr auto tuple_index_view(Tuple &tuple, std::index_sequence< Idxs... >={})
Makes a sub-tuple made of references to the original tuple (selected by index).
Definition: tuple.hpp:205
constexpr auto transform(from_t< FromColumns... > f, to_t< ToColumns... > t, double prob, Fun fun, Prng &prng=utility::random_generator, dim_t< Dim > d=dim_t< 1 >{})
Probabilistic transform of a subset of cxtream columns.
Definition: transform.hpp:393
constexpr auto tuple_transform(Tuple &&tuple, Fun &&fun)
Transform each element of a tuple.
Definition: tuple.hpp:155