cxtream  0.5.1
C++17 data pipeline with Python bindings.
Public Member Functions | List of all members
cxtream::dataframe< DataTable > Class Template Reference

Tabular object with convenient data access methods. More...

#include <cxtream/core/dataframe.hpp>

Public Member Functions

template<typename T >
 dataframe (std::vector< std::vector< T >> columns, std::vector< std::string > header={})
 
template<typename... Ts>
 dataframe (std::tuple< std::vector< Ts >... > columns, std::vector< std::string > header={})
 
template<typename Rng , typename ValueT = ranges::range_value_type_t<Rng>>
std::size_t insert_col (Rng &&rng, std::string col_name={}, std::function< std::string(const ValueT &)> cvt=static_cast< std::string(*)(const ValueT &)>(utility::to_string))
 
template<typename... Ts>
std::size_t insert_row (std::tuple< Ts... > row_tuple, std::tuple< std::function< std::string(const Ts &)>... > cvts=std::make_tuple(static_cast< std::string(*)(const Ts &)>(utility::to_string)...))
 
std::size_t insert_row (std::vector< std::string > row)
 
void drop_icol (std::size_t col_index)
 
void drop_col (const std::string &col_name)
 
void drop_row (const std::size_t row_idx)
 
auto raw_icol (std::size_t col_index)
 
auto raw_icol (std::size_t col_index) const
 
auto raw_col (const std::string &col_name)
 
auto raw_col (const std::string &col_name) const
 
template<typename T >
auto icol (std::size_t col_index, std::function< T(const std::string &)> cvt=utility::string_to< T >) const
 
template<typename T >
auto col (const std::string &col_name, std::function< T(const std::string &)> cvt=utility::string_to< T >) const
 
auto raw_cols ()
 
auto raw_cols () const
 
auto raw_icols (std::vector< std::size_t > col_indexes)
 
auto raw_icols (std::vector< std::size_t > col_indexes) const
 
auto raw_cols (const std::vector< std::string > &col_names)
 
auto raw_cols (const std::vector< std::string > &col_names) const
 
template<typename... Ts>
auto icols (std::vector< std::size_t > col_indexes, std::tuple< std::function< Ts(const std::string &)>... > cvts=std::make_tuple(utility::string_to< Ts >...)) const
 
template<typename... Ts>
auto cols (const std::vector< std::string > &col_names, std::tuple< std::function< Ts(const std::string &)>... > cvts=std::make_tuple(utility::string_to< Ts >...)) const
 
auto raw_rows ()
 
auto raw_rows () const
 
auto raw_irows (std::vector< std::size_t > col_indexes)
 
auto raw_irows (std::vector< std::size_t > col_indexes) const
 
auto raw_rows (const std::vector< std::string > &col_names)
 
auto raw_rows (const std::vector< std::string > &col_names) const
 
template<typename... Ts>
auto irows (std::vector< std::size_t > col_indexes, std::tuple< std::function< Ts(const std::string &)>... > cvts=std::make_tuple(utility::string_to< Ts >...)) const
 
template<typename... Ts>
auto rows (const std::vector< std::string > &col_names, std::tuple< std::function< Ts(const std::string &)>... > cvts=std::make_tuple(utility::string_to< Ts >...)) const
 
template<typename IndexT , typename ColT >
auto index_icol (std::size_t key_col_index, std::size_t val_col_index, std::function< IndexT(const std::string &)> key_col_cvt=utility::string_to< IndexT >, std::function< ColT(const std::string &)> val_col_cvt=utility::string_to< ColT >) const
 
template<typename IndexT , typename ColT >
auto index_col (const std::string &key_col_name, const std::string &val_col_name, std::function< IndexT(const std::string &)> key_col_cvt=utility::string_to< IndexT >, std::function< ColT(const std::string &)> val_col_cvt=utility::string_to< ColT >) const
 
template<typename IndexT , typename... Ts>
auto index_icols (std::size_t key_col_index, std::vector< std::size_t > val_col_indexes, std::function< IndexT(const std::string &)> key_col_cvt=utility::string_to< IndexT >, std::tuple< std::function< Ts(const std::string &)>... > val_col_cvts=std::make_tuple(utility::string_to< Ts >...)) const
 
template<typename IndexT , typename... Ts>
auto index_cols (const std::string &key_col_name, const std::vector< std::string > &val_col_names, std::function< IndexT(const std::string &)> key_col_cvt=utility::string_to< IndexT >, std::tuple< std::function< Ts(const std::string &)>... > val_col_cvts=std::make_tuple(utility::string_to< Ts >...)) const
 
std::size_t n_cols () const
 Return the number of columns.
 
std::size_t n_rows () const
 Return the number of rows (excluding header).
 
void header (std::vector< std::string > new_header)
 
std::vector< std::string > header () const
 Return the names of columns.
 
DataTable & data ()
 Return a reference to the raw data table.
 
const DataTable & data () const
 Return a const reference to the raw data table.
 

Detailed Description

template<typename DataTable = std::vector<std::vector<std::string>>>
class cxtream::dataframe< DataTable >

Tabular object with convenient data access methods.

By default, all fields are stored as std::string and they are cast to the requested type on demand.

Definition at line 38 of file dataframe.hpp.

Constructor & Destructor Documentation

◆ dataframe() [1/2]

template<typename DataTable = std::vector<std::vector<std::string>>>
template<typename T >
cxtream::dataframe< DataTable >::dataframe ( std::vector< std::vector< T >>  columns,
std::vector< std::string >  header = {} 
)
inline

Constructs the dataset from a vector of columns of the same type.

Example:

dataframe<> df{
// columns
std::vector<std::vector<int>>{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}},
// header
std::vector<std::string>{"A", "B", "C"}
};
Exceptions
std::invalid_argument1) If the header is provided, but some of the column names are empty. 2) If the column sizes mismatch. 3) If the provided header does not match the number of provided columns.

Definition at line 60 of file dataframe.hpp.

◆ dataframe() [2/2]

template<typename DataTable = std::vector<std::vector<std::string>>>
template<typename... Ts>
cxtream::dataframe< DataTable >::dataframe ( std::tuple< std::vector< Ts >... >  columns,
std::vector< std::string >  header = {} 
)
inline

Constructs the dataset from a tuple of columns of possibly different types.

Example:

dataframe<> df{
// columns
std::make_tuple(
std::vector<int>{1, 2, 3},
std::vector<std::string>{"a1", "a2", "a3"},
std::vector<std::string>{"1.1", "1.2", "1.3"}
),
// header
std::vector<std::string>{"Id", "A", "B"}
};
Exceptions
std::invalid_argument1) If the header is provided, but some of the column names are empty. 2) If the column sizes mismatch. 3) If the provided header does not match the number of provided columns.

Definition at line 91 of file dataframe.hpp.

Member Function Documentation

◆ col()

template<typename DataTable = std::vector<std::vector<std::string>>>
template<typename T >
auto cxtream::dataframe< DataTable >::col ( const std::string &  col_name,
std::function< T(const std::string &)>  cvt = utility::string_to<T> 
) const
inline

Return a typed view of a column.

By default, this function does not provide a direct access to the stored data. Instead, each field is converted to the type T and a copy is returned.

Example:

std::vector<long> data = df.col<long>("long column");
Returns
A range of T.
Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 296 of file dataframe.hpp.

◆ cols()

template<typename DataTable = std::vector<std::vector<std::string>>>
template<typename... Ts>
auto cxtream::dataframe< DataTable >::cols ( const std::vector< std::string > &  col_names,
std::tuple< std::function< Ts(const std::string &)>... >  cvts = std::make_tuple(utility::string_to<Ts>...) 
) const
inline

Return a typed view of multiple columns.

Example:

std::tuple<std::vector<int>, std::vector<double>> data =
df.cols<int, double>({"column 1", "column 2"});
Returns
A tuple of ranges of Ts.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 425 of file dataframe.hpp.

◆ drop_col()

template<typename DataTable = std::vector<std::vector<std::string>>>
void cxtream::dataframe< DataTable >::drop_col ( const std::string &  col_name)
inline

Drop a column with the given name.

Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 187 of file dataframe.hpp.

◆ drop_icol()

template<typename DataTable = std::vector<std::vector<std::string>>>
void cxtream::dataframe< DataTable >::drop_icol ( std::size_t  col_index)
inline

Drop a column with the given index.

Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 171 of file dataframe.hpp.

◆ drop_row()

template<typename DataTable = std::vector<std::vector<std::string>>>
void cxtream::dataframe< DataTable >::drop_row ( const std::size_t  row_idx)
inline

Drop a row.

Exceptions
std::out_of_rangeIf the row is not in the dataframe.

Definition at line 196 of file dataframe.hpp.

◆ header()

template<typename DataTable = std::vector<std::vector<std::string>>>
void cxtream::dataframe< DataTable >::header ( std::vector< std::string >  new_header)
inline

Set the column names.

Exceptions
std::invalid_argument1) If some of the column names are empty. 2) If the header does not match the number of columns.

Definition at line 687 of file dataframe.hpp.

◆ icol()

template<typename DataTable = std::vector<std::vector<std::string>>>
template<typename T >
auto cxtream::dataframe< DataTable >::icol ( std::size_t  col_index,
std::function< T(const std::string &)>  cvt = utility::string_to<T> 
) const
inline

Return a typed view of a column.

By default, this function does not provide a direct access to the stored data. Instead, each field is converted to the type T and a copy is returned.

Example:

std::vector<long> data = df.icol<long>(3);
Returns
A range of T.
Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 277 of file dataframe.hpp.

◆ icols()

template<typename DataTable = std::vector<std::vector<std::string>>>
template<typename... Ts>
auto cxtream::dataframe< DataTable >::icols ( std::vector< std::size_t >  col_indexes,
std::tuple< std::function< Ts(const std::string &)>... >  cvts = std::make_tuple(utility::string_to<Ts>...) 
) const
inline

Return a typed view of multiple columns.

Example:

std::tuple<std::vector<int>, std::vector<double>> data = df.icols<int, double>({1, 2});
Returns
A tuple of ranges of Ts.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 403 of file dataframe.hpp.

◆ index_col()

template<typename DataTable = std::vector<std::vector<std::string>>>
template<typename IndexT , typename ColT >
auto cxtream::dataframe< DataTable >::index_col ( const std::string &  key_col_name,
const std::string &  val_col_name,
std::function< IndexT(const std::string &)>  key_col_cvt = utility::string_to<IndexT>,
std::function< ColT(const std::string &)>  val_col_cvt = utility::string_to<ColT> 
) const
inline

Return an indexed typed view of a single column.

std::unordered_map<int, double> mapper = df.index_col<int, double>("first", "second");

This function is the same as index_icol(), but columns are selected by name.

Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 599 of file dataframe.hpp.

◆ index_cols()

template<typename DataTable = std::vector<std::vector<std::string>>>
template<typename IndexT , typename... Ts>
auto cxtream::dataframe< DataTable >::index_cols ( const std::string &  key_col_name,
const std::vector< std::string > &  val_col_names,
std::function< IndexT(const std::string &)>  key_col_cvt = utility::string_to<IndexT>,
std::tuple< std::function< Ts(const std::string &)>... >  val_col_cvts = std::make_tuple(utility::string_to<Ts>...) 
) const
inline

Return an indexed typed view of multiple columns.

See index_icol().

std::unordered_map<int, std::tuple<long, double>> mapper =
df.index_cols<int, long, double>("id", {"col1", "col2"});

This function is similar to index_icols(), but columns are selected by name.

Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 652 of file dataframe.hpp.

◆ index_icol()

template<typename DataTable = std::vector<std::vector<std::string>>>
template<typename IndexT , typename ColT >
auto cxtream::dataframe< DataTable >::index_icol ( std::size_t  key_col_index,
std::size_t  val_col_index,
std::function< IndexT(const std::string &)>  key_col_cvt = utility::string_to<IndexT>,
std::function< ColT(const std::string &)>  val_col_cvt = utility::string_to<ColT> 
) const
inline

Return an indexed typed view of a single column.

This function returns a range of tuples, where the first tuple element is from the key column and the second element is from the value column. This range can be used to construct a map or a hashmap.

Example:

std::unordered_map<int, double> mapper = df.index_icol<int, double>(0, 1);
Parameters
key_col_indexIndex of the column to be used as key.
val_col_indexIndex of the column to be used as value.
key_col_cvtFunction that is used to convert the keys from std::string to IndexT.
val_col_cvtFunction that is used to convert the values from std::string to ValueT.
Returns
A range of tuples <key, value>.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 578 of file dataframe.hpp.

◆ index_icols()

template<typename DataTable = std::vector<std::vector<std::string>>>
template<typename IndexT , typename... Ts>
auto cxtream::dataframe< DataTable >::index_icols ( std::size_t  key_col_index,
std::vector< std::size_t >  val_col_indexes,
std::function< IndexT(const std::string &)>  key_col_cvt = utility::string_to<IndexT>,
std::tuple< std::function< Ts(const std::string &)>... >  val_col_cvts = std::make_tuple(utility::string_to<Ts>...) 
) const
inline

Return an indexed typed view of multiple columns.

See index_icol().

std::unordered_map<int, std::tuple<long, double>> mapper =
df.index_icols<int, long, double>(0, {1, 2});

This function is similar to index_icol(), but value type is a tuple of Ts.

Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 628 of file dataframe.hpp.

◆ insert_col()

template<typename DataTable = std::vector<std::vector<std::string>>>
template<typename Rng , typename ValueT = ranges::range_value_type_t<Rng>>
std::size_t cxtream::dataframe< DataTable >::insert_col ( Rng &&  rng,
std::string  col_name = {},
std::function< std::string(const ValueT &)>  cvt = static_cast<std::string (*)(const ValueT&)>(utility::to_string) 
)
inline

Inserts a new column to the dataframe.

Example:

df.insert_col(std::vector<int>{5, 6, 7}, "C");
Exceptions
std::invalid_argument1) If the dataframe has a header but no column name was provided. 2) If the column size is not equal to n_rows.

Definition at line 114 of file dataframe.hpp.

◆ insert_row() [1/2]

template<typename DataTable = std::vector<std::vector<std::string>>>
template<typename... Ts>
std::size_t cxtream::dataframe< DataTable >::insert_row ( std::tuple< Ts... >  row_tuple,
std::tuple< std::function< std::string(const Ts &)>... >  cvts = std::make_tuple(                               static_cast<std::string (*)(const Ts&)>(utility::to_string)...) 
)
inline

Inserts a new typed row to the dataframe.

Example:

df.insert_row(std::make_tuple(4, "a3", true));
Returns
The index of the new row.
Exceptions
std::invalid_argumentIf the row size is not equal to n_cols.

Definition at line 135 of file dataframe.hpp.

◆ insert_row() [2/2]

template<typename DataTable = std::vector<std::vector<std::string>>>
std::size_t cxtream::dataframe< DataTable >::insert_row ( std::vector< std::string >  row)
inline

Inserts a new raw row to the dataframe.

Example:

df.insert_row({"field 1", "field 2", "field 3"});
Returns
The index of the new row.
Exceptions
std::invalid_argumentIf the row size is not equal to n_cols.

Definition at line 157 of file dataframe.hpp.

◆ irows()

template<typename DataTable = std::vector<std::vector<std::string>>>
template<typename... Ts>
auto cxtream::dataframe< DataTable >::irows ( std::vector< std::size_t >  col_indexes,
std::tuple< std::function< Ts(const std::string &)>... >  cvts = std::make_tuple(utility::string_to<Ts>...) 
) const
inline

Return a typed view of multiple rows.

This function provides the same data as icols() but transposed.

Example:

std::vector<std::tuple<int, double>> data =
df.irows<int, double>({0, 2});
Returns
A range of tuples of Ts.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 528 of file dataframe.hpp.

◆ raw_col() [1/2]

template<typename DataTable = std::vector<std::vector<std::string>>>
auto cxtream::dataframe< DataTable >::raw_col ( const std::string &  col_name)
inline

Return a raw view of a column.

The data can be directly changed by writing to the view.

Example:

df.raw_col("long column")[2] = "new_value";
Returns
A of range of std::string&.
Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 244 of file dataframe.hpp.

◆ raw_col() [2/2]

template<typename DataTable = std::vector<std::vector<std::string>>>
auto cxtream::dataframe< DataTable >::raw_col ( const std::string &  col_name) const
inline

Return a raw view of a column.

This is just a const overload of the non-const raw_col().

Returns
A of range of const std::string&.
Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 256 of file dataframe.hpp.

◆ raw_cols() [1/4]

template<typename DataTable = std::vector<std::vector<std::string>>>
auto cxtream::dataframe< DataTable >::raw_cols ( )
inline

Return a raw view of all columns.

The data can be directly changed by writing to the view.

Example:

// get the third row from the sixth column
std::string field = df.raw_cols()[5][2];
Returns
A range of ranges of std::string&.

Definition at line 316 of file dataframe.hpp.

◆ raw_cols() [2/4]

template<typename DataTable = std::vector<std::vector<std::string>>>
auto cxtream::dataframe< DataTable >::raw_cols ( ) const
inline

Return a raw view of all columns.

This is just a const overload of the non-const argument-less raw_cols().

Returns
A range of ranges of const std::string&.

Definition at line 326 of file dataframe.hpp.

◆ raw_cols() [3/4]

template<typename DataTable = std::vector<std::vector<std::string>>>
auto cxtream::dataframe< DataTable >::raw_cols ( const std::vector< std::string > &  col_names)
inline

Return a raw view of multiple columns.

The data can be directly changed by writing to the view.

Example:

// get the sixth row from the column named "column 2"
std::string field = df.raw_cols({"column 1", "column 2"})[1][5];
Returns
A range of ranges of std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 373 of file dataframe.hpp.

◆ raw_cols() [4/4]

template<typename DataTable = std::vector<std::vector<std::string>>>
auto cxtream::dataframe< DataTable >::raw_cols ( const std::vector< std::string > &  col_names) const
inline

Return a raw view of multiple columns.

This is just a const overload of the non-const raw_cols().

Returns
A range of ranges of const std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 385 of file dataframe.hpp.

◆ raw_icol() [1/2]

template<typename DataTable = std::vector<std::vector<std::string>>>
auto cxtream::dataframe< DataTable >::raw_icol ( std::size_t  col_index)
inline

Return a raw view of a column.

The data can be directly changed by writing to the view.

Example:

df.raw_icol(3)[2] = "new_value";
Returns
A of range of std::string&.
Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 217 of file dataframe.hpp.

◆ raw_icol() [2/2]

template<typename DataTable = std::vector<std::vector<std::string>>>
auto cxtream::dataframe< DataTable >::raw_icol ( std::size_t  col_index) const
inline

Return a raw view of a column.

Returns
A of range of const std::string&.
Exceptions
std::out_of_rangeIf the column is not in the dataframe.

Definition at line 227 of file dataframe.hpp.

◆ raw_icols() [1/2]

template<typename DataTable = std::vector<std::vector<std::string>>>
auto cxtream::dataframe< DataTable >::raw_icols ( std::vector< std::size_t >  col_indexes)
inline

Return a raw view of multiple columns.

The data can be directly changed by writing to the view.

Example:

// get the third row from the sixth column (with index 5)
std::string field = df.raw_icols({1, 5})[1][2];
Returns
A range of ranges of std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 343 of file dataframe.hpp.

◆ raw_icols() [2/2]

template<typename DataTable = std::vector<std::vector<std::string>>>
auto cxtream::dataframe< DataTable >::raw_icols ( std::vector< std::size_t >  col_indexes) const
inline

Return a raw view of multiple columns.

This is just a const overload of the non-const raw_icols().

Returns
A range of ranges of const std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 355 of file dataframe.hpp.

◆ raw_irows() [1/2]

template<typename DataTable = std::vector<std::vector<std::string>>>
auto cxtream::dataframe< DataTable >::raw_irows ( std::vector< std::size_t >  col_indexes)
inline

Return a raw view of multiple rows.

Example:

// get the third row from the sixth column (with index 5)
std::string field = df.raw_irows({3, 5})[2][1];
Returns
A range of ranges of std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 467 of file dataframe.hpp.

◆ raw_irows() [2/2]

template<typename DataTable = std::vector<std::vector<std::string>>>
auto cxtream::dataframe< DataTable >::raw_irows ( std::vector< std::size_t >  col_indexes) const
inline

Return a raw view of multiple rows.

This is just a const overload of the non-const raw_irows().

Returns
A range of ranges of const std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 479 of file dataframe.hpp.

◆ raw_rows() [1/4]

template<typename DataTable = std::vector<std::vector<std::string>>>
auto cxtream::dataframe< DataTable >::raw_rows ( )
inline

Return a raw view of all rows.

Example:

// get the third row from the sixth column
std::string field = df.raw_rows()[2][5];
Returns
A range of ranges of std::string&.

Definition at line 442 of file dataframe.hpp.

◆ raw_rows() [2/4]

template<typename DataTable = std::vector<std::vector<std::string>>>
auto cxtream::dataframe< DataTable >::raw_rows ( ) const
inline

Return a raw view of all rows.

This is just a const overload of the non-const argument-less raw_rows().

Returns
A range of ranges of const std::string&.

Definition at line 452 of file dataframe.hpp.

◆ raw_rows() [3/4]

template<typename DataTable = std::vector<std::vector<std::string>>>
auto cxtream::dataframe< DataTable >::raw_rows ( const std::vector< std::string > &  col_names)
inline

Return a raw view of multiple rows.

Example:

// get the third row from column named "col2"
std::string field = df.raw_rows({"col1", "col2"})[2][1];
Returns
A range of ranges of std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 495 of file dataframe.hpp.

◆ raw_rows() [4/4]

template<typename DataTable = std::vector<std::vector<std::string>>>
auto cxtream::dataframe< DataTable >::raw_rows ( const std::vector< std::string > &  col_names) const
inline

Return a raw view of multiple rows.

This is just a const overload of the non-const raw_rows().

Returns
A range of ranges of const std::string&.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 507 of file dataframe.hpp.

◆ rows()

template<typename DataTable = std::vector<std::vector<std::string>>>
template<typename... Ts>
auto cxtream::dataframe< DataTable >::rows ( const std::vector< std::string > &  col_names,
std::tuple< std::function< Ts(const std::string &)>... >  cvts = std::make_tuple(utility::string_to<Ts>...) 
) const
inline

Return a typed view of multiple rows.

This function provides the same data as cols() but transposed.

Example:

std::vector<std::tuple<int, double>> data =
df.rows<int, double>({"int_col", "double_col"});
Returns
A range of tuples of Ts.
Exceptions
std::out_of_rangeIf any of the columns is not in the dataframe.

Definition at line 550 of file dataframe.hpp.


The documentation for this class was generated from the following file: