libMVL
Mappable vector library
Classes | Macros | Typedefs | Functions
libMVL.h File Reference

core libMVL functions and structures More...

#include <stdio.h>
#include <math.h>
Include dependency graph for libMVL.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  LIBMVL_PREAMBLE
 This structure is written at the beginning of MVL file. It contains the signature identifying MVL format, and a means to check the endiannes of the MVL file. More...
 
struct  LIBMVL_POSTAMBLE
 This structure is written last to close MVL file. It contains an offset to MVL directory that can be used to retrieve offsets to LIBMVL_VECTOR structures stored in MVL file. More...
 
struct  LIBMVL_VECTOR_HEADER
 This structure describes the header of MVL vector. It is basically LIBMVL_VECTOR without the actual data. More...
 
struct  LIBMVL_VECTOR
 LIBMVL_VECTOR is the basic unit of information storage. More...
 
struct  LIBMVL_NAMED_LIST
 This structure describes a named list - an array of LIBMVL_OFFSET64 entries each with a character name or tag. More...
 
struct  LIBMVL_CONTEXT
 This structure describes MVL context - a collection of system data associated with a single MVL file. More...
 
struct  HASH_MAP
 This structure is used for constructing associative maps and also for describing index groupings. More...
 
struct  LIBMVL_PARTITION
 List of offsets partitioning the vector. First element is always 0, last element is vector size. More...
 
struct  LIBMVL_EXTENT_LIST
 List of extents - ranges of consequentive indices. Similar to partition, but they do not have to follow each other. More...
 
struct  LIBMVL_EXTENT_INDEX
 An index into a table-like set of vectors with equal number of elements. More...
 
struct  LIBMVL_VEC_STATS
 Vector statistics. More...
 

Macros

#define LIBMVL_VECTOR_UINT8   1
 
#define LIBMVL_VECTOR_INT32   2
 
#define LIBMVL_VECTOR_INT64   3
 
#define LIBMVL_VECTOR_FLOAT   4
 
#define LIBMVL_VECTOR_DOUBLE   5
 
#define LIBMVL_VECTOR_OFFSET64   100
 
#define LIBMVL_VECTOR_CSTRING   101
 
#define LIBMVL_PACKED_LIST64   102
 
#define LIBMVL_NO_METADATA   0
 Use this constant to specify that no metadata should be written.
 
#define LIBMVL_NULL_OFFSET   0
 Null offsets into memory mapped data are always invalid because that is where preamble is This is usually used to indicate that the offset does not point to valid data.
 
#define MVL_WVEC(ctx, type, ...)   mvl_write_vector_inline(ctx, type, MVL_NUMARGS(__VA_ARGS__), 0, __VA_ARGS__)
 
#define mvl_vector_type(data)   (((LIBMVL_VECTOR_HEADER *)(data))->type)
 Return type of data from a pointer to LIBMVL_VECTOR.
 
#define mvl_vector_length(data)   (((LIBMVL_VECTOR_HEADER *)(data))->length)
 Return number of elements from a pointer to LIBMVL_VECTOR.
 
#define mvl_vector_data(data)   ((((LIBMVL_VECTOR *)(data))->u))
 Return base data from a pointer to LIBMVL_VECTOR. More...
 
#define mvl_vector_data_uint8(data)   ((unsigned char *)(((const char *) data)+sizeof(LIBMVL_VECTOR_HEADER)))
 
#define mvl_vector_data_int32(data)   ((int *)(((const char *) data)+sizeof(LIBMVL_VECTOR_HEADER)))
 
#define mvl_vector_data_int64(data)   ((long long int *)(((const char *) data)+sizeof(LIBMVL_VECTOR_HEADER)))
 
#define mvl_vector_data_float(data)   ((float *)(((const char *) data)+sizeof(LIBMVL_VECTOR_HEADER)))
 
#define mvl_vector_data_double(data)   ((double *)(((const char *) data)+sizeof(LIBMVL_VECTOR_HEADER)))
 
#define mvl_vector_data_offset(data)   ((LIBMVL_OFFSET64 *)(((const char *) data)+sizeof(LIBMVL_VECTOR_HEADER)))
 
#define mvl_vector_metadata_offset(data)   ((((LIBMVL_VECTOR_HEADER *)(data))->metadata))
 Return offset to metadata of given LIBMVL_VECTOR.
 
#define MVL_NA_STRING   "\000\000NA"
 It is convenient to be able to mark strings as missing value, similar to NaN for floating point type. In MVL this is done with the special string of length 4 consisting of two NUL characters followed by letters "NA". More...
 
#define LIBMVL_SORT_LEXICOGRAPHIC   1 /* Ascending */
 
#define LIBMVL_SORT_LEXICOGRAPHIC_DESC   2 /* Descending */
 
#define MVL_SEED_HASH_VALUE   0xabcdef
 
#define LIBMVL_ACCUMULATE_HASH   0
 Flags passed to mvl_hash_indices() and mvl_hash_range() More...
 
#define LIBMVL_INIT_HASH   1
 
#define LIBMVL_FINALIZE_HASH   2
 
#define LIBMVL_COMPLETE_HASH   (LIBMVL_INIT_HASH | LIBMVL_FINALIZE_HASH)
 
#define MVL_FLAG_OWN_HASH   (1<<0)
 Flags describing HASH_MAP state. More...
 
#define MVL_FLAG_OWN_HASH_MAP   (1<<1)
 
#define MVL_FLAG_OWN_FIRST   (1<<2)
 
#define MVL_FLAG_OWN_NEXT   (1<<3)
 
#define MVL_EXTENT_INDEX   1
 Index types. More...
 

Typedefs

typedef unsigned long long LIBMVL_OFFSET64
 MVL unsigned 64-bit type used for describing offsets into loaded data.
 

Functions

static int mvl_element_size (int type)
 Return the element size in bytes for a particular MVL type. More...
 
LIBMVL_CONTEXTmvl_create_context (void)
 Create MVL context. More...
 
void mvl_free_context (LIBMVL_CONTEXT *ctx)
 Release memory associated with MVL context. More...
 
const char * mvl_strerror (LIBMVL_CONTEXT *ctx)
 Obtain description of error code. More...
 
LIBMVL_OFFSET64 mvl_write_vector (LIBMVL_CONTEXT *ctx, int type, LIBMVL_OFFSET64 length, const void *data, LIBMVL_OFFSET64 metadata)
 Write complete MVL vector. More...
 
LIBMVL_OFFSET64 mvl_start_write_vector (LIBMVL_CONTEXT *ctx, int type, LIBMVL_OFFSET64 expected_length, LIBMVL_OFFSET64 length, const void *data, LIBMVL_OFFSET64 metadata)
 Begin write of MVL vector. This is only needed if the vector has to be written in parts, such as due to memory constraints. More...
 
void mvl_rewrite_vector (LIBMVL_CONTEXT *ctx, int type, LIBMVL_OFFSET64 base_offset, LIBMVL_OFFSET64 idx, long length, const void *data)
 Write more data to MVL vector that has been previously created with mvl_start_write_vector() More...
 
LIBMVL_OFFSET64 mvl_write_concat_vectors (LIBMVL_CONTEXT *ctx, int type, long nvec, const long *lengths, void **data, LIBMVL_OFFSET64 metadata)
 Write complete MVL vector concatenating data from many vectors or arrays. More...
 
LIBMVL_OFFSET64 mvl_indexed_copy_vector (LIBMVL_CONTEXT *ctx, LIBMVL_OFFSET64 index_count, const LIBMVL_OFFSET64 *indices, const LIBMVL_VECTOR *vec, const void *data, LIBMVL_OFFSET64 data_length, LIBMVL_OFFSET64 metadata, LIBMVL_OFFSET64 max_buffer)
 Write MVL vector that contains data at specific indices. The indices can repeat, and can themselves be stored in memory mapped MVL file. More...
 
LIBMVL_OFFSET64 mvl_write_string (LIBMVL_CONTEXT *ctx, long length, const char *data, LIBMVL_OFFSET64 metadata)
 Write a single C string. In particular, this is handy for providing metadata tags. More...
 
LIBMVL_OFFSET64 mvl_write_cached_string (LIBMVL_CONTEXT *ctx, long length, const char *data)
 Write a single C string if it has not been written before, otherwise return offset to previously written object. In particular, this is handy for providing metadata tags. More...
 
LIBMVL_OFFSET64 mvl_write_packed_list (LIBMVL_CONTEXT *ctx, long count, const long *str_size, unsigned char **str, LIBMVL_OFFSET64 metadata)
 Write an array of strings as a packed list data type. This is convenient for storing a lot of different strings. More...
 
void mvl_add_directory_entry (LIBMVL_CONTEXT *ctx, LIBMVL_OFFSET64 offset, const char *tag)
 Add an entry to the top level directory of MVL file. More...
 
void mvl_add_directory_entry_n (LIBMVL_CONTEXT *ctx, LIBMVL_OFFSET64 offset, const char *tag, LIBMVL_OFFSET64 tag_size)
 Add entry to the top level directory of MVL file. More...
 
LIBMVL_OFFSET64 mvl_write_directory (LIBMVL_CONTEXT *ctx)
 Write out MVL file directory with entries collected so far. If this is called multiple times only the latest written directory is retrieved when MVL file is opened. It is an error to write out an empty directory. More...
 
LIBMVL_NAMED_LISTmvl_create_named_list (int size)
 Allocate and initialize structure for LIBMVL_NAMED_LIST. More...
 
void mvl_free_named_list (LIBMVL_NAMED_LIST *L)
 Free structure for LIBMVL_NAMED_LIST. More...
 
void mvl_recompute_named_list_hash (LIBMVL_NAMED_LIST *L)
 Recompute named list hash. More...
 
long mvl_add_list_entry (LIBMVL_NAMED_LIST *L, long tag_length, const char *tag, LIBMVL_OFFSET64 offset)
 Add entry to LIBMVL_NAMED_LIST. The entry is always appended to the end. More...
 
LIBMVL_OFFSET64 mvl_find_list_entry (LIBMVL_NAMED_LIST *L, long tag_length, const char *tag)
 Find existing entry inside LIBMVL_NAMED_LIST. If several identically named entries exist this function returns last written value. Hash table is used if present. More...
 
LIBMVL_OFFSET64 mvl_write_attributes_list (LIBMVL_CONTEXT *ctx, LIBMVL_NAMED_LIST *L)
 Write out R-style attribute list. More...
 
LIBMVL_NAMED_LISTmvl_read_attributes_list (LIBMVL_CONTEXT *ctx, const void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 metadata_offset)
 Read back MVL attributes list, typically used to described metadata. This function also initialize hash table for fast access. This function does not check that the offsets stored in returned LIBMVL_NAMED_LIST data structure are valid, this should be done by the code that uses those offsets. More...
 
LIBMVL_NAMED_LISTmvl_create_R_attributes_list (LIBMVL_CONTEXT *ctx, const char *R_class)
 Create R-style attribute list for class given by R_class, which could be, for example, "data.frame". More...
 
LIBMVL_OFFSET64 mvl_get_character_class_offset (LIBMVL_CONTEXT *ctx)
 Get offset to metadata describing R-style character class - an array of strings. This is convenient for writing columns of strings to be analyzed with R - just provide this offset as the metadata field of mvl_write_packed_list() More...
 
LIBMVL_OFFSET64 mvl_write_named_list (LIBMVL_CONTEXT *ctx, LIBMVL_NAMED_LIST *L)
 Write out named list. In R, this would be read back as list. More...
 
LIBMVL_OFFSET64 mvl_write_named_list2 (LIBMVL_CONTEXT *ctx, LIBMVL_NAMED_LIST *L, char *cl)
 Write out named list. In R, this would be read back as list with class attribute set to "cl". More...
 
LIBMVL_OFFSET64 mvl_write_named_list_as_data_frame (LIBMVL_CONTEXT *ctx, LIBMVL_NAMED_LIST *L, int nrows, LIBMVL_OFFSET64 rownames)
 Write out named list in the style of R data frames. It is assumed that all entries of L are vectors with the same number of elements. More...
 
LIBMVL_NAMED_LISTmvl_read_named_list (LIBMVL_CONTEXT *ctx, const void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 offset)
 Read back MVL named list. This function also initialize hash table for fast access. More...
 
void mvl_open (LIBMVL_CONTEXT *ctx, FILE *f)
 Prepare context for writing to file f. More...
 
void mvl_close (LIBMVL_CONTEXT *ctx)
 Write out MVL file directory and postable and close file. More...
 
static LIBMVL_OFFSET64 mvl_vector_nentries (void *vec)
 Return number of entries in the vector. Currently this is the same as mvl_vector_length() for all types except LIBMVL_PACKED_LIST64. More...
 
static int mvl_validate_vector (LIBMVL_OFFSET64 offset, const void *data, LIBMVL_OFFSET64 data_size)
 This function returns 0 if the offset into data points to a valid vector, or a negative error code otherwise. More...
 
static LIBMVL_VECTORmvl_vector_from_offset (void *data, LIBMVL_OFFSET64 offset)
 A convenience function to convert an offset into memory mapped data into a pointer to LIBMVL_VECTOR structure. More...
 
static LIBMVL_VECTORmvl_validated_vector_from_offset (void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 offset)
 A convenience function to convert an offset into memory mapped data into a pointer to LIBMVL_VECTOR structure. More...
 
static double mvl_as_double (const LIBMVL_VECTOR *vec, long idx)
 Return idx vector entry as a double. More...
 
static double mvl_as_double_default (const LIBMVL_VECTOR *vec, long idx, double def)
 Return idx vector entry as a double, with default for missing values. More...
 
static LIBMVL_OFFSET64 mvl_as_offset (const LIBMVL_VECTOR *vec, long idx)
 Return idx vector entry as an offset. More...
 
static double mvl_named_list_get_double (LIBMVL_NAMED_LIST *L, const void *data, long tag_length, const char *tag, long idx)
 Find an entry in a named list and return its idx value as a double. More...
 
static double mvl_named_list_get_double_default (LIBMVL_NAMED_LIST *L, const void *data, long tag_length, const char *tag, long idx, double def)
 Find an entry in a named list and return its idx value a double. More...
 
static LIBMVL_OFFSET64 mvl_named_list_get_offset (LIBMVL_NAMED_LIST *L, const void *data, long tag_length, const char *tag, long idx)
 Find an entry in a named list and return its idx value as an offset. More...
 
static int mvl_packed_list_is_na (const LIBMVL_VECTOR *vec, const void *data, LIBMVL_OFFSET64 idx)
 Check whether packed list entry is a special string that indicates a missing value. More...
 
static LIBMVL_OFFSET64 mvl_packed_list_get_entry_bytelength (const LIBMVL_VECTOR *vec, LIBMVL_OFFSET64 idx)
 Get length in bytes of string element idx from a packed list. More...
 
static const unsigned char * mvl_packed_list_get_entry (const LIBMVL_VECTOR *vec, const void *data, LIBMVL_OFFSET64 idx)
 Get pointer to the start of string element idx from a packed list. More...
 
static int mvl_packed_list_validate_entry (const LIBMVL_VECTOR *vec, const void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 idx)
 Get pointer to the start of string element idx from a packed list. More...
 
LIBMVL_OFFSET64 mvl_find_directory_entry (LIBMVL_CONTEXT *ctx, const char *tag)
 Find entry in MVL file directory. More...
 
void mvl_load_image (LIBMVL_CONTEXT *ctx, LIBMVL_OFFSET64 length, const void *data)
 Initilize MVL context to operate with memory mapped area data. More...
 
int mvl_sort_indices (LIBMVL_OFFSET64 indices_count, LIBMVL_OFFSET64 *indices, LIBMVL_OFFSET64 vec_count, LIBMVL_VECTOR **vec, void **vec_data, int sort_function)
 Given a table-like set of vectors of equal length arrange indices so that the columns are sorted lexicographically. More...
 
static LIBMVL_OFFSET64 mvl_randomize_bits64 (LIBMVL_OFFSET64 x)
 Randomize bits of 64-bit numbers, typically after accumulating a hash value. More...
 
static unsigned mvl_randomize_bits32 (unsigned x)
 Randomize bits of 32-bit numbers, typically after accumulating a hash value. More...
 
static LIBMVL_OFFSET64 mvl_accumulate_hash64 (LIBMVL_OFFSET64 x, const unsigned char *data, LIBMVL_OFFSET64 count)
 Accumulate hash from a piece of data. More...
 
static LIBMVL_OFFSET64 mvl_accumulate_int32_hash64 (LIBMVL_OFFSET64 x, const int *data, LIBMVL_OFFSET64 count)
 Accumulate hash from an array of 32-bit integers The integers are hashed by value, not representation, so one gets the same hash from value of 100 whether it is stored as 32-bits or 64-bits. More...
 
static LIBMVL_OFFSET64 mvl_accumulate_int64_hash64 (LIBMVL_OFFSET64 x, const long long int *data, LIBMVL_OFFSET64 count)
 Accumulate hash from an array of 64-bit integers The integers are hashed by value, not representation, so one gets the same hash from value of 100 whether it is stored as 32-bits or 64-bits. More...
 
static LIBMVL_OFFSET64 mvl_accumulate_float_hash64 (LIBMVL_OFFSET64 x, const float *data, LIBMVL_OFFSET64 count)
 Accumulate hash from an array of 32-bit floats The floats are hashed by value, not representation, so one gets the same hash from value of 100.0 whether it is stored as float or promoted to double. Note that this does not work in reverse - many doubles can be truncated to the same float. More...
 
static LIBMVL_OFFSET64 mvl_accumulate_double_hash64 (LIBMVL_OFFSET64 x, const double *data, LIBMVL_OFFSET64 count)
 Accumulate hash from an array of 64-bit floats The floats are hashed by value, not representation, so one gets the same hash from value of 100.0 whether it is stored as float or promoted to double. Note that this does not work in reverse - many doubles can be truncated to the same float. More...
 
int mvl_hash_indices (LIBMVL_OFFSET64 indices_count, const LIBMVL_OFFSET64 *indices, LIBMVL_OFFSET64 *hash, LIBMVL_OFFSET64 vec_count, LIBMVL_VECTOR **vec, void **vec_data, LIBMVL_OFFSET64 *vec_data_length, int flags)
 This function is used to compute 64 bit hash of vector values array hash[] is passed in and contains the result of the computation. More...
 
int mvl_hash_range (LIBMVL_OFFSET64 i0, LIBMVL_OFFSET64 i1, LIBMVL_OFFSET64 *hash, LIBMVL_OFFSET64 vec_count, LIBMVL_VECTOR **vec, void **vec_data, LIBMVL_OFFSET64 *vec_data_length, int flags)
 This function is used to compute 64 bit hash of vector values array hash[] is passed in and contains the result of the computation. More...
 
LIBMVL_OFFSET64 mvl_compute_hash_map_size (LIBMVL_OFFSET64 hash_count)
 Compute suggested size of hash map given the number of entries to hash. Hash map size should always be a power of 2. More...
 
HASH_MAPmvl_allocate_hash_map (LIBMVL_OFFSET64 max_index_count)
 Create HASH_MAP structure. More...
 
void mvl_free_hash_map (HASH_MAP *hash_map)
 Free allocated HASH_MAP. More...
 
void mvl_compute_hash_map (HASH_MAP *hm)
 Compute hash map. This assumes that hm->hash array has been populated with hm->hash_count hashes computed with mvl_hash_indices(). More...
 
LIBMVL_OFFSET64 mvl_hash_match_count (LIBMVL_OFFSET64 key_count, const LIBMVL_OFFSET64 *key_hash, HASH_MAP *hm)
 Find count of matches between hashes of two sets. More...
 
int mvl_find_matches (LIBMVL_OFFSET64 key_indices_count, const LIBMVL_OFFSET64 *key_indices, LIBMVL_OFFSET64 key_vec_count, LIBMVL_VECTOR **key_vec, void **key_vec_data, LIBMVL_OFFSET64 *key_hash, LIBMVL_OFFSET64 indices_count, const LIBMVL_OFFSET64 *indices, LIBMVL_OFFSET64 vec_count, LIBMVL_VECTOR **vec, void **vec_data, HASH_MAP *hm, LIBMVL_OFFSET64 *key_last, LIBMVL_OFFSET64 pairs_size, LIBMVL_OFFSET64 *key_match_indices, LIBMVL_OFFSET64 *match_indices)
 Compute pairs of merge indices. This is similar to JOIN operation in SQL. More...
 
void mvl_find_groups (LIBMVL_OFFSET64 indices_count, const LIBMVL_OFFSET64 *indices, LIBMVL_OFFSET64 vec_count, LIBMVL_VECTOR **vec, void **vec_data, HASH_MAP *hm)
 This function transforms HASH_MAP into a list of groups. Similar to GROUP BY clause in SQL. More...
 
void mvl_extend_partition (LIBMVL_PARTITION *el, LIBMVL_OFFSET64 nelem)
 Increase storage of previously allocated partition. More...
 
void mvl_find_repeats (LIBMVL_PARTITION *partition, LIBMVL_OFFSET64 count, LIBMVL_VECTOR **vec, void **data)
 Compute list of extents describing stretches of data with identical values. More...
 
void mvl_free_partition_arrays (LIBMVL_PARTITION *el)
 free arrays of previously allocated partition. This function does not free the structure itself. More...
 
void mvl_init_extent_list (LIBMVL_EXTENT_LIST *el)
 Initialize freshly allocated partition structure. More...
 
void mvl_free_extent_list_arrays (LIBMVL_EXTENT_LIST *el)
 free arrays of previously allocated partition. This function does not free the structure itself. More...
 
void mvl_extend_extent_list (LIBMVL_EXTENT_LIST *el, LIBMVL_OFFSET64 nelem)
 Increase storage of previously allocated extent list. More...
 
void mvl_init_extent_index (LIBMVL_EXTENT_INDEX *ei)
 Initialize freshly allocated extent list structure. More...
 
void mvl_free_extent_index_arrays (LIBMVL_EXTENT_INDEX *ei)
 free arrays of previously allocated extent list. This function does not free the structure itself. More...
 
int mvl_compute_extent_index (LIBMVL_EXTENT_INDEX *ei, LIBMVL_OFFSET64 count, LIBMVL_VECTOR **vec, void **data, LIBMVL_OFFSET64 *data_length)
 Compute an extent index. More...
 
LIBMVL_OFFSET64 mvl_write_extent_index (LIBMVL_CONTEXT *ctx, LIBMVL_EXTENT_INDEX *ei)
 Write extent index to MVL file. More...
 
int mvl_load_extent_index (LIBMVL_CONTEXT *ctx, void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 offset, LIBMVL_EXTENT_INDEX *ei)
 Load extent index from memory mapped MVL file.
 
static void mvl_empty_extent_list (LIBMVL_EXTENT_LIST *el)
 Alter extent list to contain no extents without freeing memory. More...
 
static void mvl_get_extents (LIBMVL_EXTENT_INDEX *ei, LIBMVL_OFFSET64 hash, LIBMVL_EXTENT_LIST *el)
 Find extents in index corresponding to a given hash. More...
 
void mvl_compute_vec_stats (const LIBMVL_VECTOR *vec, LIBMVL_VEC_STATS *stats)
 Compute vector statistics, such as a bounding box. More...
 
void mvl_normalize_vector (const LIBMVL_VECTOR *vec, const LIBMVL_VEC_STATS *stats, LIBMVL_OFFSET64 i0, LIBMVL_OFFSET64 i1, double *out)
 normalize vector More...
 

Detailed Description

core libMVL functions and structures

Definition in file libMVL.h.

Macro Definition Documentation

◆ LIBMVL_ACCUMULATE_HASH

#define LIBMVL_ACCUMULATE_HASH   0

Flags passed to mvl_hash_indices() and mvl_hash_range()

Use LIBMVL_COMPLETE_HASH when computation is done in a single call, or spread out the computation over multiple calls. Initialization and finalization can also be done outside of mvl_hash_*() functions.

No initialization or finalization, just accumulate hash value

Definition at line 1037 of file libMVL.h.

◆ LIBMVL_COMPLETE_HASH

#define LIBMVL_COMPLETE_HASH   (LIBMVL_INIT_HASH | LIBMVL_FINALIZE_HASH)

Initialize, accumulate, finalize.

Definition at line 1040 of file libMVL.h.

◆ LIBMVL_FINALIZE_HASH

#define LIBMVL_FINALIZE_HASH   2


Accumulate hash value and then finalize

Definition at line 1039 of file libMVL.h.

◆ LIBMVL_INIT_HASH

#define LIBMVL_INIT_HASH   1


Initialize hash value, then accumulate

Definition at line 1038 of file libMVL.h.

◆ LIBMVL_PACKED_LIST64

#define LIBMVL_PACKED_LIST64   102

The main purpose of this type is to provide efficient storage for vectors of short strings. This is stored as LIBMVL_VECTOR_OFFSET64 with offset[0] pointing to the start of basic vector and subsequent offsets pointing to the start of the next string. For convenience the last entry points to the end of the last string.

Thus the number of strings in PACKED_LIST64 is length-1.

The usage of 64-bit offsets allows for arbitrarily long strings in the list, while requiring only minimal overhead for each string.

The type is separate from LIBMVL_VECTOR_OFFSET64 to facilitate automated tree traversal.

Definition at line 62 of file libMVL.h.

◆ LIBMVL_SORT_LEXICOGRAPHIC

#define LIBMVL_SORT_LEXICOGRAPHIC   1 /* Ascending */

Sort in ascending order

Definition at line 762 of file libMVL.h.

◆ LIBMVL_SORT_LEXICOGRAPHIC_DESC

#define LIBMVL_SORT_LEXICOGRAPHIC_DESC   2 /* Descending */

Sort in descending order

Definition at line 763 of file libMVL.h.

◆ LIBMVL_VECTOR_CSTRING

#define LIBMVL_VECTOR_CSTRING   101


MVL vector type for storing C-style strings. It is exactly as LIBMVL_VECTOR_UINT8, except that the data is considered valid up to length or first 0 byte

Definition at line 60 of file libMVL.h.

◆ LIBMVL_VECTOR_DOUBLE

#define LIBMVL_VECTOR_DOUBLE   5


MVL vector type for storing 64-bit floating point numbers

Definition at line 58 of file libMVL.h.

◆ LIBMVL_VECTOR_FLOAT

#define LIBMVL_VECTOR_FLOAT   4


MVL vector type for storing 32-bit floating point numbers

Definition at line 57 of file libMVL.h.

◆ LIBMVL_VECTOR_INT32

#define LIBMVL_VECTOR_INT32   2

MVL vector type for storing 32-bit signed integers

Definition at line 55 of file libMVL.h.

◆ LIBMVL_VECTOR_INT64

#define LIBMVL_VECTOR_INT64   3


MVL vector type for storing 64-bit signed integers

Definition at line 56 of file libMVL.h.

◆ LIBMVL_VECTOR_OFFSET64

#define LIBMVL_VECTOR_OFFSET64   100

MVL vector type for storing unsigned 64-bit offsets, typically considered as a list of other MVL vectors

Definition at line 59 of file libMVL.h.

◆ LIBMVL_VECTOR_UINT8

#define LIBMVL_VECTOR_UINT8   1

MVL vector type for storing bytes and strings. Can also be used as an opaque type

Definition at line 54 of file libMVL.h.

◆ MVL_EXTENT_INDEX

#define MVL_EXTENT_INDEX   1

Index types.

Definition at line 1235 of file libMVL.h.

◆ MVL_FLAG_OWN_FIRST

#define MVL_FLAG_OWN_FIRST   (1<<2)

HASH_MAP member first owns allocated memory

Definition at line 1069 of file libMVL.h.

◆ MVL_FLAG_OWN_HASH

#define MVL_FLAG_OWN_HASH   (1<<0)

Flags describing HASH_MAP state.

HASH_MAP member hash owns allocated memory

Definition at line 1067 of file libMVL.h.

◆ MVL_FLAG_OWN_HASH_MAP

#define MVL_FLAG_OWN_HASH_MAP   (1<<1)

HASH_MAP member hash_map owns allocated memory

Definition at line 1068 of file libMVL.h.

◆ MVL_FLAG_OWN_NEXT

#define MVL_FLAG_OWN_NEXT   (1<<3)

HASH_MAP member next owns allocated memory

Definition at line 1070 of file libMVL.h.

◆ MVL_NA_STRING

#define MVL_NA_STRING   "\000\000NA"

It is convenient to be able to mark strings as missing value, similar to NaN for floating point type. In MVL this is done with the special string of length 4 consisting of two NUL characters followed by letters "NA".

Definition at line 668 of file libMVL.h.

◆ MVL_SEED_HASH_VALUE

#define MVL_SEED_HASH_VALUE   0xabcdef

Recommended value to be used to initialize hashes. Note that initial value should not be 0

Definition at line 826 of file libMVL.h.

◆ mvl_vector_data

#define mvl_vector_data (   data)    ((((LIBMVL_VECTOR *)(data))->u))

Return base data from a pointer to LIBMVL_VECTOR.

Definition at line 410 of file libMVL.h.

◆ mvl_vector_data_double

#define mvl_vector_data_double (   data)    ((double *)(((const char *) data)+sizeof(LIBMVL_VECTOR_HEADER)))

Access DOUBLE array of LIBMVL_VECTOR

Definition at line 443 of file libMVL.h.

◆ mvl_vector_data_float

#define mvl_vector_data_float (   data)    ((float *)(((const char *) data)+sizeof(LIBMVL_VECTOR_HEADER)))

Access FLOAT array of LIBMVL_VECTOR

Definition at line 442 of file libMVL.h.

◆ mvl_vector_data_int32

#define mvl_vector_data_int32 (   data)    ((int *)(((const char *) data)+sizeof(LIBMVL_VECTOR_HEADER)))

Access INT32 array of LIBMVL_VECTOR

Definition at line 440 of file libMVL.h.

◆ mvl_vector_data_int64

#define mvl_vector_data_int64 (   data)    ((long long int *)(((const char *) data)+sizeof(LIBMVL_VECTOR_HEADER)))

Access INT64 array of LIBMVL_VECTOR

Definition at line 441 of file libMVL.h.

◆ mvl_vector_data_offset

#define mvl_vector_data_offset (   data)    ((LIBMVL_OFFSET64 *)(((const char *) data)+sizeof(LIBMVL_VECTOR_HEADER)))

Access LIBMVL_OFFSET64 array of LIBMVL_VECTOR

Definition at line 444 of file libMVL.h.

◆ mvl_vector_data_uint8

#define mvl_vector_data_uint8 (   data)    ((unsigned char *)(((const char *) data)+sizeof(LIBMVL_VECTOR_HEADER)))

Access UINT8 array of LIBMVL_VECTOR

Definition at line 439 of file libMVL.h.

◆ MVL_WVEC

#define MVL_WVEC (   ctx,
  type,
  ... 
)    mvl_write_vector_inline(ctx, type, MVL_NUMARGS(__VA_ARGS__), 0, __VA_ARGS__)

A convenience macro used for create and writing vectors of small number of entries inline. Commonly used for writing configuration data.

Example: MVL_WVEC(ctx, LIBMVL_VECTOR_FLOAT, 1.0, 4.0, 9.0, 16.0)

Definition at line 328 of file libMVL.h.

Function Documentation

◆ mvl_accumulate_double_hash64()

static LIBMVL_OFFSET64 mvl_accumulate_double_hash64 ( LIBMVL_OFFSET64  x,
const double *  data,
LIBMVL_OFFSET64  count 
)
inlinestatic

Accumulate hash from an array of 64-bit floats The floats are hashed by value, not representation, so one gets the same hash from value of 100.0 whether it is stored as float or promoted to double. Note that this does not work in reverse - many doubles can be truncated to the same float.

This function allows to compute hash of data in several stages.

Parameters
xprevious hash value
dataarray of 64-bit floats
countlength of data
Returns
new hash value

Definition at line 1004 of file libMVL.h.

◆ mvl_accumulate_float_hash64()

static LIBMVL_OFFSET64 mvl_accumulate_float_hash64 ( LIBMVL_OFFSET64  x,
const float *  data,
LIBMVL_OFFSET64  count 
)
inlinestatic

Accumulate hash from an array of 32-bit floats The floats are hashed by value, not representation, so one gets the same hash from value of 100.0 whether it is stored as float or promoted to double. Note that this does not work in reverse - many doubles can be truncated to the same float.

This function allows to compute hash of data in several stages.

Parameters
xprevious hash value
dataarray of 32-bit floats
countlength of data
Returns
new hash value

Definition at line 972 of file libMVL.h.

◆ mvl_accumulate_hash64()

static LIBMVL_OFFSET64 mvl_accumulate_hash64 ( LIBMVL_OFFSET64  x,
const unsigned char *  data,
LIBMVL_OFFSET64  count 
)
inlinestatic

Accumulate hash from a piece of data.

This function allows to compute hash of data in several stages.

Parameters
xprevious hash value
dataarray of character data
countlength of data
Returns
new hash value

Definition at line 839 of file libMVL.h.

◆ mvl_accumulate_int32_hash64()

static LIBMVL_OFFSET64 mvl_accumulate_int32_hash64 ( LIBMVL_OFFSET64  x,
const int *  data,
LIBMVL_OFFSET64  count 
)
inlinestatic

Accumulate hash from an array of 32-bit integers The integers are hashed by value, not representation, so one gets the same hash from value of 100 whether it is stored as 32-bits or 64-bits.

This function allows to compute hash of data in several stages.

Parameters
xprevious hash value
dataarray of 32-bit integers
countlength of data
Returns
new hash value

Definition at line 909 of file libMVL.h.

◆ mvl_accumulate_int64_hash64()

static LIBMVL_OFFSET64 mvl_accumulate_int64_hash64 ( LIBMVL_OFFSET64  x,
const long long int *  data,
LIBMVL_OFFSET64  count 
)
inlinestatic

Accumulate hash from an array of 64-bit integers The integers are hashed by value, not representation, so one gets the same hash from value of 100 whether it is stored as 32-bits or 64-bits.

This function allows to compute hash of data in several stages.

Parameters
xprevious hash value
dataarray of 64-bit integers
countlength of data
Returns
new hash value

Definition at line 940 of file libMVL.h.

◆ mvl_add_directory_entry()

void mvl_add_directory_entry ( LIBMVL_CONTEXT ctx,
LIBMVL_OFFSET64  offset,
const char *  tag 
)

Add an entry to the top level directory of MVL file.

Parameters
ctxMVL context pointer that has been initialized for writing
offsetdirectory entry value - typically an offset pointing to previously written MVL object
tagC string describing directory entry. When necessary, these can repeat, in which case the last written entry is retrieved first.

Definition at line 835 of file libMVL.c.

◆ mvl_add_directory_entry_n()

void mvl_add_directory_entry_n ( LIBMVL_CONTEXT ctx,
LIBMVL_OFFSET64  offset,
const char *  tag,
LIBMVL_OFFSET64  tag_size 
)

Add entry to the top level directory of MVL file.

Parameters
ctxMVL context pointer that has been initialized for writing
offsetdirectory entry value - typically an offset pointing to previously written MVL object
tagstring describing directory entry. When necessary, these can repeat, in which case the last written entry is retrieved first.
tag_sizelength of tag

Definition at line 859 of file libMVL.c.

◆ mvl_add_list_entry()

long mvl_add_list_entry ( LIBMVL_NAMED_LIST L,
long  tag_length,
const char *  tag,
LIBMVL_OFFSET64  offset 
)

Add entry to LIBMVL_NAMED_LIST. The entry is always appended to the end.

Parameters
Lpointer to previously allocated LIBMVL_NAMED_LIST
tag_lengthsize of tag
tagstring identifying entry - these can repeat.
offset64-bit value
Returns
index of entry inside named list

Definition at line 1001 of file libMVL.c.

◆ mvl_allocate_hash_map()

HASH_MAP* mvl_allocate_hash_map ( LIBMVL_OFFSET64  max_index_count)

Create HASH_MAP structure.

This creates default HASH_MAP structure with all members allocated with new arrays. In some situations, such as to save memory it is possible to reuse existing arrays by specifying hm->flags appropriately. In such case, one should not use this constructor and instead create the structure manually.

Parameters
max_index_countexpected number of entries to hash
Returns
pointer to allocated HASH_MAP structure

Definition at line 2094 of file libMVL.c.

◆ mvl_as_double()

static double mvl_as_double ( const LIBMVL_VECTOR vec,
long  idx 
)
inlinestatic

Return idx vector entry as a double.

This function is meant as a convenience function for retrieving a few values, such as stored configuration parameters.

Parameters
veca pointer to LIBMVL_VECTOR
idxindex into a vector
Returns
vector value converted into a double, or a NAN if anything went wrong.

Definition at line 527 of file libMVL.h.

◆ mvl_as_double_default()

static double mvl_as_double_default ( const LIBMVL_VECTOR vec,
long  idx,
double  def 
)
inlinestatic

Return idx vector entry as a double, with default for missing values.

This function is meant as a convenience function for retrieving a few values, such as stored configuration parameters.

Parameters
veca pointer to LIBMVL_VECTOR
idxindex into a vector
defdefault value to return in case of out of bounds indices.
Returns
vector value converted into a double, or def if anything went wrong.

Definition at line 554 of file libMVL.h.

◆ mvl_as_offset()

static LIBMVL_OFFSET64 mvl_as_offset ( const LIBMVL_VECTOR vec,
long  idx 
)
inlinestatic

Return idx vector entry as an offset.

This function is meant as a convenience function for retrieving a few values, such as stored configuration parameters. Only LIBMVL_VECTOR_OFFSET64 vectors are supported

Parameters
veca pointer to LIBMVL_VECTOR
idxindex into a vector
Returns
vector value converted into a double, or LIBMVL_NULL_OFFSET if anything went wrong.

Definition at line 581 of file libMVL.h.

◆ mvl_close()

void mvl_close ( LIBMVL_CONTEXT ctx)

Write out MVL file directory and postable and close file.

Parameters
ctxMVL context pointer

Definition at line 1347 of file libMVL.c.

◆ mvl_compute_extent_index()

int mvl_compute_extent_index ( LIBMVL_EXTENT_INDEX ei,
LIBMVL_OFFSET64  count,
LIBMVL_VECTOR **  vec,
void **  data,
LIBMVL_OFFSET64 data_length 
)

Compute an extent index.

Parameters
eia pointer to extent index structure
countthe number of LIBMVL_VECTORS considered as columns in a table
vecan array of pointers to LIBMVL_VECTORS considered as columns in a table
dataan array of pointers to memory mapped areas those LIBMVL_VECTORs derive from. This allows computing hash from vectors drawn from different MVL
Returns
an integer error code, or 0 on success

Definition at line 2645 of file libMVL.c.

◆ mvl_compute_hash_map()

void mvl_compute_hash_map ( HASH_MAP hm)

Compute hash map. This assumes that hm->hash array has been populated with hm->hash_count hashes computed with mvl_hash_indices().

Parameters
hma pointer to HASH_MAP structure

Definition at line 2135 of file libMVL.c.

◆ mvl_compute_hash_map_size()

LIBMVL_OFFSET64 mvl_compute_hash_map_size ( LIBMVL_OFFSET64  hash_count)

Compute suggested size of hash map given the number of entries to hash. Hash map size should always be a power of 2.

Parameters
hash_countexpected number of items to hash
Returns
suggested hash map size

Definition at line 2075 of file libMVL.c.

◆ mvl_compute_vec_stats()

void mvl_compute_vec_stats ( const LIBMVL_VECTOR vec,
LIBMVL_VEC_STATS stats 
)

Compute vector statistics, such as a bounding box.

Parameters
veca pointer to LIBMVL_VECTOR
statsa pointer to previously allocated LIBMVL_VEC_STATS structure

Definition at line 2809 of file libMVL.c.

◆ mvl_create_context()

LIBMVL_CONTEXT* mvl_create_context ( void  )

Create MVL context.

Returns
A pointer to allocated LIBMVL_CONTEXT structure

Definition at line 150 of file libMVL.c.

◆ mvl_create_named_list()

LIBMVL_NAMED_LIST* mvl_create_named_list ( int  size)

Allocate and initialize structure for LIBMVL_NAMED_LIST.

Parameters
sizethis can be set to large values if the final size of named list is known
Returns
point to structure for LIBMVL_NAMED_LIST

Definition at line 927 of file libMVL.c.

◆ mvl_create_R_attributes_list()

LIBMVL_NAMED_LIST* mvl_create_R_attributes_list ( LIBMVL_CONTEXT ctx,
const char *  R_class 
)

Create R-style attribute list for class given by R_class, which could be, for example, "data.frame".

Parameters
ctxMVL context pointer that has been initialized for writing
R_classstring identifying R class, such as "data.frame"
Returns
pointer to LIBMVL_NAMED_LIST with allocated parameters

Definition at line 1082 of file libMVL.c.

◆ mvl_element_size()

static int mvl_element_size ( int  type)
inlinestatic

Return the element size in bytes for a particular MVL type.

Parameters
typeMVL type, such LIBMVL_VECTOR_FLOAT
Returns
size in bytes

Definition at line 74 of file libMVL.h.

◆ mvl_empty_extent_list()

static void mvl_empty_extent_list ( LIBMVL_EXTENT_LIST el)
inlinestatic

Alter extent list to contain no extents without freeing memory.

Parameters
elpointer to extent list structure to empty

Definition at line 1184 of file libMVL.h.

◆ mvl_extend_extent_list()

void mvl_extend_extent_list ( LIBMVL_EXTENT_LIST el,
LIBMVL_OFFSET64  nelem 
)

Increase storage of previously allocated extent list.

Parameters
elextent list structure
nelemMake sure it can contain at least that many elements

Definition at line 2573 of file libMVL.c.

◆ mvl_extend_partition()

void mvl_extend_partition ( LIBMVL_PARTITION el,
LIBMVL_OFFSET64  nelem 
)

Increase storage of previously allocated partition.

Parameters
elPartition structure
nelemMake sure it can contain at least that many elements

Definition at line 2451 of file libMVL.c.

◆ mvl_find_directory_entry()

LIBMVL_OFFSET64 mvl_find_directory_entry ( LIBMVL_CONTEXT ctx,
const char *  tag 
)

Find entry in MVL file directory.

Parameters
ctxMVL context pointer
tagcharacter string identifying entry
Returns
offset into file the entry points to

Definition at line 1384 of file libMVL.c.

◆ mvl_find_groups()

void mvl_find_groups ( LIBMVL_OFFSET64  indices_count,
const LIBMVL_OFFSET64 indices,
LIBMVL_OFFSET64  vec_count,
LIBMVL_VECTOR **  vec,
void **  vec_data,
HASH_MAP hm 
)

This function transforms HASH_MAP into a list of groups. Similar to GROUP BY clause in SQL.

The original HASH_MAP describes groups of rows with identical hashes. However, there is a (remote) possibility of collision where different rows have the same hash. This function resolves this ambiguity. After calling hm->hash_map becomes invalid, but hm->first and hm->next describe exactly identical rows

Parameters
indices_countnumber of elements in indices array
indicesan array of indices used to create HASH_MAP hm
vec_countthe number of LIBMVL_VECTORS considered as columns in a table
vecan array of pointers to LIBMVL_VECTORS considered as columns in a table
vec_dataan array of pointers to memory mapped areas those LIBMVL_VECTORs derive from. This allows computing hash from vectors drawn from different MVL
hma previously computed (with mvl_compute_hash_map()) HASH_MAP

Definition at line 2382 of file libMVL.c.

◆ mvl_find_list_entry()

LIBMVL_OFFSET64 mvl_find_list_entry ( LIBMVL_NAMED_LIST L,
long  tag_length,
const char *  tag 
)

Find existing entry inside LIBMVL_NAMED_LIST. If several identically named entries exist this function returns last written value. Hash table is used if present.

Parameters
Lpointer to previously allocated LIBMVL_NAMED_LIST
tag_lengthsize of tag
tagstring identifying entry - these can repeat.
Returns
entry value

Definition at line 1048 of file libMVL.c.

◆ mvl_find_matches()

int mvl_find_matches ( LIBMVL_OFFSET64  key_indices_count,
const LIBMVL_OFFSET64 key_indices,
LIBMVL_OFFSET64  key_vec_count,
LIBMVL_VECTOR **  key_vec,
void **  key_vec_data,
LIBMVL_OFFSET64 key_hash,
LIBMVL_OFFSET64  indices_count,
const LIBMVL_OFFSET64 indices,
LIBMVL_OFFSET64  vec_count,
LIBMVL_VECTOR **  vec,
void **  vec_data,
HASH_MAP hm,
LIBMVL_OFFSET64 key_last,
LIBMVL_OFFSET64  pairs_size,
LIBMVL_OFFSET64 key_match_indices,
LIBMVL_OFFSET64 match_indices 
)

Compute pairs of merge indices. This is similar to JOIN operation in SQL.

This function takes two table like sets of vectors as input. The vectors in each table set have to be of equal number of elements. We also take two index arrays specifying rows in each table set. We then find pairs of indices where the rows are identical.

The output is returned in pair of preallocated arrays key_match_indices and match_indices. The pairs are arrange in stretches of identical "key" rows. Those stretches are described by key_last array.

Parameters
key_indices_countnumber of entries in key_indices array
key_indicesan array with indices into "key" table-like vector set
key_vec_countnumber of vectors in "key" table set
key_vecan array of vectors in "key" table set
key_vec_dataan array of pointers to memory mapped areas those "key" vectors derive from. This allows computing hash from vectors drawn from different MVL files
key_hashan array of hashes of "key" vectors computed with mvl_hash_indices()
indices_countnumber of entries in indices array
indicesan array with indices into "main" table-like vector set
vec_countnumber of vectors in "main" table set
vecan array of vectors in "main" table set
vec_dataan array of pointers to memory mapped areas those "main" vectors derive from. This allows computing hash from vectors drawn from different MVL files
hma previosly computed HASH_MAP of "main" table set
key_lastthis is an output array of size key_indices_count that describes stretches of matches with indentical "key" rows. Thus for "key" row i, the corresponding stretch is key_last[i-1] to key_last[i]-1
pairs_sizethe size of allocated key_match_indices and match_indices arrays. This value can be computed with mvl_hash_match_count().
key_match_indicesan array of "key" indices from each pair
match_indicesan array of "main" indices from each pair
Returns
0 if everything went well, otherwise a negative error code

Definition at line 2306 of file libMVL.c.

◆ mvl_find_repeats()

void mvl_find_repeats ( LIBMVL_PARTITION el,
LIBMVL_OFFSET64  count,
LIBMVL_VECTOR **  vec,
void **  data 
)

Compute list of extents describing stretches of data with identical values.

Parameters
elpointer to previously allocated LIBMVL_PARTITION structure
countNumber of vectors in vec
vecArray of vectors with identical number of elements
dataMapped data areas (needed to compare strings)

Definition at line 2469 of file libMVL.c.

◆ mvl_free_context()

void mvl_free_context ( LIBMVL_CONTEXT ctx)

Release memory associated with MVL context.

Parameters
ctxpointer to context previously allocated with mvl_create_context()

Definition at line 186 of file libMVL.c.

◆ mvl_free_extent_index_arrays()

void mvl_free_extent_index_arrays ( LIBMVL_EXTENT_INDEX ei)

free arrays of previously allocated extent list. This function does not free the structure itself.

Parameters
eia pointer to LIBMVL_EXTENT_INDEX structure

Definition at line 2611 of file libMVL.c.

◆ mvl_free_extent_list_arrays()

void mvl_free_extent_list_arrays ( LIBMVL_EXTENT_LIST el)

free arrays of previously allocated partition. This function does not free the structure itself.

Parameters
ela pointer to LIBMVL_PARTITION structure

Definition at line 2557 of file libMVL.c.

◆ mvl_free_hash_map()

void mvl_free_hash_map ( HASH_MAP hash_map)

Free allocated HASH_MAP.

Parameters
hash_mapa pointer to previously allocated hash_map structure

Definition at line 2118 of file libMVL.c.

◆ mvl_free_named_list()

void mvl_free_named_list ( LIBMVL_NAMED_LIST L)

Free structure for LIBMVL_NAMED_LIST.

Parameters
Lpointer to previously allocated LIBMVL_NAMED_LIST

Definition at line 949 of file libMVL.c.

◆ mvl_free_partition_arrays()

void mvl_free_partition_arrays ( LIBMVL_PARTITION el)

free arrays of previously allocated partition. This function does not free the structure itself.

Parameters
ela pointer to LIBMVL_PARTITION structure

Definition at line 2533 of file libMVL.c.

◆ mvl_get_character_class_offset()

LIBMVL_OFFSET64 mvl_get_character_class_offset ( LIBMVL_CONTEXT ctx)

Get offset to metadata describing R-style character class - an array of strings. This is convenient for writing columns of strings to be analyzed with R - just provide this offset as the metadata field of mvl_write_packed_list()

Parameters
ctxMVL context pointer that has been initialized for writing
Returns
an offset into the file, suitable for specifying as MVL object metadata

Definition at line 819 of file libMVL.c.

◆ mvl_get_extents()

static void mvl_get_extents ( LIBMVL_EXTENT_INDEX ei,
LIBMVL_OFFSET64  hash,
LIBMVL_EXTENT_LIST el 
)
inlinestatic

Find extents in index corresponding to a given hash.

Parameters
eipointer to populated extent index structure
hash64-bit hash value to query
elpointer to extent list structure to add extents to

Definition at line 1196 of file libMVL.h.

◆ mvl_hash_indices()

int mvl_hash_indices ( LIBMVL_OFFSET64  indices_count,
const LIBMVL_OFFSET64 indices,
LIBMVL_OFFSET64 hash,
LIBMVL_OFFSET64  vec_count,
LIBMVL_VECTOR **  vec,
void **  vec_data,
LIBMVL_OFFSET64 vec_data_length,
int  flags 
)

This function is used to compute 64 bit hash of vector values array hash[] is passed in and contains the result of the computation.

Integer indices are computed by value, so that 100 produces the same hash whether it is stored as INT32 or INT64.

Floats and doubles are trickier - we can guarantee that the hash of a float promoted to a double is the same as the hash of the original float, but not the reverse.

Parameters
indices_counttotal number of indices
indicesan array of indices into provided vectors
hasha previously allocated array of length indices_count that the computed hashes will be written into
vec_countthe number of LIBMVL_VECTORS considered as columns in a table
vecan array of pointers to LIBMVL_VECTORS considered as columns in a table
vec_dataan array of pointers to memory mapped areas those LIBMVL_VECTORs derive from. This allows computing hash from vectors drawn from different MVL files
vec_data_lengthan array of lengths of memory mapped areas those LIBMVL_VECTORs derive from.
flagsflags specifying whether to initialize or finalize hash

Definition at line 1882 of file libMVL.c.

◆ mvl_hash_match_count()

LIBMVL_OFFSET64 mvl_hash_match_count ( LIBMVL_OFFSET64  key_count,
const LIBMVL_OFFSET64 key_hash,
HASH_MAP hm 
)

Find count of matches between hashes of two sets.

This function is useful to find the upper limit on the number of possible matches, so one can allocate arrays for the result or plan computation in some other way.

Parameters
key_countnumber of key hashes
key_hashan array of key hashes to query
hma pointer to HASH_MAP structure
Returns
number of matches

Definition at line 2202 of file libMVL.c.

◆ mvl_hash_range()

int mvl_hash_range ( LIBMVL_OFFSET64  i0,
LIBMVL_OFFSET64  i1,
LIBMVL_OFFSET64 hash,
LIBMVL_OFFSET64  vec_count,
LIBMVL_VECTOR **  vec,
void **  vec_data,
LIBMVL_OFFSET64 vec_data_length,
int  flags 
)

This function is used to compute 64 bit hash of vector values array hash[] is passed in and contains the result of the computation.

Integer indices are computed by value, so that 100 produces the same hash whether it is stored as INT32 or INT64.

Floats and doubles are trickier - we can guarantee that the hash of a float promoted to a double is the same as the hash of the original float, but not the reverse.

Parameters
i0starting index to hash
i1first index to not hash
hasha previously allocated array of length (i1-i0) that the computed hashes will be written into
vec_countthe number of LIBMVL_VECTORS considered as columns in a table
vecan array of pointers to LIBMVL_VECTORS considered as columns in a table
vec_dataan array of pointers to memory mapped areas those LIBMVL_VECTORs derive from. This allows computing hash from vectors drawn from different MVL files
vec_data_lengthan array of pointers to memory mapped areas those LIBMVL_VECTORs derive from.
flagsflags specifying whether to initialize or finalize hash

Definition at line 1984 of file libMVL.c.

◆ mvl_indexed_copy_vector()

LIBMVL_OFFSET64 mvl_indexed_copy_vector ( LIBMVL_CONTEXT ctx,
LIBMVL_OFFSET64  index_count,
const LIBMVL_OFFSET64 indices,
const LIBMVL_VECTOR vec,
const void *  data,
LIBMVL_OFFSET64  data_length,
LIBMVL_OFFSET64  metadata,
LIBMVL_OFFSET64  max_buffer 
)

Write MVL vector that contains data at specific indices. The indices can repeat, and can themselves be stored in memory mapped MVL file.

Parameters
ctxMVL context pointer that has been initialized for writing
index_countnumber of indices to process, this will determine the length of the new vector
indicesarray of indices into vector vec
veca pointer to fully formed MVL vector, such as from mapped MVL file
datapointer to data of previously mapped MVL library
data_lengthlength of data of previously mapped MVL library
metadataan optional offset to previously written metadata. Specify LIBMVL_NO_METADATA if not needed
max_buffermaximum size of buffer to hold in-flight data. Recommend to set to at least 10MB for efficiency.
Returns
an offset into the file, suitable for adding to MVL file directory, or to other MVL objects

Definition at line 473 of file libMVL.c.

◆ mvl_init_extent_index()

void mvl_init_extent_index ( LIBMVL_EXTENT_INDEX ei)

Initialize freshly allocated extent list structure.

Parameters
eia pointer to LIBMVL_EXTENT_INDEX structure

Definition at line 2601 of file libMVL.c.

◆ mvl_init_extent_list()

void mvl_init_extent_list ( LIBMVL_EXTENT_LIST el)

Initialize freshly allocated partition structure.

Parameters
ela pointer to LIBMVL_PARTITION structure

Definition at line 2545 of file libMVL.c.

◆ mvl_load_image()

void mvl_load_image ( LIBMVL_CONTEXT ctx,
LIBMVL_OFFSET64  length,
const void *  data 
)

Initilize MVL context to operate with memory mapped area data.

Parameters
ctxMVL context pointer
lengthsize of memory mapped data, in bytes
datapointer to the beginning of memory mapped area

Definition at line 1399 of file libMVL.c.

◆ mvl_named_list_get_double()

static double mvl_named_list_get_double ( LIBMVL_NAMED_LIST L,
const void *  data,
long  tag_length,
const char *  tag,
long  idx 
)
inlinestatic

Find an entry in a named list and return its idx value as a double.

This function is meant as a convenience function for retrieving a few values stored in a named list, such as stored configuration parameters. It effectively performs double indexing L[tag][idx]

Parameters
La pointer to previously retrieved LIBMVL_NAMED_LIST
dataa pointer to beginning of memory mapped MVL file
tag_lengthlength of character tag, or -1 to compute automatically
tagcharacter tag
idxindex into the entry
Returns
vector value converted into a double, or a NAN if anything went wrong.

Definition at line 605 of file libMVL.h.

◆ mvl_named_list_get_double_default()

static double mvl_named_list_get_double_default ( LIBMVL_NAMED_LIST L,
const void *  data,
long  tag_length,
const char *  tag,
long  idx,
double  def 
)
inlinestatic

Find an entry in a named list and return its idx value a double.

This function is meant as a convenience function for retrieving a few values stored in a named list, such as stored configuration parameters. It effectively performs double indexing L[tag][idx]

Parameters
La pointer to previously retrieved LIBMVL_NAMED_LIST
dataa pointer to beginning of memory mapped MVL file
tag_lengthlength of character tag, or -1 to compute automatically
tagcharacter tag
idxindex into the entry
defdefault value to return in case of errors
Returns
vector value converted into a double, or def if anything went wrong.

Definition at line 629 of file libMVL.h.

◆ mvl_named_list_get_offset()

static LIBMVL_OFFSET64 mvl_named_list_get_offset ( LIBMVL_NAMED_LIST L,
const void *  data,
long  tag_length,
const char *  tag,
long  idx 
)
inlinestatic

Find an entry in a named list and return its idx value as an offset.

This function is meant as a convenience function for retrieving a few values stored in a named list, such as stored configuration parameters. It effectively performs double indexing L[tag][idx]

Parameters
La pointer to previously retrieved LIBMVL_NAMED_LIST
dataa pointer to beginning of memory mapped MVL file
tag_lengthlength of character tag, or -1 to compute automatically
tagcharacter tag
idxindex into the entry
Returns
vector value, or LIBMVL_NULL_OFFSET if anything went wrong.

Definition at line 652 of file libMVL.h.

◆ mvl_normalize_vector()

void mvl_normalize_vector ( const LIBMVL_VECTOR vec,
const LIBMVL_VEC_STATS stats,
LIBMVL_OFFSET64  i0,
LIBMVL_OFFSET64  i1,
double *  out 
)

normalize vector

This function converts numeric vectors into a normalized double precision entries. Indices i0 and i1 specify the stretch of indices to normalize. This facilitates processing of very long vectors in pieces.

Parameters
veca pointer to LIBMVL_VECTOR
statspreviously allocated LIBMVL_VEC_STATS structure
i0start index of stretch to process
i1stop index of stretch to process
outarray of normalized entries of size i1-i0. First entry corresponds to index i0

Definition at line 2960 of file libMVL.c.

◆ mvl_open()

void mvl_open ( LIBMVL_CONTEXT ctx,
FILE *  f 
)

Prepare context for writing to file f.

Parameters
ctxMVL context pointer
fpointer to previously opened stdio.h FILE structure

Definition at line 1338 of file libMVL.c.

◆ mvl_packed_list_get_entry()

static const unsigned char* mvl_packed_list_get_entry ( const LIBMVL_VECTOR vec,
const void *  data,
LIBMVL_OFFSET64  idx 
)
inlinestatic

Get pointer to the start of string element idx from a packed list.

Parameters
veca pointer to LIBMVL_VECTOR with type LIBMVL_PACKED_LIST64
dataa pointer to beginning of memory mapped MVL file
idxentry index
Returns
a pointer to the beginning of the data.

Definition at line 719 of file libMVL.h.

◆ mvl_packed_list_get_entry_bytelength()

static LIBMVL_OFFSET64 mvl_packed_list_get_entry_bytelength ( const LIBMVL_VECTOR vec,
LIBMVL_OFFSET64  idx 
)
inlinestatic

Get length in bytes of string element idx from a packed list.

Parameters
veca pointer to LIBMVL_VECTOR with type LIBMVL_PACKED_LIST64
idxentry index
Returns
string length in bytes

Definition at line 701 of file libMVL.h.

◆ mvl_packed_list_is_na()

static int mvl_packed_list_is_na ( const LIBMVL_VECTOR vec,
const void *  data,
LIBMVL_OFFSET64  idx 
)
inlinestatic

Check whether packed list entry is a special string that indicates a missing value.

Parameters
veca pointer to LIBMVL_VECTOR with type LIBMVL_PACKED_LIST64
dataa pointer to beginning of memory mapped MVL file
idxentry index
Returns
1 if the entry is NA - a missing value, 0 otherwise

Definition at line 684 of file libMVL.h.

◆ mvl_packed_list_validate_entry()

static int mvl_packed_list_validate_entry ( const LIBMVL_VECTOR vec,
const void *  data,
LIBMVL_OFFSET64  data_size,
LIBMVL_OFFSET64  idx 
)
inlinestatic

Get pointer to the start of string element idx from a packed list.

Parameters
veca pointer to LIBMVL_VECTOR with type LIBMVL_PACKED_LIST64
dataa pointer to beginning of memory mapped MVL file
idxentry index
Returns
a pointer to the beginning of the data.

Definition at line 736 of file libMVL.h.

◆ mvl_randomize_bits32()

static unsigned mvl_randomize_bits32 ( unsigned  x)
inlinestatic

Randomize bits of 32-bit numbers, typically after accumulating a hash value.

Parameters
xinput value
Returns
Randomized value

Definition at line 813 of file libMVL.h.

◆ mvl_randomize_bits64()

static LIBMVL_OFFSET64 mvl_randomize_bits64 ( LIBMVL_OFFSET64  x)
inlinestatic

Randomize bits of 64-bit numbers, typically after accumulating a hash value.

Parameters
xinput value
Returns
Randomized value

Definition at line 796 of file libMVL.h.

◆ mvl_read_attributes_list()

LIBMVL_NAMED_LIST* mvl_read_attributes_list ( LIBMVL_CONTEXT ctx,
const void *  data,
LIBMVL_OFFSET64  data_size,
LIBMVL_OFFSET64  metadata_offset 
)

Read back MVL attributes list, typically used to described metadata. This function also initialize hash table for fast access. This function does not check that the offsets stored in returned LIBMVL_NAMED_LIST data structure are valid, this should be done by the code that uses those offsets.

Parameters
ctxMVL context pointer
datamemory mapped data
data_sizesize of memory mapped data
metadata_offsetmetadata offset pointing to the previously written attributes
Returns
NULL if there is no metadata, otherwise LIBMVL_NAMED_LIST populated with attributes

Definition at line 1191 of file libMVL.c.

◆ mvl_read_named_list()

LIBMVL_NAMED_LIST* mvl_read_named_list ( LIBMVL_CONTEXT ctx,
const void *  data,
LIBMVL_OFFSET64  data_size,
LIBMVL_OFFSET64  offset 
)

Read back MVL named list. This function also initialize hash table for fast access.

Parameters
ctxMVL context pointer
datamemory mapped data
data_sizesize of memory mapped data
offsetoffset into data where LIBMVL_NAMED_LIST begins
Returns
NULL on error, otherwise LIBMVL_NAMED_LIST

Definition at line 1250 of file libMVL.c.

◆ mvl_recompute_named_list_hash()

void mvl_recompute_named_list_hash ( LIBMVL_NAMED_LIST L)

Recompute named list hash.

Parameters
Lpointer to previously allocated LIBMVL_NAMED_LIST

Definition at line 964 of file libMVL.c.

◆ mvl_rewrite_vector()

void mvl_rewrite_vector ( LIBMVL_CONTEXT ctx,
int  type,
LIBMVL_OFFSET64  base_offset,
LIBMVL_OFFSET64  idx,
long  length,
const void *  data 
)

Write more data to MVL vector that has been previously created with mvl_start_write_vector()

Parameters
ctxMVL context pointer that has been initialized for writing
typeMVL data type
base_offsetthe offset returned by mvl_start_write_vector()
idxindex of of first element pointed to by data
lengthnumber of elements to write
datapointer to data

Definition at line 452 of file libMVL.c.

◆ mvl_sort_indices()

int mvl_sort_indices ( LIBMVL_OFFSET64  indices_count,
LIBMVL_OFFSET64 indices,
LIBMVL_OFFSET64  vec_count,
LIBMVL_VECTOR **  vec,
void **  vec_data,
int  sort_function 
)

Given a table-like set of vectors of equal length arrange indices so that the columns are sorted lexicographically.

Parameters
indices_counttotal number of indices
indicesan array of indices into provided vectors
vec_countthe number of LIBMVL_VECTORS considered as columns in a table
vecan array of pointers to LIBMVL_VECTORS considered as columns in a table
vec_dataan array of pointers to memory mapped areas those LIBMVL_VECTORs derive from. This allows computing hash from vectors drawn from different MVL files
sort_functionone of LIBMVL_SORT_LEXICOGRAPHIC or LIBMVL_SORT_LEXICOGRAPHIC_DESC to specify sort direction

Definition at line 354 of file libMVL_sort.cc.

◆ mvl_start_write_vector()

LIBMVL_OFFSET64 mvl_start_write_vector ( LIBMVL_CONTEXT ctx,
int  type,
LIBMVL_OFFSET64  expected_length,
LIBMVL_OFFSET64  length,
const void *  data,
LIBMVL_OFFSET64  metadata 
)

Begin write of MVL vector. This is only needed if the vector has to be written in parts, such as due to memory constraints.

Parameters
ctxMVL context pointer that has been initialized for writing
typeMVL data type
expected_lengthnumber of elements in the fully written vector
lengthnumber of elements to write
datapointer to data
metadataan optional offset to previously written metadata. Specify LIBMVL_NO_METADATA if not needed
Returns
an offset into the file, suitable for adding to MVL file directory, or to other MVL objects

Definition at line 369 of file libMVL.c.

◆ mvl_strerror()

const char* mvl_strerror ( LIBMVL_CONTEXT ctx)

Obtain description of error code.

Parameters
ctxpointer to context previously allocated with mvl_create_context()
Returns
pointer to C string which memory is owned by the context

Definition at line 213 of file libMVL.c.

◆ mvl_validate_vector()

static int mvl_validate_vector ( LIBMVL_OFFSET64  offset,
const void *  data,
LIBMVL_OFFSET64  data_size 
)
inlinestatic

This function returns 0 if the offset into data points to a valid vector, or a negative error code otherwise.

Parameters
offsetan offset into memory mapped data where the LIBMVL_VECTOR is located
datapointer to beginning of memory mapped data
data_sizean upper limit for valid offsets - usually the size of mapped MVL file. if data_size is set to ~0LLU the checks are bypassed

Definition at line 457 of file libMVL.h.

◆ mvl_validated_vector_from_offset()

static LIBMVL_VECTOR* mvl_validated_vector_from_offset ( void *  data,
LIBMVL_OFFSET64  data_size,
LIBMVL_OFFSET64  offset 
)
inlinestatic

A convenience function to convert an offset into memory mapped data into a pointer to LIBMVL_VECTOR structure.

This function validates vector structure, but not the contents of the vector.

Parameters
datapointer to memory mapped MVL file
offset64-bit offset into MVL file
Returns
pointer to LIBMVL_VECTOR structure stored in MVL file

Definition at line 508 of file libMVL.h.

◆ mvl_vector_from_offset()

static LIBMVL_VECTOR* mvl_vector_from_offset ( void *  data,
LIBMVL_OFFSET64  offset 
)
inlinestatic

A convenience function to convert an offset into memory mapped data into a pointer to LIBMVL_VECTOR structure.

It assumes that the offset is valid, to validate it see mvl_validate_vector()

Parameters
datapointer to memory mapped MVL file
offset64-bit offset into MVL file
Returns
pointer to LIBMVL_VECTOR structure stored in MVL file

Definition at line 495 of file libMVL.h.

◆ mvl_vector_nentries()

static LIBMVL_OFFSET64 mvl_vector_nentries ( void *  vec)
inlinestatic

Return number of entries in the vector. Currently this is the same as mvl_vector_length() for all types except LIBMVL_PACKED_LIST64.

Parameters
vecpointer to start of the vector

Definition at line 392 of file libMVL.h.

◆ mvl_write_attributes_list()

LIBMVL_OFFSET64 mvl_write_attributes_list ( LIBMVL_CONTEXT ctx,
LIBMVL_NAMED_LIST L 
)

Write out R-style attribute list.

Parameters
ctxMVL context pointer that has been initialized for writing
Lpreviously created attributes list
Returns
an offset into the file, suitable for use as vector metadata

Definition at line 1096 of file libMVL.c.

◆ mvl_write_cached_string()

LIBMVL_OFFSET64 mvl_write_cached_string ( LIBMVL_CONTEXT ctx,
long  length,
const char *  data 
)

Write a single C string if it has not been written before, otherwise return offset to previously written object. In particular, this is handy for providing metadata tags.

Parameters
ctxMVL context pointer that has been initialized for writing
lengthstring length. Set to -1 to be computed automatically.
datastring data
Returns
an offset into the file, suitable for adding to MVL file directory, or to other MVL objects

Definition at line 701 of file libMVL.c.

◆ mvl_write_concat_vectors()

LIBMVL_OFFSET64 mvl_write_concat_vectors ( LIBMVL_CONTEXT ctx,
int  type,
long  nvec,
const long *  lengths,
void **  data,
LIBMVL_OFFSET64  metadata 
)

Write complete MVL vector concatenating data from many vectors or arrays.

Parameters
ctxMVL context pointer that has been initialized for writing
typeMVL data type
nvecnumber of arrays to concatenate
lengthsarray of lengths of individual vectors
dataarray of pointers to vector data
metadataan optional offset to previously written metadata. Specify LIBMVL_NO_METADATA if not needed
Returns
an offset into the file, suitable for adding to MVL file directory, or to other MVL objects

Definition at line 636 of file libMVL.c.

◆ mvl_write_directory()

LIBMVL_OFFSET64 mvl_write_directory ( LIBMVL_CONTEXT ctx)

Write out MVL file directory with entries collected so far. If this is called multiple times only the latest written directory is retrieved when MVL file is opened. It is an error to write out an empty directory.

Parameters
ctxMVL context pointer that has been initialized for writing
Returns
an offset into the file where the directory was written

Definition at line 880 of file libMVL.c.

◆ mvl_write_extent_index()

LIBMVL_OFFSET64 mvl_write_extent_index ( LIBMVL_CONTEXT ctx,
LIBMVL_EXTENT_INDEX ei 
)

Write extent index to MVL file.

Definition at line 2695 of file libMVL.c.

◆ mvl_write_named_list()

LIBMVL_OFFSET64 mvl_write_named_list ( LIBMVL_CONTEXT ctx,
LIBMVL_NAMED_LIST L 
)

Write out named list. In R, this would be read back as list.

Parameters
ctxMVL context pointer that has been initialized for writing
Lpreviously created named list
Returns
an offset into the file, suitable for adding to MVL file directory, or to other MVL objects

Definition at line 1119 of file libMVL.c.

◆ mvl_write_named_list2()

LIBMVL_OFFSET64 mvl_write_named_list2 ( LIBMVL_CONTEXT ctx,
LIBMVL_NAMED_LIST L,
char *  cl 
)

Write out named list. In R, this would be read back as list with class attribute set to "cl".

Parameters
ctxMVL context pointer that has been initialized for writing
Lpreviously created named list
clcharacter string describing list class
Returns
an offset into the file, suitable for adding to MVL file directory, or to other MVL objects

Definition at line 1141 of file libMVL.c.

◆ mvl_write_named_list_as_data_frame()

LIBMVL_OFFSET64 mvl_write_named_list_as_data_frame ( LIBMVL_CONTEXT ctx,
LIBMVL_NAMED_LIST L,
int  nrows,
LIBMVL_OFFSET64  rownames 
)

Write out named list in the style of R data frames. It is assumed that all entries of L are vectors with the same number of elements.

Parameters
ctxMVL context pointer that has been initialized for writing
Lpreviously created named list
nrowsnumber of elements in each entry of L. Note that packed lists should have length of nrows+1
rownamesnames of individual rows. Set to 0 to omit.
Returns
an offset into the file, suitable for adding to MVL file directory, or to other MVL objects

Definition at line 1164 of file libMVL.c.

◆ mvl_write_packed_list()

LIBMVL_OFFSET64 mvl_write_packed_list ( LIBMVL_CONTEXT ctx,
long  count,
const long *  str_size,
unsigned char **  str,
LIBMVL_OFFSET64  metadata 
)

Write an array of strings as a packed list data type. This is convenient for storing a lot of different strings.

Parameters
ctxMVL context pointer that has been initialized for writing
countNumber of strings to store
str_sizearray of lengths of individual strings. If this is NULL string lengths are computed automatically. In addition, if any string length is -1 it is also computed automatically.
strpoint to array of strings
metadataan optional offset to previously written metadata. Specify LIBMVL_NO_METADATA if not needed
Returns
an offset into the file, suitable for adding to MVL file directory, or to other MVL objects

Definition at line 785 of file libMVL.c.

◆ mvl_write_string()

LIBMVL_OFFSET64 mvl_write_string ( LIBMVL_CONTEXT ctx,
long  length,
const char *  data,
LIBMVL_OFFSET64  metadata 
)

Write a single C string. In particular, this is handy for providing metadata tags.

Parameters
ctxMVL context pointer that has been initialized for writing
lengthstring length. Set to -1 to be computed automatically.
datastring data
metadataan optional offset to previously written metadata. Specify LIBMVL_NO_METADATA if not needed
Returns
an offset into the file, suitable for adding to MVL file directory, or to other MVL objects

Definition at line 689 of file libMVL.c.

◆ mvl_write_vector()

LIBMVL_OFFSET64 mvl_write_vector ( LIBMVL_CONTEXT ctx,
int  type,
LIBMVL_OFFSET64  length,
const void *  data,
LIBMVL_OFFSET64  metadata 
)

Write complete MVL vector.

Parameters
ctxMVL context pointer that has been initialized for writing
typeMVL data type
lengthnumber of elements to write
datapointer to data
metadataan optional offset to previously written metadata. Specify LIBMVL_NO_METADATA if not needed
Returns
an offset into the file, suitable for adding to MVL file directory, or to other MVL objects

Definition at line 319 of file libMVL.c.