libMVL
Mappable vector library
|
Go to the documentation of this file.
23 #define LIBMVL_SIGNATURE "MVL0"
24 #define LIBMVL_ENDIANNESS_FLAG 1.0
54 #define LIBMVL_VECTOR_UINT8 1
55 #define LIBMVL_VECTOR_INT32 2
56 #define LIBMVL_VECTOR_INT64 3
57 #define LIBMVL_VECTOR_FLOAT 4
58 #define LIBMVL_VECTOR_DOUBLE 5
59 #define LIBMVL_VECTOR_OFFSET64 100
60 #define LIBMVL_VECTOR_CSTRING 101
62 #define LIBMVL_PACKED_LIST64 102
63 #define LIBMVL_VECTOR_CHECKSUM 103
66 #define LIBMVL_VECTOR_POSTAMBLE1 1000
67 #define LIBMVL_VECTOR_POSTAMBLE2 1001
88 case LIBMVL_VECTOR_CHECKSUM:
105 unsigned int alignment;
132 #define LIBMVL_CHECKSUM_ALGORITHM_INTERNAL1_HASH64 1
139 #define LIBMVL_FULL_CHECKSUMS_DIRECTORY_KEY "MVL_FULL_CHECKSUMS"
147 int checksum_algorithm;
155 #ifndef MVL_STATIC_MEMBERS
173 #define MVL_STATIC_MEMBERS 1
176 #if MVL_STATIC_MEMBERS
285 #define MVL_CONTEXT_DATA(ctx) (ctx->data)
290 #define MVL_CONTEXT_DATA_SIZE(ctx) (ctx->data_size)
293 #define LIBMVL_CTX_FLAG_HAVE_POSIX_FALLOCATE (1<<0)
294 #define LIBMVL_CTX_FLAG_HAVE_FTELLO (1<<1)
296 #define LIBMVL_ERR_FAIL_PREAMBLE -1
297 #define LIBMVL_ERR_FAIL_POSTAMBLE -2
298 #define LIBMVL_ERR_UNKNOWN_TYPE -3
299 #define LIBMVL_ERR_FAIL_VECTOR -4
300 #define LIBMVL_ERR_INCOMPLETE_WRITE -5
301 #define LIBMVL_ERR_INVALID_SIGNATURE -6
302 #define LIBMVL_ERR_WRONG_ENDIANNESS -7
303 #define LIBMVL_ERR_EMPTY_DIRECTORY -8
304 #define LIBMVL_ERR_INVALID_DIRECTORY -9
305 #define LIBMVL_ERR_FTELL -10
306 #define LIBMVL_ERR_CORRUPT_POSTAMBLE -11
307 #define LIBMVL_ERR_INVALID_ATTR_LIST -12
308 #define LIBMVL_ERR_INVALID_OFFSET -13
309 #define LIBMVL_ERR_INVALID_ATTR -14
310 #define LIBMVL_ERR_CANNOT_SEEK -15
311 #define LIBMVL_ERR_INVALID_PARAMETER -16
312 #define LIBMVL_ERR_INVALID_LENGTH -17
313 #define LIBMVL_ERR_INVALID_EXTENT_INDEX -18
314 #define LIBMVL_ERR_CORRUPT_PACKED_LIST -19
315 #define LIBMVL_ERR_UNALIGNED_POINTER -20
316 #define LIBMVL_ERR_UNALIGNED_OFFSET -21
317 #define LIBMVL_ERR_INVALID_HEADER -22
318 #define LIBMVL_ERR_UNKNOWN_CHECKSUM_ALGORITHM -23
319 #define LIBMVL_ERR_CHECKSUM_FAILED -24
320 #define LIBMVL_ERR_NO_CHECKSUMS -25
321 #define LIBMVL_ERR_NO_DATA -26
322 #define LIBMVL_ERR_MVL_FILE_TOO_SHORT -27
350 #define LIBMVL_NO_METADATA 0
354 #define LIBMVL_NULL_OFFSET 0
404 #define MVL_NUMARGS(...) (sizeof((int[]){__VA_ARGS__})/sizeof(int))
412 #define MVL_WVEC(ctx, type, ...) mvl_write_vector_inline(ctx, type, MVL_NUMARGS(__VA_ARGS__), 0, __VA_ARGS__)
467 #define mvl_vector_type(data) (((LIBMVL_VECTOR_HEADER *)(data))->type)
471 #define mvl_vector_length(data) (((LIBMVL_VECTOR_HEADER *)(data))->length)
484 #if MVL_STATIC_MEMBERS
494 #define mvl_vector_data(data) ((((LIBMVL_VECTOR *)(data))->u))
496 #define mvl_vector_data(data) (*(((LIBMVL_VECTOR *)(data))))
523 #define mvl_vector_data_uint8(data) ((unsigned char *)(((const char *) data)+sizeof(LIBMVL_VECTOR_HEADER)))
524 #define mvl_vector_data_int32(data) ((int *)(((const char *) data)+sizeof(LIBMVL_VECTOR_HEADER)))
525 #define mvl_vector_data_int64(data) ((long long int *)(((const char *) data)+sizeof(LIBMVL_VECTOR_HEADER)))
526 #define mvl_vector_data_float(data) ((float *)(((const char *) data)+sizeof(LIBMVL_VECTOR_HEADER)))
527 #define mvl_vector_data_double(data) ((double *)(((const char *) data)+sizeof(LIBMVL_VECTOR_HEADER)))
528 #define mvl_vector_data_offset(data) ((LIBMVL_OFFSET64 *)(((const char *) data)+sizeof(LIBMVL_VECTOR_HEADER)))
532 #define mvl_vector_metadata_offset(data) ((((LIBMVL_VECTOR_HEADER *)(data))->metadata))
557 if(offset2 <
sizeof(
LIBMVL_VECTOR_HEADER) || offset2>data_size)
return(LIBMVL_ERR_INVALID_OFFSET);
590 return(offset==0 ? NULL : (
LIBMVL_VECTOR *)(&(((
unsigned char*)data)[offset])));
703 if(ofs==0)
return(NAN);
727 if(ofs==0)
return(def);
761 #define MVL_NA_STRING "\000\000NA"
762 #define MVL_NA_STRING_LENGTH 4
767 if((s[0]==0 && s[1]==0 && s[2]==
'N' && s[3]==
'A'))
return 1;
782 if((idx+1>=len) || (idx<0))
return 1;
785 return(mvl_string_is_na(&(((
const char *)(data))[start]), stop-start));
799 if((idx+1>=len) || (idx<0))
return -1;
817 if((idx+1>=len) || (idx<0))
return NULL;
819 return(&(((
const unsigned char *)(data))[start]));
834 if((idx+1>=len) || (idx<0))
return -2;
837 if(start>data_size)
return(-3);
838 if(stop>data_size)
return(-4);
854 #define LIBMVL_SORT_LEXICOGRAPHIC 1
855 #define LIBMVL_SORT_LEXICOGRAPHIC_DESC 2
891 x*=18397683724573214587LLU;
893 x*=13397683724573242421LLU;
918 #define MVL_SEED_HASH_VALUE 0xabcdef
934 for(i=0;i<count;i++) {
936 x*=13397683724573242421LLU;
946 static inline void mvl_accumulate_hash64x8(
LIBMVL_OFFSET64 *x,
const unsigned char *data0,
const unsigned char *data1,
const unsigned char *data2,
const unsigned char *data3,
const unsigned char *data4,
const unsigned char *data5,
const unsigned char *data6,
const unsigned char *data7,
LIBMVL_OFFSET64 count)
959 for(i=0;i<count;i++) {
961 x ## k=( (x ## k) +(data ## k)[i]); \
962 (x ## k)*=13397683724573242421LLU; \
963 (x ## k) ^= (x ## k)>>33; \
1005 unsigned *d_ext=(
unsigned *)&d;
1006 for(i=0;i<count;i++) {
1009 x*=13397683724573242421LLU;
1012 x*=13397683724573242421LLU;
1036 unsigned *d_ext=(
unsigned *)&d;
1037 for(i=0;i<count;i++) {
1040 x*=13397683724573242421LLU;
1043 x*=13397683724573242421LLU;
1068 unsigned *d_ext=(
unsigned *)&d;
1069 for(i=0;i<count;i++) {
1072 x*=13397683724573242421LLU;
1075 x*=13397683724573242421LLU;
1100 unsigned *d_ext=(
unsigned *)&d;
1101 for(i=0;i<count;i++) {
1104 x*=13397683724573242421LLU;
1107 x*=13397683724573242421LLU;
1129 #define LIBMVL_ACCUMULATE_HASH 0
1130 #define LIBMVL_INIT_HASH 1
1131 #define LIBMVL_FINALIZE_HASH 2
1132 #define LIBMVL_COMPLETE_HASH (LIBMVL_INIT_HASH | LIBMVL_FINALIZE_HASH)
1159 #define MVL_FLAG_OWN_HASH (1<<0)
1160 #define MVL_FLAG_OWN_HASH_MAP (1<<1)
1161 #define MVL_FLAG_OWN_FIRST (1<<2)
1162 #define MVL_FLAG_OWN_NEXT (1<<3)
1163 #define MVL_FLAG_OWN_VEC_TYPES (1<<4)
1234 #ifndef LIBMVL_EXTENT_INLINE_SIZE
1235 #define LIBMVL_EXTENT_INLINE_SIZE 4
1296 if(hash==ei->hash_map.
hash[idx]) {
1302 idx=ei->hash_map.
next[idx];
1327 #define MVL_EXTENT_INDEX 1
1328 #define MVL_SPATIAL_INDEX1 2
void mvl_normalize_vector(const LIBMVL_VECTOR *vec, const LIBMVL_VEC_STATS *stats, LIBMVL_OFFSET64 i0, LIBMVL_OFFSET64 i1, double *out)
normalize vector
double nrepeat
number of stretches with identical elements
void mvl_init_extent_list(LIBMVL_EXTENT_LIST *el)
Initialize freshly allocated partition structure.
static void mvl_empty_extent_list(LIBMVL_EXTENT_LIST *el)
Alter extent list to contain no extents without freeing memory.
LIBMVL_OFFSET64 mvl_write_named_list(LIBMVL_CONTEXT *ctx, LIBMVL_NAMED_LIST *L)
Write out named list. In R, this would be read back as list.
const char * mvl_strerror(LIBMVL_CONTEXT *ctx)
Obtain description of error code.
static void mvl_clear_error(LIBMVL_CONTEXT *ctx)
Clear error code.
LIBMVL_OFFSET64 * offset
First extent element.
LIBMVL_OFFSET64 mvl_write_named_list_as_data_frame(LIBMVL_CONTEXT *ctx, LIBMVL_NAMED_LIST *L, int nrows, LIBMVL_OFFSET64 rownames)
Write out named list in the style of R data frames. It is assumed that all entries of L are vectors w...
LIBMVL_OFFSET64 * stop
First element just past the extent end.
LIBMVL_OFFSET64 mvl_indexed_copy_vector(LIBMVL_CONTEXT *ctx, LIBMVL_OFFSET64 index_count, const LIBMVL_OFFSET64 *indices, const LIBMVL_VECTOR *vec, const void *data, LIBMVL_OFFSET64 data_length, LIBMVL_OFFSET64 metadata, LIBMVL_OFFSET64 max_buffer)
Write MVL vector that contains data at specific indices. The indices can repeat, and can themselves b...
int mvl_verify_full_checksum_vector(LIBMVL_CONTEXT *ctx, const LIBMVL_VECTOR *checksum_vector, void *data, LIBMVL_OFFSET64 data_size)
Compute and verify checksums for the entire area covered by checksum vector.
static LIBMVL_OFFSET64 mvl_accumulate_hash64(LIBMVL_OFFSET64 x, const unsigned char *data, LIBMVL_OFFSET64 count)
Accumulate hash from a piece of data.
void mvl_free_hash_map(HASH_MAP *hash_map)
Free allocated HASH_MAP.
#define mvl_vector_data_double(data)
double center
a value in the "middle" of the vector
LIBMVL_OFFSET64 hash_map_size
size of hash_map array, should be power of 2
static LIBMVL_OFFSET64 mvl_randomize_bits64(LIBMVL_OFFSET64 x)
Randomize bits of 64-bit numbers, typically after accumulating a hash value.
static double mvl_named_list_get_double(LIBMVL_NAMED_LIST *L, const void *data, long tag_length, const char *tag, long idx)
Find an entry in a named list and return its idx value as a double.
void mvl_close(LIBMVL_CONTEXT *ctx)
Write out MVL file directory and postable and close file.
LIBMVL_OFFSET64 * start
First extent element.
static double mvl_as_double_default(const LIBMVL_VECTOR *vec, long idx, double def)
Return idx vector entry as a double, with default for missing values.
This structure describes a named list - an array of LIBMVL_OFFSET64 entries each with a character nam...
LIBMVL_OFFSET64 first_count
Number of valid entries in first array - this is populated by mvl_find_groups()
LIBMVL_OFFSET64 mvl_compute_hash_map_size(LIBMVL_OFFSET64 hash_count)
Compute suggested size of hash map given the number of entries to hash. Hash map size should always b...
int mvl_find_matches(LIBMVL_OFFSET64 key_indices_count, const LIBMVL_OFFSET64 *key_indices, LIBMVL_OFFSET64 key_vec_count, LIBMVL_VECTOR **key_vec, void **key_vec_data, LIBMVL_OFFSET64 *key_vec_data_length, LIBMVL_OFFSET64 *key_hash, LIBMVL_OFFSET64 indices_count, const LIBMVL_OFFSET64 *indices, LIBMVL_OFFSET64 vec_count, LIBMVL_VECTOR **vec, void **vec_data, LIBMVL_OFFSET64 *vec_data_length, HASH_MAP *hm, LIBMVL_OFFSET64 *key_last, LIBMVL_OFFSET64 pairs_size, LIBMVL_OFFSET64 *key_match_indices, LIBMVL_OFFSET64 *match_indices)
Compute pairs of merge indices. This is similar to JOIN operation in SQL.
static LIBMVL_OFFSET64 mvl_packed_list_get_entry_bytelength(const LIBMVL_VECTOR *vec, LIBMVL_OFFSET64 idx)
Get length in bytes of string element idx from a packed list.
LIBMVL_OFFSET64 count
extent has count valid elements
static LIBMVL_OFFSET64 mvl_named_list_get_offset(LIBMVL_NAMED_LIST *L, const void *data, long tag_length, const char *tag, long idx)
Find an entry in a named list and return its idx value as an offset.
static int mvl_element_size(int type)
Return the element size in bytes for a particular MVL type.
void mvl_load_image(LIBMVL_CONTEXT *ctx, const void *data, LIBMVL_OFFSET64 length)
Initilize MVL context to operate with memory mapped area data.
static const unsigned char * mvl_packed_list_get_entry(const LIBMVL_VECTOR *vec, const void *data, LIBMVL_OFFSET64 idx)
Get pointer to the start of string element idx from a packed list.
void mvl_find_groups(LIBMVL_OFFSET64 indices_count, const LIBMVL_OFFSET64 *indices, LIBMVL_OFFSET64 vec_count, LIBMVL_VECTOR **vec, void **vec_data, LIBMVL_OFFSET64 *vec_data_length, HASH_MAP *hm)
This function transforms HASH_MAP into a list of groups. Similar to GROUP BY clause in SQL.
static LIBMVL_VECTOR * mvl_validated_vector_from_offset(void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 offset)
A convenience function to convert an offset into memory mapped data into a pointer to LIBMVL_VECTOR s...
void mvl_compute_hash_map(HASH_MAP *hm)
Compute hash map. This assumes that hm->hash array has been populated with hm->hash_count hashes comp...
LIBMVL_OFFSET64 * next
array of next indices in each group. ~0LLU indicates end of group
LIBMVL_OFFSET64 mvl_write_concat_vectors(LIBMVL_CONTEXT *ctx, int type, long nvec, const long *lengths, void **data, LIBMVL_OFFSET64 metadata)
Write complete MVL vector concatenating data from many vectors or arrays.
int mvl_load_extent_index(LIBMVL_CONTEXT *ctx, void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 offset, LIBMVL_EXTENT_INDEX *ei)
Load extent index from memory mapped MVL file.
void mvl_free_partition_arrays(LIBMVL_PARTITION *el)
free arrays of previously allocated partition. This function does not free the structure itself.
double scale
normalization scale
#define LIBMVL_VECTOR_DOUBLE
static LIBMVL_OFFSET64 mvl_accumulate_double_hash64(LIBMVL_OFFSET64 x, const double *data, LIBMVL_OFFSET64 count)
Accumulate hash from an array of 64-bit floats The floats are hashed by value, not representation,...
HASH_MAP * mvl_allocate_hash_map(LIBMVL_OFFSET64 max_index_count)
Create HASH_MAP structure.
LIBMVL_OFFSET64 mvl_find_list_entry(LIBMVL_NAMED_LIST *L, long tag_length, const char *tag)
Find existing entry inside LIBMVL_NAMED_LIST. If several identically named entries exist this functio...
This structure is written last to close MVL file. It contains an offset to MVL directory that can be ...
#define mvl_vector_length(data)
Return number of elements from a pointer to LIBMVL_VECTOR.
static int mvl_packed_list_validate_entry(const LIBMVL_VECTOR *vec, const void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 idx)
Get pointer to the start of string element idx from a packed list.
LIBMVL_OFFSET64 mvl_find_directory_entry(LIBMVL_CONTEXT *ctx, const char *tag)
Find entry in MVL file directory.
int * vec_types
Types of vectors used to produce hashes.
static LIBMVL_OFFSET64 mvl_accumulate_int64_hash64(LIBMVL_OFFSET64 x, const long long int *data, LIBMVL_OFFSET64 count)
Accumulate hash from an array of 64-bit integers The integers are hashed by value,...
void mvl_extend_extent_list(LIBMVL_EXTENT_LIST *el, LIBMVL_OFFSET64 nelem)
Increase storage of previously allocated extent list.
static double mvl_named_list_get_double_default(LIBMVL_NAMED_LIST *L, const void *data, long tag_length, const char *tag, long idx, double def)
Find an entry in a named list and return its idx value a double.
LIBMVL_OFFSET64 mvl_hash_match_count(LIBMVL_OFFSET64 key_count, const LIBMVL_OFFSET64 *key_hash, HASH_MAP *hm)
Find count of matches between hashes of two sets.
LIBMVL_OFFSET64 mvl_write_attributes_list(LIBMVL_CONTEXT *ctx, LIBMVL_NAMED_LIST *L)
Write out R-style attribute list.
LIBMVL_OFFSET64 mvl_write_named_list2(LIBMVL_CONTEXT *ctx, LIBMVL_NAMED_LIST *L, char *cl)
Write out named list. In R, this would be read back as list with class attribute set to "cl".
void mvl_add_directory_entry(LIBMVL_CONTEXT *ctx, LIBMVL_OFFSET64 offset, const char *tag)
Add an entry to the top level directory of MVL file.
static LIBMVL_VECTOR * mvl_vector_from_offset(void *data, LIBMVL_OFFSET64 offset)
A convenience function to convert an offset into memory mapped data into a pointer to LIBMVL_VECTOR s...
#define mvl_vector_data_offset(data)
This structure is used for constructing associative maps and also for describing index groupings.
static LIBMVL_OFFSET64 mvl_as_offset(const LIBMVL_VECTOR *vec, long idx)
Return idx vector entry as an offset.
LIBMVL_OFFSET64 mvl_write_vector(LIBMVL_CONTEXT *ctx, int type, LIBMVL_OFFSET64 length, const void *data, LIBMVL_OFFSET64 metadata)
Write complete MVL vector.
static LIBMVL_OFFSET64 mvl_accumulate_int32_hash64(LIBMVL_OFFSET64 x, const int *data, LIBMVL_OFFSET64 count)
Accumulate hash from an array of 32-bit integers The integers are hashed by value,...
LIBMVL_OFFSET64 vec_count
Number of vectors used to produce hashes.
void mvl_free_named_list(LIBMVL_NAMED_LIST *L)
Free structure for LIBMVL_NAMED_LIST.
LIBMVL_OFFSET64 mvl_write_cached_string(LIBMVL_CONTEXT *ctx, long length, const char *data)
Write a single C string if it has not been written before, otherwise return offset to previously writ...
#define LIBMVL_PACKED_LIST64
LIBMVL_OFFSET64 size
Space allocated for start and stop arrays.
#define LIBMVL_VECTOR_FLOAT
#define mvl_vector_type(data)
Return type of data from a pointer to LIBMVL_VECTOR.
void mvl_free_extent_index_arrays(LIBMVL_EXTENT_INDEX *ei)
free arrays of previously allocated extent list. This function does not free the structure itself.
LIBMVL_OFFSET64 mvl_write_string(LIBMVL_CONTEXT *ctx, long length, const char *data, LIBMVL_OFFSET64 metadata)
Write a single C string. In particular, this is handy for providing metadata tags.
void mvl_free_extent_list_arrays(LIBMVL_EXTENT_LIST *el)
free arrays of previously allocated partition. This function does not free the structure itself.
#define mvl_vector_data_int64(data)
LIBMVL_NAMED_LIST * mvl_read_attributes_list(LIBMVL_CONTEXT *ctx, const void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 metadata_offset)
Read back MVL attributes list, typically used to described metadata. This function also initialize ha...
static LIBMVL_OFFSET64 mvl_accumulate_float_hash64(LIBMVL_OFFSET64 x, const float *data, LIBMVL_OFFSET64 count)
Accumulate hash from an array of 32-bit floats The floats are hashed by value, not representation,...
int mvl_hash_range(LIBMVL_OFFSET64 i0, LIBMVL_OFFSET64 i1, LIBMVL_OFFSET64 *hash, LIBMVL_OFFSET64 vec_count, LIBMVL_VECTOR **vec, void **vec_data, LIBMVL_OFFSET64 *vec_data_length, int flags)
This function is used to compute 64 bit hash of vector values array hash[] is passed in and contains ...
double min
minimum value of vector entries
LIBMVL_VECTOR is the basic unit of information storage.
LIBMVL_OFFSET64 hash_size
size of hash, first and next arrays
LIBMVL_OFFSET64 size
Space allocated for start and stop arrays.
LIBMVL_OFFSET64 flags
flags describing HASH_MAP state
double max
maximum value of vector entries
void mvl_extend_partition(LIBMVL_PARTITION *el, LIBMVL_OFFSET64 nelem)
Increase storage of previously allocated partition.
void mvl_compute_vec_stats(const LIBMVL_VECTOR *vec, LIBMVL_VEC_STATS *stats)
Compute vector statistics, such as a bounding box.
int mvl_verify_checksum_vector3(LIBMVL_CONTEXT *ctx, const LIBMVL_VECTOR *checksum_vector, void *data, LIBMVL_OFFSET64 data_size, void *start, void *stop)
Compute and verify checksums for a given area. It works just like mvl_verify_checksum_vector() but ta...
LIBMVL_NAMED_LIST * mvl_create_named_list(int size)
Allocate and initialize structure for LIBMVL_NAMED_LIST.
LIBMVL_OFFSET64 mvl_write_packed_list(LIBMVL_CONTEXT *ctx, long count, const long *str_size, unsigned char **str, LIBMVL_OFFSET64 metadata)
Write an array of strings as a packed list data type. This is convenient for storing a lot of differe...
#define LIBMVL_VECTOR_UINT8
static LIBMVL_OFFSET64 mvl_vector_nentries(void *vec)
Return number of entries in the vector. Currently this is the same as mvl_vector_length() for all typ...
static int mvl_get_error(LIBMVL_CONTEXT *ctx)
Obtain integer error code.
#define LIBMVL_VECTOR_INT64
int mvl_compute_extent_index(LIBMVL_EXTENT_INDEX *ei, LIBMVL_OFFSET64 count, LIBMVL_VECTOR **vec, void **data, LIBMVL_OFFSET64 *data_length)
Compute an extent index.
#define MVL_CONTEXT_DATA(ctx)
An index into a table-like set of vectors with equal number of elements.
LIBMVL_OFFSET64 hash_count
Number of valid entries in hash, hash_count < hash_size and hash_count < hash_map_size.
#define LIBMVL_VECTOR_INT32
int mvl_verify_checksum_vector(LIBMVL_CONTEXT *ctx, const LIBMVL_VECTOR *checksum_vector, void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 start, LIBMVL_OFFSET64 stop)
Compute and verify checksums for a given area.
LIBMVL_CONTEXT * mvl_create_context(void)
Create MVL context.
List of offsets partitioning the vector. First element is always 0, last element is vector size.
long mvl_add_list_entry(LIBMVL_NAMED_LIST *L, long tag_length, const char *tag, LIBMVL_OFFSET64 offset)
Add entry to LIBMVL_NAMED_LIST. The entry is always appended to the end.
LIBMVL_OFFSET64 mvl_write_extent_index(LIBMVL_CONTEXT *ctx, LIBMVL_EXTENT_INDEX *ei)
Write extent index to MVL file.
LIBMVL_OFFSET64 * first
array of indices in each group
LIBMVL_OFFSET64 * hash
Input hashes, used by mvl_compute_hash_map()
int mvl_hash_indices(LIBMVL_OFFSET64 indices_count, const LIBMVL_OFFSET64 *indices, LIBMVL_OFFSET64 *hash, LIBMVL_OFFSET64 vec_count, LIBMVL_VECTOR **vec, void **vec_data, LIBMVL_OFFSET64 *vec_data_length, int flags)
This function is used to compute 64 bit hash of vector values array hash[] is passed in and contains ...
void mvl_find_repeats(LIBMVL_PARTITION *partition, LIBMVL_OFFSET64 count, LIBMVL_VECTOR **vec, void **data, LIBMVL_OFFSET64 *data_length)
Compute list of extents describing stretches of data with identical values.
This structure is written at the beginning of MVL file. It contains the signature identifying MVL for...
LIBMVL_OFFSET64 mvl_write_hash64_checksum_vector(LIBMVL_CONTEXT *ctx, void *base, LIBMVL_OFFSET64 checksum_area_start, LIBMVL_OFFSET64 checksum_area_stop, LIBMVL_OFFSET64 checksum_block_size)
Compute and write checksums for a given area.
static double mvl_as_double(const LIBMVL_VECTOR *vec, long idx)
Return idx vector entry as a double.
LIBMVL_NAMED_LIST * mvl_read_named_list(LIBMVL_CONTEXT *ctx, const void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 offset)
Read back MVL named list. This function also initialize hash table for fast access.
LIBMVL_NAMED_LIST * mvl_create_R_attributes_list(LIBMVL_CONTEXT *ctx, const char *R_class)
Create R-style attribute list for class given by R_class, which could be, for example,...
void mvl_open(LIBMVL_CONTEXT *ctx, FILE *f)
Prepare context for writing to file f.
#define LIBMVL_VECTOR_CSTRING
#define mvl_vector_data_int32(data)
This structure describes MVL context - a collection of system data associated with a single MVL file.
LIBMVL_OFFSET64 mvl_write_directory(LIBMVL_CONTEXT *ctx)
Write out MVL file directory with entries collected so far. If this is called multiple times only the...
void mvl_add_directory_entry_n(LIBMVL_CONTEXT *ctx, LIBMVL_OFFSET64 offset, const char *tag, LIBMVL_OFFSET64 tag_size)
Add entry to the top level directory of MVL file.
static unsigned mvl_randomize_bits32(unsigned x)
Randomize bits of 32-bit numbers, typically after accumulating a hash value.
void mvl_init_extent_index(LIBMVL_EXTENT_INDEX *ei)
Initialize freshly allocated extent list structure.
static void mvl_get_extents(LIBMVL_EXTENT_INDEX *ei, LIBMVL_OFFSET64 hash, LIBMVL_EXTENT_LIST *el)
Find extents in index corresponding to a given hash.
int mvl_sort_indices(LIBMVL_OFFSET64 indices_count, LIBMVL_OFFSET64 *indices, LIBMVL_OFFSET64 vec_count, LIBMVL_VECTOR **vec, void **vec_data, int sort_function)
Given a table-like set of vectors of equal length arrange indices so that the columns are sorted lexi...
LIBMVL_OFFSET64 count
extent has count valid elements
double average_repeat_length
average length of stretch with identical elements
void mvl_rewrite_vector(LIBMVL_CONTEXT *ctx, int type, LIBMVL_OFFSET64 base_offset, LIBMVL_OFFSET64 idx, long length, const void *data)
Write more data to MVL vector that has been previously created with mvl_start_write_vector()
static int mvl_packed_list_is_na(const LIBMVL_VECTOR *vec, const void *data, LIBMVL_OFFSET64 idx)
Check whether packed list entry is a special string that indicates a missing value.
static int mvl_validate_vector(LIBMVL_OFFSET64 offset, const void *data, LIBMVL_OFFSET64 data_size)
This function returns 0 if the offset into data points to a valid vector, or a negative error code ot...
LIBMVL_OFFSET64 mvl_get_character_class_offset(LIBMVL_CONTEXT *ctx)
Get offset to metadata describing R-style character class - an array of strings. This is convenient f...
unsigned long long LIBMVL_OFFSET64
MVL unsigned 64-bit type used for describing offsets into loaded data.
void mvl_free_context(LIBMVL_CONTEXT *ctx)
Release memory associated with MVL context.
#define mvl_vector_data_float(data)
static int mvl_validate_vector2(LIBMVL_CONTEXT *ctx, LIBMVL_OFFSET64 offset)
A convenience version of mvl_validate_vector() that uses data and data_size from MVL context....
#define LIBMVL_VECTOR_OFFSET64
List of extents - ranges of consequentive indices. Similar to partition, but they do not have to foll...
int mvl_verify_checksum_vector2(LIBMVL_CONTEXT *ctx, const LIBMVL_VECTOR *checksum_vector, void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 vector_offset)
Compute and verify checksums for the entire area occupied by given LIBMVL_VECTOR. Metadata is not che...
LIBMVL_OFFSET64 * hash_map
This is an associative table mapping hash & (hash_map_size-1) into indices in the "first" array.
LIBMVL_OFFSET64 mvl_start_write_vector(LIBMVL_CONTEXT *ctx, int type, LIBMVL_OFFSET64 expected_length, LIBMVL_OFFSET64 length, const void *data, LIBMVL_OFFSET64 metadata)
Begin write of MVL vector. This is only needed if the vector has to be written in parts,...
void mvl_recompute_named_list_hash(LIBMVL_NAMED_LIST *L)
Recompute named list hash.
#define MVL_CONTEXT_DATA_SIZE(ctx)