libMVL
Mappable vector library
libMVL.c
Go to the documentation of this file.
1 
2 /* (c) Vladimir Dergachev 2019-2021 */
3 
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <unistd.h>
12 #include <stdarg.h>
13 #include <fcntl.h>
14 #ifndef __WIN32__
15 #include <alloca.h>
16 #else
17 #include <malloc.h>
18 #endif
19 
20 #ifdef RMVL_PACKAGE
21 #include <R.h>
22 #include <Rinternals.h>
23 #endif
24 
25 #include "libMVL.h"
26 
27 static void *do_malloc(LIBMVL_OFFSET64 a, LIBMVL_OFFSET64 b)
28 {
29 void *r;
30 int i=0;
31 LIBMVL_OFFSET64 total_size;
32 
33 /* basic sanity checks */
34 if(a<1)a=1;
35 if(b<1)b=1;
36 total_size=a*b;
37 if(total_size<1)total_size=1;
38 
39 if(total_size/b<a) {
40  #ifdef USING_R
41  Rprintf("libMVL: *** INTERNAL ERROR: Could not allocate %llu chunks of %llu bytes each because of overflow %llu total)\n",a,b,a*b);
42  #else
43  fprintf(stderr,"libMVL: *** INTERNAL ERROR: Could not allocate %llu chunks of %llu bytes each because of overflow %llu total\n",a,b,a*b);
44  #endif
45  return(NULL);
46  }
47 
48 r=malloc(total_size);
49 while(r==NULL){
50 #ifdef USING_R
51  Rprintf("libMVL: Could not allocate %llu chunks of %llu bytes each (%llu bytes total)\n",a,b,a*b);
52 #else
53  fprintf(stderr,"libMVL: Could not allocate %llu chunks of %llu bytes each (%llu bytes total)\n",a,b,a*b);
54 #endif
55 // if(i>args_info.memory_allocation_retries_arg)exit(-1);
56  sleep(10);
57  r=malloc(total_size);
58  i++;
59  }
60 //if(a*b>10e6)madvise(r, a*b, MADV_HUGEPAGE);
61 return r;
62 }
63 
64 static inline char *memndup(const char *s, LIBMVL_OFFSET64 len)
65 {
66 char *p;
67 int i;
68 p=do_malloc(len+1, 1);
69 for(i=0;i<len;i++)p[i]=s[i];
70 p[len]=0;
71 return(p);
72 }
73 
74 #ifdef __APPLE__
75 
76 #else
77 
78 #define HAVE_FTELLO
79 
80 #endif
81 
82 off_t do_ftello(FILE *f)
83 {
84 #ifdef HAVE_FTELLO
85 return(ftello(f));
86 #else
87 /* ftello() is broken on MacOS >= 10.15 */
88 fflush(f);
89 return(lseek(fileno(f), 0, SEEK_CUR));
90 #endif
91 }
92 
93 #ifndef HAVE_POSIX_FALLOCATE
94 
95 #if _POSIX_C_SOURCE >= 200112L
96 #define HAVE_POSIX_FALLOCATE 1
97 #else
98 #define HAVE_POSIX_FALLOCATE 0
99 #endif
100 
101 #endif
102 
103 static int do_fallocate(FILE *f, LIBMVL_OFFSET64 offset, LIBMVL_OFFSET64 len)
104 {
105 #if HAVE_POSIX_FALLOCATE
106 
107 return(posix_fallocate(fileno(f), offset, len));
108 
109 #else
110 
111 off_t cur, end, i;
112 int err;
113 #ifndef FALLOCATE_BUF_SIZE
114 #define FALLOCATE_BUF_SIZE 512
115 #endif
116 char buf[FALLOCATE_BUF_SIZE];
117 
118 cur=do_ftello(f);
119 if(cur<0)return(cur);
120 
121 if((err=fseeko(f, 0, SEEK_END))<0) {
122  return(err);
123  }
124 
125 end=do_ftello(f);
126 if(end<0)return(end);
127 
128 if(end>=(offset+len))return(0);
129 
130 memset(buf, 0, FALLOCATE_BUF_SIZE);
131 
132 for(i=end;i<offset+len;i+=FALLOCATE_BUF_SIZE) {
133  size_t cnt=offset+len-i;
134  if(cnt>FALLOCATE_BUF_SIZE)cnt=FALLOCATE_BUF_SIZE;
135  fwrite(buf, 1, cnt, f);
136  }
137 
138 if((err=fseeko(f, cur, SEEK_SET))<0) {
139  return(err);
140  }
141 return(0);
142 #endif
143 }
144 
145 
151 {
152 LIBMVL_CONTEXT *ctx;
153 //ctx=calloc(1, sizeof(*ctx));
154 ctx=do_malloc(1, sizeof(*ctx));
155 if(ctx==NULL)return(ctx);
156 
157 memset(ctx, 0, sizeof(*ctx));
158 
159 ctx->error=0;
160 ctx->abort_on_error=1;
161 ctx->alignment=32;
162 
163 //ctx->directory=do_malloc(ctx->dir_size, sizeof(*ctx->directory));
164 ctx->directory=mvl_create_named_list(100);
165 mvl_recompute_named_list_hash(ctx->directory);
166 ctx->directory_offset=-1;
167 
168 ctx->full_checksums_offset=LIBMVL_NULL_OFFSET;
169 
170 ctx->character_class_offset=0;
171 
172 ctx->cached_strings=mvl_create_named_list(32);
173 
174 ctx->flags=0;
175 
176 #ifdef HAVE_POSIX_FALLOCATE
177 ctx->flags|=LIBMVL_CTX_FLAG_HAVE_POSIX_FALLOCATE;
178 #endif
179 
180 #ifdef HAVE_FTELLO
181 ctx->flags|=LIBMVL_CTX_FLAG_HAVE_FTELLO;
182 #endif
183 
184 return(ctx);
185 }
186 
191 {
192 mvl_free_named_list(ctx->directory);
193 // for(LIBMVL_OFFSET64 i=0;i<ctx->dir_free;i++)
194 // free(ctx->directory[i].tag);
195 // free(ctx->directory);
196 mvl_free_named_list(ctx->cached_strings);
197 free(ctx);
198 }
199 
200 void mvl_set_error(LIBMVL_CONTEXT *ctx, int error)
201 {
202 ctx->error=error;
203 if(ctx->abort_on_error) {
204 #ifdef USING_R
205  Rprintf("*** ERROR: libMVL code %d: %s\n", error, mvl_strerror(ctx));
206 #else
207  fprintf(stderr, "*** ERROR: libMVL code %d: %s\n", error, mvl_strerror(ctx));
208  exit(-1);
209 #endif
210  }
211 }
212 
217 const char * mvl_strerror(LIBMVL_CONTEXT *ctx)
218 {
219 switch(ctx->error) {
220  case 0:
221  return("no error");
222  case LIBMVL_ERR_FAIL_PREAMBLE:
223  return("invalid preamble");
224  case LIBMVL_ERR_FAIL_POSTAMBLE:
225  return("invalid postamble");
226  case LIBMVL_ERR_UNKNOWN_TYPE :
227  return("unknown type");
228  case LIBMVL_ERR_FAIL_VECTOR:
229  return("unknown type");
230  case LIBMVL_ERR_INCOMPLETE_WRITE:
231  return("incomplete write");
232  case LIBMVL_ERR_INVALID_SIGNATURE:
233  return("invalid signature");
234  case LIBMVL_ERR_WRONG_ENDIANNESS:
235  return("wrong endianness");
236  case LIBMVL_ERR_EMPTY_DIRECTORY:
237  return("empty MVL directory");
238  case LIBMVL_ERR_INVALID_DIRECTORY:
239  return("invalid MVL directory");
240  case LIBMVL_ERR_FTELL:
241  return("call to ftell() failed");
242  case LIBMVL_ERR_CORRUPT_POSTAMBLE:
243  return("corrupt postamble");
244  case LIBMVL_ERR_INVALID_ATTR_LIST:
245  return("invalid attribute list");
246  case LIBMVL_ERR_INVALID_OFFSET:
247  return("invalid offset");
248  case LIBMVL_ERR_INVALID_ATTR:
249  return("invalid attributes");
250  case LIBMVL_ERR_CANNOT_SEEK:
251  return("seek() call failed");
252  case LIBMVL_ERR_INVALID_PARAMETER:
253  return("invalid parameter");
254  case LIBMVL_ERR_INVALID_LENGTH:
255  return("invalid length");
256  case LIBMVL_ERR_INVALID_EXTENT_INDEX:
257  return("invalid extent index");
258  case LIBMVL_ERR_UNALIGNED_POINTER:
259  return("pointer is not properly aligned");
260  case LIBMVL_ERR_UNALIGNED_OFFSET:
261  return("an offset or size parameter is not properly aligned");
262  case LIBMVL_ERR_INVALID_HEADER:
263  return("invalid or inappropriate vector header");
264  case LIBMVL_ERR_UNKNOWN_CHECKSUM_ALGORITHM:
265  return("unknown checksum algorithm");
266  case LIBMVL_ERR_CHECKSUM_FAILED:
267  return("checksum did not match, corrupt data likely");
268  case LIBMVL_ERR_NO_CHECKSUMS:
269  return("no checksums found, cannot verify");
270  case LIBMVL_ERR_NO_DATA:
271  return("data is NULL and mvl_load_image() has not been called on MVL context");
272  case LIBMVL_ERR_MVL_FILE_TOO_SHORT:
273  return("MVL file length is too short, indicating a corrupt or wrong file");
274  default:
275  return("unknown error");
276 
277  }
278 }
279 
280 void mvl_write(LIBMVL_CONTEXT *ctx, LIBMVL_OFFSET64 length, const void *data)
281 {
283 n=fwrite(data, 1, length, ctx->f);
284 if(n<length)mvl_set_error(ctx, LIBMVL_ERR_INCOMPLETE_WRITE);
285 }
286 
287 void mvl_rewrite(LIBMVL_CONTEXT *ctx, LIBMVL_OFFSET64 offset, LIBMVL_OFFSET64 length, const void *data)
288 {
290 off_t cur;
291 cur=do_ftello(ctx->f);
292 if(cur<0) {
293  mvl_set_error(ctx, LIBMVL_ERR_FTELL);
294  return;
295  }
296 if(fseeko(ctx->f, offset, SEEK_SET)<0) {
297  mvl_set_error(ctx, LIBMVL_ERR_CANNOT_SEEK);
298  return;
299  }
300 n=fwrite(data, 1, length, ctx->f);
301 if(n<length)mvl_set_error(ctx, LIBMVL_ERR_INCOMPLETE_WRITE);
302 if(fseeko(ctx->f, cur, SEEK_SET)<0) {
303  mvl_set_error(ctx, LIBMVL_ERR_CANNOT_SEEK);
304  return;
305  }
306 }
307 
308 void mvl_write_preamble(LIBMVL_CONTEXT *ctx)
309 {
310 memset(&(ctx->tmp_preamble), 0, sizeof(ctx->tmp_preamble));
311 memcpy(ctx->tmp_preamble.signature, LIBMVL_SIGNATURE, 4);
312 ctx->tmp_preamble.endianness=LIBMVL_ENDIANNESS_FLAG;
313 ctx->tmp_preamble.alignment=ctx->alignment;
314 mvl_write(ctx, sizeof(ctx->tmp_preamble), &ctx->tmp_preamble);
315 }
316 
317 void mvl_write_postamble(LIBMVL_CONTEXT *ctx)
318 {
319 memset(&(ctx->tmp_postamble), 0, sizeof(ctx->tmp_postamble));
320 ctx->tmp_postamble.directory=ctx->directory_offset;
321 #ifdef MVL_OLD_DIRECTORY
322 ctx->tmp_postamble.type=LIBMVL_VECTOR_POSTAMBLE1;
323 #else
324 ctx->tmp_postamble.type=LIBMVL_VECTOR_POSTAMBLE2;
325 #endif
326 mvl_write(ctx, sizeof(ctx->tmp_postamble), &ctx->tmp_postamble);
327 }
328 
337 LIBMVL_OFFSET64 mvl_write_vector(LIBMVL_CONTEXT *ctx, int type, LIBMVL_OFFSET64 length, const void *data, LIBMVL_OFFSET64 metadata)
338 {
339 LIBMVL_OFFSET64 byte_length;
340 int padding;
341 unsigned char *zeros;
342 off_t offset;
343 
344 memset(&(ctx->tmp_vh), 0, sizeof(ctx->tmp_vh));
345 
346 byte_length=length*mvl_element_size(type);
347 if(byte_length<=0) {
348  mvl_set_error(ctx, LIBMVL_ERR_UNKNOWN_TYPE);
349  return(LIBMVL_NULL_OFFSET);
350  }
351 padding=ctx->alignment-((byte_length+sizeof(ctx->tmp_vh)) & (ctx->alignment-1));
352 padding=padding & (ctx->alignment-1);
353 
354 ctx->tmp_vh.length=length;
355 ctx->tmp_vh.type=type;
356 ctx->tmp_vh.metadata=metadata;
357 
358 offset=do_ftello(ctx->f);
359 
360 if(offset<0) {
361  perror("mvl_write_vector");
362  mvl_set_error(ctx, LIBMVL_ERR_FTELL);
363  return(LIBMVL_NULL_OFFSET);
364  }
365 
366 mvl_write(ctx, sizeof(ctx->tmp_vh), &ctx->tmp_vh);
367 mvl_write(ctx, byte_length, data);
368 
369 if(padding>0) {
370  zeros=alloca(padding);
371  memset(zeros, 0, padding);
372  mvl_write(ctx, padding, zeros);
373  }
374 
375 return(offset);
376 }
377 
387 LIBMVL_OFFSET64 mvl_start_write_vector(LIBMVL_CONTEXT *ctx, int type, LIBMVL_OFFSET64 expected_length, LIBMVL_OFFSET64 length, const void *data, LIBMVL_OFFSET64 metadata)
388 {
389 LIBMVL_OFFSET64 byte_length, total_byte_length;
390 int padding;
391 unsigned char *zeros;
392 off_t offset;
393 
394 if(length>expected_length) {
395  mvl_set_error(ctx, LIBMVL_ERR_INVALID_PARAMETER);
396  return(LIBMVL_NULL_OFFSET);
397  }
398 
399 
400 memset(&(ctx->tmp_vh), 0, sizeof(ctx->tmp_vh));
401 
402 switch(type) {
404  case LIBMVL_VECTOR_UINT8:
405  byte_length=length;
406  total_byte_length=expected_length;
407  break;
408  case LIBMVL_VECTOR_INT32:
409  case LIBMVL_VECTOR_FLOAT:
410  byte_length=length*4;
411  total_byte_length=expected_length*4;
412  break;
413  case LIBMVL_VECTOR_INT64:
417  byte_length=length*8;
418  total_byte_length=expected_length*8;
419  break;
420  default:
421  mvl_set_error(ctx, LIBMVL_ERR_UNKNOWN_TYPE);
422  return(LIBMVL_NULL_OFFSET);
423  }
424 padding=ctx->alignment-((total_byte_length+sizeof(ctx->tmp_vh)) & (ctx->alignment-1));
425 padding=padding & (ctx->alignment-1);
426 
427 ctx->tmp_vh.length=expected_length;
428 ctx->tmp_vh.type=type;
429 ctx->tmp_vh.metadata=metadata;
430 
431 offset=do_ftello(ctx->f);
432 
433 if(offset<0) {
434  perror("mvl_write_vector");
435  mvl_set_error(ctx, LIBMVL_ERR_FTELL);
436  return(LIBMVL_NULL_OFFSET);
437  }
438 
439 if(do_fallocate(ctx->f, offset, sizeof(ctx->tmp_vh)+total_byte_length+padding)) {
440  mvl_set_error(ctx, LIBMVL_ERR_INCOMPLETE_WRITE);
441  return(LIBMVL_NULL_OFFSET);
442  }
443 
444 mvl_write(ctx, sizeof(ctx->tmp_vh), &ctx->tmp_vh);
445 if(byte_length>0)mvl_write(ctx, byte_length, data);
446 if(total_byte_length>byte_length) {
447  if(fseeko(ctx->f, total_byte_length-byte_length, SEEK_CUR)<0) {
448  mvl_set_error(ctx, LIBMVL_ERR_CANNOT_SEEK);
449  return(LIBMVL_NULL_OFFSET);
450  }
451  }
452 
453 if(padding>0) {
454  zeros=alloca(padding);
455  memset(zeros, 0, padding);
456  mvl_write(ctx, padding, zeros);
457  }
458 
459 return(offset);
460 }
461 
470 void mvl_rewrite_vector(LIBMVL_CONTEXT *ctx, int type, LIBMVL_OFFSET64 base_offset, LIBMVL_OFFSET64 idx, long length, const void *data)
471 {
472 LIBMVL_OFFSET64 byte_length, elt_size;
473 
474 elt_size=mvl_element_size(type);
475 byte_length=length*elt_size;
476 
477 if(byte_length>0)mvl_rewrite(ctx, base_offset+elt_size*idx+sizeof(ctx->tmp_vh), byte_length, data);
478 }
479 
491 LIBMVL_OFFSET64 mvl_indexed_copy_vector(LIBMVL_CONTEXT *ctx, LIBMVL_OFFSET64 index_count, const LIBMVL_OFFSET64 *indices, const LIBMVL_VECTOR *vec, const void *data, LIBMVL_OFFSET64 data_length, LIBMVL_OFFSET64 metadata, LIBMVL_OFFSET64 max_buffer)
492 {
493 LIBMVL_OFFSET64 char_length, vec_length, i, m, k, i_start, char_start, char_buf_length, vec_buf_length, N;
494 LIBMVL_OFFSET64 offset, char_offset;
495 unsigned char *char_buffer;
496 void *vec_buffer;
497 
498 switch(mvl_vector_type(vec)) {
500  vec_length=index_count+1;
501  char_length=0;
502  for(i=0;i<index_count;i++) {
503  if(mvl_packed_list_validate_entry(vec, data, data_length, indices[i])) {
504  mvl_set_error(ctx, LIBMVL_ERR_CORRUPT_PACKED_LIST);
505  return 0;
506  }
507  char_length+=mvl_packed_list_get_entry_bytelength(vec, indices[i]);
508  }
509  break;
510  default:
511  vec_length=index_count;
512  char_length=0;
513  }
514 
515 vec_buf_length=vec_length;
516 if(vec_buf_length*mvl_element_size(mvl_vector_type(vec))>max_buffer) {
517  vec_buf_length=max_buffer/mvl_element_size(mvl_vector_type(vec));
518  }
519 if(vec_buf_length<50)vec_buf_length=50;
520 vec_buffer=do_malloc(vec_buf_length, mvl_element_size(mvl_vector_type(vec)));
521 
522 offset=mvl_start_write_vector(ctx, mvl_vector_type(vec), vec_length, 0, NULL, metadata);
523 
525  char_buf_length=char_length;
526  if(char_buf_length>max_buffer)char_buf_length=max_buffer;
527  if(char_buf_length<100)char_buf_length=100;
528  char_buffer=do_malloc(char_buf_length, 1);
529  char_offset=mvl_start_write_vector(ctx, LIBMVL_VECTOR_UINT8, char_length, 0, NULL, LIBMVL_NO_METADATA);
530  i=char_offset+sizeof(LIBMVL_VECTOR_HEADER);
531  mvl_rewrite_vector(ctx, mvl_vector_type(vec), offset, 0, 1, &i);
532  } else {
533  char_buf_length=0;
534  char_buffer=NULL;
535  }
536 
537 i_start=0;
538 char_start=0;
539 while(i_start<index_count) {
540  switch(mvl_vector_type(vec)) {
541  case LIBMVL_PACKED_LIST64: {
542  LIBMVL_OFFSET64 *po=(LIBMVL_OFFSET64 *)vec_buffer;
543  i=mvl_packed_list_get_entry_bytelength(vec, indices[i_start]);
544  //fprintf(stderr, "packed_list i_start=%lld char_start=%lld\n", i_start, char_start);
545  if(i>=char_buf_length) {
546  mvl_rewrite_vector(ctx, LIBMVL_VECTOR_UINT8, char_offset, char_start, i, mvl_packed_list_get_entry(vec, data, indices[i_start]));
547  po[0]=char_start+char_offset+sizeof(LIBMVL_VECTOR_HEADER);
548  mvl_rewrite_vector(ctx, mvl_vector_type(vec), offset, i_start+1, 1, po);
549  i_start++;
550  break;
551  }
552  N=0;
553  for(i=0;(i<char_buf_length) && (N<vec_buf_length) && (i_start+N<index_count);i+=mvl_packed_list_get_entry_bytelength(vec, indices[i_start+N]), N++) {
554  }
555  if(i>char_buf_length)N--;
556 // fprintf(stderr, "buffer plan N=%lld vec_buf_length=%lld i=%lld char_buf_length=%lld\n", N, vec_buf_length, i, char_buf_length);
557  //fprintf(stderr, "packed_list i=%lld N=%lld char_buf_len=%lld vec_buf_len=%lld index_count=%lld\n", i, N, char_buf_length, vec_buf_length, index_count);
558 // if(N>vec_buf_length) {
559 // fprintf(stderr, "*** INTERNAL ERROR: buffer overrun N=%lld vec_buf_length=%lld\n", N, vec_buf_length);
560 // }
561  k=0;
562  for(i=0;i<N;i++) {
563  m=mvl_packed_list_get_entry_bytelength(vec, indices[i_start+i]);
564  memcpy(&(char_buffer[k]), mvl_packed_list_get_entry(vec, data, indices[i_start+i]), m);
565  k+=m;
566  po[i]=char_offset+char_start+sizeof(LIBMVL_VECTOR_HEADER)+k;
567  }
568 // if(k>char_buf_length) {
569 // fprintf(stderr, "*** INTERNAL ERROR: buffer overrun k=%lld char_buf_length=%lld N=%lld vec_buf_length=%lld\n", k, char_buf_length, N, vec_buf_length);
570 // }
571  //fprintf(stderr, "packed_list i=%lld N=%lld k=%lld char_buf_len=%lld vec_buf_len=%lld\n", i, N, k, char_buf_length, vec_buf_length);
572  mvl_rewrite_vector(ctx, LIBMVL_VECTOR_UINT8, char_offset, char_start, k, char_buffer);
573  mvl_rewrite_vector(ctx, mvl_vector_type(vec), offset, i_start+1, N, po);
574  i_start+=N;
575  char_start+=k;
576  break;
577  }
578  case LIBMVL_VECTOR_UINT8: {
579  unsigned char *pb=(unsigned char *)vec_buffer;
580  N=index_count-i_start;
581  if(N>vec_buf_length)N=vec_buf_length;
582  for(i=0;i<N;i++) {
583  pb[i]=mvl_vector_data_uint8(vec)[indices[i+i_start]];
584  }
585  mvl_rewrite_vector(ctx, mvl_vector_type(vec), offset, i_start, N, pb);
586  i_start+=N;
587  break;
588  }
589  case LIBMVL_VECTOR_INT32: {
590  int *pi=(int *)vec_buffer;
591  N=index_count-i_start;
592  if(N>vec_buf_length)N=vec_buf_length;
593  for(i=0;i<N;i++) {
594  pi[i]=mvl_vector_data_int32(vec)[indices[i+i_start]];
595  }
596  mvl_rewrite_vector(ctx, mvl_vector_type(vec), offset, i_start, N, pi);
597  i_start+=N;
598  break;
599  }
600  case LIBMVL_VECTOR_INT64: {
601  long long *pi=(long long *)vec_buffer;
602  N=index_count-i_start;
603  if(N>vec_buf_length)N=vec_buf_length;
604  for(i=0;i<N;i++) {
605  pi[i]=mvl_vector_data_int64(vec)[indices[i+i_start]];
606  }
607  mvl_rewrite_vector(ctx, mvl_vector_type(vec), offset, i_start, N, pi);
608  i_start+=N;
609  break;
610  }
611  case LIBMVL_VECTOR_FLOAT: {
612  float *pf=(float *)vec_buffer;
613  N=index_count-i_start;
614  if(N>vec_buf_length)N=vec_buf_length;
615  for(i=0;i<N;i++) {
616  pf[i]=mvl_vector_data_float(vec)[indices[i+i_start]];
617  }
618  mvl_rewrite_vector(ctx, mvl_vector_type(vec), offset, i_start, N, pf);
619  i_start+=N;
620  break;
621  }
622  case LIBMVL_VECTOR_DOUBLE: {
623  double *pd=(double *)vec_buffer;
624  N=index_count-i_start;
625  if(N>vec_buf_length)N=vec_buf_length;
626  for(i=0;i<N;i++) {
627  pd[i]=mvl_vector_data_double(vec)[indices[i+i_start]];
628  }
629  mvl_rewrite_vector(ctx, mvl_vector_type(vec), offset, i_start, N, pd);
630  i_start+=N;
631  break;
632  }
633 
634  default:
635  mvl_set_error(ctx, LIBMVL_ERR_UNKNOWN_TYPE);
636  i_start=index_count;
637  }
638  }
639 
640 free(vec_buffer);
641 free(char_buffer);
642 return(offset);
643 }
644 
654 LIBMVL_OFFSET64 mvl_write_concat_vectors(LIBMVL_CONTEXT *ctx, int type, long nvec, const long *lengths, void **data, LIBMVL_OFFSET64 metadata)
655 {
656 LIBMVL_OFFSET64 byte_length, length;
657 int padding, item_size;
658 unsigned char *zeros;
659 off_t offset;
660 int i;
661 
662 length=0;
663 for(i=0;i<nvec;i++)length+=lengths[i];
664 
665 memset(&(ctx->tmp_vh), 0, sizeof(ctx->tmp_vh));
666 
667 item_size=mvl_element_size(type);
668 if(item_size<=0) {
669  mvl_set_error(ctx, LIBMVL_ERR_UNKNOWN_TYPE);
670  return(LIBMVL_NULL_OFFSET);
671  }
672 byte_length=length*item_size;
673 padding=ctx->alignment-((byte_length+sizeof(ctx->tmp_vh)) & (ctx->alignment-1));
674 padding=padding & (ctx->alignment-1);
675 
676 ctx->tmp_vh.length=length;
677 ctx->tmp_vh.type=type;
678 ctx->tmp_vh.metadata=metadata;
679 
680 offset=do_ftello(ctx->f);
681 
682 if((long long)offset<0) {
683  perror("mvl_write_vector");
684  mvl_set_error(ctx, LIBMVL_ERR_FTELL);
685  }
686 
687 mvl_write(ctx, sizeof(ctx->tmp_vh), &ctx->tmp_vh);
688 for(i=0;i<nvec;i++)
689  mvl_write(ctx, lengths[i]*item_size, data[i]);
690 
691 if(padding>0) {
692  zeros=alloca(padding);
693  memset(zeros, 0, padding);
694  mvl_write(ctx, padding, zeros);
695  }
696 
697 return(offset);
698 }
699 
707 LIBMVL_OFFSET64 mvl_write_string(LIBMVL_CONTEXT *ctx, long length, const char *data, LIBMVL_OFFSET64 metadata)
708 {
709 if(length<0)length=strlen(data);
710 return(mvl_write_vector(ctx, LIBMVL_VECTOR_CSTRING, length, data, metadata));
711 }
712 
719 LIBMVL_OFFSET64 mvl_write_cached_string(LIBMVL_CONTEXT *ctx, long length, const char *data)
720 {
721 LIBMVL_OFFSET64 ofs;
722 if(length<0)length=strlen(data);
723 ofs=mvl_find_list_entry(ctx->cached_strings, length, data);
724 if(ofs!=LIBMVL_NULL_OFFSET)return(ofs);
725 
727 mvl_add_list_entry(ctx->cached_strings, length, data, ofs);
728 return(ofs);
729 }
730 
731 LIBMVL_OFFSET64 mvl_write_vector_inline(LIBMVL_CONTEXT *ctx, int type, int count, LIBMVL_OFFSET64 metadata, ...)
732 {
733 int i;
734 va_list ap;
735 
736 va_start(ap, metadata);
737 
738 switch(type) {
740  case LIBMVL_VECTOR_UINT8: {
741  char *data;
742  data=alloca(count);
743  for(i=0;i<count;i++)data[i]=va_arg(ap, int);
744  va_end(ap);
745  return(mvl_write_vector(ctx, type, count, data, metadata));
746  break;
747  }
748  case LIBMVL_VECTOR_INT32: {
749  int *data;
750  data=alloca(count*sizeof(*data));
751  for(i=0;i<count;i++)data[i]=va_arg(ap, int);
752  va_end(ap);
753  return(mvl_write_vector(ctx, type, count, data, metadata));
754  break;
755  }
756  case LIBMVL_VECTOR_FLOAT: {
757  float *data;
758  data=alloca(count*sizeof(*data));
759  for(i=0;i<count;i++)data[i]=va_arg(ap, double);
760  va_end(ap);
761  return(mvl_write_vector(ctx, type, count, data, metadata));
762  break;
763  }
764  case LIBMVL_VECTOR_INT64: {
765  long long *data;
766  data=alloca(count*sizeof(*data));
767  for(i=0;i<count;i++)data[i]=va_arg(ap, long long);
768  va_end(ap);
769  return(mvl_write_vector(ctx, type, count, data, metadata));
770  break;
771  }
772  case LIBMVL_VECTOR_DOUBLE: {
773  double *data;
774  data=alloca(count*sizeof(*data));
775  for(i=0;i<count;i++)data[i]=va_arg(ap, double);
776  va_end(ap);
777  return(mvl_write_vector(ctx, type, count, data, metadata));
778  break;
779  }
780  case LIBMVL_VECTOR_OFFSET64: {
781  LIBMVL_OFFSET64 *data;
782  data=alloca(count*sizeof(*data));
783  for(i=0;i<count;i++)data[i]=va_arg(ap, LIBMVL_OFFSET64);
784  va_end(ap);
785  return(mvl_write_vector(ctx, type, count, data, metadata));
786  break;
787  }
788  default:
789  mvl_set_error(ctx, LIBMVL_ERR_UNKNOWN_TYPE);
790  return(LIBMVL_NULL_OFFSET);
791  }
792 
793 }
794 
803 LIBMVL_OFFSET64 mvl_write_packed_list(LIBMVL_CONTEXT *ctx, long count, const long *str_size, unsigned char **str, LIBMVL_OFFSET64 metadata)
804 {
805 LIBMVL_OFFSET64 *ofsv, ofs1, ofs2, len1;
806 long *str_size2;
807 long i;
808 ofsv=do_malloc(count+1, sizeof(*ofsv));
809 str_size2=do_malloc(count, sizeof(*str_size2));
810 
811 len1=0;
812 for(i=0;i<count;i++) {
813  if((str_size==NULL) || (str_size[i]<0)) {
814  str_size2[i]=strlen((char *)str[i]);
815  } else {
816  str_size2[i]=str_size[i];
817  }
818  len1+=str_size2[i];
819  }
820 ofs1=mvl_write_concat_vectors(ctx, LIBMVL_VECTOR_UINT8, count, str_size2, (void **)str, LIBMVL_NO_METADATA);
821 
822 ofsv[0]=ofs1+sizeof(LIBMVL_VECTOR_HEADER);
823 for(i=0;i<count;i++) {
824  ofsv[i+1]=ofsv[i]+str_size2[i];
825  }
826 
827 ofs2=mvl_write_vector(ctx, LIBMVL_PACKED_LIST64, count+1, ofsv, metadata);
828 free(ofsv);
829 free(str_size2);
830 return(ofs2);
831 }
832 
833 /* The function mvl_write_hash64_checksum_vector() writes out computed hashes in blocks of LIBMVL_INTERNAL1_HASH64_BLOCKSIZE values.
834  * On some block-based storage devices it is advantageous to have the size of the written out blocks to be a multiple of device block size
835  * as this reduces actual I/O and wear.
836  * The default value below has been chosen to accomodate most known devices at the time of writing.
837  */
838 
839 #ifndef LIBMVL_INTERNAL1_HASH64_BLOCKSIZE
840 #define LIBMVL_INTERNAL1_HASH64_BLOCKSIZE 65536
841 #endif
842 
851 LIBMVL_OFFSET64 mvl_write_hash64_checksum_vector(LIBMVL_CONTEXT *ctx, void *data, LIBMVL_OFFSET64 checksum_area_start, LIBMVL_OFFSET64 checksum_area_stop, LIBMVL_OFFSET64 checksum_block_size)
852 {
854 LIBMVL_OFFSET64 *buffer;
855 LIBMVL_OFFSET64 byte_length, padding;
856 LIBMVL_OFFSET64 buffer_idx;
857 LIBMVL_OFFSET64 block, block_stop;
858 LIBMVL_OFFSET64 hash;
859 LIBMVL_OFFSET64 offset;
860 unsigned char *data8;
861 unsigned char *zeros;
862 
863 if(data==NULL) {
864  data=ctx->data;
865  }
866 
867 if(data==NULL) {
868  mvl_set_error(ctx, LIBMVL_ERR_NO_DATA);
869  return(LIBMVL_NULL_OFFSET);
870  }
871 
872 data8=(unsigned char *)data;
873 
874 if(((LIBMVL_OFFSET64)(data)) & 0x7) {
875  mvl_set_error(ctx, LIBMVL_ERR_UNALIGNED_POINTER);
876  return(LIBMVL_NULL_OFFSET);
877  }
878 
879 if((checksum_area_start & 0x7) || (checksum_block_size & 0x7) || (checksum_area_stop & 0x7)) {
880  mvl_set_error(ctx, LIBMVL_ERR_UNALIGNED_OFFSET);
881  return(LIBMVL_NULL_OFFSET);
882  }
883 
884 memset(hdr, 0, sizeof(*hdr));
885 hdr->type=LIBMVL_VECTOR_CHECKSUM;
886 hdr->checksum_algorithm=LIBMVL_CHECKSUM_ALGORITHM_INTERNAL1_HASH64;
887 hdr->checksum_area_start=checksum_area_start;
888 hdr->checksum_area_stop=checksum_area_stop;
889 hdr->checksum_block_size=checksum_block_size;
890 hdr->length=(checksum_area_stop-checksum_area_start+checksum_block_size-1)/checksum_block_size;
891 hdr->metadata=LIBMVL_NULL_OFFSET;
892 
893 byte_length=hdr->length*8;
894 padding=ctx->alignment-((byte_length+sizeof(ctx->tmp_vh)) & (ctx->alignment-1));
895 padding=padding & (ctx->alignment-1);
896 
897 
898 buffer=do_malloc(LIBMVL_INTERNAL1_HASH64_BLOCKSIZE, sizeof(*buffer));
899 
900 offset=do_ftello(ctx->f);
901 
902 if((long long)offset<0) {
903  perror("mvl_write_vector");
904  mvl_set_error(ctx, LIBMVL_ERR_FTELL);
905  }
906 
907 mvl_write(ctx, sizeof(ctx->tmp_vh), &ctx->tmp_vh);
908 
909 buffer_idx=0;
910 for(block=checksum_area_start;block<checksum_area_stop;block+=checksum_block_size) {
911  block_stop=block+checksum_block_size;
912  if(block_stop>checksum_area_stop)block_stop=checksum_area_stop;
913 
914  hash=MVL_SEED_HASH_VALUE;
915  hash=mvl_accumulate_int64_hash64(hash, (long long *)&(data8[block]), (block_stop-block)>>3);
916  hash=mvl_randomize_bits64(hash);
917 
918  buffer[buffer_idx]=hash;
919  buffer_idx++;
920 
921  if(buffer_idx>=LIBMVL_INTERNAL1_HASH64_BLOCKSIZE) {
922  mvl_write(ctx, buffer_idx*sizeof(*buffer), buffer);
923  buffer_idx=0;
924  }
925  }
926 
927 if(buffer_idx>0) {
928  mvl_write(ctx, buffer_idx*sizeof(*buffer), buffer);
929  buffer_idx=0;
930  }
931 
932 if(padding>0) {
933  zeros=alloca(padding);
934  memset(zeros, 0, padding);
935  mvl_write(ctx, padding, zeros);
936  }
937 
938 free(buffer);
939 return(offset);
940 }
941 
951 int mvl_verify_checksum_vector(LIBMVL_CONTEXT *ctx, const LIBMVL_VECTOR *checksum_vector, void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 start, LIBMVL_OFFSET64 stop)
952 {
954 LIBMVL_OFFSET64 block, buffer_idx, block_stop, hash, start2, stop2;
955 unsigned char *data8;
956 LIBMVL_OFFSET64 *buffer;
957 
958 if(data==NULL) {
959  data=ctx->data;
960  data_size=ctx->data_size;
961  }
962 
963 if(data==NULL) {
964  mvl_set_error(ctx, LIBMVL_ERR_NO_DATA);
965  return(-1);
966  }
967 
968 data8=(unsigned char *)data;
969 
970 if(checksum_vector==NULL) {
971  if(ctx->full_checksums_offset==LIBMVL_NULL_OFFSET) {
972  mvl_set_error(ctx, LIBMVL_ERR_NO_CHECKSUMS);
973  return(-2);
974  }
975  checksum_vector=(LIBMVL_VECTOR *) &(data8[ctx->full_checksums_offset]);
976  }
977 
978 hdr=(LIBMVL_CHECKSUM_VECTOR_HEADER *)(checksum_vector);
979 buffer=mvl_vector_data_offset(checksum_vector);
980 
981 if(hdr->type!=LIBMVL_VECTOR_CHECKSUM) {
982  mvl_set_error(ctx, LIBMVL_ERR_INVALID_HEADER);
983  return(-3);
984  }
985 
986 if(hdr->checksum_algorithm!=LIBMVL_CHECKSUM_ALGORITHM_INTERNAL1_HASH64) {
987  mvl_set_error(ctx, LIBMVL_ERR_UNKNOWN_CHECKSUM_ALGORITHM);
988  return(-4);
989  }
990 
991 if(stop<start) {
992  mvl_set_error(ctx, LIBMVL_ERR_INVALID_OFFSET);
993  return(-5);
994  }
995 
996 if(stop==start) {
997  /* Nothing to check */
998  return(0);
999  }
1000 
1001 if(start<hdr->checksum_area_start || stop>hdr->checksum_area_stop) {
1002  mvl_set_error(ctx, LIBMVL_ERR_INVALID_OFFSET);
1003  return(-6);
1004  }
1005 
1006 if(hdr->length < ((hdr->checksum_area_stop-hdr->checksum_area_start+hdr->checksum_block_size-1) / hdr->checksum_block_size)) {
1007  mvl_set_error(ctx, LIBMVL_ERR_INVALID_HEADER);
1008  return(-7);
1009  }
1010 
1011 start2=start-((start-hdr->checksum_area_start) % hdr->checksum_block_size);
1012 
1013 block=(stop-hdr->checksum_area_start) % hdr->checksum_block_size;
1014 stop2=stop;
1015 if(block>0)stop2+=hdr->checksum_block_size-block;
1016 
1017 if(stop2>hdr->checksum_area_stop)stop2=hdr->checksum_area_stop;
1018 
1019 if(stop2>data_size) {
1020  mvl_set_error(ctx, LIBMVL_ERR_INVALID_OFFSET);
1021  return(-8);
1022  }
1023 
1024 buffer_idx=(start2-hdr->checksum_area_start) / hdr->checksum_block_size;
1025 
1026 for(block=start2;block<stop2;block+=hdr->checksum_block_size) {
1027 
1028  block_stop=block+hdr->checksum_block_size;
1029  if(block_stop>hdr->checksum_area_stop)block_stop=hdr->checksum_area_stop;
1030 
1031  hash=MVL_SEED_HASH_VALUE;
1032  hash=mvl_accumulate_int64_hash64(hash, (long long *)&(data8[block]), (block_stop-block)>>3);
1033  hash=mvl_randomize_bits64(hash);
1034 
1035  if(buffer[buffer_idx]!=hash) {
1036  mvl_set_error(ctx, LIBMVL_ERR_CHECKSUM_FAILED);
1037  return(-255);
1038  }
1039  buffer_idx++;
1040  }
1041 
1042 return(0);
1043 }
1044 
1052 int mvl_verify_full_checksum_vector(LIBMVL_CONTEXT *ctx, const LIBMVL_VECTOR *checksum_vector, void *data, LIBMVL_OFFSET64 data_size)
1053 {
1055 unsigned char *data8;
1056 
1057 if(data==NULL) {
1058  data=ctx->data;
1059  data_size=ctx->data_size;
1060  }
1061 
1062 if(data==NULL) {
1063  mvl_set_error(ctx, LIBMVL_ERR_NO_DATA);
1064  return(-1);
1065  }
1066 
1067 data8=(unsigned char *)data;
1068 
1069 if(checksum_vector==NULL) {
1070  if(ctx->full_checksums_offset==LIBMVL_NULL_OFFSET) {
1071  mvl_set_error(ctx, LIBMVL_ERR_NO_CHECKSUMS);
1072  return(-2);
1073  }
1074  checksum_vector=(LIBMVL_VECTOR *) &(data8[ctx->full_checksums_offset]);
1075  }
1076 
1077 hdr=(LIBMVL_CHECKSUM_VECTOR_HEADER *)(checksum_vector);
1078 
1079 if(hdr->type!=LIBMVL_VECTOR_CHECKSUM) {
1080  mvl_set_error(ctx, LIBMVL_ERR_INVALID_HEADER);
1081  return(-3);
1082  }
1083 
1084 return(mvl_verify_checksum_vector(ctx, checksum_vector, data, data_size, hdr->checksum_area_start, hdr->checksum_area_stop));
1085 }
1086 
1095 int mvl_verify_checksum_vector2(LIBMVL_CONTEXT *ctx, const LIBMVL_VECTOR *checksum_vector, void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 vector_offset)
1096 {
1097 int err;
1098 LIBMVL_OFFSET64 byte_length;
1100 char *data8;
1101 int element_size;
1102 int a;
1103 
1104 if(data==NULL) {
1105  data=ctx->data;
1106  data_size=ctx->data_size;
1107  }
1108 
1109 if(data==NULL) {
1110  mvl_set_error(ctx, LIBMVL_ERR_NO_DATA);
1111  return(-1);
1112  }
1113 
1114 data8=(char *)data;
1115 
1116 if((err=mvl_validate_vector(vector_offset, data, data_size))!=0) {
1117  mvl_set_error(ctx, err);
1118  return(-50);
1119  }
1120 
1121 if(checksum_vector==NULL) {
1122  if(ctx->full_checksums_offset==LIBMVL_NULL_OFFSET) {
1123  mvl_set_error(ctx, LIBMVL_ERR_NO_CHECKSUMS);
1124  return(-1);
1125  }
1126  checksum_vector=(LIBMVL_VECTOR *) &(data8[ctx->full_checksums_offset]);
1127  }
1128 
1129 vec=(LIBMVL_VECTOR_HEADER *)&(data8[vector_offset]);
1130 
1131 element_size=mvl_element_size(mvl_vector_type(vec));
1132 
1133 if(!element_size) {
1134  mvl_set_error(ctx, LIBMVL_ERR_UNKNOWN_TYPE);
1135  return(-51);
1136  }
1137 
1138 byte_length=element_size*mvl_vector_length(vec);
1139 
1140 if((a=mvl_verify_checksum_vector(ctx, checksum_vector, data, data_size, vector_offset, vector_offset+byte_length+sizeof(*vec))))return(a);
1141 
1143  return(mvl_verify_checksum_vector2(ctx, checksum_vector, data, data_size, mvl_vector_data_offset(vec)[0]-sizeof(LIBMVL_VECTOR_HEADER)));
1144  }
1145 
1146 return(0);
1147 }
1148 
1158 int mvl_verify_checksum_vector3(LIBMVL_CONTEXT *ctx, const LIBMVL_VECTOR *checksum_vector, void *data, LIBMVL_OFFSET64 data_size, void * start, void * stop)
1159 {
1160 char *data8;
1161 char *start8=(char *)start;
1162 char *stop8=(char *)stop;
1163 
1164 if(data==NULL) {
1165  data=ctx->data;
1166  data_size=ctx->data_size;
1167  }
1168 
1169 if(data==NULL) {
1170  mvl_set_error(ctx, LIBMVL_ERR_NO_DATA);
1171  return(-1);
1172  }
1173 
1174 data8=(char *)data;
1175 
1176 if( (start8-data8 < 0) || (start8-data8>data_size) || (stop8-data8<0) || (stop8-data8>data_size)) {
1177  mvl_set_error(ctx, LIBMVL_ERR_INVALID_OFFSET);
1178  return(-40);
1179  }
1180 return(mvl_verify_checksum_vector(ctx, checksum_vector, data, data_size, start8-data8, stop8-data8));
1181 }
1182 
1188 {
1190 if(ctx->character_class_offset==0) {
1191  L=mvl_create_R_attributes_list(ctx, "character");
1192  ctx->character_class_offset=mvl_write_attributes_list(ctx, L);
1194  }
1195 return(ctx->character_class_offset);
1196 }
1197 
1203 void mvl_add_directory_entry(LIBMVL_CONTEXT *ctx, LIBMVL_OFFSET64 offset, const char *tag)
1204 {
1205 // LIBMVL_DIRECTORY_ENTRY *p;
1206 // if(ctx->dir_free>=ctx->dir_size) {
1207 // ctx->dir_size+=ctx->dir_size+10;
1208 //
1209 // p=do_malloc(ctx->dir_size, sizeof(*p));
1210 // if(ctx->dir_free>0)memcpy(p, ctx->directory, ctx->dir_free*sizeof(*p));
1211 // free(ctx->directory);
1212 // ctx->directory=p;
1213 // }
1214 // //fprintf(stderr, "Adding entry %d \"%s\"=0x%016x\n", ctx->dir_free, tag, offset);
1215 // ctx->directory[ctx->dir_free].offset=offset;
1216 // ctx->directory[ctx->dir_free].tag=strdup(tag);
1217 // ctx->dir_free++;
1218 mvl_add_list_entry(ctx->directory, -1, tag, offset);
1219 }
1220 
1227 void mvl_add_directory_entry_n(LIBMVL_CONTEXT *ctx, LIBMVL_OFFSET64 offset, const char *tag, LIBMVL_OFFSET64 tag_size)
1228 {
1229 // LIBMVL_DIRECTORY_ENTRY *p;
1230 // if(ctx->dir_free>=ctx->dir_size) {
1231 // ctx->dir_size+=ctx->dir_size+10;
1232 //
1233 // p=do_malloc(ctx->dir_size, sizeof(*p));
1234 // if(ctx->dir_free>0)memcpy(p, ctx->directory, ctx->dir_free*sizeof(*p));
1235 // free(ctx->directory);
1236 // ctx->directory=p;
1237 // }
1238 // ctx->directory[ctx->dir_free].offset=offset;
1239 // ctx->directory[ctx->dir_free].tag=memndup(tag, tag_size);
1240 // ctx->dir_free++;
1241 mvl_add_list_entry(ctx->directory, tag_size, tag, offset);
1242 }
1243 
1249 {
1250 LIBMVL_OFFSET64 *p;
1251 LIBMVL_OFFSET64 offset;
1252 off_t cur;
1253 
1254 if(ctx->directory->free<1) {
1255  mvl_set_error(ctx, LIBMVL_ERR_EMPTY_DIRECTORY);
1256  return(0);
1257  }
1258 
1259 #ifdef MVL_OLD_DIRECTORY
1260 
1261 p=do_malloc(ctx->directory->free*2, sizeof(*p));
1262 for(int i=0;i<ctx->directory->free;i++) {
1263 // p[i]=mvl_write_vector(ctx, LIBMVL_VECTOR_UINT8, strlen(ctx->directory[i].tag), ctx->directory[i].tag, LIBMVL_NO_METADATA);
1264 // p[i+ctx->dir_free]=ctx->directory[i].offset;
1265  p[i]=mvl_write_vector(ctx, LIBMVL_VECTOR_UINT8, ctx->directory->tag_length[i], ctx->directory->tag[i], LIBMVL_NO_METADATA);
1266  p[i+ctx->directory->free]=ctx->directory->offset[i];
1267  }
1268 
1269 
1270 cur=do_ftello(ctx->f);
1271 
1272 if((long long)cur<0) {
1273  perror("mvl_write_directory");
1274  mvl_set_error(ctx, LIBMVL_ERR_FTELL);
1275  }
1276 offset=cur;
1277 
1278 mvl_write_vector(ctx, LIBMVL_VECTOR_OFFSET64, 2*ctx->directory->free, p, LIBMVL_NO_METADATA);
1279 
1280 free(p);
1281 
1282 #else
1283 offset=mvl_write_named_list(ctx, ctx->directory);
1284 #endif
1285 
1286 ctx->directory_offset=offset;
1287 
1288 return(offset);
1289 }
1290 
1296 {
1298 L=do_malloc(1, sizeof(*L));
1299 L->size=size;
1300 L->free=0;
1301 if(L->size<10)L->size=10;
1302 
1303 L->offset=do_malloc(L->size, sizeof(*L->offset));
1304 L->tag_length=do_malloc(L->size, sizeof(*L->tag_length));
1305 L->tag=do_malloc(L->size, sizeof(*L->tag));
1306 
1307 L->hash_size=0;
1308 L->next_item=NULL;
1309 L->first_item=NULL;
1310 
1311 return(L);
1312 }
1313 
1318 {
1319 long i;
1320 for(i=0;i<L->free;i++)free(L->tag[i]);
1321 free(L->next_item);
1322 free(L->first_item);
1323 free(L->offset);
1324 free(L->tag);
1325 free(L->tag_length);
1326 free(L);
1327 }
1328 
1333 {
1334 LIBMVL_OFFSET64 mask;
1335 if(L->hash_size<L->size) {
1336  LIBMVL_OFFSET64 hs=1;
1337 
1338  while(hs<L->size && hs)hs=hs<<1;
1339 
1340  L->hash_size=hs;
1341  free(L->next_item);
1342  free(L->first_item);
1343 
1344  /* This can only happen if L->size is greater than 2^63 - unlikely */
1345  if(hs==0) {
1346  L->next_item=NULL;
1347  L->first_item=NULL;
1348  return;
1349  }
1350  L->next_item=do_malloc(L->hash_size, sizeof(*L->next_item));
1351  L->first_item=do_malloc(L->hash_size, sizeof(*L->first_item));
1352  }
1353 mask=L->hash_size-1;
1354 for(LIBMVL_OFFSET64 i=0;i<L->hash_size;i++)L->first_item[i]=-1;
1355 for(LIBMVL_OFFSET64 i=0;i<L->free;i++) {
1356  LIBMVL_OFFSET64 h=mvl_accumulate_hash64(MVL_SEED_HASH_VALUE, L->tag[i], L->tag_length[i]) & mask;
1357  L->next_item[i]=L->first_item[h];
1358  L->first_item[h]=i;
1359  }
1360 }
1361 
1369 long mvl_add_list_entry(LIBMVL_NAMED_LIST *L, long tag_length, const char *tag, LIBMVL_OFFSET64 offset)
1370 {
1371 void *p;
1372 long k;
1373 if(L->free>=L->size) {
1374  L->size=2*L->size+10;
1375 
1376  p=do_malloc(L->size, sizeof(*L->offset));
1377  if(L->free>0)memcpy(p, L->offset, L->free*sizeof(*L->offset));
1378  free(L->offset);
1379  L->offset=p;
1380 
1381  p=do_malloc(L->size, sizeof(*L->tag_length));
1382  if(L->free>0)memcpy(p, L->tag_length, L->free*sizeof(*L->tag_length));
1383  free(L->tag_length);
1384  L->tag_length=p;
1385 
1386  p=do_malloc(L->size, sizeof(*L->tag));
1387  if(L->free>0)memcpy(p, L->tag, L->free*sizeof(*L->tag));
1388  free(L->tag);
1389  L->tag=p;
1390  }
1391 
1392 if(L->hash_size && (L->free>=L->hash_size))mvl_recompute_named_list_hash(L);
1393 
1394 k=L->free;
1395 L->free++;
1396 L->offset[k]=offset;
1397 if(tag_length<0)tag_length=strlen(tag);
1398 L->tag_length[k]=tag_length;
1399 L->tag[k]=(unsigned char*)memndup(tag, tag_length);
1400 
1401 if(L->hash_size>0) {
1402  LIBMVL_OFFSET64 mask=L->hash_size-1;
1403  LIBMVL_OFFSET64 h=mvl_accumulate_hash64(MVL_SEED_HASH_VALUE, (const unsigned char*)tag, tag_length) & mask;
1404  L->next_item[k]=L->first_item[h];
1405  L->first_item[h]=k;
1406  }
1407 return(k);
1408 }
1409 
1416 LIBMVL_OFFSET64 mvl_find_list_entry(LIBMVL_NAMED_LIST *L, long tag_length, const char *tag)
1417 {
1418 long i, tl;
1419 tl=tag_length;
1420 if(tl<0)tl=strlen(tag);
1421 
1422 if(L->hash_size>0) {
1423  /* Hash table present */
1424  LIBMVL_OFFSET64 mask=L->hash_size-1;
1425  LIBMVL_OFFSET64 h=mvl_accumulate_hash64(MVL_SEED_HASH_VALUE, (const unsigned char*)tag, tl) & mask;
1426  for(i=L->first_item[h]; i>=0; i=L->next_item[i]) {
1427  if(L->tag_length[i]!=tl)continue;
1428  if(!memcmp(L->tag[i], tag, tl)) {
1429  return(L->offset[i]);
1430  }
1431  }
1432  return(LIBMVL_NULL_OFFSET);
1433  }
1434 
1435 for(i=0;i<L->free;i++) {
1436  if(L->tag_length[i]!=tl)continue;
1437  if(!memcmp(L->tag[i], tag, tl)) {
1438  return(L->offset[i]);
1439  }
1440  }
1441 return(LIBMVL_NULL_OFFSET);
1442 }
1443 
1444 
1451 {
1453 L=mvl_create_named_list(-1);
1454 mvl_add_list_entry(L, -1, "MVL_LAYOUT", mvl_write_cached_string(ctx, -1, "R"));
1455 mvl_add_list_entry(L, -1, "class", mvl_write_cached_string(ctx, -1, R_class));
1456 return(L);
1457 }
1458 
1465 {
1466 LIBMVL_OFFSET64 *offsets, attr_offset;
1467 long i;
1468 offsets=do_malloc(2*L->free, sizeof(*offsets));
1469 
1470 for(i=0;i<L->free;i++) {
1471  offsets[i]=mvl_write_cached_string(ctx, L->tag_length[i], (const char *)L->tag[i]);
1472  }
1473 memcpy(&(offsets[L->free]), L->offset, L->free*sizeof(*offsets));
1474 
1475 attr_offset=mvl_write_vector(ctx, LIBMVL_VECTOR_OFFSET64, 2*L->free, offsets, LIBMVL_NO_METADATA);
1476 
1477 free(offsets);
1478 
1479 return(attr_offset);
1480 }
1481 
1488 {
1489 LIBMVL_OFFSET64 list_offset;
1490 LIBMVL_NAMED_LIST *metadata;
1491 
1492 metadata=mvl_create_R_attributes_list(ctx, "list");
1493 //mvl_add_list_entry(metadata, -1, "names", mvl_write_vector(ctx, LIBMVL_VECTOR_OFFSET64, L->free, offsets, LIBMVL_NO_METADATA));
1494 mvl_add_list_entry(metadata, -1, "names", mvl_write_packed_list(ctx, L->free, L->tag_length, L->tag, LIBMVL_NO_METADATA));
1495 
1496 list_offset=mvl_write_vector(ctx, LIBMVL_VECTOR_OFFSET64, L->free, L->offset, mvl_write_attributes_list(ctx, metadata));
1497 
1498 mvl_free_named_list(metadata);
1499 
1500 return(list_offset);
1501 }
1502 
1510 {
1511 LIBMVL_OFFSET64 list_offset;
1512 LIBMVL_NAMED_LIST *metadata;
1513 
1514 metadata=mvl_create_R_attributes_list(ctx, cl);
1515 //mvl_add_list_entry(metadata, -1, "names", mvl_write_vector(ctx, LIBMVL_VECTOR_OFFSET64, L->free, offsets, LIBMVL_NO_METADATA));
1516 mvl_add_list_entry(metadata, -1, "names", mvl_write_packed_list(ctx, L->free, L->tag_length, L->tag, LIBMVL_NO_METADATA));
1517 
1518 list_offset=mvl_write_vector(ctx, LIBMVL_VECTOR_OFFSET64, L->free, L->offset, mvl_write_attributes_list(ctx, metadata));
1519 
1520 mvl_free_named_list(metadata);
1521 
1522 return(list_offset);
1523 }
1524 
1533 {
1534 LIBMVL_OFFSET64 list_offset;
1535 LIBMVL_NAMED_LIST *metadata;
1536 
1537 metadata=mvl_create_R_attributes_list(ctx, "data.frame");
1538 // mvl_add_list_entry(metadata, -1, "names", mvl_write_vector(ctx, LIBMVL_VECTOR_OFFSET64, L->free, offsets, LIBMVL_NO_METADATA));
1539 mvl_add_list_entry(metadata, -1, "names", mvl_write_packed_list(ctx, L->free, L->tag_length, L->tag, LIBMVL_NO_METADATA));
1540 mvl_add_list_entry(metadata, -1, "dim", MVL_WVEC(ctx, LIBMVL_VECTOR_INT32, nrows, (int)L->free));
1541 if(rownames!=0)mvl_add_list_entry(metadata, -1, "rownames", rownames);
1542 
1543 
1544 list_offset=mvl_write_vector(ctx, LIBMVL_VECTOR_OFFSET64, L->free, L->offset, mvl_write_attributes_list(ctx, metadata));
1545 
1546 mvl_free_named_list(metadata);
1547 
1548 return(list_offset);
1549 }
1550 
1551 /* This is meant to operate on memory mapped files */
1560 {
1562 long i, nattr;
1563 char *p, *d;
1564 int err;
1565 
1566 if(metadata_offset==LIBMVL_NO_METADATA)return(NULL);
1567 
1568 if(data==NULL) {
1569  data=ctx->data;
1570  data_size=ctx->data_size;
1571  }
1572 
1573 if(data==NULL) {
1574  mvl_set_error(ctx, LIBMVL_ERR_NO_DATA);
1575  return(NULL);
1576  }
1577 
1578 if((err=mvl_validate_vector(metadata_offset, data, data_size))!=0) {
1579  mvl_set_error(ctx, LIBMVL_ERR_INVALID_OFFSET);
1580  return(NULL);
1581  }
1582 
1583 d=(char *)data;
1584 
1585 if(mvl_vector_type(&(d[metadata_offset]))!=LIBMVL_VECTOR_OFFSET64) {
1586  mvl_set_error(ctx, LIBMVL_ERR_INVALID_OFFSET);
1587  return(NULL);
1588  }
1589 
1590 p=&(d[metadata_offset]);
1591 
1592 nattr=mvl_vector_length(p);
1593 if(nattr==0)return(NULL);
1594 if((nattr<0) || (nattr & 1)) {
1595  mvl_set_error(ctx, LIBMVL_ERR_INVALID_ATTR_LIST);
1596  return(NULL);
1597  }
1598 nattr=nattr>>1;
1599 
1600 L=mvl_create_named_list(nattr);
1601 for(i=0;i<nattr;i++) {
1602  if((err=mvl_validate_vector(mvl_vector_data_offset(p)[i], data, data_size))!=0) {
1603  mvl_set_error(ctx, LIBMVL_ERR_INVALID_OFFSET);
1604  mvl_add_list_entry(L,
1605  9,
1606  "*CORRUPT*",
1607  mvl_vector_data_offset(p)[i+nattr]);
1608  } else {
1609  mvl_add_list_entry(L,
1611  (const char *)mvl_vector_data_uint8(&(d[mvl_vector_data_offset(p)[i]])),
1612  mvl_vector_data_offset(p)[i+nattr]);
1613  }
1614  }
1615 
1617 return(L);
1618 }
1619 
1620 /* This is meant to operate on memory mapped files */
1629 {
1630 LIBMVL_NAMED_LIST *L, *Lattr;
1631 char *d;
1632 LIBMVL_OFFSET64 names_ofs, tag_ofs;
1633 long i, nelem;
1634 int err;
1635 
1636 if(offset==LIBMVL_NULL_OFFSET)return(NULL);
1637 
1638 if(data==NULL) {
1639  data=ctx->data;
1640  data_size=ctx->data_size;
1641  }
1642 
1643 if(data==NULL) {
1644  mvl_set_error(ctx, LIBMVL_ERR_NO_DATA);
1645  return(NULL);
1646  }
1647 
1648 if((err=mvl_validate_vector(offset, data, data_size))!=0) {
1649  mvl_set_error(ctx, LIBMVL_ERR_INVALID_OFFSET);
1650  return(NULL);
1651  }
1652 
1653 d=(char *)data;
1654 
1655 if(mvl_vector_type(&(d[offset]))!=LIBMVL_VECTOR_OFFSET64){
1656  mvl_set_error(ctx, LIBMVL_ERR_INVALID_OFFSET);
1657  return(NULL);
1658  }
1659 
1660 Lattr=mvl_read_attributes_list(ctx, data, data_size, mvl_vector_metadata_offset(&(d[offset])));
1661 if(Lattr==NULL)return(NULL);
1662 names_ofs=mvl_find_list_entry(Lattr, -1, "names");
1663 
1664 if((err=mvl_validate_vector(names_ofs, data, data_size))!=0) {
1665  mvl_set_error(ctx, LIBMVL_ERR_INVALID_OFFSET);
1666  return(NULL);
1667  }
1668 
1669 nelem=mvl_vector_length(&(d[offset]));
1670 
1671 L=mvl_create_named_list(nelem);
1672 
1673 switch(mvl_vector_type(&(d[names_ofs]))) {
1675  if(nelem!=mvl_vector_length(&(d[names_ofs]))) {
1677  mvl_free_named_list(Lattr);
1678  mvl_set_error(ctx, LIBMVL_ERR_INVALID_ATTR);
1679  return(NULL);
1680  }
1681  for(i=0;i<nelem;i++) {
1682  tag_ofs=mvl_vector_data_offset(&(d[names_ofs]))[i];
1683 
1684  if((err=mvl_validate_vector(tag_ofs, data, data_size))!=0) {
1685  mvl_set_error(ctx, LIBMVL_ERR_INVALID_OFFSET);
1686  mvl_add_list_entry(L, 9, "*CORRUPT*", mvl_vector_data_offset(&(d[offset]))[i]);
1687  continue;
1688  }
1689 
1690  mvl_add_list_entry(L, mvl_vector_length(&(d[tag_ofs])), (const char *)mvl_vector_data_uint8(&(d[tag_ofs])), mvl_vector_data_offset(&(d[offset]))[i]);
1691  }
1692  break;
1693  case LIBMVL_PACKED_LIST64:
1694  if(nelem+1!=mvl_vector_length(&(d[names_ofs]))) {
1696  mvl_free_named_list(Lattr);
1697  mvl_set_error(ctx, LIBMVL_ERR_INVALID_ATTR);
1698  return(NULL);
1699  }
1700  for(i=0;i<nelem;i++) {
1701  if((err=mvl_packed_list_validate_entry((LIBMVL_VECTOR *)&(d[names_ofs]), d, data_size, i))!=0) {
1702  mvl_set_error(ctx, LIBMVL_ERR_CORRUPT_PACKED_LIST);
1703  mvl_add_list_entry(L, 9, "*CORRUPT*", mvl_vector_data_offset(&(d[offset]))[i]);
1704  continue;
1705  }
1706  mvl_add_list_entry(L, mvl_packed_list_get_entry_bytelength((LIBMVL_VECTOR *)&(d[names_ofs]), i), (const char *)mvl_packed_list_get_entry((LIBMVL_VECTOR *)&(d[names_ofs]), d, i), mvl_vector_data_offset(&(d[offset]))[i]);
1707  }
1708  break;
1709  default:
1711  mvl_free_named_list(Lattr);
1712  mvl_set_error(ctx, LIBMVL_ERR_INVALID_ATTR);
1713  return(NULL);
1714  }
1715 
1716 mvl_free_named_list(Lattr);
1717 
1719 return(L);
1720 }
1721 
1726 void mvl_open(LIBMVL_CONTEXT *ctx, FILE *f)
1727 {
1728 ctx->f=f;
1729 mvl_write_preamble(ctx);
1730 }
1731 
1736 {
1737 mvl_write_directory(ctx);
1738 mvl_write_postamble(ctx);
1739 fflush(ctx->f);
1740 ctx->f=NULL;
1741 }
1742 
1743 LIBMVL_OFFSET64 mvl_directory_length(const void *data)
1744 {
1746 if(p->type!=LIBMVL_VECTOR_OFFSET64) {
1747  return(0);
1748  }
1749 if(p->length &1) {
1750  return 0;
1751  }
1752 return(p->length>>1);
1753 }
1754 
1755 // LIBMVL_OFFSET64 mvl_directory_tag(const void *data, int i)
1756 // {
1757 // LIBMVL_VECTOR *p=(LIBMVL_VECTOR *)data;
1758 // return(mvl_vector_data_offset(p)[i]);
1759 // }
1760 //
1761 // LIBMVL_OFFSET64 mvl_directory_entry(void *data, int i)
1762 // {
1763 // LIBMVL_VECTOR *p=(LIBMVL_VECTOR *)data;
1764 // return(mvl_vector_data_offset(p)[i+(p->header.length>>1)]);
1765 // }
1766 
1773 {
1774 // int i;
1775 // for(i=ctx->dir_free-1;i>=0;i--) {
1776 // if(!strcmp(tag, ctx->directory[i].tag))return(ctx->directory[i].offset);
1777 // }
1778 // return(0);
1779 return(mvl_find_list_entry(ctx->directory, -1, tag));
1780 }
1781 
1787 void mvl_load_image(LIBMVL_CONTEXT *ctx, const void *data, LIBMVL_OFFSET64 length)
1788 {
1789 LIBMVL_PREAMBLE *pr=(LIBMVL_PREAMBLE *)data;
1790 LIBMVL_POSTAMBLE *pa=(LIBMVL_POSTAMBLE *)&(((unsigned char *)data)[length-sizeof(LIBMVL_POSTAMBLE)]);
1791 LIBMVL_VECTOR *dir, *a;
1792 LIBMVL_OFFSET64 k;
1793 int i;
1794 int err;
1795 
1796 if(strncmp(pr->signature, LIBMVL_SIGNATURE, 4)) {
1797  mvl_set_error(ctx, LIBMVL_ERR_INVALID_SIGNATURE);
1798  return;
1799  }
1800 
1801 if(pr->endianness!=LIBMVL_ENDIANNESS_FLAG) {
1802  mvl_set_error(ctx, LIBMVL_ERR_WRONG_ENDIANNESS);
1803  return;
1804  }
1805 
1806 if(length<sizeof(LIBMVL_POSTAMBLE)+sizeof(LIBMVL_PREAMBLE)) {
1807  mvl_set_error(ctx, LIBMVL_ERR_MVL_FILE_TOO_SHORT);
1808  return;
1809  }
1810 
1811 //fprintf(stderr, "Reading MVL directory at offset 0x%08llx\n", pa->directory);
1812 
1813 ctx->data=(unsigned char *)data;
1814 ctx->data_size=length;
1815 
1816 mvl_free_named_list(ctx->directory);
1817 
1818 switch(pa->type) {
1819  case LIBMVL_VECTOR_POSTAMBLE2:
1820  if((err=mvl_validate_vector(pa->directory, data, length))<0) {
1821  ctx->directory=mvl_create_named_list(100);
1822  mvl_set_error(ctx, LIBMVL_ERR_CORRUPT_POSTAMBLE);
1823  return;
1824  }
1825 
1826  ctx->directory=mvl_read_named_list(ctx, data, length, pa->directory);
1827  if(ctx->directory==NULL)
1828  ctx->directory=mvl_create_named_list(100);
1829  break;
1830 #ifdef MVL_READ_OLD_DIRECTORY
1831  case LIBMVL_VECTOR_POSTAMBLE1:
1832  dir=(LIBMVL_VECTOR *)&(((unsigned char *)data)[pa->directory]);
1833  k=dir->header.length>>1;
1834 
1835  ctx->directory=mvl_create_named_list(k);
1836 
1837  // for(i=0;i<ctx->dir_free;i++) {
1838  // free(ctx->directory[i].tag);
1839  // ctx->directory[i].tag=NULL;
1840  // ctx->directory[i].offset=0;
1841  // }
1842 
1843  // ctx->dir_free=dir->header.length>>1;
1844  // //fprintf(stderr, "Reading MVL with %ld directory entries\n", ctx->dir_free);
1845  // if(ctx->dir_free >= ctx->dir_size) {
1846  // ctx->dir_size=ctx->dir_free;
1847  // free(ctx->directory);
1848  // ctx->directory=do_malloc(ctx->dir_size, sizeof(*ctx->directory));
1849  // }
1850 
1851  // for(i=0;i<ctx->dir_free;i++) {
1852  // ctx->directory[i].offset=mvl_vector_data(dir).offset[i+ctx->dir_free];
1853  // a=(LIBMVL_VECTOR *)&(((unsigned char *)data)[mvl_vector_data(dir).offset[i]]);
1854  // ctx->directory[i].tag=memndup(mvl_vector_data(a).b, a->header.length);
1855  // }
1856  for(i=0;i<k;i++) {
1857  a=(LIBMVL_VECTOR *)&(((unsigned char *)data)[mvl_vector_data_offset(dir)[i]]);
1858 
1859  mvl_add_list_entry(ctx->directory, a->header.length, (const char *)mvl_vector_data_uint8(a), mvl_vector_data_offset(dir)[i+k]);
1860  }
1861  mvl_recompute_named_list_hash(ctx->directory);
1862  break;
1863 #endif
1864  default:
1865  ctx->directory=mvl_create_named_list(100);
1866  mvl_set_error(ctx, LIBMVL_ERR_CORRUPT_POSTAMBLE);
1867  return;
1868  }
1869 
1870 ctx->full_checksums_offset=mvl_find_directory_entry(ctx, LIBMVL_FULL_CHECKSUMS_DIRECTORY_KEY);
1871 if((ctx->full_checksums_offset!=LIBMVL_NULL_OFFSET) && (err=mvl_validate_vector(ctx->full_checksums_offset, data, length))) {
1872  mvl_set_error(ctx, err);
1873  ctx->full_checksums_offset=LIBMVL_NULL_OFFSET;
1874  }
1875 }
1876 
1877 typedef struct {
1878  LIBMVL_VECTOR **vec;
1879  void **data; /* This is needed for packed vectors */
1880  LIBMVL_OFFSET64 *data_length;
1881  LIBMVL_OFFSET64 nvec;
1882  } MVL_SORT_INFO;
1883 
1884 typedef struct {
1885  LIBMVL_OFFSET64 index;
1886  MVL_SORT_INFO *info;
1887  } MVL_SORT_UNIT;
1888 
1889 
1890 int mvl_equals(MVL_SORT_UNIT *a, MVL_SORT_UNIT *b)
1891 {
1892 LIBMVL_OFFSET64 i, ai, bi;
1893 LIBMVL_OFFSET64 N=a->info->nvec;
1894 LIBMVL_VECTOR *avec, *bvec;
1895 ai=a->index;
1896 bi=b->index;
1897 for(i=0;i<N;i++) {
1898  avec=a->info->vec[i];
1899  bvec=b->info->vec[i];
1900 
1901  switch(mvl_vector_type(avec)) {
1902  case LIBMVL_VECTOR_CSTRING:
1903  case LIBMVL_VECTOR_UINT8: {
1904  unsigned char ad, bd;
1905  if(mvl_vector_type(bvec)!=mvl_vector_type(avec))return 0;
1906  ad=mvl_vector_data_uint8(avec)[ai];
1907  bd=mvl_vector_data_uint8(bvec)[bi];
1908  if(ad!=bd)return 0;
1909  break;
1910  }
1911  case LIBMVL_VECTOR_INT32: {
1912  int ad;
1913  ad=mvl_vector_data_int32(avec)[ai];
1914  switch(mvl_vector_type(bvec)) {
1915  case LIBMVL_VECTOR_INT32: {
1916  int bd;
1917  bd=mvl_vector_data_int32(bvec)[bi];
1918  if(ad!=bd)return 0;
1919  break;
1920  }
1921  case LIBMVL_VECTOR_INT64: {
1922  long long bd;
1923  bd=mvl_vector_data_int64(bvec)[bi];
1924  if(ad!=bd)return 0;
1925  break;
1926  }
1927  default:
1928  return 0;
1929  break;
1930  }
1931  break;
1932  }
1933  case LIBMVL_VECTOR_INT64: {
1934  long long ad;
1935  ad=mvl_vector_data_int64(avec)[ai];
1936  switch(mvl_vector_type(bvec)) {
1937  case LIBMVL_VECTOR_INT32: {
1938  int bd;
1939  bd=mvl_vector_data_int32(bvec)[bi];
1940  if(ad!=bd)return 0;
1941  break;
1942  }
1943  case LIBMVL_VECTOR_INT64: {
1944  long long bd;
1945  bd=mvl_vector_data_int64(bvec)[bi];
1946  if(ad!=bd)return 0;
1947  break;
1948  }
1949  default:
1950  return 0;
1951  break;
1952  }
1953  break;
1954  }
1955  case LIBMVL_VECTOR_FLOAT: {
1956  float ad;
1957  ad=mvl_vector_data_float(avec)[ai];
1958  switch(mvl_vector_type(bvec)) {
1959  case LIBMVL_VECTOR_FLOAT: {
1960  float bd;
1961  bd=mvl_vector_data_float(bvec)[bi];
1962  if(ad!=bd)return 0;
1963  break;
1964  }
1965  case LIBMVL_VECTOR_DOUBLE: {
1966  double bd;
1967  bd=mvl_vector_data_double(bvec)[bi];
1968  if((double)ad!=bd)return 0;
1969  break;
1970  }
1971  default:
1972  return 0;
1973  break;
1974  }
1975  break;
1976  }
1977  case LIBMVL_VECTOR_DOUBLE: {
1978  double ad;
1979  ad=mvl_vector_data_double(avec)[ai];
1980  switch(mvl_vector_type(bvec)) {
1981  case LIBMVL_VECTOR_FLOAT: {
1982  double bd;
1983  bd=mvl_vector_data_float(bvec)[bi];
1984  if(ad!=bd)return 0;
1985  break;
1986  }
1987  case LIBMVL_VECTOR_DOUBLE: {
1988  double bd;
1989  bd=mvl_vector_data_double(bvec)[bi];
1990  if(ad!=bd)return 0;
1991  break;
1992  }
1993  default:
1994  return 0;
1995  break;
1996  }
1997  break;
1998  }
1999  case LIBMVL_VECTOR_OFFSET64: {
2000  LIBMVL_OFFSET64 ad, bd;
2001  if(mvl_vector_type(bvec)!=mvl_vector_type(avec))return 0;
2002  ad=mvl_vector_data_offset(avec)[ai];
2003  bd=mvl_vector_data_offset(bvec)[bi];
2004  if(ad!=bd)return 0;
2005  break;
2006  }
2007  case LIBMVL_PACKED_LIST64: {
2008  LIBMVL_OFFSET64 al, bl, nn;
2009  const unsigned char *ad, *bd;
2010  if(mvl_vector_type(bvec)!=mvl_vector_type(avec))return 0;
2011  if(mvl_packed_list_validate_entry(avec, a->info->data[i], a->info->data_length[i], ai))return 0;
2012  if(mvl_packed_list_validate_entry(bvec, b->info->data[i], b->info->data_length[i], bi))return 0;
2015  ad=mvl_packed_list_get_entry(avec, a->info->data[i], ai);
2016  bd=mvl_packed_list_get_entry(bvec, b->info->data[i], bi);
2017  if(al!=bl)return 0;
2018  nn=al;
2019  for(LIBMVL_OFFSET64 j=0;j<nn;j++) {
2020  if(ad[j]!=bd[j])return 0;
2021  }
2022  break;
2023  }
2024  default:
2025  return(0);
2026  }
2027  }
2028 return 1;
2029 }
2030 
2031 #if 0
2032 
2033 /* The comparison functions below use absolute index as a last resort in comparison which preserves order for identical entries.
2034  * This makes these functions unsuitable to check equality
2035  * They also assume that a->info==b->info
2036  */
2037 
2038 int mvl_lexicographic_cmp(MVL_SORT_UNIT *a, MVL_SORT_UNIT *b)
2039 {
2040 LIBMVL_OFFSET64 i, ai, bi;
2041 LIBMVL_OFFSET64 N=a->info->nvec;
2042 LIBMVL_VECTOR *vec;
2043 ai=a->index;
2044 bi=b->index;
2045 for(i=0;i<N;i++) {
2046  vec=a->info->vec[i];
2047 
2048  switch(mvl_vector_type(vec)) {
2049  case LIBMVL_VECTOR_CSTRING:
2050  case LIBMVL_VECTOR_UINT8: {
2051  unsigned char ad, bd;
2052  ad=mvl_vector_data_uint8(vec)[ai];
2053  bd=mvl_vector_data_uint8(vec)[bi];
2054  if(ad<bd)return -1;
2055  if(ad>bd)return 1;
2056  break;
2057  }
2058  case LIBMVL_VECTOR_INT32: {
2059  int ad, bd;
2060  ad=mvl_vector_data_int32(vec)[ai];
2061  bd=mvl_vector_data_int32(vec)[bi];
2062  if(ad<bd)return -1;
2063  if(ad>bd)return 1;
2064  break;
2065  }
2066  case LIBMVL_VECTOR_FLOAT: {
2067  float ad, bd;
2068  ad=mvl_vector_data_float(vec)[ai];
2069  bd=mvl_vector_data_float(vec)[bi];
2070  if(ad<bd)return -1;
2071  if(ad>bd)return 1;
2072  break;
2073  }
2074  case LIBMVL_VECTOR_INT64: {
2075  long long ad, bd;
2076  ad=mvl_vector_data_int64(vec)[ai];
2077  bd=mvl_vector_data_int64(vec)[bi];
2078  if(ad<bd)return -1;
2079  if(ad>bd)return 1;
2080  break;
2081  }
2082  case LIBMVL_VECTOR_DOUBLE: {
2083  double ad, bd;
2084  ad=mvl_vector_data_double(vec)[ai];
2085  bd=mvl_vector_data_double(vec)[bi];
2086  if(ad<bd)return -1;
2087  if(ad>bd)return 1;
2088  break;
2089  }
2090  case LIBMVL_VECTOR_OFFSET64: {
2091  LIBMVL_OFFSET64 ad, bd;
2092  ad=mvl_vector_data_offset(vec)[ai];
2093  bd=mvl_vector_data_offset(vec)[bi];
2094  if(ad<bd)return -1;
2095  if(ad>bd)return 1;
2096  break;
2097  }
2098  case LIBMVL_PACKED_LIST64: {
2099  LIBMVL_OFFSET64 al, bl, nn;
2100  const unsigned char *ad, *bd;
2103  ad=mvl_packed_list_get_entry(vec, a->info->data[i], ai);
2104  bd=mvl_packed_list_get_entry(vec, b->info->data[i], bi);
2105  nn=al;
2106  if(bl<nn)nn=bl;
2107  for(LIBMVL_OFFSET64 j=0;j<nn;j++) {
2108  if(ad[j]<bd[j])return -1;
2109  if(ad[j]>bd[j])return 1;
2110  }
2111  if(al<bl)return -1;
2112  if(al>bl)return 1;
2113  break;
2114  }
2115  default:
2116  if(ai<bi)return -1;
2117  if(ai>bi)return 1;
2118  return(0);
2119  }
2120  }
2121 if(ai<bi)return -1;
2122 if(ai>bi)return 1;
2123 return 0;
2124 }
2125 
2126 int mvl_lexicographic_desc_cmp(MVL_SORT_UNIT *a, MVL_SORT_UNIT *b)
2127 {
2128 LIBMVL_OFFSET64 i, ai, bi;
2129 LIBMVL_OFFSET64 N=a->info->nvec;
2130 LIBMVL_VECTOR *vec;
2131 ai=a->index;
2132 bi=b->index;
2133 for(i=0;i<N;i++) {
2134  vec=a->info->vec[i];
2135 
2136  switch(mvl_vector_type(vec)) {
2137  case LIBMVL_VECTOR_CSTRING:
2138  case LIBMVL_VECTOR_UINT8: {
2139  unsigned char ad, bd;
2140  ad=mvl_vector_data_uint8(vec)[ai];
2141  bd=mvl_vector_data_uint8(vec)[bi];
2142  if(ad<bd)return 1;
2143  if(ad>bd)return -1;
2144  break;
2145  }
2146  case LIBMVL_VECTOR_INT32: {
2147  int ad, bd;
2148  ad=mvl_vector_data_int32(vec)[ai];
2149  bd=mvl_vector_data_int32(vec)[bi];
2150  if(ad<bd)return 1;
2151  if(ad>bd)return -1;
2152  break;
2153  }
2154  case LIBMVL_VECTOR_FLOAT: {
2155  float ad, bd;
2156  ad=mvl_vector_data_float(vec)[ai];
2157  bd=mvl_vector_data_float(vec)[bi];
2158  if(ad<bd)return 1;
2159  if(ad>bd)return -1;
2160  break;
2161  }
2162  case LIBMVL_VECTOR_INT64: {
2163  long long ad, bd;
2164  ad=mvl_vector_data_int64(vec)[ai];
2165  bd=mvl_vector_data_int64(vec)[bi];
2166  if(ad<bd)return 1;
2167  if(ad>bd)return -1;
2168  break;
2169  }
2170  case LIBMVL_VECTOR_DOUBLE: {
2171  double ad, bd;
2172  ad=mvl_vector_data_double(vec)[ai];
2173  bd=mvl_vector_data_double(vec)[bi];
2174  if(ad<bd)return 1;
2175  if(ad>bd)return -1;
2176  break;
2177  }
2178  case LIBMVL_VECTOR_OFFSET64: {
2179  LIBMVL_OFFSET64 ad, bd;
2180  ad=mvl_vector_data_offset(vec)[ai];
2181  bd=mvl_vector_data_offset(vec)[bi];
2182  if(ad<bd)return 1;
2183  if(ad>bd)return -1;
2184  break;
2185  }
2186  case LIBMVL_PACKED_LIST64: {
2187  LIBMVL_OFFSET64 al, bl, nn;
2188  const unsigned char *ad, *bd;
2191  ad=mvl_packed_list_get_entry(vec, a->info->data[i], ai);
2192  bd=mvl_packed_list_get_entry(vec, b->info->data[i], bi);
2193  nn=al;
2194  if(bl<nn)nn=bl;
2195  for(LIBMVL_OFFSET64 j=0;j<nn;j++) {
2196  if(ad[j]<bd[j])return 1;
2197  if(ad[j]>bd[j])return -1;
2198  }
2199  if(al<bl)return 1;
2200  if(al>bl)return -1;
2201  break;
2202  }
2203  default:
2204  if(ai<bi)return 1;
2205  if(ai>bi)return -1;
2206  return(0);
2207  }
2208  }
2209 if(ai<bi)return 1;
2210 if(ai>bi)return -1;
2211 return 0;
2212 }
2213 
2214 /*
2215  * This function sorts indices into a list of vectors so that the resulting permutation is ordered.
2216  * The vector should all be the same length N, except LIBMVL_PACKED_LIST64 which should N+1 - this provides the same number of elements.
2217  * The indices are from 0 to N-1 and can repeat.
2218  *
2219  * vec_data is the pointer to mapped data range where offsets point. This is needed only for vectors of type LIBMVL_PACKED_LIST64.
2220  * You can set vec_data to NULL if LIBMVL_PACKED_LIST64 vectors are not present. Also entries vec_data[i] can be NULL if the corresponding vector is not of type
2221  * LIBMVL_PACKED_LIST64
2222  *
2223  * This function return 0 on successful sort. If no vectors are supplies (vec_count==0) the indices are unchanged and the sort is considered successful.
2224  */
2225 int mvl_sort_indices1(LIBMVL_OFFSET64 indices_count, LIBMVL_OFFSET64 *indices, LIBMVL_OFFSET64 vec_count, LIBMVL_VECTOR **vec, void **vec_data, LIBMVL_OFFSET64 *vec_data_length, int sort_function)
2226 {
2227 MVL_SORT_UNIT *units;
2228 MVL_SORT_INFO info;
2229 LIBMVL_OFFSET64 i, N;
2230 
2231 if(vec_count<1)return 0;
2232 
2233 info.data=vec_data;
2234 info.data_length=vec_data_length;
2235 info.vec=vec;
2236 info.nvec=vec_count;
2237 
2238 units=do_malloc(indices_count, sizeof(*units));
2239 
2240 N=mvl_vector_nentries(vec[0]);
2241 //fprintf(stderr, "vec_count=%d N=%d\n", vec_count, N);
2242 for(i=1;i<vec_count;i++) {
2243  if(mvl_vector_type(vec[i])==LIBMVL_PACKED_LIST64) {
2244  if(mvl_vector_length(vec[i])!=N+1)return -1;
2245  if(vec_data==NULL)return -1;
2246  if(vec_data[i]==NULL)return -1;
2247  continue;
2248  }
2249  if(mvl_vector_length(vec[i])!=N)return -1;
2250  }
2251 
2252 for(i=0;i<indices_count;i++) {
2253  units[i].info=&info;
2254  if(indices[i]>=N)return -1;
2255  units[i].index=indices[i];
2256  }
2257 
2258 switch(sort_function) {
2260  qsort(units, indices_count, sizeof(*units), (int (*)(const void *, const void *))mvl_lexicographic_cmp);
2261  break;
2263  qsort(units, indices_count, sizeof(*units), (int (*)(const void *, const void *))mvl_lexicographic_desc_cmp);
2264  break;
2265  default:
2266  break;
2267  }
2268 for(i=0;i<indices_count;i++) {
2269  indices[i]=units[i].index;
2270  }
2271 
2272 free(units);
2273 return 0;
2274 }
2275 #endif
2276 
2293 int mvl_hash_indices(LIBMVL_OFFSET64 indices_count, const LIBMVL_OFFSET64 *indices, LIBMVL_OFFSET64 *hash, LIBMVL_OFFSET64 vec_count, LIBMVL_VECTOR **vec, void **vec_data, LIBMVL_OFFSET64 *vec_data_length, int flags)
2294 {
2295 LIBMVL_OFFSET64 i, j, N;
2296 
2297 if(flags & LIBMVL_INIT_HASH) {
2298  for(i=0;i<indices_count;i++) {
2299  hash[i]=MVL_SEED_HASH_VALUE;
2300  }
2301  }
2302 
2303 if(vec_count<1)return 0;
2304 
2305 N=mvl_vector_length(vec[0]);
2306 //fprintf(stderr, "vec_count=%d N=%d\n", vec_count, N);
2307 if(mvl_vector_type(vec[0])==LIBMVL_PACKED_LIST64)N--;
2308 for(i=1;i<vec_count;i++) {
2309  if(mvl_vector_type(vec[i])==LIBMVL_PACKED_LIST64) {
2310  if(mvl_vector_length(vec[i])!=N+1)return -1;
2311  if(vec_data==NULL)return -2;
2312  if(vec_data[i]==NULL)return -3;
2313  continue;
2314  }
2315  if(mvl_vector_length(vec[i])!=N)return -4;
2316  }
2317 
2318 for(i=0;i<indices_count;i++) {
2319  if(indices[i]>=N)return -5;
2320  }
2321 
2322 for(j=0;j<vec_count;j++) {
2323  switch(mvl_vector_type(vec[j])) {
2324  case LIBMVL_VECTOR_CSTRING:
2325  case LIBMVL_VECTOR_UINT8:
2326  for(i=0;i<indices_count;i++) {
2327  hash[i]=mvl_accumulate_hash64(hash[i], (const unsigned char *)&(mvl_vector_data_uint8(vec[j])[indices[i]]), 1);
2328  }
2329  break;
2330  case LIBMVL_VECTOR_INT32:
2331  for(i=0;i<indices_count;i++) {
2332  hash[i]=mvl_accumulate_int32_hash64(hash[i], &(mvl_vector_data_int32(vec[j])[indices[i]]), 1);
2333  }
2334  break;
2335  case LIBMVL_VECTOR_INT64:
2336  for(i=0;i<indices_count;i++) {
2337  hash[i]=mvl_accumulate_int64_hash64(hash[i], &(mvl_vector_data_int64(vec[j])[indices[i]]), 1);
2338  }
2339  break;
2340  case LIBMVL_VECTOR_FLOAT:
2341  for(i=0;i<indices_count;i++) {
2342  hash[i]=mvl_accumulate_float_hash64(hash[i], &(mvl_vector_data_float(vec[j])[indices[i]]), 1);
2343  }
2344  break;
2345  case LIBMVL_VECTOR_DOUBLE:
2346  for(i=0;i<indices_count;i++) {
2347  hash[i]=mvl_accumulate_double_hash64(hash[i], &(mvl_vector_data_double(vec[j])[indices[i]]), 1);
2348  }
2349  break;
2350  case LIBMVL_VECTOR_OFFSET64: /* TODO: we might want to do something more clever here */
2351  for(i=0;i<indices_count;i++) {
2352  hash[i]=mvl_accumulate_hash64(hash[i], (const unsigned char *)&(mvl_vector_data_int64(vec[j])[indices[i]]), 8);
2353  }
2354  break;
2355  case LIBMVL_PACKED_LIST64: {
2356  if(vec_data==NULL)return -6;
2357  if(vec_data[j]==NULL)return -7;
2358  for(i=0;i<indices_count;i++) {
2359  if(mvl_packed_list_validate_entry(vec[j], vec_data[j], vec_data_length[j], indices[i]))return -8;
2360 
2361  hash[i]=mvl_accumulate_hash64(hash[i], mvl_packed_list_get_entry(vec[j], vec_data[j], indices[i]), mvl_packed_list_get_entry_bytelength(vec[j], indices[i]));
2362  }
2363  break;
2364  }
2365  default:
2366  return(-1);
2367  }
2368  }
2369 
2370 if(flags & LIBMVL_FINALIZE_HASH) {
2371  for(i=0;i<indices_count;i++) {
2372  hash[i]=mvl_randomize_bits64(hash[i]);
2373  }
2374  }
2375 
2376 return 0;
2377 }
2378 
2395 int mvl_hash_range(LIBMVL_OFFSET64 i0, LIBMVL_OFFSET64 i1, LIBMVL_OFFSET64 *hash, LIBMVL_OFFSET64 vec_count, LIBMVL_VECTOR **vec, void **vec_data, LIBMVL_OFFSET64 *vec_data_length, int flags)
2396 {
2397 LIBMVL_OFFSET64 i, j, N, indices_count;
2398 
2399 indices_count=i1-i0;
2400 
2401 if(flags & LIBMVL_INIT_HASH) {
2402  for(i=0;i<indices_count;i++) {
2403  hash[i]=MVL_SEED_HASH_VALUE;
2404  }
2405  }
2406 
2407 if(vec_count<1 || (i1<=i0))return 0;
2408 
2409 N=mvl_vector_length(vec[0]);
2410 //fprintf(stderr, "vec_count=%d N=%d\n", vec_count, N);
2411 if(mvl_vector_type(vec[0])==LIBMVL_PACKED_LIST64)N--;
2412 for(i=1;i<vec_count;i++) {
2413  if(mvl_vector_type(vec[i])==LIBMVL_PACKED_LIST64) {
2414  if(mvl_vector_length(vec[i])!=N+1)return -1;
2415  if(vec_data==NULL)return -2;
2416  if(vec_data[i]==NULL)return -3;
2417  continue;
2418  }
2419  if(mvl_vector_length(vec[i])!=N)return -4;
2420  }
2421 
2422 if(i0>=N || i1>=N)return(-5);
2423 
2424 for(j=0;j<vec_count;j++) {
2425  switch(mvl_vector_type(vec[j])) {
2426  case LIBMVL_VECTOR_CSTRING:
2427  case LIBMVL_VECTOR_UINT8:
2428  for(i=0;i<indices_count;i++) {
2429  hash[i]=mvl_accumulate_hash64(hash[i], (const unsigned char *)&(mvl_vector_data_uint8(vec[j])[i+i0]), 1);
2430  }
2431  break;
2432  case LIBMVL_VECTOR_INT32:
2433  for(i=0;i<indices_count;i++) {
2434  hash[i]=mvl_accumulate_int32_hash64(hash[i], &(mvl_vector_data_int32(vec[j])[i+i0]), 1);
2435  }
2436  break;
2437  case LIBMVL_VECTOR_INT64:
2438  for(i=0;i<indices_count;i++) {
2439  hash[i]=mvl_accumulate_int64_hash64(hash[i], &(mvl_vector_data_int64(vec[j])[i+i0]), 1);
2440  }
2441  break;
2442  case LIBMVL_VECTOR_FLOAT:
2443  for(i=0;i<indices_count;i++) {
2444  hash[i]=mvl_accumulate_float_hash64(hash[i], &(mvl_vector_data_float(vec[j])[i+i0]), 1);
2445  }
2446  break;
2447  case LIBMVL_VECTOR_DOUBLE:
2448  for(i=0;i<indices_count;i++) {
2449  hash[i]=mvl_accumulate_double_hash64(hash[i], &(mvl_vector_data_double(vec[j])[i+i0]), 1);
2450  }
2451  break;
2452  case LIBMVL_VECTOR_OFFSET64: /* TODO: we might want to do something more clever here */
2453  for(i=0;i<indices_count;i++) {
2454  hash[i]=mvl_accumulate_hash64(hash[i], (const unsigned char *)&(mvl_vector_data_int64(vec[j])[i+i0]), 8);
2455  }
2456  break;
2457  case LIBMVL_PACKED_LIST64: {
2458  if(vec_data==NULL)return -6;
2459  if(vec_data[j]==NULL)return -7;
2460  for(i=0;i<indices_count;i++) {
2461  if(mvl_packed_list_validate_entry(vec[j], vec_data[j], vec_data_length[j], i+i0))return -8;
2462 
2463  hash[i]=mvl_accumulate_hash64(hash[i], mvl_packed_list_get_entry(vec[j], vec_data[j], i+i0), mvl_packed_list_get_entry_bytelength(vec[j], i+i0));
2464  }
2465  break;
2466  }
2467  default:
2468  return(-1);
2469  }
2470  }
2471 
2472 if(flags & LIBMVL_FINALIZE_HASH) {
2473  for(i=0;i<indices_count;i++) {
2474  hash[i]=mvl_randomize_bits64(hash[i]);
2475  }
2476  }
2477 
2478 return 0;
2479 }
2480 
2487 {
2488 LIBMVL_OFFSET64 hash_map_size;
2489 
2490 if(hash_count & (1LLU<<63))return 0; /* Too big */
2491 
2492 hash_map_size=1;
2493 while(hash_map_size<hash_count) {
2494  hash_map_size=hash_map_size<<1;
2495  }
2496 return(hash_map_size);
2497 }
2498 
2506 {
2507 HASH_MAP *hm;
2508 
2509 hm=do_malloc(1, sizeof(*hm));
2510 hm->hash_count=0;
2511 hm->hash_size=max_index_count;
2512 hm->hash_map_size=mvl_compute_hash_map_size(max_index_count);
2513 
2514 hm->hash=do_malloc(hm->hash_size, sizeof(hm->hash));
2515 hm->hash_map=do_malloc(hm->hash_map_size, sizeof(*hm->hash_map));
2516 hm->first=do_malloc(hm->hash_size, sizeof(*hm->first));
2517 hm->next=do_malloc(hm->hash_size, sizeof(*hm->next));
2518 
2519 hm->vec_count=0;
2520 
2522 
2523 return(hm);
2524 }
2525 
2530 {
2531 if(hash_map->flags & MVL_FLAG_OWN_HASH)free(hash_map->hash);
2532 if(hash_map->flags & MVL_FLAG_OWN_HASH_MAP)free(hash_map->hash_map);
2533 if(hash_map->flags & MVL_FLAG_OWN_FIRST)free(hash_map->first);
2534 if(hash_map->flags & MVL_FLAG_OWN_NEXT)free(hash_map->next);
2535 if(hash_map->flags & MVL_FLAG_OWN_VEC_TYPES)free(hash_map->vec_types);
2536 
2537 hash_map->hash_size=0;
2538 hash_map->hash_map_size=0;
2539 hash_map->vec_count=0;
2540 free(hash_map);
2541 }
2542 
2547 {
2548 LIBMVL_OFFSET64 i, k, hash_mask, N_first, hash_count;
2549 LIBMVL_OFFSET64 hash_map_size;
2550 LIBMVL_OFFSET64 *hash_map, *next, *first, *hash;
2551 
2552 hash_count=hm->hash_count;
2553 hash=hm->hash;
2554 hash_map=hm->hash_map;
2555 first=hm->first;
2556 next=hm->next;
2557 
2558 hash_map_size=hm->hash_map_size;
2559 hash_mask=hash_map_size-1;
2560 
2561 for(i=0;i<hash_map_size;i++) {
2562  hash_map[i]=~0LLU;
2563  }
2564 
2565 if(hash_mask & hash_map_size) {
2566  N_first=0;
2567  for(i=0;i<hash_count;i++) {
2568  k=hash[i] % hash_map_size;
2569  if(hash_map[k]==~0LLU) {
2570  hash_map[k]=i;
2571  first[N_first]=i;
2572  next[i]=~0LLU;
2573  N_first++;
2574  continue;
2575  }
2576  next[i]=hash_map[k];
2577  hash_map[k]=i;
2578  }
2579  for(i=0;i<N_first;i++) {
2580  k=hash[first[i]] % hash_map_size;
2581  first[i]=hash_map[k];
2582  }
2583  } else {
2584  N_first=0;
2585  for(i=0;i<hash_count;i++) {
2586  k=hash[i] & hash_mask;
2587  if(hash_map[k]==~0LLU) {
2588  hash_map[k]=i;
2589  first[N_first]=i;
2590  next[i]=~0LLU;
2591  N_first++;
2592  continue;
2593  }
2594  next[i]=hash_map[k];
2595  hash_map[k]=i;
2596  }
2597  for(i=0;i<N_first;i++) {
2598  k=hash[first[i]] & hash_mask;
2599  first[i]=hash_map[k];
2600  }
2601  }
2602 hm->first_count=N_first;
2603 }
2604 
2614 {
2615 LIBMVL_OFFSET64 i, k, hash_mask, match_count;
2616 LIBMVL_OFFSET64 *hash_map, *next, *hash;
2617 LIBMVL_OFFSET64 hash_map_size;
2618 
2619 
2620 hash_map_size=hm->hash_map_size;
2621 hash=hm->hash;
2622 hash_map=hm->hash_map;
2623 next=hm->next;
2624 
2625 hash_mask=hash_map_size-1;
2626 
2627 match_count=0;
2628 if(hash_map_size & hash_mask) {
2629  for(i=0;i<key_count;i++) {
2630  k=hash_map[key_hash[i] % hash_map_size];
2631  while(k!=~0LLU) {
2632  if(hash[k]==key_hash[i])match_count++;
2633  k=next[k];
2634  }
2635  }
2636  } else {
2637  for(i=0;i<key_count;i++) {
2638  k=hash_map[key_hash[i] & hash_mask];
2639  while(k!=~0LLU) {
2640  if(hash[k]==key_hash[i])match_count++;
2641  k=next[k];
2642  }
2643  }
2644  }
2645 return(match_count);
2646 }
2647 
2648 /* Find indices of keys in set of hashes, using hash map.
2649  * Only the first matching hash is reported.
2650  * If not found the index is set to ~0 (0xfff...fff)
2651  * Output is in key_indices
2652  */
2653 void mvl_find_first_hashes(LIBMVL_OFFSET64 key_count, const LIBMVL_OFFSET64 *key_hash, LIBMVL_OFFSET64 *key_indices, HASH_MAP *hm)
2654 {
2655 LIBMVL_OFFSET64 i, k, hash_mask, hash_map_size;
2656 LIBMVL_OFFSET64 *hash_map, *next, *hash;
2657 
2658 hash=hm->hash;
2659 hash_map_size=hm->hash_map_size;
2660 hash_map=hm->hash_map;
2661 next=hm->next;
2662 
2663 hash_mask=hash_map_size-1;
2664 
2665 if(hash_map_size & hash_mask) {
2666  for(i=0;i<key_count;i++) {
2667  k=hash_map[key_hash[i] % hash_map_size];
2668  while(k!=~0LLU) {
2669  if(hash[k]==key_hash[i])break;
2670  k=next[k];
2671  }
2672  key_indices[i]=k;
2673  }
2674  } else {
2675  for(i=0;i<key_count;i++) {
2676  k=hash_map[key_hash[i] & hash_mask];
2677  while(k!=~0LLU) {
2678  if(hash[k]==key_hash[i])break;
2679  k=next[k];
2680  }
2681  key_indices[i]=k;
2682  }
2683  }
2684 }
2685 
2686 /* This function computes pairs of merge indices. The pairs are stored in key_match_indices[] and match_indices[].
2687  * All arrays should be provided by the caller. The size of match_indices arrays is computed with mvl_hash_match_count()
2688  * An auxiliary array key_last of length key_indices_count stores the stop before index (in terms of matches array).
2689  * In particular the total number of matches is given by key_last[key_indices_count-1]
2690  */
2719 int mvl_find_matches(LIBMVL_OFFSET64 key_indices_count, const LIBMVL_OFFSET64 *key_indices, LIBMVL_OFFSET64 key_vec_count, LIBMVL_VECTOR **key_vec, void **key_vec_data, LIBMVL_OFFSET64 *key_vec_data_length, LIBMVL_OFFSET64 *key_hash,
2720  LIBMVL_OFFSET64 indices_count, const LIBMVL_OFFSET64 *indices, LIBMVL_OFFSET64 vec_count, LIBMVL_VECTOR **vec, void **vec_data, LIBMVL_OFFSET64 *vec_data_length, HASH_MAP *hm,
2721  LIBMVL_OFFSET64 *key_last, LIBMVL_OFFSET64 pairs_size, LIBMVL_OFFSET64 *key_match_indices, LIBMVL_OFFSET64 *match_indices)
2722 {
2723 LIBMVL_OFFSET64 *hash, *hash_map, *next;
2724 LIBMVL_OFFSET64 hash_map_size, i, k, hash_mask, N_matches;
2725 MVL_SORT_INFO key_si, si;
2726 MVL_SORT_UNIT key_su, su;
2727 
2728 key_si.vec=key_vec;
2729 key_si.data=key_vec_data;
2730 key_si.data_length=key_vec_data_length;
2731 key_si.nvec=key_vec_count;
2732 
2733 si.vec=vec;
2734 si.data=vec_data;
2735 si.data_length=vec_data_length;
2736 si.nvec=vec_count;
2737 
2738 key_su.info=&key_si;
2739 su.info=&si;
2740 
2741 hash_map_size=hm->hash_map_size;
2742 hash_mask=hash_map_size-1;
2743 hash_map=hm->hash_map;
2744 hash=hm->hash;
2745 next=hm->next;
2746 
2747 N_matches=0;
2748 
2749 if(hash_map_size & hash_mask) {
2750  for(i=0;i<key_indices_count;i++) {
2751  k=hash_map[key_hash[i] % hash_map_size];
2752  key_su.index=key_indices[i];
2753  while(k!=~0LLU) {
2754  su.index=indices[k];
2755  if((hash[k]==key_hash[i]) && mvl_equals(&key_su, &su) ) {
2756  if(N_matches>=pairs_size)return(-1000);
2757  key_match_indices[N_matches]=key_indices[i];
2758  match_indices[N_matches]=indices[k];
2759  N_matches++;
2760  }
2761  k=next[k];
2762  }
2763  key_last[i]=N_matches;
2764  }
2765  } else {
2766  for(i=0;i<key_indices_count;i++) {
2767  k=hash_map[key_hash[i] & hash_mask];
2768  key_su.index=key_indices[i];
2769  while(k!=~0LLU) {
2770  su.index=indices[k];
2771  if((hash[k]==key_hash[i]) && mvl_equals(&key_su, &su) ) {
2772  if(N_matches>=pairs_size)return(-1000);
2773  key_match_indices[N_matches]=key_indices[i];
2774  match_indices[N_matches]=indices[k];
2775  N_matches++;
2776  }
2777  k=next[k];
2778  }
2779  key_last[i]=N_matches;
2780  }
2781  }
2782 return(0);
2783 }
2784 
2798 void mvl_find_groups(LIBMVL_OFFSET64 indices_count, const LIBMVL_OFFSET64 *indices, LIBMVL_OFFSET64 vec_count, LIBMVL_VECTOR **vec, void **vec_data, LIBMVL_OFFSET64 *vec_data_length, HASH_MAP *hm)
2799 {
2800 LIBMVL_OFFSET64 *hash, *tmp, *next;
2801 LIBMVL_OFFSET64 i, j, l, m, k, group_count, first_count, a;
2802 MVL_SORT_INFO si;
2803 MVL_SORT_UNIT su1, su2;
2804 
2805 si.vec=vec;
2806 si.data=vec_data;
2807 si.data_length=vec_data_length;
2808 si.nvec=vec_count;
2809 
2810 su1.info=&si;
2811 su2.info=&si;
2812 
2813 tmp=hm->hash_map;
2814 hash=hm->hash;
2815 next=hm->next;
2816 
2817 group_count=hm->first_count;
2818 first_count=hm->first_count;
2819 
2820 for(i=0;i<first_count;i++) {
2821  k=hm->first[i];
2822  j=0;
2823  while(k!=~0LLU) {
2824  tmp[j]=k;
2825  j++;
2826  k=next[k];
2827  }
2828  while(j>1) {
2829  m=j-1;
2830  l=1;
2831  su1.index=indices[tmp[0]];
2832  while(l<=m) {
2833  su2.index=indices[tmp[l]];
2834  if(hash[tmp[0]]!=hash[tmp[l]] || !mvl_equals(&su1, &su2)) {
2835  if(l<m) {
2836  a=tmp[m];
2837  tmp[m]=tmp[l];
2838  tmp[l]=a;
2839  }
2840  m--;
2841  } else l++;
2842  }
2843  next[tmp[0]]=~0LLU;
2844  for(m=1;m<l;m++)next[tmp[m]]=tmp[m-1];
2845  if(l==j) {
2846  hm->first[i]=tmp[l-1];
2847  break;
2848  } else {
2849  hm->first[group_count]=tmp[l-1];
2850  group_count++;
2851  memmove(tmp, &(tmp[l]), (j-l)*sizeof(*tmp));
2852  hm->first[i]=tmp[0];
2853  hm->next[tmp[0]]=~0LLU;
2854  j-=l;
2855  }
2856  }
2857 
2858  }
2859 hm->first_count=group_count;
2860 }
2861 
2862 
2869 {
2870 LIBMVL_OFFSET64 *p, new_size;
2871 new_size=2*el->size+nelem;
2872 
2873 p=do_malloc(new_size, sizeof(*p));
2874 if(el->count>0) memcpy(p, el->offset, el->count*sizeof(*p));
2875 if(el->size>0)free(el->offset);
2876 el->offset=p;
2877 el->size=new_size;
2878 }
2879 
2887 void mvl_find_repeats(LIBMVL_PARTITION *el, LIBMVL_OFFSET64 count, LIBMVL_VECTOR **vec, void **data, LIBMVL_OFFSET64 *data_length)
2888 {
2889 LIBMVL_OFFSET64 N;
2890 MVL_SORT_INFO info;
2891 MVL_SORT_UNIT a, b;
2892 
2893 if(count<1)return;
2894 
2895 if(el->count>=el->size)
2896  mvl_extend_partition(el, 1024);
2897 
2898 N=mvl_vector_length(vec[0]);
2899 if(mvl_vector_type(vec[0])==LIBMVL_PACKED_LIST64)N--;
2900 
2901 for(LIBMVL_OFFSET64 i=1;i<count;i++) {
2902  if(mvl_vector_type(vec[i])==LIBMVL_PACKED_LIST64) {
2903  if(mvl_vector_length(vec[i])!=N+1) {
2904  return;
2905  }
2906  } else {
2907  if(mvl_vector_length(vec[i])!=N) {
2908  return;
2909  }
2910  }
2911  }
2912 
2913 info.vec=vec;
2914 info.data=data;
2915 info.data_length=data_length;
2916 info.nvec=count;
2917 
2918 a.info=&info;
2919 a.index=0;
2920 b.info=&info;
2921 
2922 for(LIBMVL_OFFSET64 i=1; i<N;i++) {
2923  b.index=i;
2924  if(mvl_equals(&a, &b))continue;
2925 
2926  if(el->count>=el->size)mvl_extend_partition(el, 0);
2927 
2928  el->offset[el->count]=a.index;
2929  el->count++;
2930  a.index=i;
2931  }
2932 
2933 if(el->count+1>=el->size)mvl_extend_partition(el, 0);
2934 el->offset[el->count]=a.index;
2935 el->count++;
2936 el->offset[el->count]=N;
2937 el->count++;
2938 }
2939 
2944 {
2945 memset(el, 0, sizeof(*el));
2946 }
2947 
2953 {
2954 if(el->size>0) {
2955  free(el->offset);
2956  }
2957 el->offset=NULL;
2958 el->size=0;
2959 }
2960 
2965 {
2966 memset(el, 0, sizeof(*el));
2967 el->size=LIBMVL_EXTENT_INLINE_SIZE;
2968 el->start=el->start_inline;
2969 el->stop=el->stop_inline;
2970 }
2971 
2977 {
2978 if(el->size>LIBMVL_EXTENT_INLINE_SIZE) {
2979  free(el->start);
2980  free(el->stop);
2981  }
2982 el->start=NULL;
2983 el->stop=NULL;
2984 el->size=0;
2985 }
2986 
2993 {
2994 LIBMVL_OFFSET64 *p;
2995 LIBMVL_OFFSET64 new_size;
2996 
2997 new_size=2*el->size+nelem;
2998 
2999 p=do_malloc(new_size, sizeof(*p));
3000 if(el->count>0) memcpy(p, el->start, el->count*sizeof(*p));
3001 if(el->size>LIBMVL_EXTENT_INLINE_SIZE) {
3002  free(el->start);
3003  }
3004 el->start=p;
3005 
3006 p=do_malloc(new_size, sizeof(*p));
3007 if(el->count>0) memcpy(p, el->stop, el->count*sizeof(*p));
3008 if(el->size>LIBMVL_EXTENT_INLINE_SIZE) {
3009  free(el->stop);
3010  }
3011 el->stop=p;
3012 
3013 el->size=new_size;
3014 }
3015 
3016 
3021 {
3022 memset(ei, 0, sizeof(*ei));
3023 mvl_init_partitiion(&(ei->partition));
3024 }
3025 
3031 {
3032 mvl_free_partition_arrays(&(ei->partition));
3033 
3034 if(ei->hash_map.flags & MVL_FLAG_OWN_FIRST)
3035  free(ei->hash_map.first);
3036 
3037 if(ei->hash_map.flags & MVL_FLAG_OWN_HASH)
3038  free(ei->hash_map.hash);
3039 
3040 if(ei->hash_map.flags & MVL_FLAG_OWN_NEXT)
3041  free(ei->hash_map.next);
3042 
3043 if(ei->hash_map.flags & MVL_FLAG_OWN_HASH_MAP)
3044  free(ei->hash_map.hash_map);
3045 
3046 if(ei->hash_map.flags & MVL_FLAG_OWN_VEC_TYPES)
3047  free(ei->hash_map.vec_types);
3048 
3049 ei->hash_map.flags=0;
3050 ei->hash_map.hash_size=0;
3051 ei->hash_map.hash_map_size=0;
3052 ei->hash_map.vec_count=0;
3053 }
3054 
3055 
3065 {
3066 int err;
3067 ei->partition.count=0;
3068 mvl_find_repeats(&(ei->partition), count, vec, data, data_length);
3069 
3070 ei->hash_map.hash_count=ei->partition.count-1;
3071 
3072 if(ei->hash_map.hash_size<ei->hash_map.hash_count ||
3074  if(ei->hash_map.flags & MVL_FLAG_OWN_HASH)
3075  free(ei->hash_map.hash);
3076  if(ei->hash_map.flags & MVL_FLAG_OWN_FIRST)
3077  free(ei->hash_map.first);
3078  if(ei->hash_map.flags & MVL_FLAG_OWN_NEXT)
3079  free(ei->hash_map.next);
3080 
3082  ei->hash_map.hash_size=ei->hash_map.hash_count;
3083  ei->hash_map.hash=do_malloc(ei->hash_map.hash_size, sizeof(*ei->hash_map.hash));
3084  ei->hash_map.first=do_malloc(ei->hash_map.hash_size, sizeof(*ei->hash_map.first));
3085  ei->hash_map.next=do_malloc(ei->hash_map.hash_size, sizeof(*ei->hash_map.next));
3086  }
3087 if(ei->hash_map.hash_map_size<ei->hash_map.hash_count || !(ei->hash_map.flags & MVL_FLAG_OWN_HASH_MAP)) {
3088  if(ei->hash_map.flags & MVL_FLAG_OWN_HASH_MAP) {
3089  free(ei->hash_map.hash_map);
3090  }
3091  ei->hash_map.flags|=MVL_FLAG_OWN_HASH_MAP;
3092  ei->hash_map.hash_map_size=mvl_compute_hash_map_size(ei->hash_map.hash_count);
3093  ei->hash_map.hash_map=do_malloc(ei->hash_map.hash_map_size, sizeof(*ei->hash_map.hash_map));
3094  }
3095 
3096 if((err=mvl_hash_indices(ei->hash_map.hash_count, ei->partition.offset, ei->hash_map.hash, count, vec, data, data_length, LIBMVL_COMPLETE_HASH))!=0)return(err);
3097 
3098 if(ei->hash_map.flags & MVL_FLAG_OWN_VEC_TYPES)
3099  free(ei->hash_map.vec_types);
3100 
3101 ei->hash_map.flags|=MVL_FLAG_OWN_VEC_TYPES;
3102 ei->hash_map.vec_count=count;
3103 ei->hash_map.vec_types=do_malloc(count, sizeof(*ei->hash_map.vec_types));
3104 
3105 for(LIBMVL_OFFSET64 i=0;i<count;i++)ei->hash_map.vec_types[i]=mvl_vector_type(vec[i]);
3106 
3107 mvl_compute_hash_map(&(ei->hash_map));
3108 return(0);
3109 }
3110 
3115 {
3117 LIBMVL_OFFSET64 offset;
3119 
3121 
3122 mvl_add_list_entry(L, -1, "partition", mvl_write_vector(ctx, LIBMVL_VECTOR_OFFSET64, ei->partition.count, ei->partition.offset, LIBMVL_NO_METADATA));
3123 mvl_add_list_entry(L, -1, "hash", mvl_write_vector(ctx, LIBMVL_VECTOR_OFFSET64, ei->hash_map.hash_count, ei->hash_map.hash, LIBMVL_NO_METADATA));
3124 //mvl_add_list_entry(L, -1, "first", mvl_write_vector(ctx, LIBMVL_VECTOR_OFFSET64, ei->hash_map.first_count, ei->hash_map.first, LIBMVL_NO_METADATA));
3125 mvl_add_list_entry(L, -1, "next", mvl_write_vector(ctx, LIBMVL_VECTOR_OFFSET64, ei->hash_map.hash_count, ei->hash_map.next, LIBMVL_NO_METADATA));
3126 mvl_add_list_entry(L, -1, "hash_map", mvl_write_vector(ctx, LIBMVL_VECTOR_OFFSET64, ei->hash_map.hash_map_size, ei->hash_map.hash_map, LIBMVL_NO_METADATA));
3127 mvl_add_list_entry(L, -1, "vec_types", mvl_write_vector(ctx, LIBMVL_VECTOR_INT32, ei->hash_map.vec_count, ei->hash_map.vec_types, LIBMVL_NO_METADATA));
3128 offset=mvl_write_named_list2(ctx, L, "MVL_INDEX");
3130 return(offset);
3131 }
3132 
3136 {
3138 LIBMVL_VECTOR *vec;
3139 L=mvl_read_named_list(ctx, data, data_size, offset);
3140 
3142 ei->partition.count=0;
3143 ei->hash_map.hash_count=0;
3144 ei->hash_map.first_count=0;
3145 
3146 if(L==NULL) {
3147  ei->partition.count=0;
3148  ei->hash_map.hash_count=0;
3149  ei->hash_map.first_count=0;
3150  return(LIBMVL_ERR_INVALID_EXTENT_INDEX);
3151  }
3152 
3153 vec=mvl_validated_vector_from_offset(data, data_size, mvl_find_list_entry(L, -1, "partition"));
3154 if(vec==NULL) {
3155  ei->partition.count=0;
3156  ei->hash_map.hash_count=0;
3157  ei->hash_map.first_count=0;
3158  return(LIBMVL_ERR_INVALID_EXTENT_INDEX);
3159  }
3160 
3161 ei->partition.size=0;
3162 ei->partition.offset=mvl_vector_data_offset(vec);
3163 ei->partition.count=mvl_vector_length(vec);
3164 
3165 vec=mvl_validated_vector_from_offset(data, data_size, mvl_find_list_entry(L, -1, "hash"));
3166 if(vec==NULL) {
3167  ei->partition.count=0;
3168  ei->hash_map.hash_count=0;
3169  ei->hash_map.first_count=0;
3170  return(LIBMVL_ERR_INVALID_EXTENT_INDEX);
3171  }
3172 ei->hash_map.hash_size=0;
3173 ei->hash_map.hash_count=mvl_vector_length(vec);
3174 ei->hash_map.hash=mvl_vector_data_offset(vec);
3175 
3176 #if 0
3177 vec=mvl_vector_from_offset(data, mvl_find_list_entry(L, -1, "first"));
3178 if(vec==NULL) {
3179  ei->partition.count=0;
3180  ei->hash_map.hash_count=0;
3181  ei->hash_map.first_count=0;
3182  return(LIBMVL_ERR_INVALID_EXTENT_INDEX);
3183  }
3184 ei->hash_map.first=mvl_vector_data_offset(vec);
3185 ei->hash_map.first_count=mvl_vector_length(vec);
3186 #else
3187 ei->hash_map.first=NULL;
3188 ei->hash_map.first_count=0;
3189 #endif
3190 
3191 vec=mvl_validated_vector_from_offset(data, data_size, mvl_find_list_entry(L, -1, "next"));
3192 if(vec==NULL || mvl_vector_length(vec)!=ei->hash_map.hash_count) {
3193  ei->partition.count=0;
3194  ei->hash_map.hash_count=0;
3195  ei->hash_map.first_count=0;
3196  return(LIBMVL_ERR_INVALID_EXTENT_INDEX);
3197  }
3198 ei->hash_map.next=mvl_vector_data_offset(vec);
3199 
3200 vec=mvl_validated_vector_from_offset(data, data_size, mvl_find_list_entry(L, -1, "hash_map"));
3201 if(vec==NULL) {
3202  ei->partition.count=0;
3203  ei->hash_map.hash_count=0;
3204  ei->hash_map.first_count=0;
3205  return(LIBMVL_ERR_INVALID_EXTENT_INDEX);
3206  }
3207 ei->hash_map.hash_map_size=mvl_vector_length(vec);
3208 ei->hash_map.hash_map=mvl_vector_data_offset(vec);
3209 
3210 vec=mvl_validated_vector_from_offset(data, data_size, mvl_find_list_entry(L, -1, "vec_types"));
3211 if(vec==NULL) {
3212  ei->partition.count=0;
3213  ei->hash_map.hash_count=0;
3214  ei->hash_map.first_count=0;
3215  return(LIBMVL_ERR_INVALID_EXTENT_INDEX);
3216  }
3217 ei->hash_map.vec_count=mvl_vector_length(vec);
3218 ei->hash_map.vec_types=mvl_vector_data_int32(vec);
3220 
3221 return(0);
3222 }
3223 
3229 {
3230 if(mvl_vector_length(vec)<1) {
3231  stats->max=-1;
3232  stats->min=1;
3233  stats->center=0.0;
3234  stats->scale=0.0;
3235  stats->nrepeat=0;
3236  stats->average_repeat_length=0.0;
3237  return;
3238  }
3239 switch(mvl_vector_type(vec)) {
3240  case LIBMVL_VECTOR_DOUBLE: {
3241  double a0, a1, b;
3242  double *pd=mvl_vector_data_double(vec);
3243  LIBMVL_OFFSET64 nrepeat;
3244  double prev;
3245  a0=pd[0];
3246  a1=a0;
3247  prev=a0;
3248  nrepeat=0;
3249  for(LIBMVL_OFFSET64 i=1;i<mvl_vector_length(vec);i++) {
3250  b=pd[i];
3251  if(b>a1)a1=b;
3252  if(b<a0)a0=b;
3253  if(b!=prev) {
3254  nrepeat++;
3255  prev=b;
3256  }
3257  }
3258  nrepeat++;
3259  stats->nrepeat=nrepeat;
3260  stats->average_repeat_length=(1.0*mvl_vector_length(vec))/nrepeat;
3261  stats->max=a1;
3262  stats->min=a0;
3263  stats->center=(a0+a1)*0.5;
3264  if(a1>a0)
3265  stats->scale=2/(a1-a0);
3266  else
3267  stats->scale=0.0;
3268  break;
3269  }
3270  case LIBMVL_VECTOR_FLOAT: {
3271  float a0, a1, b;
3272  float *pd=mvl_vector_data_float(vec);
3273  LIBMVL_OFFSET64 nrepeat;
3274  float prev;
3275  a0=pd[0];
3276  a1=a0;
3277  prev=a0;
3278  nrepeat=0;
3279  for(LIBMVL_OFFSET64 i=1;i<mvl_vector_length(vec);i++) {
3280  b=pd[i];
3281  if(b>a1)a1=b;
3282  if(b<a0)a0=b;
3283  if(b!=prev) {
3284  nrepeat++;
3285  prev=b;
3286  }
3287  }
3288  nrepeat++;
3289  stats->nrepeat=nrepeat;
3290  stats->average_repeat_length=(1.0*mvl_vector_length(vec))/nrepeat;
3291  stats->max=a1;
3292  stats->min=a0;
3293  stats->center=(a0+a1)*0.5;
3294  if(a1>a0)
3295  stats->scale=2/(a1-a0);
3296  else
3297  stats->scale=0.0;
3298  break;
3299  }
3300  case LIBMVL_VECTOR_INT32: {
3301  int a0, a1, b;
3302  int *pd=mvl_vector_data_int32(vec);
3303  LIBMVL_OFFSET64 nrepeat;
3304  int prev;
3305  a0=pd[0];
3306  a1=a0;
3307  prev=a0;
3308  nrepeat=0;
3309  for(LIBMVL_OFFSET64 i=1;i<mvl_vector_length(vec);i++) {
3310  b=pd[i];
3311  if(b>a1)a1=b;
3312  if(b<a0)a0=b;
3313  if(b!=prev) {
3314  nrepeat++;
3315  prev=b;
3316  }
3317  }
3318  nrepeat++;
3319  stats->nrepeat=nrepeat;
3320  stats->average_repeat_length=(1.0*mvl_vector_length(vec))/nrepeat;
3321  stats->max=a1;
3322  stats->min=a0;
3323  stats->center=(a0*1.0+a1*1.0)*0.5;
3324  if(a1>a0)
3325  stats->scale=2/(a1-a0);
3326  else
3327  stats->scale=0.0;
3328  break;
3329  }
3330  case LIBMVL_VECTOR_INT64: {
3331  long long int a0, a1, b;
3332  long long int *pd=mvl_vector_data_int64(vec);
3333  LIBMVL_OFFSET64 nrepeat;
3334  long long int prev;
3335  a0=pd[0];
3336  a1=a0;
3337  prev=a0;
3338  nrepeat=0;
3339  for(LIBMVL_OFFSET64 i=1;i<mvl_vector_length(vec);i++) {
3340  b=pd[i];
3341  if(b>a1)a1=b;
3342  if(b<a0)a0=b;
3343  if(b!=prev) {
3344  nrepeat++;
3345  prev=b;
3346  }
3347  }
3348  nrepeat++;
3349  stats->nrepeat=nrepeat;
3350  stats->average_repeat_length=(1.0*mvl_vector_length(vec))/nrepeat;
3351  stats->max=a1;
3352  stats->min=a0;
3353  stats->center=(a0*1.0+a1*1.0)*0.5;
3354  if(a1>a0)
3355  stats->scale=2/(a1-a0);
3356  else
3357  stats->scale=0.0;
3358  break;
3359  }
3360  default:
3361  stats->max=-1;
3362  stats->min=1;
3363  stats->center=0.0;
3364  stats->scale=0.0;
3365  stats->nrepeat=0;
3366  stats->average_repeat_length=0.0;
3367  }
3368 }
3369 
3379 void mvl_normalize_vector(const LIBMVL_VECTOR *vec, const LIBMVL_VEC_STATS *stats, LIBMVL_OFFSET64 i0, LIBMVL_OFFSET64 i1, double *out)
3380 {
3381 double scale, center;
3382 scale=0.5*stats->scale;
3383 center=1.5-stats->center*scale;
3384 if(i0>mvl_vector_length(vec))return;
3385 if(i1>mvl_vector_length(vec)) {
3387  if(i<i0)i=i0;
3388  for(;i<i1;i++)out[i-i0]=0.0;
3389  i1=mvl_vector_length(vec);
3390  }
3391 if(i0>=i1)return;
3392 
3393 switch(mvl_vector_type(vec)) {
3394  case LIBMVL_VECTOR_DOUBLE: {
3395  double *pd=mvl_vector_data_double(vec);
3396  for(LIBMVL_OFFSET64 i=i0;i<i1;i++) {
3397  out[i-i0]=pd[i]*scale+center;
3398  }
3399  break;
3400  }
3401  case LIBMVL_VECTOR_FLOAT: {
3402  float *pd=mvl_vector_data_float(vec);
3403  for(LIBMVL_OFFSET64 i=i0;i<i1;i++) {
3404  out[i-i0]=pd[i]*scale+center;
3405  }
3406  break;
3407  }
3408  case LIBMVL_VECTOR_INT32: {
3409  int *pd=mvl_vector_data_int32(vec);
3410  for(LIBMVL_OFFSET64 i=i0;i<i1;i++) {
3411  out[i-i0]=pd[i]*scale+center;
3412  }
3413  break;
3414  }
3415  case LIBMVL_VECTOR_INT64: {
3416  long long int *pd=mvl_vector_data_int64(vec);
3417  for(LIBMVL_OFFSET64 i=i0;i<i1;i++) {
3418  out[i-i0]=pd[i]*scale+center;
3419  }
3420  break;
3421  }
3422  default:
3423  for(LIBMVL_OFFSET64 i=i0;i<i1;i++)out[i-i0]=0.0;
3424  }
3425 }
LIBMVL_VEC_STATS::nrepeat
double nrepeat
number of stretches with identical elements
Definition: libMVL.h:1317
LIBMVL_VECTOR_HEADER
This structure describes the header of MVL vector. It is basically LIBMVL_VECTOR without the actual d...
Definition: libMVL.h:120
mvl_create_context
LIBMVL_CONTEXT * mvl_create_context(void)
Create MVL context.
Definition: libMVL.c:150
MVL_EXTENT_INDEX
#define MVL_EXTENT_INDEX
Index types.
Definition: libMVL.h:1327
LIBMVL_NO_METADATA
#define LIBMVL_NO_METADATA
Use this constant to specify that no metadata should be written.
Definition: libMVL.h:350
LIBMVL_SORT_LEXICOGRAPHIC_DESC
#define LIBMVL_SORT_LEXICOGRAPHIC_DESC
Definition: libMVL.h:855
LIBMVL_PARTITION::offset
LIBMVL_OFFSET64 * offset
First extent element.
Definition: libMVL.h:1226
mvl_start_write_vector
LIBMVL_OFFSET64 mvl_start_write_vector(LIBMVL_CONTEXT *ctx, int type, LIBMVL_OFFSET64 expected_length, LIBMVL_OFFSET64 length, const void *data, LIBMVL_OFFSET64 metadata)
Begin write of MVL vector. This is only needed if the vector has to be written in parts,...
Definition: libMVL.c:387
mvl_free_extent_list_arrays
void mvl_free_extent_list_arrays(LIBMVL_EXTENT_LIST *el)
free arrays of previously allocated partition. This function does not free the structure itself.
Definition: libMVL.c:2976
LIBMVL_EXTENT_LIST::stop
LIBMVL_OFFSET64 * stop
First element just past the extent end.
Definition: libMVL.h:1246
mvl_accumulate_hash64
static LIBMVL_OFFSET64 mvl_accumulate_hash64(LIBMVL_OFFSET64 x, const unsigned char *data, LIBMVL_OFFSET64 count)
Accumulate hash from a piece of data.
Definition: libMVL.h:931
LIBMVL_FINALIZE_HASH
#define LIBMVL_FINALIZE_HASH
Definition: libMVL.h:1131
mvl_vector_data_double
#define mvl_vector_data_double(data)
Definition: libMVL.h:527
LIBMVL_VEC_STATS::center
double center
a value in the "middle" of the vector
Definition: libMVL.h:1314
HASH_MAP::hash_map_size
LIBMVL_OFFSET64 hash_map_size
size of hash_map array, should be power of 2
Definition: libMVL.h:1174
mvl_randomize_bits64
static LIBMVL_OFFSET64 mvl_randomize_bits64(LIBMVL_OFFSET64 x)
Randomize bits of 64-bit numbers, typically after accumulating a hash value.
Definition: libMVL.h:888
mvl_compute_hash_map_size
LIBMVL_OFFSET64 mvl_compute_hash_map_size(LIBMVL_OFFSET64 hash_count)
Compute suggested size of hash map given the number of entries to hash. Hash map size should always b...
Definition: libMVL.c:2486
mvl_load_extent_index
int mvl_load_extent_index(LIBMVL_CONTEXT *ctx, void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 offset, LIBMVL_EXTENT_INDEX *ei)
Load extent index from memory mapped MVL file.
Definition: libMVL.c:3135
mvl_write_named_list_as_data_frame
LIBMVL_OFFSET64 mvl_write_named_list_as_data_frame(LIBMVL_CONTEXT *ctx, LIBMVL_NAMED_LIST *L, int nrows, LIBMVL_OFFSET64 rownames)
Write out named list in the style of R data frames. It is assumed that all entries of L are vectors w...
Definition: libMVL.c:1532
LIBMVL_EXTENT_LIST::start
LIBMVL_OFFSET64 * start
First extent element.
Definition: libMVL.h:1245
LIBMVL_COMPLETE_HASH
#define LIBMVL_COMPLETE_HASH
Definition: libMVL.h:1132
LIBMVL_CHECKSUM_ALGORITHM_INTERNAL1_HASH64
#define LIBMVL_CHECKSUM_ALGORITHM_INTERNAL1_HASH64
Definition: libMVL.h:132
mvl_read_named_list
LIBMVL_NAMED_LIST * mvl_read_named_list(LIBMVL_CONTEXT *ctx, const void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 offset)
Read back MVL named list. This function also initialize hash table for fast access.
Definition: libMVL.c:1628
LIBMVL_NAMED_LIST
This structure describes a named list - an array of LIBMVL_OFFSET64 entries each with a character nam...
Definition: libMVL.h:236
HASH_MAP::first_count
LIBMVL_OFFSET64 first_count
Number of valid entries in first array - this is populated by mvl_find_groups()
Definition: libMVL.h:1175
mvl_create_named_list
LIBMVL_NAMED_LIST * mvl_create_named_list(int size)
Allocate and initialize structure for LIBMVL_NAMED_LIST.
Definition: libMVL.c:1295
mvl_packed_list_get_entry_bytelength
static LIBMVL_OFFSET64 mvl_packed_list_get_entry_bytelength(const LIBMVL_VECTOR *vec, LIBMVL_OFFSET64 idx)
Get length in bytes of string element idx from a packed list.
Definition: libMVL.h:794
mvl_write_cached_string
LIBMVL_OFFSET64 mvl_write_cached_string(LIBMVL_CONTEXT *ctx, long length, const char *data)
Write a single C string if it has not been written before, otherwise return offset to previously writ...
Definition: libMVL.c:719
LIBMVL_PARTITION::count
LIBMVL_OFFSET64 count
extent has count valid elements
Definition: libMVL.h:1225
mvl_element_size
static int mvl_element_size(int type)
Return the element size in bytes for a particular MVL type.
Definition: libMVL.h:75
mvl_vector_metadata_offset
#define mvl_vector_metadata_offset(data)
Return offset to metadata of given LIBMVL_VECTOR.
Definition: libMVL.h:532
mvl_packed_list_get_entry
static const unsigned char * mvl_packed_list_get_entry(const LIBMVL_VECTOR *vec, const void *data, LIBMVL_OFFSET64 idx)
Get pointer to the start of string element idx from a packed list.
Definition: libMVL.h:812
mvl_write_concat_vectors
LIBMVL_OFFSET64 mvl_write_concat_vectors(LIBMVL_CONTEXT *ctx, int type, long nvec, const long *lengths, void **data, LIBMVL_OFFSET64 metadata)
Write complete MVL vector concatenating data from many vectors or arrays.
Definition: libMVL.c:654
mvl_free_partition_arrays
void mvl_free_partition_arrays(LIBMVL_PARTITION *el)
free arrays of previously allocated partition. This function does not free the structure itself.
Definition: libMVL.c:2952
mvl_recompute_named_list_hash
void mvl_recompute_named_list_hash(LIBMVL_NAMED_LIST *L)
Recompute named list hash.
Definition: libMVL.c:1332
mvl_free_extent_index_arrays
void mvl_free_extent_index_arrays(LIBMVL_EXTENT_INDEX *ei)
free arrays of previously allocated extent list. This function does not free the structure itself.
Definition: libMVL.c:3030
mvl_validated_vector_from_offset
static LIBMVL_VECTOR * mvl_validated_vector_from_offset(void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 offset)
A convenience function to convert an offset into memory mapped data into a pointer to LIBMVL_VECTOR s...
Definition: libMVL.h:601
mvl_verify_checksum_vector3
int mvl_verify_checksum_vector3(LIBMVL_CONTEXT *ctx, const LIBMVL_VECTOR *checksum_vector, void *data, LIBMVL_OFFSET64 data_size, void *start, void *stop)
Compute and verify checksums for a given area. It works just like mvl_verify_checksum_vector() but ta...
Definition: libMVL.c:1158
HASH_MAP::next
LIBMVL_OFFSET64 * next
array of next indices in each group. ~0LLU indicates end of group
Definition: libMVL.h:1179
LIBMVL_VEC_STATS
Vector statistics.
Definition: libMVL.h:1311
mvl_close
void mvl_close(LIBMVL_CONTEXT *ctx)
Write out MVL file directory and postable and close file.
Definition: libMVL.c:1735
LIBMVL_VEC_STATS::scale
double scale
normalization scale
Definition: libMVL.h:1315
LIBMVL_VECTOR_DOUBLE
#define LIBMVL_VECTOR_DOUBLE
Definition: libMVL.h:58
mvl_accumulate_double_hash64
static LIBMVL_OFFSET64 mvl_accumulate_double_hash64(LIBMVL_OFFSET64 x, const double *data, LIBMVL_OFFSET64 count)
Accumulate hash from an array of 64-bit floats The floats are hashed by value, not representation,...
Definition: libMVL.h:1096
LIBMVL_POSTAMBLE
This structure is written last to close MVL file. It contains an offset to MVL directory that can be ...
Definition: libMVL.h:112
mvl_vector_length
#define mvl_vector_length(data)
Return number of elements from a pointer to LIBMVL_VECTOR.
Definition: libMVL.h:471
mvl_rewrite_vector
void mvl_rewrite_vector(LIBMVL_CONTEXT *ctx, int type, LIBMVL_OFFSET64 base_offset, LIBMVL_OFFSET64 idx, long length, const void *data)
Write more data to MVL vector that has been previously created with mvl_start_write_vector()
Definition: libMVL.c:470
mvl_packed_list_validate_entry
static int mvl_packed_list_validate_entry(const LIBMVL_VECTOR *vec, const void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 idx)
Get pointer to the start of string element idx from a packed list.
Definition: libMVL.h:829
mvl_write_directory
LIBMVL_OFFSET64 mvl_write_directory(LIBMVL_CONTEXT *ctx)
Write out MVL file directory with entries collected so far. If this is called multiple times only the...
Definition: libMVL.c:1248
HASH_MAP::vec_types
int * vec_types
Types of vectors used to produce hashes.
Definition: libMVL.h:1181
mvl_add_directory_entry
void mvl_add_directory_entry(LIBMVL_CONTEXT *ctx, LIBMVL_OFFSET64 offset, const char *tag)
Add an entry to the top level directory of MVL file.
Definition: libMVL.c:1203
MVL_WVEC
#define MVL_WVEC(ctx, type,...)
Definition: libMVL.h:412
mvl_accumulate_int64_hash64
static LIBMVL_OFFSET64 mvl_accumulate_int64_hash64(LIBMVL_OFFSET64 x, const long long int *data, LIBMVL_OFFSET64 count)
Accumulate hash from an array of 64-bit integers The integers are hashed by value,...
Definition: libMVL.h:1032
mvl_strerror
const char * mvl_strerror(LIBMVL_CONTEXT *ctx)
Obtain description of error code.
Definition: libMVL.c:217
mvl_write_packed_list
LIBMVL_OFFSET64 mvl_write_packed_list(LIBMVL_CONTEXT *ctx, long count, const long *str_size, unsigned char **str, LIBMVL_OFFSET64 metadata)
Write an array of strings as a packed list data type. This is convenient for storing a lot of differe...
Definition: libMVL.c:803
MVL_SEED_HASH_VALUE
#define MVL_SEED_HASH_VALUE
Definition: libMVL.h:918
mvl_init_partitiion
void mvl_init_partitiion(LIBMVL_PARTITION *el)
Initialize freshly allocated partition structure.
Definition: libMVL.c:2943
mvl_vector_from_offset
static LIBMVL_VECTOR * mvl_vector_from_offset(void *data, LIBMVL_OFFSET64 offset)
A convenience function to convert an offset into memory mapped data into a pointer to LIBMVL_VECTOR s...
Definition: libMVL.h:588
mvl_vector_data_offset
#define mvl_vector_data_offset(data)
Definition: libMVL.h:528
HASH_MAP
This structure is used for constructing associative maps and also for describing index groupings.
Definition: libMVL.h:1170
mvl_accumulate_int32_hash64
static LIBMVL_OFFSET64 mvl_accumulate_int32_hash64(LIBMVL_OFFSET64 x, const int *data, LIBMVL_OFFSET64 count)
Accumulate hash from an array of 32-bit integers The integers are hashed by value,...
Definition: libMVL.h:1001
HASH_MAP::vec_count
LIBMVL_OFFSET64 vec_count
Number of vectors used to produce hashes.
Definition: libMVL.h:1180
LIBMVL_PACKED_LIST64
#define LIBMVL_PACKED_LIST64
Definition: libMVL.h:62
LIBMVL_PARTITION::size
LIBMVL_OFFSET64 size
Space allocated for start and stop arrays.
Definition: libMVL.h:1224
mvl_compute_vec_stats
void mvl_compute_vec_stats(const LIBMVL_VECTOR *vec, LIBMVL_VEC_STATS *stats)
Compute vector statistics, such as a bounding box.
Definition: libMVL.c:3228
mvl_write_extent_index
LIBMVL_OFFSET64 mvl_write_extent_index(LIBMVL_CONTEXT *ctx, LIBMVL_EXTENT_INDEX *ei)
Write extent index to MVL file.
Definition: libMVL.c:3114
LIBMVL_VECTOR_FLOAT
#define LIBMVL_VECTOR_FLOAT
Definition: libMVL.h:57
mvl_vector_type
#define mvl_vector_type(data)
Return type of data from a pointer to LIBMVL_VECTOR.
Definition: libMVL.h:467
mvl_hash_range
int mvl_hash_range(LIBMVL_OFFSET64 i0, LIBMVL_OFFSET64 i1, LIBMVL_OFFSET64 *hash, LIBMVL_OFFSET64 vec_count, LIBMVL_VECTOR **vec, void **vec_data, LIBMVL_OFFSET64 *vec_data_length, int flags)
This function is used to compute 64 bit hash of vector values array hash[] is passed in and contains ...
Definition: libMVL.c:2395
MVL_FLAG_OWN_FIRST
#define MVL_FLAG_OWN_FIRST
Definition: libMVL.h:1161
mvl_vector_data_int64
#define mvl_vector_data_int64(data)
Definition: libMVL.h:525
mvl_vector_data_uint8
#define mvl_vector_data_uint8(data)
Definition: libMVL.h:523
mvl_add_directory_entry_n
void mvl_add_directory_entry_n(LIBMVL_CONTEXT *ctx, LIBMVL_OFFSET64 offset, const char *tag, LIBMVL_OFFSET64 tag_size)
Add entry to the top level directory of MVL file.
Definition: libMVL.c:1227
mvl_accumulate_float_hash64
static LIBMVL_OFFSET64 mvl_accumulate_float_hash64(LIBMVL_OFFSET64 x, const float *data, LIBMVL_OFFSET64 count)
Accumulate hash from an array of 32-bit floats The floats are hashed by value, not representation,...
Definition: libMVL.h:1064
mvl_create_R_attributes_list
LIBMVL_NAMED_LIST * mvl_create_R_attributes_list(LIBMVL_CONTEXT *ctx, const char *R_class)
Create R-style attribute list for class given by R_class, which could be, for example,...
Definition: libMVL.c:1450
mvl_indexed_copy_vector
LIBMVL_OFFSET64 mvl_indexed_copy_vector(LIBMVL_CONTEXT *ctx, LIBMVL_OFFSET64 index_count, const LIBMVL_OFFSET64 *indices, const LIBMVL_VECTOR *vec, const void *data, LIBMVL_OFFSET64 data_length, LIBMVL_OFFSET64 metadata, LIBMVL_OFFSET64 max_buffer)
Write MVL vector that contains data at specific indices. The indices can repeat, and can themselves b...
Definition: libMVL.c:491
LIBMVL_VEC_STATS::min
double min
minimum value of vector entries
Definition: libMVL.h:1313
LIBMVL_VECTOR
LIBMVL_VECTOR is the basic unit of information storage.
Definition: libMVL.h:184
mvl_extend_partition
void mvl_extend_partition(LIBMVL_PARTITION *el, LIBMVL_OFFSET64 nelem)
Increase storage of previously allocated partition.
Definition: libMVL.c:2868
mvl_get_character_class_offset
LIBMVL_OFFSET64 mvl_get_character_class_offset(LIBMVL_CONTEXT *ctx)
Get offset to metadata describing R-style character class - an array of strings. This is convenient f...
Definition: libMVL.c:1187
LIBMVL_SORT_LEXICOGRAPHIC
#define LIBMVL_SORT_LEXICOGRAPHIC
Definition: libMVL.h:854
HASH_MAP::hash_size
LIBMVL_OFFSET64 hash_size
size of hash, first and next arrays
Definition: libMVL.h:1173
LIBMVL_EXTENT_LIST::size
LIBMVL_OFFSET64 size
Space allocated for start and stop arrays.
Definition: libMVL.h:1243
HASH_MAP::flags
LIBMVL_OFFSET64 flags
flags describing HASH_MAP state
Definition: libMVL.h:1171
LIBMVL_VEC_STATS::max
double max
maximum value of vector entries
Definition: libMVL.h:1312
mvl_hash_indices
int mvl_hash_indices(LIBMVL_OFFSET64 indices_count, const LIBMVL_OFFSET64 *indices, LIBMVL_OFFSET64 *hash, LIBMVL_OFFSET64 vec_count, LIBMVL_VECTOR **vec, void **vec_data, LIBMVL_OFFSET64 *vec_data_length, int flags)
This function is used to compute 64 bit hash of vector values array hash[] is passed in and contains ...
Definition: libMVL.c:2293
mvl_find_groups
void mvl_find_groups(LIBMVL_OFFSET64 indices_count, const LIBMVL_OFFSET64 *indices, LIBMVL_OFFSET64 vec_count, LIBMVL_VECTOR **vec, void **vec_data, LIBMVL_OFFSET64 *vec_data_length, HASH_MAP *hm)
This function transforms HASH_MAP into a list of groups. Similar to GROUP BY clause in SQL.
Definition: libMVL.c:2798
LIBMVL_CHECKSUM_VECTOR_HEADER
This structure describes the header of MVL checksum vector.
Definition: libMVL.h:144
mvl_init_extent_index
void mvl_init_extent_index(LIBMVL_EXTENT_INDEX *ei)
Initialize freshly allocated extent list structure.
Definition: libMVL.c:3020
mvl_load_image
void mvl_load_image(LIBMVL_CONTEXT *ctx, const void *data, LIBMVL_OFFSET64 length)
Initilize MVL context to operate with memory mapped area data.
Definition: libMVL.c:1787
mvl_free_hash_map
void mvl_free_hash_map(HASH_MAP *hash_map)
Free allocated HASH_MAP.
Definition: libMVL.c:2529
LIBMVL_VECTOR_UINT8
#define LIBMVL_VECTOR_UINT8
Definition: libMVL.h:54
MVL_FLAG_OWN_NEXT
#define MVL_FLAG_OWN_NEXT
Definition: libMVL.h:1162
mvl_vector_nentries
static LIBMVL_OFFSET64 mvl_vector_nentries(void *vec)
Return number of entries in the vector. Currently this is the same as mvl_vector_length() for all typ...
Definition: libMVL.h:476
mvl_write_string
LIBMVL_OFFSET64 mvl_write_string(LIBMVL_CONTEXT *ctx, long length, const char *data, LIBMVL_OFFSET64 metadata)
Write a single C string. In particular, this is handy for providing metadata tags.
Definition: libMVL.c:707
mvl_allocate_hash_map
HASH_MAP * mvl_allocate_hash_map(LIBMVL_OFFSET64 max_index_count)
Create HASH_MAP structure.
Definition: libMVL.c:2505
LIBMVL_VECTOR_INT64
#define LIBMVL_VECTOR_INT64
Definition: libMVL.h:56
LIBMVL_EXTENT_INDEX
An index into a table-like set of vectors with equal number of elements.
Definition: libMVL.h:1260
LIBMVL_INIT_HASH
#define LIBMVL_INIT_HASH
Definition: libMVL.h:1130
mvl_find_directory_entry
LIBMVL_OFFSET64 mvl_find_directory_entry(LIBMVL_CONTEXT *ctx, const char *tag)
Find entry in MVL file directory.
Definition: libMVL.c:1772
HASH_MAP::hash_count
LIBMVL_OFFSET64 hash_count
Number of valid entries in hash, hash_count < hash_size and hash_count < hash_map_size.
Definition: libMVL.h:1172
mvl_free_context
void mvl_free_context(LIBMVL_CONTEXT *ctx)
Release memory associated with MVL context.
Definition: libMVL.c:190
LIBMVL_VECTOR_INT32
#define LIBMVL_VECTOR_INT32
Definition: libMVL.h:55
LIBMVL_PARTITION
List of offsets partitioning the vector. First element is always 0, last element is vector size.
Definition: libMVL.h:1223
mvl_free_named_list
void mvl_free_named_list(LIBMVL_NAMED_LIST *L)
Free structure for LIBMVL_NAMED_LIST.
Definition: libMVL.c:1317
LIBMVL_FULL_CHECKSUMS_DIRECTORY_KEY
#define LIBMVL_FULL_CHECKSUMS_DIRECTORY_KEY
Definition: libMVL.h:139
LIBMVL_NULL_OFFSET
#define LIBMVL_NULL_OFFSET
Null offsets into memory mapped data are always invalid because that is where preamble is This is usu...
Definition: libMVL.h:354
HASH_MAP::first
LIBMVL_OFFSET64 * first
array of indices in each group
Definition: libMVL.h:1178
mvl_normalize_vector
void mvl_normalize_vector(const LIBMVL_VECTOR *vec, const LIBMVL_VEC_STATS *stats, LIBMVL_OFFSET64 i0, LIBMVL_OFFSET64 i1, double *out)
normalize vector
Definition: libMVL.c:3379
HASH_MAP::hash
LIBMVL_OFFSET64 * hash
Input hashes, used by mvl_compute_hash_map()
Definition: libMVL.h:1176
mvl_hash_match_count
LIBMVL_OFFSET64 mvl_hash_match_count(LIBMVL_OFFSET64 key_count, const LIBMVL_OFFSET64 *key_hash, HASH_MAP *hm)
Find count of matches between hashes of two sets.
Definition: libMVL.c:2613
LIBMVL_PREAMBLE
This structure is written at the beginning of MVL file. It contains the signature identifying MVL for...
Definition: libMVL.h:102
mvl_extend_extent_list
void mvl_extend_extent_list(LIBMVL_EXTENT_LIST *el, LIBMVL_OFFSET64 nelem)
Increase storage of previously allocated extent list.
Definition: libMVL.c:2992
mvl_compute_hash_map
void mvl_compute_hash_map(HASH_MAP *hm)
Compute hash map. This assumes that hm->hash array has been populated with hm->hash_count hashes comp...
Definition: libMVL.c:2546
mvl_write_named_list2
LIBMVL_OFFSET64 mvl_write_named_list2(LIBMVL_CONTEXT *ctx, LIBMVL_NAMED_LIST *L, char *cl)
Write out named list. In R, this would be read back as list with class attribute set to "cl".
Definition: libMVL.c:1509
mvl_verify_checksum_vector2
int mvl_verify_checksum_vector2(LIBMVL_CONTEXT *ctx, const LIBMVL_VECTOR *checksum_vector, void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 vector_offset)
Compute and verify checksums for the entire area occupied by given LIBMVL_VECTOR. Metadata is not che...
Definition: libMVL.c:1095
LIBMVL_VECTOR_CSTRING
#define LIBMVL_VECTOR_CSTRING
Definition: libMVL.h:60
mvl_vector_data_int32
#define mvl_vector_data_int32(data)
Definition: libMVL.h:524
MVL_FLAG_OWN_HASH
#define MVL_FLAG_OWN_HASH
Flags describing HASH_MAP state.
Definition: libMVL.h:1159
mvl_add_list_entry
long mvl_add_list_entry(LIBMVL_NAMED_LIST *L, long tag_length, const char *tag, LIBMVL_OFFSET64 offset)
Add entry to LIBMVL_NAMED_LIST. The entry is always appended to the end.
Definition: libMVL.c:1369
LIBMVL_CONTEXT
This structure describes MVL context - a collection of system data associated with a single MVL file.
Definition: libMVL.h:255
mvl_open
void mvl_open(LIBMVL_CONTEXT *ctx, FILE *f)
Prepare context for writing to file f.
Definition: libMVL.c:1726
mvl_init_extent_list
void mvl_init_extent_list(LIBMVL_EXTENT_LIST *el)
Initialize freshly allocated partition structure.
Definition: libMVL.c:2964
mvl_write_vector
LIBMVL_OFFSET64 mvl_write_vector(LIBMVL_CONTEXT *ctx, int type, LIBMVL_OFFSET64 length, const void *data, LIBMVL_OFFSET64 metadata)
Write complete MVL vector.
Definition: libMVL.c:337
mvl_write_hash64_checksum_vector
LIBMVL_OFFSET64 mvl_write_hash64_checksum_vector(LIBMVL_CONTEXT *ctx, void *data, LIBMVL_OFFSET64 checksum_area_start, LIBMVL_OFFSET64 checksum_area_stop, LIBMVL_OFFSET64 checksum_block_size)
Compute and write checksums for a given area.
Definition: libMVL.c:851
mvl_verify_full_checksum_vector
int mvl_verify_full_checksum_vector(LIBMVL_CONTEXT *ctx, const LIBMVL_VECTOR *checksum_vector, void *data, LIBMVL_OFFSET64 data_size)
Compute and verify checksums for the entire area covered by checksum vector.
Definition: libMVL.c:1052
mvl_find_matches
int mvl_find_matches(LIBMVL_OFFSET64 key_indices_count, const LIBMVL_OFFSET64 *key_indices, LIBMVL_OFFSET64 key_vec_count, LIBMVL_VECTOR **key_vec, void **key_vec_data, LIBMVL_OFFSET64 *key_vec_data_length, LIBMVL_OFFSET64 *key_hash, LIBMVL_OFFSET64 indices_count, const LIBMVL_OFFSET64 *indices, LIBMVL_OFFSET64 vec_count, LIBMVL_VECTOR **vec, void **vec_data, LIBMVL_OFFSET64 *vec_data_length, HASH_MAP *hm, LIBMVL_OFFSET64 *key_last, LIBMVL_OFFSET64 pairs_size, LIBMVL_OFFSET64 *key_match_indices, LIBMVL_OFFSET64 *match_indices)
Compute pairs of merge indices. This is similar to JOIN operation in SQL.
Definition: libMVL.c:2719
libMVL.h
core libMVL functions and structures
mvl_find_repeats
void mvl_find_repeats(LIBMVL_PARTITION *el, LIBMVL_OFFSET64 count, LIBMVL_VECTOR **vec, void **data, LIBMVL_OFFSET64 *data_length)
Compute list of extents describing stretches of data with identical values.
Definition: libMVL.c:2887
mvl_write_named_list
LIBMVL_OFFSET64 mvl_write_named_list(LIBMVL_CONTEXT *ctx, LIBMVL_NAMED_LIST *L)
Write out named list. In R, this would be read back as list.
Definition: libMVL.c:1487
MVL_FLAG_OWN_HASH_MAP
#define MVL_FLAG_OWN_HASH_MAP
Definition: libMVL.h:1160
LIBMVL_EXTENT_LIST::count
LIBMVL_OFFSET64 count
extent has count valid elements
Definition: libMVL.h:1244
LIBMVL_VEC_STATS::average_repeat_length
double average_repeat_length
average length of stretch with identical elements
Definition: libMVL.h:1316
mvl_validate_vector
static int mvl_validate_vector(LIBMVL_OFFSET64 offset, const void *data, LIBMVL_OFFSET64 data_size)
This function returns 0 if the offset into data points to a valid vector, or a negative error code ot...
Definition: libMVL.h:541
mvl_compute_extent_index
int mvl_compute_extent_index(LIBMVL_EXTENT_INDEX *ei, LIBMVL_OFFSET64 count, LIBMVL_VECTOR **vec, void **data, LIBMVL_OFFSET64 *data_length)
Compute an extent index.
Definition: libMVL.c:3064
mvl_verify_checksum_vector
int mvl_verify_checksum_vector(LIBMVL_CONTEXT *ctx, const LIBMVL_VECTOR *checksum_vector, void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 start, LIBMVL_OFFSET64 stop)
Compute and verify checksums for a given area.
Definition: libMVL.c:951
LIBMVL_OFFSET64
unsigned long long LIBMVL_OFFSET64
MVL unsigned 64-bit type used for describing offsets into loaded data.
Definition: libMVL.h:98
mvl_read_attributes_list
LIBMVL_NAMED_LIST * mvl_read_attributes_list(LIBMVL_CONTEXT *ctx, const void *data, LIBMVL_OFFSET64 data_size, LIBMVL_OFFSET64 metadata_offset)
Read back MVL attributes list, typically used to described metadata. This function also initialize ha...
Definition: libMVL.c:1559
mvl_vector_data_float
#define mvl_vector_data_float(data)
Definition: libMVL.h:526
mvl_write_attributes_list
LIBMVL_OFFSET64 mvl_write_attributes_list(LIBMVL_CONTEXT *ctx, LIBMVL_NAMED_LIST *L)
Write out R-style attribute list.
Definition: libMVL.c:1464
LIBMVL_VECTOR_OFFSET64
#define LIBMVL_VECTOR_OFFSET64
Definition: libMVL.h:59
LIBMVL_EXTENT_LIST
List of extents - ranges of consequentive indices. Similar to partition, but they do not have to foll...
Definition: libMVL.h:1242
mvl_find_list_entry
LIBMVL_OFFSET64 mvl_find_list_entry(LIBMVL_NAMED_LIST *L, long tag_length, const char *tag)
Find existing entry inside LIBMVL_NAMED_LIST. If several identically named entries exist this functio...
Definition: libMVL.c:1416
HASH_MAP::hash_map
LIBMVL_OFFSET64 * hash_map
This is an associative table mapping hash & (hash_map_size-1) into indices in the "first" array.
Definition: libMVL.h:1177