1 /*****************************************************************************
2 * RRDtool 1.4.3 Copyright by Tobi Oetiker, 1997-2010
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
7 *****************************************************************************/
19 #ifdef HAVE_BROKEN_MS_ASYNC
20 #include <sys/types.h>
27 #define _LK_UNLCK 0 /* Unlock */
28 #define _LK_LOCK 1 /* Lock */
29 #define _LK_NBLCK 2 /* Non-blocking lock */
30 #define _LK_RLCK 3 /* Lock for read only */
31 #define _LK_NBRLCK 4 /* Non-blocking lock for read only */
34 #define LK_UNLCK _LK_UNLCK
35 #define LK_LOCK _LK_LOCK
36 #define LK_NBLCK _LK_NBLCK
37 #define LK_RLCK _LK_RLCK
38 #define LK_NBRLCK _LK_NBRLCK
41 /* DEBUG 2 prints information obtained via mincore(2) */
43 /* do not calculate exact madvise hints but assume 1 page for headers and
44 * set DONTNEED for the rest, which is assumed to be data */
45 /* Avoid calling madvise on areas that were already hinted. May be benefical if
46 * your syscalls are very slow */
49 /* the cast to void* is there to avoid this warning seen on ia64 with certain
50 versions of gcc: 'cast increases required alignment of target type'
52 #define __rrd_read(dst, dst_t, cnt) { \
53 size_t wanted = sizeof(dst_t)*(cnt); \
54 if (offset + wanted > rrd_file->file_len) { \
55 rrd_set_error("reached EOF while loading header " #dst); \
56 goto out_nullify_head; \
58 (dst) = (dst_t*)(void*) (data + offset); \
62 #define __rrd_read(dst, dst_t, cnt) { \
63 size_t wanted = sizeof(dst_t)*(cnt); \
65 if ((dst = (dst_t*)malloc(wanted)) == NULL) { \
66 rrd_set_error(#dst " malloc"); \
67 goto out_nullify_head; \
69 got = read (rrd_simple_file->fd, dst, wanted); \
70 if (got != wanted) { \
71 rrd_set_error("short read while reading header " #dst); \
72 goto out_nullify_head; \
78 /* get the address of the start of this page */
79 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
81 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
85 /* Open a database file, return its header and an open filehandle,
86 * positioned to the first cdp in the first rra.
87 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
88 * before returning an error. Do not call rrd_close upon failure of rrd_open.
89 * If creating a new file, the parameter rrd must be initialised with
90 * details of the file content.
91 * If opening an existing file, then use rrd must be initialised by
92 * rrd_init(rrd) prior to invoking rrd_open
96 const char *const file_name,
105 ssize_t _page_size = sysconf(_SC_PAGESIZE);
106 char *data = MAP_FAILED;
110 rrd_file_t *rrd_file = NULL;
111 rrd_simple_file_t *rrd_simple_file = NULL;
112 size_t newfile_size = 0;
113 size_t header_len, value_cnt, data_len;
115 /* Are we creating a new file? */
116 if((rdwr & RRD_CREAT) && (rrd->stat_head != NULL))
118 header_len = rrd_get_header_size(rrd);
121 for (ui = 0; ui < rrd->stat_head->rra_cnt; ui++)
122 value_cnt += rrd->stat_head->ds_cnt * rrd->rra_def[ui].row_cnt;
124 data_len = sizeof(rrd_value_t) * value_cnt;
126 newfile_size = header_len + data_len;
129 rrd_file = (rrd_file_t*)malloc(sizeof(rrd_file_t));
130 if (rrd_file == NULL) {
131 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
134 memset(rrd_file, 0, sizeof(rrd_file_t));
136 rrd_file->pvt = malloc(sizeof(rrd_simple_file_t));
137 if(rrd_file->pvt == NULL) {
138 rrd_set_error("allocating rrd_simple_file for '%s'", file_name);
141 memset(rrd_file->pvt, 0, sizeof(rrd_simple_file_t));
142 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
145 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
146 (RRD_READONLY | RRD_READWRITE)) {
147 /* Both READONLY and READWRITE were given, which is invalid. */
148 rrd_set_error("in read/write request mask");
154 rrd_simple_file->mm_prot = PROT_READ;
155 rrd_simple_file->mm_flags = 0;
158 if (rdwr & RRD_READONLY) {
162 rrd_simple_file->mm_flags = MAP_PRIVATE;
164 # ifdef MAP_NORESERVE
165 rrd_simple_file->mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
169 if (rdwr & RRD_READWRITE) {
172 rrd_simple_file->mm_flags = MAP_SHARED;
173 rrd_simple_file->mm_prot |= PROT_WRITE;
176 if (rdwr & RRD_CREAT) {
177 flags |= (O_CREAT | O_TRUNC);
179 if (rdwr & RRD_EXCL) {
183 if (rdwr & RRD_READAHEAD) {
185 rrd_simple_file->mm_flags |= MAP_POPULATE; /* populate ptes and data */
187 #if defined MAP_NONBLOCK
188 rrd_simple_file->mm_flags |= MAP_NONBLOCK; /* just populate ptes */
191 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
195 if ((rrd_simple_file->fd = open(file_name, flags, 0666)) < 0) {
196 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
201 #ifdef HAVE_BROKEN_MS_ASYNC
202 if (rdwr & RRD_READWRITE) {
203 /* some unices, the files mtime does not get update
204 on msync MS_ASYNC, in order to help them,
205 we update the the timestamp at this point.
206 The thing happens pretty 'close' to the open
207 call so the chances of a race should be minimal.
209 Maybe ask your vendor to fix your OS ... */
210 utime(file_name,NULL);
215 /* Better try to avoid seeks as much as possible. stat may be heavy but
216 * many concurrent seeks are even worse. */
217 if (newfile_size == 0 && ((fstat(rrd_simple_file->fd, &statb)) < 0)) {
218 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
221 if (newfile_size == 0) {
222 rrd_file->file_len = statb.st_size;
224 rrd_file->file_len = newfile_size;
225 lseek(rrd_simple_file->fd, newfile_size - 1, SEEK_SET);
226 if ( write(rrd_simple_file->fd, "\0", 1) == -1){ /* poke */
227 rrd_set_error("write '%s': %s", file_name, rrd_strerror(errno));
230 lseek(rrd_simple_file->fd, 0, SEEK_SET);
232 #ifdef HAVE_POSIX_FADVISE
233 /* In general we need no read-ahead when dealing with rrd_files.
234 When we stop reading, it is highly unlikely that we start up again.
235 In this manner we actually save time and diskaccess (and buffer cache).
236 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
237 posix_fadvise(rrd_simple_file->fd, 0, 0, POSIX_FADV_RANDOM);
241 if (rdwr & RRD_READWRITE)
243 if (setvbuf((rrd_simple_file->fd),NULL,_IONBF,2)) {
244 rrd_set_error("failed to disable the stream buffer\n");
251 /* force allocating the file on the underlaying filesystem to prevent any
252 * future bus error when the filesystem is full and attempting to write
253 * trough the file mapping. Filling the file using memset on the file
254 * mapping can also lead some bus error, so we use the old fashioned
257 if (rdwr & RRD_CREAT) {
261 memset(buf, DNAN, sizeof buf);
262 lseek(rrd_simple_file->fd, offset, SEEK_SET);
264 for (i = 0; i < (newfile_size - 1) / sizeof buf; ++i)
266 if (write(rrd_simple_file->fd, buf, sizeof buf) == -1)
268 rrd_set_error("write '%s': %s", file_name, rrd_strerror(errno));
273 if (write(rrd_simple_file->fd, buf,
274 (newfile_size - 1) % sizeof buf) == -1)
276 rrd_set_error("write '%s': %s", file_name, rrd_strerror(errno));
280 lseek(rrd_simple_file->fd, 0, SEEK_SET);
283 data = mmap(0, rrd_file->file_len,
284 rrd_simple_file->mm_prot, rrd_simple_file->mm_flags,
285 rrd_simple_file->fd, offset);
287 /* lets see if the first read worked */
288 if (data == MAP_FAILED) {
289 rrd_set_error("mmaping file '%s': %s", file_name,
290 rrd_strerror(errno));
293 rrd_simple_file->file_start = data;
294 if (rdwr & RRD_CREAT) {
298 if (rdwr & RRD_CREAT)
301 if (rdwr & RRD_COPY) {
302 /* We will read everything in a moment (copying) */
303 madvise(data, rrd_file->file_len, MADV_WILLNEED );
304 madvise(data, rrd_file->file_len, MADV_SEQUENTIAL );
306 /* We do not need to read anything in for the moment */
307 madvise(data, rrd_file->file_len, MADV_RANDOM);
308 /* the stat_head will be needed soonish, so hint accordingly */
309 madvise(data, sizeof(stat_head_t), MADV_WILLNEED);
310 madvise(data, sizeof(stat_head_t), MADV_RANDOM);
314 __rrd_read(rrd->stat_head, stat_head_t,
317 /* lets do some test if we are on track ... */
318 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
319 rrd_set_error("'%s' is not an RRD file", file_name);
320 goto out_nullify_head;
323 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
324 rrd_set_error("This RRD was created on another architecture");
325 goto out_nullify_head;
328 version = atoi(rrd->stat_head->version);
330 if (version > atoi(RRD_VERSION)) {
331 rrd_set_error("can't handle RRD file version %s",
332 rrd->stat_head->version);
333 goto out_nullify_head;
335 #if defined USE_MADVISE
336 /* the ds_def will be needed soonish, so hint accordingly */
337 madvise(data + PAGE_START(offset),
338 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
340 __rrd_read(rrd->ds_def, ds_def_t,
341 rrd->stat_head->ds_cnt);
343 #if defined USE_MADVISE
344 /* the rra_def will be needed soonish, so hint accordingly */
345 madvise(data + PAGE_START(offset),
346 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
348 __rrd_read(rrd->rra_def, rra_def_t,
349 rrd->stat_head->rra_cnt);
351 /* handle different format for the live_head */
353 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
354 if (rrd->live_head == NULL) {
355 rrd_set_error("live_head_t malloc");
358 #if defined USE_MADVISE
359 /* the live_head will be needed soonish, so hint accordingly */
360 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
362 __rrd_read(rrd->legacy_last_up, time_t,
365 rrd->live_head->last_up = *rrd->legacy_last_up;
366 rrd->live_head->last_up_usec = 0;
368 #if defined USE_MADVISE
369 /* the live_head will be needed soonish, so hint accordingly */
370 madvise(data + PAGE_START(offset),
371 sizeof(live_head_t), MADV_WILLNEED);
373 __rrd_read(rrd->live_head, live_head_t,
376 __rrd_read(rrd->pdp_prep, pdp_prep_t,
377 rrd->stat_head->ds_cnt);
378 __rrd_read(rrd->cdp_prep, cdp_prep_t,
379 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
380 __rrd_read(rrd->rra_ptr, rra_ptr_t,
381 rrd->stat_head->rra_cnt);
383 rrd_file->header_len = offset;
384 rrd_file->pos = offset;
387 unsigned long row_cnt = 0;
389 for (ui=0; ui<rrd->stat_head->rra_cnt; ui++)
390 row_cnt += rrd->rra_def[ui].row_cnt;
392 size_t correct_len = rrd_file->header_len +
393 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
395 if (correct_len > rrd_file->file_len)
397 rrd_set_error("'%s' is too small (should be %ld bytes)",
398 file_name, (long long) correct_len);
399 goto out_nullify_head;
406 rrd->stat_head = NULL;
409 if (data != MAP_FAILED)
410 munmap(data, rrd_file->file_len);
413 close(rrd_simple_file->fd);
421 #if defined DEBUG && DEBUG > 1
422 /* Print list of in-core pages of a the current rrd_file. */
425 rrd_file_t *rrd_file,
428 rrd_simple_file_t *rrd_simple_file;
429 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
431 /* pretty print blocks in core */
434 ssize_t _page_size = sysconf(_SC_PAGESIZE);
436 off = rrd_file->file_len +
437 ((rrd_file->file_len + _page_size - 1) / _page_size);
441 if (mincore(rrd_simple_file->file_start, rrd_file->file_len, vec) == 0) {
443 unsigned is_in = 0, was_in = 0;
445 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
446 is_in = vec[off] & 1; /* if lsb set then is core resident */
449 if (was_in != is_in) {
450 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
451 was_in ? "" : "not ", vec + prev, off - prev);
457 "%s: %sin core: %p len %ld\n", mark,
458 was_in ? "" : "not ", vec + prev, off - prev);
460 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
463 fprintf(stderr, "sorry mincore only works with mmap");
466 #endif /* defined DEBUG && DEBUG > 1 */
469 * get exclusive lock to whole file.
470 * lock gets removed when we close the file
472 * returns 0 on success
475 rrd_file_t *rrd_file)
478 rrd_simple_file_t *rrd_simple_file;
479 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
482 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
485 if (_fstat(rrd_simple_file->fd, &st) == 0) {
486 rcstat = _locking(rrd_simple_file->fd, _LK_NBLCK, st.st_size);
493 lock.l_type = F_WRLCK; /* exclusive write lock */
494 lock.l_len = 0; /* whole file */
495 lock.l_start = 0; /* start of file */
496 lock.l_whence = SEEK_SET; /* end of file */
498 rcstat = fcntl(rrd_simple_file->fd, F_SETLK, &lock);
506 /* drop cache except for the header and the active pages */
508 rrd_file_t *rrd_file,
511 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
512 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
513 size_t dontneed_start;
517 ssize_t _page_size = sysconf(_SC_PAGESIZE);
519 if (rrd_file == NULL) {
520 #if defined DEBUG && DEBUG
521 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
526 #if defined DEBUG && DEBUG > 1
527 mincore_print(rrd_file, "before");
530 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
531 rra_start = rrd_file->header_len;
532 dontneed_start = PAGE_START(rra_start) + _page_size;
533 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
536 + rrd->rra_ptr[i].cur_row
537 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
538 if (active_block > dontneed_start) {
540 madvise(rrd_simple_file->file_start + dontneed_start,
541 active_block - dontneed_start - 1, MADV_DONTNEED);
543 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
544 #ifdef HAVE_POSIX_FADVISE
545 posix_fadvise(rrd_simple_file->fd, dontneed_start,
546 active_block - dontneed_start - 1,
547 POSIX_FADV_DONTNEED);
550 dontneed_start = active_block;
551 /* do not release 'hot' block if update for this RAA will occur
552 * within 10 minutes */
553 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
554 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
555 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
556 dontneed_start += _page_size;
559 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
563 if (dontneed_start < rrd_file->file_len) {
565 madvise(rrd_simple_file->file_start + dontneed_start,
566 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
568 #ifdef HAVE_POSIX_FADVISE
569 posix_fadvise(rrd_simple_file->fd, dontneed_start,
570 rrd_file->file_len - dontneed_start,
571 POSIX_FADV_DONTNEED);
575 #if defined DEBUG && DEBUG > 1
576 mincore_print(rrd_file, "after");
578 #endif /* without madvise and posix_fadvise it does not make much sense todo anything */
586 rrd_file_t *rrd_file)
588 rrd_simple_file_t *rrd_simple_file;
589 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
593 ret = msync(rrd_simple_file->file_start, rrd_file->file_len, MS_ASYNC);
595 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
596 ret = munmap(rrd_simple_file->file_start, rrd_file->file_len);
598 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
600 ret = close(rrd_simple_file->fd);
602 rrd_set_error("closing file: %s", rrd_strerror(errno));
610 /* Set position of rrd_file. */
613 rrd_file_t *rrd_file,
618 rrd_simple_file_t *rrd_simple_file;
619 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
622 if (whence == SEEK_SET)
624 else if (whence == SEEK_CUR)
625 rrd_file->pos += off;
626 else if (whence == SEEK_END)
627 rrd_file->pos = rrd_file->file_len + off;
629 ret = lseek(rrd_simple_file->fd, off, whence);
631 rrd_set_error("lseek: %s", rrd_strerror(errno));
634 /* mimic fseek, which returns 0 upon success */
635 return ret < 0; /*XXX: or just ret to mimic lseek */
639 /* Get current position in rrd_file. */
642 rrd_file_t *rrd_file)
644 return rrd_file->pos;
648 /* Read count bytes into buffer buf, starting at rrd_file->pos.
649 * Returns the number of bytes read or <0 on error. */
652 rrd_file_t *rrd_file,
656 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
661 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
664 return -1; /* EINVAL */
665 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
666 if (_surplus > 0) { /* short read */
671 buf = memcpy(buf, rrd_simple_file->file_start + rrd_file->pos, _cnt);
673 rrd_file->pos += _cnt; /* mimmic read() semantics */
678 ret = read(rrd_simple_file->fd, buf, count);
680 rrd_file->pos += ret; /* mimmic read() semantics */
686 /* Write count bytes from buffer buf to the current position
687 * rrd_file->pos of rrd_simple_file->fd.
688 * Returns the number of bytes written or <0 on error. */
691 rrd_file_t *rrd_file,
695 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
697 size_t old_size = rrd_file->file_len;
701 return -1; /* EINVAL */
703 if((rrd_file->pos + count) > old_size)
705 rrd_set_error("attempting to write beyond end of file (%ld + %ld > %ld)",rrd_file->pos, count, old_size);
708 memcpy(rrd_simple_file->file_start + rrd_file->pos, buf, count);
709 rrd_file->pos += count;
710 return count; /* mimmic write() semantics */
712 ssize_t _sz = write(rrd_simple_file->fd, buf, count);
715 rrd_file->pos += _sz;
721 /* this is a leftover from the old days, it serves no purpose
722 and is therefore turned into a no-op */
724 rrd_file_t UNUSED(*rrd_file))
728 /* Initialize RRD header. */
733 rrd->stat_head = NULL;
736 rrd->live_head = NULL;
737 rrd->legacy_last_up = NULL;
739 rrd->pdp_prep = NULL;
740 rrd->cdp_prep = NULL;
741 rrd->rrd_value = NULL;
745 /* free RRD header data. */
751 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
752 free(rrd->live_head);
759 free(rrd->live_head);
760 free(rrd->stat_head);
766 free(rrd->rrd_value);
771 /* routine used by external libraries to free memory allocated by
781 * rra_update informs us about the RRAs being updated
782 * The low level storage API may use this information for
783 * aligning RRAs within stripes, or other performance enhancements
786 rrd_file_t UNUSED(*rrd_file),
788 unsigned long UNUSED(rra_row),
789 time_t UNUSED(rra_time))
794 * This function is called when creating a new RRD
795 * The storage implementation can use this opportunity to select
796 * a sensible starting row within the file.
797 * The default implementation is random, to ensure that all RRAs
798 * don't change to a new disk block at the same time
800 unsigned long rrd_select_initial_row(
801 rrd_file_t UNUSED(*rrd_file),
806 return rrd_random() % rra->row_cnt;