1 /*****************************************************************************
2 * RRDtool 1.3.2 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
7 *****************************************************************************/
19 # define random() rand()
20 # define srandom(x) srand(x)
23 #define _LK_UNLCK 0 /* Unlock */
24 #define _LK_LOCK 1 /* Lock */
25 #define _LK_NBLCK 2 /* Non-blocking lock */
26 #define _LK_RLCK 3 /* Lock for read only */
27 #define _LK_NBRLCK 4 /* Non-blocking lock for read only */
30 #define LK_UNLCK _LK_UNLCK
31 #define LK_LOCK _LK_LOCK
32 #define LK_NBLCK _LK_NBLCK
33 #define LK_RLCK _LK_RLCK
34 #define LK_NBRLCK _LK_NBRLCK
37 /* DEBUG 2 prints information obtained via mincore(2) */
39 /* do not calculate exact madvise hints but assume 1 page for headers and
40 * set DONTNEED for the rest, which is assumed to be data */
41 /* Avoid calling madvise on areas that were already hinted. May be benefical if
42 * your syscalls are very slow */
45 /* the cast to void* is there to avoid this warning seen on ia64 with certain
46 versions of gcc: 'cast increases required alignment of target type'
48 #define __rrd_read(dst, dst_t, cnt) { \
49 size_t wanted = sizeof(dst_t)*(cnt); \
50 if (offset + wanted > rrd_file->file_len) { \
51 rrd_set_error("reached EOF while loading header " #dst); \
52 goto out_nullify_head; \
54 (dst) = (dst_t*)(void*) (data + offset); \
58 #define __rrd_read(dst, dst_t, cnt) { \
59 size_t wanted = sizeof(dst_t)*(cnt); \
61 if ((dst = (dst_t*)malloc(wanted)) == NULL) { \
62 rrd_set_error(#dst " malloc"); \
63 goto out_nullify_head; \
65 got = read (rrd_simple_file->fd, dst, wanted); \
66 if (got != wanted) { \
67 rrd_set_error("short read while reading header " #dst); \
68 goto out_nullify_head; \
74 /* get the address of the start of this page */
75 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
77 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
81 long int rra_random_row(
85 /* Open a database file, return its header and an open filehandle,
86 * positioned to the first cdp in the first rra.
87 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
88 * before returning an error. Do not call rrd_close upon failure of rrd_open.
89 * If creating a new file, the parameter rrd must be initialised with
90 * details of the file content.
91 * If opening an existing file, then use rrd must be initialised by
92 * rrd_init(rrd) prior to invoking rrd_open
96 const char *const file_name,
105 ssize_t _page_size = sysconf(_SC_PAGESIZE);
106 char *data = MAP_FAILED;
110 rrd_file_t *rrd_file = NULL;
111 rrd_simple_file_t *rrd_simple_file = NULL;
112 size_t newfile_size = 0;
113 size_t header_len, value_cnt, data_len;
115 /* Are we creating a new file? */
116 if((rdwr & RRD_CREAT) && (rrd->stat_head != NULL))
119 sizeof(stat_head_t) + \
120 sizeof(ds_def_t) * rrd->stat_head->ds_cnt + \
121 sizeof(rra_def_t) * rrd->stat_head->rra_cnt + \
123 sizeof(live_head_t) + \
124 sizeof(pdp_prep_t) * rrd->stat_head->ds_cnt + \
125 sizeof(cdp_prep_t) * rrd->stat_head->ds_cnt * rrd->stat_head->rra_cnt + \
126 sizeof(rra_ptr_t) * rrd->stat_head->rra_cnt;
129 for (ui = 0; ui < rrd->stat_head->rra_cnt; ui++)
130 value_cnt += rrd->stat_head->ds_cnt * rrd->rra_def[ui].row_cnt;
132 data_len = sizeof(rrd_value_t) * value_cnt;
134 newfile_size = header_len + data_len;
137 rrd_file = (rrd_file_t*)malloc(sizeof(rrd_file_t));
138 if (rrd_file == NULL) {
139 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
142 memset(rrd_file, 0, sizeof(rrd_file_t));
144 rrd_file->pvt = malloc(sizeof(rrd_simple_file_t));
145 if(rrd_file->pvt == NULL) {
146 rrd_set_error("allocating rrd_simple_file for '%s'", file_name);
149 memset(rrd_file->pvt, 0, sizeof(rrd_simple_file_t));
150 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
153 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
154 (RRD_READONLY | RRD_READWRITE)) {
155 /* Both READONLY and READWRITE were given, which is invalid. */
156 rrd_set_error("in read/write request mask");
162 rrd_simple_file->mm_prot = PROT_READ;
163 rrd_simple_file->mm_flags = 0;
166 if (rdwr & RRD_READONLY) {
169 rrd_simple_file->mm_flags = MAP_PRIVATE;
170 # ifdef MAP_NORESERVE
171 rrd_simple_file->mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
175 if (rdwr & RRD_READWRITE) {
178 if (rdwr & RRD_CREAT) {
179 flags |= (O_CREAT | O_TRUNC);
182 if (rdwr & RRD_READAHEAD) {
184 rrd_simple_file->mm_flags |= MAP_POPULATE; /* populate ptes and data */
186 #if defined MAP_NONBLOCK
187 rrd_simple_file->mm_flags |= MAP_NONBLOCK; /* just populate ptes */
190 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
194 if ((rrd_simple_file->fd = open(file_name, flags, 0666)) < 0) {
195 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
200 #ifdef HAVE_BROKEN_MS_ASYNC
201 if (rdwr & RRD_READWRITE) {
202 /* some unices, the files mtime does not get update
203 on msync MS_ASYNC, in order to help them,
204 we update the the timestamp at this point.
205 The thing happens pretty 'close' to the open
206 call so the chances of a race should be minimal.
208 Maybe ask your vendor to fix your OS ... */
209 utime(file_name,NULL);
214 /* Better try to avoid seeks as much as possible. stat may be heavy but
215 * many concurrent seeks are even worse. */
216 if (newfile_size == 0 && ((fstat(rrd_simple_file->fd, &statb)) < 0)) {
217 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
220 if (newfile_size == 0) {
221 rrd_file->file_len = statb.st_size;
223 rrd_file->file_len = newfile_size;
224 lseek(rrd_simple_file->fd, newfile_size - 1, SEEK_SET);
225 write(rrd_simple_file->fd, "\0", 1); /* poke */
226 lseek(rrd_simple_file->fd, 0, SEEK_SET);
228 #ifdef HAVE_POSIX_FADVISE
229 /* In general we need no read-ahead when dealing with rrd_files.
230 When we stop reading, it is highly unlikely that we start up again.
231 In this manner we actually save time and diskaccess (and buffer cache).
232 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
233 posix_fadvise(rrd_simple_file->fd, 0, 0, POSIX_FADV_RANDOM);
237 if (rdwr & RRD_READWRITE)
239 if (setvbuf((rrd_simple_file->fd),NULL,_IONBF,2)) {
240 rrd_set_error("failed to disable the stream buffer\n");
247 data = mmap(0, rrd_file->file_len,
248 rrd_simple_file->mm_prot, rrd_simple_file->mm_flags,
249 rrd_simple_file->fd, offset);
251 /* lets see if the first read worked */
252 if (data == MAP_FAILED) {
253 rrd_set_error("mmaping file '%s': %s", file_name,
254 rrd_strerror(errno));
257 rrd_simple_file->file_start = data;
258 if (rdwr & RRD_CREAT) {
259 memset(data, DNAN, newfile_size - 1);
263 if (rdwr & RRD_CREAT)
266 if (rdwr & RRD_COPY) {
267 /* We will read everything in a moment (copying) */
268 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
270 /* We do not need to read anything in for the moment */
271 madvise(data, rrd_file->file_len, MADV_RANDOM);
272 /* the stat_head will be needed soonish, so hint accordingly */
273 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
277 __rrd_read(rrd->stat_head, stat_head_t,
280 /* lets do some test if we are on track ... */
281 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
282 rrd_set_error("'%s' is not an RRD file", file_name);
283 goto out_nullify_head;
286 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
287 rrd_set_error("This RRD was created on another architecture");
288 goto out_nullify_head;
291 version = atoi(rrd->stat_head->version);
293 if (version > atoi(RRD_VERSION)) {
294 rrd_set_error("can't handle RRD file version %s",
295 rrd->stat_head->version);
296 goto out_nullify_head;
298 #if defined USE_MADVISE
299 /* the ds_def will be needed soonish, so hint accordingly */
300 madvise(data + PAGE_START(offset),
301 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
303 __rrd_read(rrd->ds_def, ds_def_t,
304 rrd->stat_head->ds_cnt);
306 #if defined USE_MADVISE
307 /* the rra_def will be needed soonish, so hint accordingly */
308 madvise(data + PAGE_START(offset),
309 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
311 __rrd_read(rrd->rra_def, rra_def_t,
312 rrd->stat_head->rra_cnt);
314 /* handle different format for the live_head */
316 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
317 if (rrd->live_head == NULL) {
318 rrd_set_error("live_head_t malloc");
321 #if defined USE_MADVISE
322 /* the live_head will be needed soonish, so hint accordingly */
323 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
325 __rrd_read(rrd->legacy_last_up, time_t,
328 rrd->live_head->last_up = *rrd->legacy_last_up;
329 rrd->live_head->last_up_usec = 0;
331 #if defined USE_MADVISE
332 /* the live_head will be needed soonish, so hint accordingly */
333 madvise(data + PAGE_START(offset),
334 sizeof(live_head_t), MADV_WILLNEED);
336 __rrd_read(rrd->live_head, live_head_t,
339 __rrd_read(rrd->pdp_prep, pdp_prep_t,
340 rrd->stat_head->ds_cnt);
341 __rrd_read(rrd->cdp_prep, cdp_prep_t,
342 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
343 __rrd_read(rrd->rra_ptr, rra_ptr_t,
344 rrd->stat_head->rra_cnt);
346 rrd_file->header_len = offset;
347 rrd_file->pos = offset;
350 unsigned long row_cnt = 0;
352 for (ui=0; ui<rrd->stat_head->rra_cnt; ui++)
353 row_cnt += rrd->rra_def[ui].row_cnt;
355 size_t correct_len = rrd_file->header_len +
356 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
358 if (correct_len > rrd_file->file_len)
360 rrd_set_error("'%s' is too small (should be %ld bytes)",
361 file_name, (long long) correct_len);
362 goto out_nullify_head;
369 rrd->stat_head = NULL;
372 if (data != MAP_FAILED)
373 munmap(data, rrd_file->file_len);
376 close(rrd_simple_file->fd);
384 #if defined DEBUG && DEBUG > 1
385 /* Print list of in-core pages of a the current rrd_file. */
388 rrd_file_t *rrd_file,
391 rrd_simple_file_t *rrd_simple_file;
392 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
394 /* pretty print blocks in core */
397 ssize_t _page_size = sysconf(_SC_PAGESIZE);
399 off = rrd_file->file_len +
400 ((rrd_file->file_len + _page_size - 1) / _page_size);
404 if (mincore(rrd_simple_file->file_start, rrd_file->file_len, vec) == 0) {
406 unsigned is_in = 0, was_in = 0;
408 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
409 is_in = vec[off] & 1; /* if lsb set then is core resident */
412 if (was_in != is_in) {
413 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
414 was_in ? "" : "not ", vec + prev, off - prev);
420 "%s: %sin core: %p len %ld\n", mark,
421 was_in ? "" : "not ", vec + prev, off - prev);
423 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
426 fprintf(stderr, "sorry mincore only works with mmap");
429 #endif /* defined DEBUG && DEBUG > 1 */
432 * get exclusive lock to whole file.
433 * lock gets removed when we close the file
435 * returns 0 on success
438 rrd_file_t *rrd_file)
441 rrd_simple_file_t *rrd_simple_file;
442 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
445 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
448 if (_fstat(rrd_simple_file->fd, &st) == 0) {
449 rcstat = _locking(rrd_simple_file->fd, _LK_NBLCK, st.st_size);
456 lock.l_type = F_WRLCK; /* exclusive write lock */
457 lock.l_len = 0; /* whole file */
458 lock.l_start = 0; /* start of file */
459 lock.l_whence = SEEK_SET; /* end of file */
461 rcstat = fcntl(rrd_simple_file->fd, F_SETLK, &lock);
469 /* drop cache except for the header and the active pages */
471 rrd_file_t *rrd_file,
474 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
475 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
476 size_t dontneed_start;
480 ssize_t _page_size = sysconf(_SC_PAGESIZE);
482 if (rrd_file == NULL) {
483 #if defined DEBUG && DEBUG
484 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
489 #if defined DEBUG && DEBUG > 1
490 mincore_print(rrd_file, "before");
493 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
494 rra_start = rrd_file->header_len;
495 dontneed_start = PAGE_START(rra_start) + _page_size;
496 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
499 + rrd->rra_ptr[i].cur_row
500 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
501 if (active_block > dontneed_start) {
503 madvise(rrd_simple_file->file_start + dontneed_start,
504 active_block - dontneed_start - 1, MADV_DONTNEED);
506 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
507 #ifdef HAVE_POSIX_FADVISE
508 posix_fadvise(rrd_simple_file->fd, dontneed_start,
509 active_block - dontneed_start - 1,
510 POSIX_FADV_DONTNEED);
513 dontneed_start = active_block;
514 /* do not release 'hot' block if update for this RAA will occur
515 * within 10 minutes */
516 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
517 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
518 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
519 dontneed_start += _page_size;
522 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
526 if (dontneed_start < rrd_file->file_len) {
528 madvise(rrd_simple_file->file_start + dontneed_start,
529 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
531 #ifdef HAVE_POSIX_FADVISE
532 posix_fadvise(rrd_simple_file->fd, dontneed_start,
533 rrd_file->file_len - dontneed_start,
534 POSIX_FADV_DONTNEED);
538 #if defined DEBUG && DEBUG > 1
539 mincore_print(rrd_file, "after");
541 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
549 rrd_file_t *rrd_file)
551 rrd_simple_file_t *rrd_simple_file;
552 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
556 ret = msync(rrd_simple_file->file_start, rrd_file->file_len, MS_ASYNC);
558 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
559 ret = munmap(rrd_simple_file->file_start, rrd_file->file_len);
561 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
563 ret = close(rrd_simple_file->fd);
565 rrd_set_error("closing file: %s", rrd_strerror(errno));
573 /* Set position of rrd_file. */
576 rrd_file_t *rrd_file,
581 rrd_simple_file_t *rrd_simple_file;
582 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
585 if (whence == SEEK_SET)
587 else if (whence == SEEK_CUR)
588 rrd_file->pos += off;
589 else if (whence == SEEK_END)
590 rrd_file->pos = rrd_file->file_len + off;
592 ret = lseek(rrd_simple_file->fd, off, whence);
594 rrd_set_error("lseek: %s", rrd_strerror(errno));
597 /* mimic fseek, which returns 0 upon success */
598 return ret < 0; /*XXX: or just ret to mimic lseek */
602 /* Get current position in rrd_file. */
605 rrd_file_t *rrd_file)
607 return rrd_file->pos;
611 /* Read count bytes into buffer buf, starting at rrd_file->pos.
612 * Returns the number of bytes read or <0 on error. */
615 rrd_file_t *rrd_file,
619 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
624 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
627 return -1; /* EINVAL */
628 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
629 if (_surplus > 0) { /* short read */
634 buf = memcpy(buf, rrd_simple_file->file_start + rrd_file->pos, _cnt);
636 rrd_file->pos += _cnt; /* mimmic read() semantics */
641 ret = read(rrd_simple_file->fd, buf, count);
643 rrd_file->pos += ret; /* mimmic read() semantics */
649 /* Write count bytes from buffer buf to the current position
650 * rrd_file->pos of rrd_simple_file->fd.
651 * Returns the number of bytes written or <0 on error. */
654 rrd_file_t *rrd_file,
658 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
660 size_t old_size = rrd_file->file_len;
664 return -1; /* EINVAL */
666 if((rrd_file->pos + count) > old_size)
668 rrd_set_error("attempting to write beyond end of file");
671 memcpy(rrd_simple_file->file_start + rrd_file->pos, buf, count);
672 rrd_file->pos += count;
673 return count; /* mimmic write() semantics */
675 ssize_t _sz = write(rrd_simple_file->fd, buf, count);
678 rrd_file->pos += _sz;
684 /* flush all data pending to be written to FD. */
687 rrd_file_t *rrd_file)
690 rrd_simple_file_t *rrd_simple_file;
691 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
692 if (fdatasync(rrd_simple_file->fd) != 0) {
693 rrd_set_error("flushing fd %d: %s", rrd_simple_file->fd,
694 rrd_strerror(errno));
700 /* Initialize RRD header. */
705 rrd->stat_head = NULL;
708 rrd->live_head = NULL;
709 rrd->legacy_last_up = NULL;
711 rrd->pdp_prep = NULL;
712 rrd->cdp_prep = NULL;
713 rrd->rrd_value = NULL;
717 /* free RRD header data. */
723 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
724 free(rrd->live_head);
731 free(rrd->live_head);
732 free(rrd->stat_head);
738 free(rrd->rrd_value);
743 /* routine used by external libraries to free memory allocated by
753 * rra_update informs us about the RRAs being updated
754 * The low level storage API may use this information for
755 * aligning RRAs within stripes, or other performance enhancements
758 rrd_file_t *rrd_file __attribute__((unused)),
759 int rra_idx __attribute__((unused)),
760 unsigned long rra_row __attribute__((unused)),
761 time_t rra_time __attribute__((unused)))
766 * This function is called when creating a new RRD
767 * The storage implementation can use this opportunity to select
768 * a sensible starting row within the file.
769 * The default implementation is random, to ensure that all RRAs
770 * don't change to a new disk block at the same time
772 unsigned long rrd_select_initial_row(
773 rrd_file_t *rrd_file __attribute__((unused)),
774 int rra_idx __attribute__((unused)),
778 return rra_random_row(rra);
781 static int rand_init = 0;
783 long int rra_random_row(
787 srandom((unsigned int) time(NULL) + (unsigned int) getpid());
791 return random() % rra->row_cnt;