1 /*****************************************************************************
2 * RRDtool 1.3.2 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
7 *****************************************************************************/
13 /* DEBUG 2 prints information obtained via mincore(2) */
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
24 #define __rrd_read(dst, dst_t, cnt) { \
25 size_t wanted = sizeof(dst_t)*(cnt); \
26 if (offset + wanted > rrd_file->file_len) { \
27 rrd_set_error("reached EOF while loading header " #dst); \
28 goto out_nullify_head; \
30 (dst) = (dst_t*)(void*) (data + offset); \
34 #define __rrd_read(dst, dst_t, cnt) { \
35 size_t wanted = sizeof(dst_t)*(cnt); \
37 if ((dst = malloc(wanted)) == NULL) { \
38 rrd_set_error(#dst " malloc"); \
39 goto out_nullify_head; \
41 got = read (rrd_simple_file->fd, dst, wanted); \
42 if (got != wanted) { \
43 rrd_set_error("short read while reading header " #dst); \
44 goto out_nullify_head; \
50 /* get the address of the start of this page */
51 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
53 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
57 long int rra_random_row(
61 /* Open a database file, return its header and an open filehandle,
62 * positioned to the first cdp in the first rra.
63 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
64 * before returning an error. Do not call rrd_close upon failure of rrd_open.
65 * If creating a new file, the parameter rrd must be initialised with
66 * details of the file content.
67 * If opening an existing file, then use rrd must be initialised by
68 * rrd_init(rrd) prior to invoking rrd_open
72 const char *const file_name,
81 ssize_t _page_size = sysconf(_SC_PAGESIZE);
82 char *data = MAP_FAILED;
86 rrd_file_t *rrd_file = NULL;
87 rrd_simple_file_t *rrd_simple_file = NULL;
88 off_t newfile_size = 0;
89 off_t header_len, value_cnt, data_len;
91 /* Are we creating a new file? */
92 if((rdwr & RRD_CREAT) && (rrd->stat_head != NULL))
95 sizeof(stat_head_t) + \
96 sizeof(ds_def_t) * rrd->stat_head->ds_cnt + \
97 sizeof(rra_def_t) * rrd->stat_head->rra_cnt + \
99 sizeof(live_head_t) + \
100 sizeof(pdp_prep_t) * rrd->stat_head->ds_cnt + \
101 sizeof(cdp_prep_t) * rrd->stat_head->ds_cnt * rrd->stat_head->rra_cnt + \
102 sizeof(rra_ptr_t) * rrd->stat_head->rra_cnt;
105 for (i = 0; i < rrd->stat_head->rra_cnt; i++)
106 value_cnt += rrd->stat_head->ds_cnt * rrd->rra_def[i].row_cnt;
108 data_len = sizeof(rrd_value_t) * value_cnt;
110 newfile_size = header_len + data_len;
113 rrd_file = malloc(sizeof(rrd_file_t));
114 if (rrd_file == NULL) {
115 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
118 memset(rrd_file, 0, sizeof(rrd_file_t));
120 rrd_file->pvt = malloc(sizeof(rrd_simple_file_t));
121 if(rrd_file->pvt == NULL) {
122 rrd_set_error("allocating rrd_simple_file for '%s'", file_name);
125 memset(rrd_file->pvt, 0, sizeof(rrd_simple_file_t));
126 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
129 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
130 (RRD_READONLY | RRD_READWRITE)) {
131 /* Both READONLY and READWRITE were given, which is invalid. */
132 rrd_set_error("in read/write request mask");
138 rrd_simple_file->mm_prot = PROT_READ;
139 rrd_simple_file->mm_flags = 0;
142 if (rdwr & RRD_READONLY) {
145 rrd_simple_file->mm_flags = MAP_PRIVATE;
146 # ifdef MAP_NORESERVE
147 rrd_simple_file->mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
151 if (rdwr & RRD_READWRITE) {
154 rrd_simple_file->mm_flags = MAP_SHARED;
155 rrd_simple_file->mm_prot |= PROT_WRITE;
158 if (rdwr & RRD_CREAT) {
159 flags |= (O_CREAT | O_TRUNC);
162 if (rdwr & RRD_READAHEAD) {
164 rrd_simple_file->mm_flags |= MAP_POPULATE; /* populate ptes and data */
166 #if defined MAP_NONBLOCK
167 rrd_simple_file->mm_flags |= MAP_NONBLOCK; /* just populate ptes */
170 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
174 if ((rrd_simple_file->fd = open(file_name, flags, 0666)) < 0) {
175 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
179 /* Better try to avoid seeks as much as possible. stat may be heavy but
180 * many concurrent seeks are even worse. */
181 if (newfile_size == 0 && ((fstat(rrd_simple_file->fd, &statb)) < 0)) {
182 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
185 if (newfile_size == 0) {
186 rrd_file->file_len = statb.st_size;
188 rrd_file->file_len = newfile_size;
189 lseek(rrd_simple_file->fd, newfile_size - 1, SEEK_SET);
190 write(rrd_simple_file->fd, "\0", 1); /* poke */
191 lseek(rrd_simple_file->fd, 0, SEEK_SET);
193 #ifdef HAVE_POSIX_FADVISE
194 /* In general we need no read-ahead when dealing with rrd_files.
195 When we stop reading, it is highly unlikely that we start up again.
196 In this manner we actually save time and diskaccess (and buffer cache).
197 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
198 posix_fadvise(rrd_simple_file->fd, 0, 0, POSIX_FADV_RANDOM);
202 if (rdwr & RRD_READWRITE)
204 if (setvbuf((rrd_simple_file->fd),NULL,_IONBF,2)) {
205 rrd_set_error("failed to disable the stream buffer\n");
212 data = mmap(0, rrd_file->file_len,
213 rrd_simple_file->mm_prot, rrd_simple_file->mm_flags,
214 rrd_simple_file->fd, offset);
216 /* lets see if the first read worked */
217 if (data == MAP_FAILED) {
218 rrd_set_error("mmaping file '%s': %s", file_name,
219 rrd_strerror(errno));
222 rrd_simple_file->file_start = data;
223 if (rdwr & RRD_CREAT) {
224 memset(data, DNAN, newfile_size - 1);
228 if (rdwr & RRD_CREAT)
231 if (rdwr & RRD_COPY) {
232 /* We will read everything in a moment (copying) */
233 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
235 /* We do not need to read anything in for the moment */
236 madvise(data, rrd_file->file_len, MADV_RANDOM);
237 /* the stat_head will be needed soonish, so hint accordingly */
238 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
242 __rrd_read(rrd->stat_head, stat_head_t,
245 /* lets do some test if we are on track ... */
246 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
247 rrd_set_error("'%s' is not an RRD file", file_name);
248 goto out_nullify_head;
251 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
252 rrd_set_error("This RRD was created on another architecture");
253 goto out_nullify_head;
256 version = atoi(rrd->stat_head->version);
258 if (version > atoi(RRD_VERSION)) {
259 rrd_set_error("can't handle RRD file version %s",
260 rrd->stat_head->version);
261 goto out_nullify_head;
263 #if defined USE_MADVISE
264 /* the ds_def will be needed soonish, so hint accordingly */
265 madvise(data + PAGE_START(offset),
266 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
268 __rrd_read(rrd->ds_def, ds_def_t,
269 rrd->stat_head->ds_cnt);
271 #if defined USE_MADVISE
272 /* the rra_def will be needed soonish, so hint accordingly */
273 madvise(data + PAGE_START(offset),
274 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
276 __rrd_read(rrd->rra_def, rra_def_t,
277 rrd->stat_head->rra_cnt);
279 /* handle different format for the live_head */
281 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
282 if (rrd->live_head == NULL) {
283 rrd_set_error("live_head_t malloc");
286 #if defined USE_MADVISE
287 /* the live_head will be needed soonish, so hint accordingly */
288 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
290 __rrd_read(rrd->legacy_last_up, time_t,
293 rrd->live_head->last_up = *rrd->legacy_last_up;
294 rrd->live_head->last_up_usec = 0;
296 #if defined USE_MADVISE
297 /* the live_head will be needed soonish, so hint accordingly */
298 madvise(data + PAGE_START(offset),
299 sizeof(live_head_t), MADV_WILLNEED);
301 __rrd_read(rrd->live_head, live_head_t,
304 __rrd_read(rrd->pdp_prep, pdp_prep_t,
305 rrd->stat_head->ds_cnt);
306 __rrd_read(rrd->cdp_prep, cdp_prep_t,
307 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
308 __rrd_read(rrd->rra_ptr, rra_ptr_t,
309 rrd->stat_head->rra_cnt);
311 rrd_file->header_len = offset;
312 rrd_file->pos = offset;
315 unsigned long row_cnt = 0;
318 for (i=0; i<rrd->stat_head->rra_cnt; i++)
319 row_cnt += rrd->rra_def[i].row_cnt;
321 off_t correct_len = rrd_file->header_len +
322 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
324 if (correct_len > rrd_file->file_len)
326 rrd_set_error("'%s' is too small (should be %ld bytes)",
327 file_name, (long long) correct_len);
328 goto out_nullify_head;
335 rrd->stat_head = NULL;
338 if (data != MAP_FAILED)
339 munmap(data, rrd_file->file_len);
341 close(rrd_simple_file->fd);
349 #if defined DEBUG && DEBUG > 1
350 /* Print list of in-core pages of a the current rrd_file. */
353 rrd_file_t *rrd_file,
356 rrd_simple_file_t *rrd_simple_file;
357 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
359 /* pretty print blocks in core */
362 ssize_t _page_size = sysconf(_SC_PAGESIZE);
364 off = rrd_file->file_len +
365 ((rrd_file->file_len + _page_size - 1) / _page_size);
369 if (mincore(rrd_simple_file->file_start, rrd_file->file_len, vec) == 0) {
371 unsigned is_in = 0, was_in = 0;
373 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
374 is_in = vec[off] & 1; /* if lsb set then is core resident */
377 if (was_in != is_in) {
378 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
379 was_in ? "" : "not ", vec + prev, off - prev);
385 "%s: %sin core: %p len %ld\n", mark,
386 was_in ? "" : "not ", vec + prev, off - prev);
388 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
391 fprintf(stderr, "sorry mincore only works with mmap");
394 #endif /* defined DEBUG && DEBUG > 1 */
397 * get exclusive lock to whole file.
398 * lock gets removed when we close the file
400 * returns 0 on success
403 rrd_file_t *rrd_file)
406 rrd_simple_file_t *rrd_simple_file;
407 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
410 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
413 if (_fstat(rrd_simple_file->fd, &st) == 0) {
414 rcstat = _locking(rrd_simple_file->fd, _LK_NBLCK, st.st_size);
421 lock.l_type = F_WRLCK; /* exclusive write lock */
422 lock.l_len = 0; /* whole file */
423 lock.l_start = 0; /* start of file */
424 lock.l_whence = SEEK_SET; /* end of file */
426 rcstat = fcntl(rrd_simple_file->fd, F_SETLK, &lock);
434 /* drop cache except for the header and the active pages */
436 rrd_file_t *rrd_file,
439 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
440 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
441 off_t dontneed_start;
445 ssize_t _page_size = sysconf(_SC_PAGESIZE);
447 if (rrd_file == NULL) {
448 #if defined DEBUG && DEBUG
449 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
454 #if defined DEBUG && DEBUG > 1
455 mincore_print(rrd_file, "before");
458 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
459 rra_start = rrd_file->header_len;
460 dontneed_start = PAGE_START(rra_start) + _page_size;
461 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
464 + rrd->rra_ptr[i].cur_row
465 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
466 if (active_block > dontneed_start) {
468 madvise(rrd_simple_file->file_start + dontneed_start,
469 active_block - dontneed_start - 1, MADV_DONTNEED);
471 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
472 #ifdef HAVE_POSIX_FADVISE
473 posix_fadvise(rrd_simple_file->fd, dontneed_start,
474 active_block - dontneed_start - 1,
475 POSIX_FADV_DONTNEED);
478 dontneed_start = active_block;
479 /* do not release 'hot' block if update for this RAA will occur
480 * within 10 minutes */
481 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
482 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
483 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
484 dontneed_start += _page_size;
487 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
491 if (dontneed_start < rrd_file->file_len) {
493 madvise(rrd_simple_file->file_start + dontneed_start,
494 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
496 #ifdef HAVE_POSIX_FADVISE
497 posix_fadvise(rrd_simple_file->fd, dontneed_start,
498 rrd_file->file_len - dontneed_start,
499 POSIX_FADV_DONTNEED);
503 #if defined DEBUG && DEBUG > 1
504 mincore_print(rrd_file, "after");
506 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
514 rrd_file_t *rrd_file)
516 rrd_simple_file_t *rrd_simple_file;
517 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
521 ret = msync(rrd_simple_file->file_start, rrd_file->file_len, MS_ASYNC);
523 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
524 ret = munmap(rrd_simple_file->file_start, rrd_file->file_len);
526 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
528 ret = close(rrd_simple_file->fd);
530 rrd_set_error("closing file: %s", rrd_strerror(errno));
538 /* Set position of rrd_file. */
541 rrd_file_t *rrd_file,
546 rrd_simple_file_t *rrd_simple_file;
547 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
550 if (whence == SEEK_SET)
552 else if (whence == SEEK_CUR)
553 rrd_file->pos += off;
554 else if (whence == SEEK_END)
555 rrd_file->pos = rrd_file->file_len + off;
557 ret = lseek(rrd_simple_file->fd, off, whence);
559 rrd_set_error("lseek: %s", rrd_strerror(errno));
562 /* mimic fseek, which returns 0 upon success */
563 return ret < 0; /*XXX: or just ret to mimic lseek */
567 /* Get current position in rrd_file. */
570 rrd_file_t *rrd_file)
572 return rrd_file->pos;
576 /* Read count bytes into buffer buf, starting at rrd_file->pos.
577 * Returns the number of bytes read or <0 on error. */
580 rrd_file_t *rrd_file,
584 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
589 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
592 return -1; /* EINVAL */
593 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
594 if (_surplus > 0) { /* short read */
599 buf = memcpy(buf, rrd_simple_file->file_start + rrd_file->pos, _cnt);
601 rrd_file->pos += _cnt; /* mimmic read() semantics */
606 ret = read(rrd_simple_file->fd, buf, count);
608 rrd_file->pos += ret; /* mimmic read() semantics */
614 /* Write count bytes from buffer buf to the current position
615 * rrd_file->pos of rrd_simple_file->fd.
616 * Returns the number of bytes written or <0 on error. */
619 rrd_file_t *rrd_file,
623 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
625 int old_size = rrd_file->file_len;
629 return -1; /* EINVAL */
631 if((rrd_file->pos + count) > old_size)
633 rrd_set_error("attempting to write beyond end of file");
636 memcpy(rrd_simple_file->file_start + rrd_file->pos, buf, count);
637 rrd_file->pos += count;
638 return count; /* mimmic write() semantics */
640 ssize_t _sz = write(rrd_simple_file->fd, buf, count);
643 rrd_file->pos += _sz;
649 /* flush all data pending to be written to FD. */
652 rrd_file_t *rrd_file)
654 rrd_simple_file_t *rrd_simple_file;
655 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
656 if (fdatasync(rrd_simple_file->fd) != 0) {
657 rrd_set_error("flushing fd %d: %s", rrd_simple_file->fd,
658 rrd_strerror(errno));
663 /* Initialize RRD header. */
668 rrd->stat_head = NULL;
671 rrd->live_head = NULL;
672 rrd->legacy_last_up = NULL;
674 rrd->pdp_prep = NULL;
675 rrd->cdp_prep = NULL;
676 rrd->rrd_value = NULL;
680 /* free RRD header data. */
686 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
687 free(rrd->live_head);
694 free(rrd->live_head);
695 free(rrd->stat_head);
701 free(rrd->rrd_value);
706 /* routine used by external libraries to free memory allocated by
716 * rra_update informs us about the RRAs being updated
717 * The low level storage API may use this information for
718 * aligning RRAs within stripes, or other performance enhancements
721 rrd_file_t *rrd_file,
723 unsigned long rra_row,
729 * This function is called when creating a new RRD
730 * The storage implementation can use this opportunity to select
731 * a sensible starting row within the file.
732 * The default implementation is random, to ensure that all RRAs
733 * don't change to a new disk block at the same time
735 unsigned long rrd_select_initial_row(
736 rrd_file_t *rrd_file,
741 return rra_random_row(rra);
744 static int rand_init = 0;
746 long int rra_random_row(
750 srandom((unsigned int) time(NULL) + (unsigned int) getpid());
754 return random() % rra->row_cnt;