1 /*****************************************************************************
2 * RRDtool 1.3.2 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
7 *****************************************************************************/
13 /* DEBUG 2 prints information obtained via mincore(2) */
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
24 #define __rrd_read(dst, dst_t, cnt) { \
25 size_t wanted = sizeof(dst_t)*(cnt); \
26 if (offset + wanted > rrd_file->file_len) { \
27 rrd_set_error("reached EOF while loading header " #dst); \
28 goto out_nullify_head; \
30 (dst) = (dst_t*)(void*) (data + offset); \
34 #define __rrd_read(dst, dst_t, cnt) { \
35 size_t wanted = sizeof(dst_t)*(cnt); \
37 if ((dst = malloc(wanted)) == NULL) { \
38 rrd_set_error(#dst " malloc"); \
39 goto out_nullify_head; \
41 got = read (rrd_simple_file->fd, dst, wanted); \
42 if (got != wanted) { \
43 rrd_set_error("short read while reading header " #dst); \
44 goto out_nullify_head; \
50 /* get the address of the start of this page */
51 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
53 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
57 /* Open a database file, return its header and an open filehandle,
58 * positioned to the first cdp in the first rra.
59 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
60 * before returning an error. Do not call rrd_close upon failure of rrd_open.
61 * If creating a new file, the parameter rrd must be initialised with
62 * details of the file content.
63 * If opening an existing file, then use rrd must be initialised by
64 * rrd_init(rrd) prior to invoking rrd_open
68 const char *const file_name,
77 ssize_t _page_size = sysconf(_SC_PAGESIZE);
78 char *data = MAP_FAILED;
82 rrd_file_t *rrd_file = NULL;
83 rrd_simple_file_t *rrd_simple_file = NULL;
84 off_t newfile_size = 0;
85 off_t header_len, value_cnt, data_len;
87 /* Are we creating a new file? */
88 if((rdwr & RRD_CREAT) && (rrd->stat_head != NULL))
91 sizeof(stat_head_t) + \
92 sizeof(ds_def_t) * rrd->stat_head->ds_cnt + \
93 sizeof(rra_def_t) * rrd->stat_head->rra_cnt + \
95 sizeof(live_head_t) + \
96 sizeof(pdp_prep_t) * rrd->stat_head->ds_cnt + \
97 sizeof(cdp_prep_t) * rrd->stat_head->ds_cnt * rrd->stat_head->rra_cnt + \
98 sizeof(rra_ptr_t) * rrd->stat_head->rra_cnt;
101 for (i = 0; i < rrd->stat_head->rra_cnt; i++)
102 value_cnt += rrd->stat_head->ds_cnt * rrd->rra_def[i].row_cnt;
104 data_len = sizeof(rrd_value_t) * value_cnt;
106 newfile_size = header_len + data_len;
109 rrd_file = malloc(sizeof(rrd_file_t));
110 if (rrd_file == NULL) {
111 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
114 memset(rrd_file, 0, sizeof(rrd_file_t));
116 rrd_file->pvt = malloc(sizeof(rrd_simple_file_t));
117 if(rrd_file->pvt == NULL) {
118 rrd_set_error("allocating rrd_simple_file for '%s'", file_name);
121 memset(rrd_file->pvt, 0, sizeof(rrd_simple_file_t));
122 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
125 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
126 (RRD_READONLY | RRD_READWRITE)) {
127 /* Both READONLY and READWRITE were given, which is invalid. */
128 rrd_set_error("in read/write request mask");
134 rrd_simple_file->mm_prot = PROT_READ;
135 rrd_simple_file->mm_flags = 0;
138 if (rdwr & RRD_READONLY) {
141 rrd_simple_file->mm_flags = MAP_PRIVATE;
142 # ifdef MAP_NORESERVE
143 rrd_simple_file->mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
147 if (rdwr & RRD_READWRITE) {
150 rrd_simple_file->mm_flags = MAP_SHARED;
151 rrd_simple_file->mm_prot |= PROT_WRITE;
154 if (rdwr & RRD_CREAT) {
155 flags |= (O_CREAT | O_TRUNC);
158 if (rdwr & RRD_READAHEAD) {
160 rrd_simple_file->mm_flags |= MAP_POPULATE; /* populate ptes and data */
162 #if defined MAP_NONBLOCK
163 rrd_simple_file->mm_flags |= MAP_NONBLOCK; /* just populate ptes */
166 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
170 if ((rrd_simple_file->fd = open(file_name, flags, 0666)) < 0) {
171 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
175 /* Better try to avoid seeks as much as possible. stat may be heavy but
176 * many concurrent seeks are even worse. */
177 if (newfile_size == 0 && ((fstat(rrd_simple_file->fd, &statb)) < 0)) {
178 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
181 if (newfile_size == 0) {
182 rrd_file->file_len = statb.st_size;
184 rrd_file->file_len = newfile_size;
185 lseek(rrd_simple_file->fd, newfile_size - 1, SEEK_SET);
186 write(rrd_simple_file->fd, "\0", 1); /* poke */
187 lseek(rrd_simple_file->fd, 0, SEEK_SET);
189 #ifdef HAVE_POSIX_FADVISE
190 /* In general we need no read-ahead when dealing with rrd_files.
191 When we stop reading, it is highly unlikely that we start up again.
192 In this manner we actually save time and diskaccess (and buffer cache).
193 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
194 posix_fadvise(rrd_simple_file->fd, 0, 0, POSIX_FADV_RANDOM);
198 if (rdwr & RRD_READWRITE)
200 if (setvbuf((rrd_simple_file->fd),NULL,_IONBF,2)) {
201 rrd_set_error("failed to disable the stream buffer\n");
208 data = mmap(0, rrd_file->file_len,
209 rrd_simple_file->mm_prot, rrd_simple_file->mm_flags,
210 rrd_simple_file->fd, offset);
212 /* lets see if the first read worked */
213 if (data == MAP_FAILED) {
214 rrd_set_error("mmaping file '%s': %s", file_name,
215 rrd_strerror(errno));
218 rrd_simple_file->file_start = data;
219 if (rdwr & RRD_CREAT) {
220 memset(data, DNAN, newfile_size - 1);
224 if (rdwr & RRD_CREAT)
227 if (rdwr & RRD_COPY) {
228 /* We will read everything in a moment (copying) */
229 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
231 /* We do not need to read anything in for the moment */
232 madvise(data, rrd_file->file_len, MADV_RANDOM);
233 /* the stat_head will be needed soonish, so hint accordingly */
234 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
238 __rrd_read(rrd->stat_head, stat_head_t,
241 /* lets do some test if we are on track ... */
242 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
243 rrd_set_error("'%s' is not an RRD file", file_name);
244 goto out_nullify_head;
247 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
248 rrd_set_error("This RRD was created on another architecture");
249 goto out_nullify_head;
252 version = atoi(rrd->stat_head->version);
254 if (version > atoi(RRD_VERSION)) {
255 rrd_set_error("can't handle RRD file version %s",
256 rrd->stat_head->version);
257 goto out_nullify_head;
259 #if defined USE_MADVISE
260 /* the ds_def will be needed soonish, so hint accordingly */
261 madvise(data + PAGE_START(offset),
262 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
264 __rrd_read(rrd->ds_def, ds_def_t,
265 rrd->stat_head->ds_cnt);
267 #if defined USE_MADVISE
268 /* the rra_def will be needed soonish, so hint accordingly */
269 madvise(data + PAGE_START(offset),
270 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
272 __rrd_read(rrd->rra_def, rra_def_t,
273 rrd->stat_head->rra_cnt);
275 /* handle different format for the live_head */
277 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
278 if (rrd->live_head == NULL) {
279 rrd_set_error("live_head_t malloc");
282 #if defined USE_MADVISE
283 /* the live_head will be needed soonish, so hint accordingly */
284 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
286 __rrd_read(rrd->legacy_last_up, time_t,
289 rrd->live_head->last_up = *rrd->legacy_last_up;
290 rrd->live_head->last_up_usec = 0;
292 #if defined USE_MADVISE
293 /* the live_head will be needed soonish, so hint accordingly */
294 madvise(data + PAGE_START(offset),
295 sizeof(live_head_t), MADV_WILLNEED);
297 __rrd_read(rrd->live_head, live_head_t,
300 __rrd_read(rrd->pdp_prep, pdp_prep_t,
301 rrd->stat_head->ds_cnt);
302 __rrd_read(rrd->cdp_prep, cdp_prep_t,
303 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
304 __rrd_read(rrd->rra_ptr, rra_ptr_t,
305 rrd->stat_head->rra_cnt);
307 rrd_file->header_len = offset;
308 rrd_file->pos = offset;
311 unsigned long row_cnt = 0;
314 for (i=0; i<rrd->stat_head->rra_cnt; i++)
315 row_cnt += rrd->rra_def[i].row_cnt;
317 off_t correct_len = rrd_file->header_len +
318 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
320 if (correct_len > rrd_file->file_len)
322 rrd_set_error("'%s' is too small (should be %ld bytes)",
323 file_name, (long long) correct_len);
324 goto out_nullify_head;
331 rrd->stat_head = NULL;
334 if (data != MAP_FAILED)
335 munmap(data, rrd_file->file_len);
337 close(rrd_simple_file->fd);
345 #if defined DEBUG && DEBUG > 1
346 /* Print list of in-core pages of a the current rrd_file. */
349 rrd_file_t *rrd_file,
352 rrd_simple_file_t *rrd_simple_file;
353 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
355 /* pretty print blocks in core */
358 ssize_t _page_size = sysconf(_SC_PAGESIZE);
360 off = rrd_file->file_len +
361 ((rrd_file->file_len + _page_size - 1) / _page_size);
365 if (mincore(rrd_simple_file->file_start, rrd_file->file_len, vec) == 0) {
367 unsigned is_in = 0, was_in = 0;
369 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
370 is_in = vec[off] & 1; /* if lsb set then is core resident */
373 if (was_in != is_in) {
374 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
375 was_in ? "" : "not ", vec + prev, off - prev);
381 "%s: %sin core: %p len %ld\n", mark,
382 was_in ? "" : "not ", vec + prev, off - prev);
384 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
387 fprintf(stderr, "sorry mincore only works with mmap");
390 #endif /* defined DEBUG && DEBUG > 1 */
393 * get exclusive lock to whole file.
394 * lock gets removed when we close the file
396 * returns 0 on success
399 rrd_file_t *rrd_file)
402 rrd_simple_file_t *rrd_simple_file;
403 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
406 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
409 if (_fstat(rrd_simple_file->fd, &st) == 0) {
410 rcstat = _locking(rrd_simple_file->fd, _LK_NBLCK, st.st_size);
417 lock.l_type = F_WRLCK; /* exclusive write lock */
418 lock.l_len = 0; /* whole file */
419 lock.l_start = 0; /* start of file */
420 lock.l_whence = SEEK_SET; /* end of file */
422 rcstat = fcntl(rrd_simple_file->fd, F_SETLK, &lock);
430 /* drop cache except for the header and the active pages */
432 rrd_file_t *rrd_file,
435 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
436 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
437 off_t dontneed_start;
441 ssize_t _page_size = sysconf(_SC_PAGESIZE);
443 if (rrd_file == NULL) {
444 #if defined DEBUG && DEBUG
445 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
450 #if defined DEBUG && DEBUG > 1
451 mincore_print(rrd_file, "before");
454 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
455 rra_start = rrd_file->header_len;
456 dontneed_start = PAGE_START(rra_start) + _page_size;
457 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
460 + rrd->rra_ptr[i].cur_row
461 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
462 if (active_block > dontneed_start) {
464 madvise(rrd_simple_file->file_start + dontneed_start,
465 active_block - dontneed_start - 1, MADV_DONTNEED);
467 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
468 #ifdef HAVE_POSIX_FADVISE
469 posix_fadvise(rrd_simple_file->fd, dontneed_start,
470 active_block - dontneed_start - 1,
471 POSIX_FADV_DONTNEED);
474 dontneed_start = active_block;
475 /* do not release 'hot' block if update for this RAA will occur
476 * within 10 minutes */
477 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
478 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
479 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
480 dontneed_start += _page_size;
483 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
487 if (dontneed_start < rrd_file->file_len) {
489 madvise(rrd_simple_file->file_start + dontneed_start,
490 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
492 #ifdef HAVE_POSIX_FADVISE
493 posix_fadvise(rrd_simple_file->fd, dontneed_start,
494 rrd_file->file_len - dontneed_start,
495 POSIX_FADV_DONTNEED);
499 #if defined DEBUG && DEBUG > 1
500 mincore_print(rrd_file, "after");
502 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
510 rrd_file_t *rrd_file)
512 rrd_simple_file_t *rrd_simple_file;
513 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
517 ret = msync(rrd_simple_file->file_start, rrd_file->file_len, MS_ASYNC);
519 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
520 ret = munmap(rrd_simple_file->file_start, rrd_file->file_len);
522 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
524 ret = close(rrd_simple_file->fd);
526 rrd_set_error("closing file: %s", rrd_strerror(errno));
534 /* Set position of rrd_file. */
537 rrd_file_t *rrd_file,
542 rrd_simple_file_t *rrd_simple_file;
543 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
546 if (whence == SEEK_SET)
548 else if (whence == SEEK_CUR)
549 rrd_file->pos += off;
550 else if (whence == SEEK_END)
551 rrd_file->pos = rrd_file->file_len + off;
553 ret = lseek(rrd_simple_file->fd, off, whence);
555 rrd_set_error("lseek: %s", rrd_strerror(errno));
558 /* mimic fseek, which returns 0 upon success */
559 return ret < 0; /*XXX: or just ret to mimic lseek */
563 /* Get current position in rrd_file. */
566 rrd_file_t *rrd_file)
568 return rrd_file->pos;
572 /* Read count bytes into buffer buf, starting at rrd_file->pos.
573 * Returns the number of bytes read or <0 on error. */
576 rrd_file_t *rrd_file,
580 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
585 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
588 return -1; /* EINVAL */
589 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
590 if (_surplus > 0) { /* short read */
595 buf = memcpy(buf, rrd_simple_file->file_start + rrd_file->pos, _cnt);
597 rrd_file->pos += _cnt; /* mimmic read() semantics */
602 ret = read(rrd_simple_file->fd, buf, count);
604 rrd_file->pos += ret; /* mimmic read() semantics */
610 /* Write count bytes from buffer buf to the current position
611 * rrd_file->pos of rrd_simple_file->fd.
612 * Returns the number of bytes written or <0 on error. */
615 rrd_file_t *rrd_file,
619 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
621 int old_size = rrd_file->file_len;
625 return -1; /* EINVAL */
627 if((rrd_file->pos + count) > old_size)
629 rrd_set_error("attempting to write beyond end of file");
632 memcpy(rrd_simple_file->file_start + rrd_file->pos, buf, count);
633 rrd_file->pos += count;
634 return count; /* mimmic write() semantics */
636 ssize_t _sz = write(rrd_simple_file->fd, buf, count);
639 rrd_file->pos += _sz;
645 /* flush all data pending to be written to FD. */
648 rrd_file_t *rrd_file)
650 rrd_simple_file_t *rrd_simple_file;
651 rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
652 if (fdatasync(rrd_simple_file->fd) != 0) {
653 rrd_set_error("flushing fd %d: %s", rrd_simple_file->fd,
654 rrd_strerror(errno));
659 /* Initialize RRD header. */
664 rrd->stat_head = NULL;
667 rrd->live_head = NULL;
668 rrd->legacy_last_up = NULL;
670 rrd->pdp_prep = NULL;
671 rrd->cdp_prep = NULL;
672 rrd->rrd_value = NULL;
676 /* free RRD header data. */
682 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
683 free(rrd->live_head);
690 free(rrd->live_head);
691 free(rrd->stat_head);
697 free(rrd->rrd_value);
702 /* routine used by external libraries to free memory allocated by