1 /*****************************************************************************
2 * RRDtool 1.2.99907080300 Copyright by Tobi Oetiker, 1997-2007
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
7 *****************************************************************************/
13 /* DEBUG 2 prints information obtained via mincore(2) */
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
24 #define __rrd_read(dst, dst_t, cnt) \
25 (dst) = (dst_t*)(void*) (data + offset); \
26 offset += sizeof(dst_t) * (cnt)
28 #define __rrd_read(dst, dst_t, cnt) \
29 if ((dst = malloc(sizeof(dst_t)*(cnt))) == NULL) { \
30 rrd_set_error(#dst " malloc"); \
31 goto out_nullify_head; \
33 offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
36 /* get the address of the start of this page */
38 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
42 /* Open a database file, return its header and an open filehandle,
43 * positioned to the first cdp in the first rra.
44 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
45 * before returning an error. Do not call rrd_close upon failure of rrd_open.
49 const char *const file_name,
54 mode_t mode = S_IRUSR;
58 ssize_t _page_size = sysconf(_SC_PAGESIZE);
59 int mm_prot = PROT_READ, mm_flags = 0;
64 rrd_file_t *rrd_file = NULL;
65 off_t newfile_size = 0;
67 if (rdwr & RRD_CREAT) {
68 /* yes bad inline signaling alert, we are using the
69 floatcookie to pass the size in ... only used in resize */
70 newfile_size = (off_t) rrd->stat_head->float_cookie;
74 rrd_file = malloc(sizeof(rrd_file_t));
75 if (rrd_file == NULL) {
76 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
79 memset(rrd_file, 0, sizeof(rrd_file_t));
82 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
83 (RRD_READONLY | RRD_READWRITE)) {
84 /* Both READONLY and READWRITE were given, which is invalid. */
85 rrd_set_error("in read/write request mask");
89 if (rdwr & RRD_READONLY) {
92 mm_flags = MAP_PRIVATE;
94 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
98 if (rdwr & RRD_READWRITE) {
102 mm_flags = MAP_SHARED;
103 mm_prot |= PROT_WRITE;
106 if (rdwr & RRD_CREAT) {
107 flags |= (O_CREAT | O_TRUNC);
110 if (rdwr & RRD_READAHEAD) {
112 mm_flags |= MAP_POPULATE; /* populate ptes and data */
114 #if defined MAP_NONBLOCK
115 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
119 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
120 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
124 /* Better try to avoid seeks as much as possible. stat may be heavy but
125 * many concurrent seeks are even worse. */
126 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
127 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
130 if (newfile_size == 0) {
131 rrd_file->file_len = statb.st_size;
133 rrd_file->file_len = newfile_size;
134 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
135 write(rrd_file->fd, "\0", 1); /* poke */
136 lseek(rrd_file->fd, 0, SEEK_SET);
138 #ifdef HAVE_POSIX_FADVISE
139 /* In general we need no read-ahead when dealing with rrd_files.
140 When we stop reading, it is highly unlikely that we start up again.
141 In this manner we actually save time and diskaccess (and buffer cache).
142 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
143 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
147 if (rdwr & RRD_READWRITE)
149 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
150 rrd_set_error("failed to disable the stream buffer\n");
156 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
157 rrd_file->fd, offset);
159 /* lets see if the first read worked */
160 if (data == MAP_FAILED) {
161 rrd_set_error("mmaping file '%s': %s", file_name,
162 rrd_strerror(errno));
165 rrd_file->file_start = data;
166 if (rdwr & RRD_CREAT) {
167 memset(data, DNAN, newfile_size - 1);
171 if (rdwr & RRD_CREAT)
174 if (rdwr & RRD_COPY) {
175 /* We will read everything in a moment (copying) */
176 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
178 /* We do not need to read anything in for the moment */
179 madvise(data, rrd_file->file_len, MADV_RANDOM);
180 /* the stat_head will be needed soonish, so hint accordingly */
181 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
185 __rrd_read(rrd->stat_head, stat_head_t, 1);
187 /* lets do some test if we are on track ... */
188 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
189 rrd_set_error("'%s' is not an RRD file", file_name);
190 goto out_nullify_head;
193 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
194 rrd_set_error("This RRD was created on another architecture");
195 goto out_nullify_head;
198 version = atoi(rrd->stat_head->version);
200 if (version > atoi(RRD_VERSION)) {
201 rrd_set_error("can't handle RRD file version %s",
202 rrd->stat_head->version);
203 goto out_nullify_head;
205 #if defined USE_MADVISE
206 /* the ds_def will be needed soonish, so hint accordingly */
207 madvise(data + PAGE_START(offset),
208 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
210 __rrd_read(rrd->ds_def, ds_def_t,
211 rrd->stat_head->ds_cnt);
213 #if defined USE_MADVISE
214 /* the rra_def will be needed soonish, so hint accordingly */
215 madvise(data + PAGE_START(offset),
216 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
218 __rrd_read(rrd->rra_def, rra_def_t,
219 rrd->stat_head->rra_cnt);
221 /* handle different format for the live_head */
223 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
224 if (rrd->live_head == NULL) {
225 rrd_set_error("live_head_t malloc");
229 memmove(&rrd->live_head->last_up, data + offset, sizeof(long));
230 offset += sizeof(long);
232 offset += read(rrd_file->fd, &rrd->live_head->last_up, sizeof(long));
234 rrd->live_head->last_up_usec = 0;
236 #if defined USE_MADVISE
237 /* the live_head will be needed soonish, so hint accordingly */
238 madvise(data + PAGE_START(offset),
239 sizeof(live_head_t), MADV_WILLNEED);
241 __rrd_read(rrd->live_head, live_head_t,
244 //XXX: This doesn't look like it needs madvise
245 __rrd_read(rrd->pdp_prep, pdp_prep_t,
246 rrd->stat_head->ds_cnt);
248 //XXX: This could benefit from madvise()ing
249 __rrd_read(rrd->cdp_prep, cdp_prep_t,
250 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
252 //XXX: This could benefit from madvise()ing
253 __rrd_read(rrd->rra_ptr, rra_ptr_t,
254 rrd->stat_head->rra_cnt);
256 rrd_file->header_len = offset;
257 rrd_file->pos = offset;
261 rrd->stat_head = NULL;
270 /* Print list of in-core pages of a the current rrd_file. */
273 rrd_file_t *rrd_file,
277 /* pretty print blocks in core */
280 ssize_t _page_size = sysconf(_SC_PAGESIZE);
282 off = rrd_file->file_len +
283 ((rrd_file->file_len + _page_size - 1) / _page_size);
287 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
289 unsigned is_in = 0, was_in = 0;
291 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
292 is_in = vec[off] & 1; /* if lsb set then is core resident */
295 if (was_in != is_in) {
296 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
297 was_in ? "" : "not ", vec + prev, off - prev);
303 "%s: %sin core: %p len %ld\n", mark,
304 was_in ? "" : "not ", vec + prev, off - prev);
306 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
309 fprintf(stderr, "sorry mincore only works with mmap");
314 /* drop cache except for the header and the active pages */
316 rrd_file_t *rrd_file,
319 unsigned long dontneed_start;
320 unsigned long rra_start;
321 unsigned long active_block;
323 ssize_t _page_size = sysconf(_SC_PAGESIZE);
325 #if defined DEBUG && DEBUG > 1
326 mincore_print(rrd_file, "before");
329 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
330 rra_start = rrd_file->header_len;
331 dontneed_start = PAGE_START(rra_start) + _page_size;
332 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
335 + rrd->rra_ptr[i].cur_row
336 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
337 if (active_block > dontneed_start) {
339 madvise(rrd_file->file_start + dontneed_start,
340 active_block - dontneed_start - 1, MADV_DONTNEED);
342 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
343 #ifdef HAVE_POSIX_FADVISE
344 posix_fadvise(rrd_file->fd, dontneed_start,
345 active_block - dontneed_start - 1,
346 POSIX_FADV_DONTNEED);
349 dontneed_start = active_block;
350 /* do not release 'hot' block if update for this RAA will occur
351 * within 10 minutes */
352 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
353 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
354 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
355 dontneed_start += _page_size;
358 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
362 madvise(rrd_file->file_start + dontneed_start,
363 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
365 #ifdef HAVE_POSIX_FADVISE
366 posix_fadvise(rrd_file->fd, dontneed_start,
367 rrd_file->file_len - dontneed_start, POSIX_FADV_DONTNEED);
369 #if defined DEBUG && DEBUG > 1
370 mincore_print(rrd_file, "after");
375 rrd_file_t *rrd_file)
380 ret = msync(rrd_file->file_start, rrd_file->file_len,MS_ASYNC);
382 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
383 ret = munmap(rrd_file->file_start, rrd_file->file_len);
385 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
387 ret = close(rrd_file->fd);
389 rrd_set_error("closing file: %s", rrd_strerror(errno));
396 /* Set position of rrd_file. */
399 rrd_file_t *rrd_file,
406 if (whence == SEEK_SET)
408 else if (whence == SEEK_CUR)
409 rrd_file->pos += off;
410 else if (whence == SEEK_END)
411 rrd_file->pos = rrd_file->file_len + off;
413 ret = lseek(rrd_file->fd, off, whence);
415 rrd_set_error("lseek: %s", rrd_strerror(errno));
418 /* mimic fseek, which returns 0 upon success */
419 return ret < 0; //XXX: or just ret to mimic lseek
423 /* Get current position in rrd_file. */
425 inline off_t rrd_tell(
426 rrd_file_t *rrd_file)
428 return rrd_file->pos;
432 /* Read count bytes into buffer buf, starting at rrd_file->pos.
433 * Returns the number of bytes read or <0 on error. */
435 inline ssize_t rrd_read(
436 rrd_file_t *rrd_file,
444 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
447 return -1; /* EINVAL */
448 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
449 if (_surplus > 0) { /* short read */
454 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
456 rrd_file->pos += _cnt; /* mimmic read() semantics */
461 ret = read(rrd_file->fd, buf, count);
463 rrd_file->pos += ret; /* mimmic read() semantics */
469 /* Write count bytes from buffer buf to the current position
470 * rrd_file->pos of rrd_file->fd.
471 * Returns the number of bytes written or <0 on error. */
473 inline ssize_t rrd_write(
474 rrd_file_t *rrd_file,
482 return -1; /* EINVAL */
483 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
484 rrd_file->pos += count;
485 return count; /* mimmic write() semantics */
487 ssize_t _sz = write(rrd_file->fd, buf, count);
490 rrd_file->pos += _sz;
496 /* flush all data pending to be written to FD. */
498 inline void rrd_flush(
499 rrd_file_t *rrd_file)
501 if (fdatasync(rrd_file->fd) != 0) {
502 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
503 rrd_strerror(errno));
508 /* Initialize RRD header. */
513 rrd->stat_head = NULL;
516 rrd->live_head = NULL;
518 rrd->pdp_prep = NULL;
519 rrd->cdp_prep = NULL;
520 rrd->rrd_value = NULL;
524 /* free RRD header data. */
527 inline void rrd_free(
535 free(rrd->live_head);
536 free(rrd->stat_head);
542 free(rrd->rrd_value);
547 /* routine used by external libraries to free memory allocated by
557 /* XXX: FIXME: missing documentation. */
558 /*XXX: FIXME should be renamed to rrd_readfile or _rrd_readfile */
560 int /*_rrd_*/ readfile(
561 const char *file_name,
565 long writecnt = 0, totalcnt = MEMBLK;
570 if ((strcmp("-", file_name) == 0)) {
573 if ((input = fopen(file_name, "rb")) == NULL) {
574 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
582 } while (c != '\n' && !feof(input));
584 if (strcmp("-", file_name)) {
585 fseek(input, 0, SEEK_END);
586 /* have extra space for detecting EOF without realloc */
587 totalcnt = (ftell(input) + 1) / sizeof(char) - offset;
588 if (totalcnt < MEMBLK)
589 totalcnt = MEMBLK; /* sanitize */
590 fseek(input, offset * sizeof(char), SEEK_SET);
592 if (((*buffer) = (char *) malloc((totalcnt + 4) * sizeof(char))) == NULL) {
593 perror("Allocate Buffer:");
598 fread((*buffer) + writecnt, 1,
599 (totalcnt - writecnt) * sizeof(char), input);
600 if (writecnt >= totalcnt) {
603 rrd_realloc((*buffer),
604 (totalcnt + 4) * sizeof(char))) == NULL) {
605 perror("Realloc Buffer:");
609 } while (!feof(input));
610 (*buffer)[writecnt] = '\0';
611 if (strcmp("-", file_name) != 0) {