1 /*****************************************************************************
2 * RRDtool 1.2.23 Copyright by Tobi Oetiker, 1997-2007
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
7 *****************************************************************************/
13 /* DEBUG 2 prints information obtained via mincore(2) */
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
18 /* Avoid calling madvise on areas that were already hinted. May be benefical if
19 * your syscalls are very slow */
20 #define CHECK_MADVISE_OVERLAPS 1
23 /* the cast to void* is there to avoid this warning seen on ia64 with certain
24 versions of gcc: 'cast increases required alignment of target type'
26 #define __rrd_read(dst, dst_t, cnt) \
27 (dst) = (dst_t*)(void*) (data + offset); \
28 offset += sizeof(dst_t) * (cnt)
30 #define __rrd_read(dst, dst_t, cnt) \
31 if ((dst = malloc(sizeof(dst_t)*(cnt))) == NULL) { \
32 rrd_set_error(#dst " malloc"); \
33 goto out_nullify_head; \
35 offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
38 /* get the address of the start of this page */
40 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
44 /* vector of last madvise hint */
45 typedef struct _madvise_vec_t {
49 _madvise_vec_t _madv_vec = { NULL, 0 };
52 #if defined CHECK_MADVISE_OVERLAPS
53 #define _madvise(_start, _off, _hint) \
54 if ((_start) != _madv_vec.start && (ssize_t)(_off) != _madv_vec.length) { \
55 _madv_vec.start = (_start) ; _madv_vec.length = (_off); \
56 madvise((_start), (_off), (_hint)); \
59 #define _madvise(_start, _off, _hint) \
60 madvise((_start), (_off), (_hint))
63 /* Open a database file, return its header and an open filehandle,
64 * positioned to the first cdp in the first rra.
65 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
66 * before returning an error. Do not call rrd_close upon failure of rrd_open.
70 const char *const file_name,
75 mode_t mode = S_IRUSR;
79 ssize_t _page_size = sysconf(_SC_PAGESIZE);
80 int mm_prot = PROT_READ, mm_flags = 0;
85 rrd_file_t *rrd_file = NULL;
86 off_t newfile_size = 0;
88 if (rdwr & RRD_CREAT) {
89 /* yes bad inline signaling alert, we are using the
90 floatcookie to pass the size in ... only used in resize */
91 newfile_size = (off_t) rrd->stat_head->float_cookie;
95 rrd_file = malloc(sizeof(rrd_file_t));
96 if (rrd_file == NULL) {
97 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
100 memset(rrd_file, 0, sizeof(rrd_file_t));
103 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
104 (RRD_READONLY | RRD_READWRITE)) {
105 /* Both READONLY and READWRITE were given, which is invalid. */
106 rrd_set_error("in read/write request mask");
110 if (rdwr & RRD_READONLY) {
113 mm_flags = MAP_PRIVATE;
114 # ifdef MAP_NORESERVE
115 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
119 if (rdwr & RRD_READWRITE) {
123 mm_flags = MAP_SHARED;
124 mm_prot |= PROT_WRITE;
127 if (rdwr & RRD_CREAT) {
128 flags |= (O_CREAT | O_TRUNC);
131 if (rdwr & RRD_READAHEAD) {
133 mm_flags |= MAP_POPULATE; /* populate ptes and data */
135 #if defined MAP_NONBLOCK
136 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
147 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
148 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
152 /* Better try to avoid seeks as much as possible. stat may be heavy but
153 * many concurrent seeks are even worse. */
154 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
155 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
158 if (newfile_size == 0) {
159 rrd_file->file_len = statb.st_size;
161 rrd_file->file_len = newfile_size;
162 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
163 write(rrd_file->fd, "\0", 1); /* poke */
164 lseek(rrd_file->fd, 0, SEEK_SET);
166 #ifdef HAVE_POSIX_FADVISE
167 /* In general we need no read-ahead when dealing with rrd_files.
168 When we stop reading, it is highly unlikely that we start up again.
169 In this manner we actually save time and diskaccess (and buffer cache).
170 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
171 if (0 != posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM)) {
172 rrd_set_error("setting POSIX_FADV_RANDOM on '%s': %s", file_name,
173 rrd_strerror(errno));
179 if (rdwr & RRD_READWRITE)
181 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
182 rrd_set_error("failed to disable the stream buffer\n");
188 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
189 rrd_file->fd, offset);
191 /* lets see if the first read worked */
192 if (data == MAP_FAILED) {
193 rrd_set_error("mmaping file '%s': %s", file_name,
194 rrd_strerror(errno));
197 rrd_file->file_start = data;
198 if (rdwr & RRD_CREAT) {
199 memset(data, DNAN, newfile_size - 1);
203 if (rdwr & RRD_CREAT)
206 if (rdwr & RRD_COPY) {
207 /* We will read everything in a moment (copying) */
208 _madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
211 /* We do not need to read anything in for the moment */
212 _madvise(data, rrd_file->file_len, MADV_RANDOM);
213 /* the stat_head will be needed soonish, so hint accordingly */
214 _madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
217 /* alternatively: keep 1 page worth of data, likely headers,
218 * don't need the rest. */
219 _madvise(data, _page_size, MADV_WILLNEED | MADV_SEQUENTIAL);
220 _madvise(data + _page_size, (rrd_file->file_len >= _page_size)
221 ? rrd_file->file_len - _page_size : 0, MADV_DONTNEED);
226 __rrd_read(rrd->stat_head, stat_head_t,
229 /* lets do some test if we are on track ... */
230 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
231 rrd_set_error("'%s' is not an RRD file", file_name);
232 goto out_nullify_head;
235 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
236 rrd_set_error("This RRD was created on another architecture");
237 goto out_nullify_head;
240 version = atoi(rrd->stat_head->version);
242 if (version > atoi(RRD_VERSION)) {
243 rrd_set_error("can't handle RRD file version %s",
244 rrd->stat_head->version);
245 goto out_nullify_head;
247 #if defined USE_MADVISE && !defined ONE_PAGE
248 /* the ds_def will be needed soonish, so hint accordingly */
249 _madvise(data + PAGE_START(offset),
250 sizeof(ds_def_t) * rrd->stat_head->ds_cnt,
253 __rrd_read(rrd->ds_def, ds_def_t,
254 rrd->stat_head->ds_cnt);
256 #if defined USE_MADVISE && !defined ONE_PAGE
257 /* the rra_def will be needed soonish, so hint accordingly */
258 _madvise(data + PAGE_START(offset),
259 sizeof(rra_def_t) * rrd->stat_head->rra_cnt,
262 __rrd_read(rrd->rra_def, rra_def_t,
263 rrd->stat_head->rra_cnt);
265 /* handle different format for the live_head */
267 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
268 if (rrd->live_head == NULL) {
269 rrd_set_error("live_head_t malloc");
273 memmove(&rrd->live_head->last_up, data + offset, sizeof(long));
274 offset += sizeof(long);
276 offset += read(rrd_file->fd, &rrd->live_head->last_up, sizeof(long));
278 rrd->live_head->last_up_usec = 0;
280 #if defined USE_MADVISE && !defined ONE_PAGE
281 /* the live_head will be needed soonish, so hint accordingly */
282 _madvise(data + PAGE_START(offset),
283 sizeof(live_head_t), MADV_WILLNEED);
285 __rrd_read(rrd->live_head, live_head_t,
288 //XXX: This doesn't look like it needs madvise
289 __rrd_read(rrd->pdp_prep, pdp_prep_t,
290 rrd->stat_head->ds_cnt);
292 //XXX: This could benefit from madvise()ing
293 __rrd_read(rrd->cdp_prep, cdp_prep_t,
294 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
296 //XXX: This could benefit from madvise()ing
297 __rrd_read(rrd->rra_ptr, rra_ptr_t,
298 rrd->stat_head->rra_cnt);
300 rrd_file->header_len = offset;
301 rrd_file->pos = offset;
305 rrd->stat_head = NULL;
314 /* Close a reference to an rrd_file. */
316 void mincore_print(rrd_file_t *rrd_file,char * mark){
318 /* pretty print blocks in core */
321 ssize_t _page_size = sysconf(_SC_PAGESIZE);
323 off = rrd_file->file_len +
324 ((rrd_file->file_len + _page_size - 1) / _page_size);
328 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
330 unsigned is_in = 0, was_in = 0;
332 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
333 is_in = vec[off] & 1; /* if lsb set then is core resident */
336 if (was_in != is_in) {
337 fprintf(stderr, "%s: %sin core: %p len %ld\n",mark,
338 was_in ? "" : "not ", vec + prev, off - prev);
344 "%s: %sin core: %p len %ld\n", mark,
345 was_in ? "" : "not ", vec + prev, off - prev);
347 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
350 fprintf(stderr, "sorry mincore only works with mmap");
355 /* drop cache except for the header and the active pages */
358 rrd_file_t *rrd_file,
360 unsigned long dontneed_start;
361 unsigned long rra_start;
362 unsigned long active_block;
364 ssize_t _page_size = sysconf(_SC_PAGESIZE);
366 #if defined DEBUG && DEBUG > 1
367 mincore_print(rrd_file,"before");
370 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
371 rra_start = rrd_file->header_len;
372 dontneed_start = PAGE_START(rra_start)+_page_size;
373 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
376 + rrd->rra_ptr[i].cur_row
377 * rrd->stat_head->ds_cnt
378 * sizeof(rrd_value_t));
379 if (active_block > dontneed_start){
381 _madvise(rrd_file->file_start + dontneed_start,
382 active_block-dontneed_start-1,
385 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
386 #ifdef HAVE_POSIX_FADVISE
387 posix_fadvise(rrd_file->fd, dontneed_start, active_block-dontneed_start-1, POSIX_FADV_DONTNEED);
390 dontneed_start = active_block + _page_size;
391 rra_start += rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt * sizeof(rrd_value_t);
394 _madvise(rrd_file->file_start + dontneed_start,
395 rrd_file->file_len - dontneed_start,
398 #ifdef HAVE_POSIX_FADVISE
399 posix_fadvise(rrd_file->fd, dontneed_start, rrd_file->file_len-dontneed_start, POSIX_FADV_DONTNEED);
401 #if defined DEBUG && DEBUG > 1
402 mincore_print(rrd_file,"after");
407 rrd_file_t *rrd_file)
411 ret = munmap(rrd_file->file_start, rrd_file->file_len);
413 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
415 ret = close(rrd_file->fd);
417 rrd_set_error("closing file: %s", rrd_strerror(errno));
424 /* Set position of rrd_file. */
427 rrd_file_t *rrd_file,
434 if (whence == SEEK_SET)
436 else if (whence == SEEK_CUR)
437 rrd_file->pos += off;
438 else if (whence == SEEK_END)
439 rrd_file->pos = rrd_file->file_len + off;
441 ret = lseek(rrd_file->fd, off, whence);
443 rrd_set_error("lseek: %s", rrd_strerror(errno));
446 //XXX: mimic fseek, which returns 0 upon success
447 return ret == -1; //XXX: or just ret to mimic lseek
451 /* Get current position in rrd_file. */
453 inline off_t rrd_tell(
454 rrd_file_t *rrd_file)
456 return rrd_file->pos;
460 /* read count bytes into buffer buf, starting at rrd_file->pos.
461 * Returns the number of bytes read or <0 on error. */
463 inline ssize_t rrd_read(
464 rrd_file_t *rrd_file,
470 ssize_t _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
472 if (_surplus > 0) { /* short read */
477 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
479 rrd_file->pos += _cnt; /* mimmic read() semantics */
484 ret = read(rrd_file->fd, buf, count);
486 rrd_file->pos += ret; /* mimmic read() semantics */
492 /* write count bytes from buffer buf to the current position
493 * rrd_file->pos of rrd_file->fd.
494 * Returns the number of bytes written. */
496 inline ssize_t rrd_write(
497 rrd_file_t *rrd_file,
502 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
503 rrd_file->pos += count;
504 return count; /* mimmic write() semantics */
506 ssize_t _sz = write(rrd_file->fd, buf, count);
509 rrd_file->pos += _sz;
515 /* flush all data pending to be written to FD. */
517 inline void rrd_flush(
518 rrd_file_t *rrd_file)
520 if (fdatasync(rrd_file->fd) != 0) {
521 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
522 rrd_strerror(errno));
527 /* Initialize RRD header. */
532 rrd->stat_head = NULL;
535 rrd->live_head = NULL;
537 rrd->pdp_prep = NULL;
538 rrd->cdp_prep = NULL;
539 rrd->rrd_value = NULL;
543 /* free RRD header data. */
546 inline void rrd_free(
554 free(rrd->live_head);
555 free(rrd->stat_head);
561 free(rrd->rrd_value);
566 /* routine used by external libraries to free memory allocated by
576 /* XXX: FIXME: missing documentation. */
577 /*XXX: FIXME should be renamed to rrd_readfile or _rrd_readfile */
579 int /*_rrd_*/ readfile(
580 const char *file_name,
584 long writecnt = 0, totalcnt = MEMBLK;
589 if ((strcmp("-", file_name) == 0)) {
592 if ((input = fopen(file_name, "rb")) == NULL) {
593 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
601 } while (c != '\n' && !feof(input));
603 if (strcmp("-", file_name)) {
604 fseek(input, 0, SEEK_END);
605 /* have extra space for detecting EOF without realloc */
606 totalcnt = (ftell(input) + 1) / sizeof(char) - offset;
607 if (totalcnt < MEMBLK)
608 totalcnt = MEMBLK; /* sanitize */
609 fseek(input, offset * sizeof(char), SEEK_SET);
611 if (((*buffer) = (char *) malloc((totalcnt + 4) * sizeof(char))) == NULL) {
612 perror("Allocate Buffer:");
617 fread((*buffer) + writecnt, 1,
618 (totalcnt - writecnt) * sizeof(char), input);
619 if (writecnt >= totalcnt) {
622 rrd_realloc((*buffer),
623 (totalcnt + 4) * sizeof(char))) == NULL) {
624 perror("Realloc Buffer:");
628 } while (!feof(input));
629 (*buffer)[writecnt] = '\0';
630 if (strcmp("-", file_name) != 0) {