1 /*****************************************************************************
2 * RRDtool 1.2.23 Copyright by Tobi Oetiker, 1997-2007
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
8 * Revision 1.10 2004/05/26 22:11:12 oetiker
9 * reduce compiler warnings. Many small fixes. -- Mike Slifcak <slif@bellsouth.net>
11 * Revision 1.9 2003/04/29 21:56:49 oetiker
12 * readline in rrd_open.c reads the file in 8 KB blocks, and calls realloc for
13 * each block. realloc is very slow in Mac OS X for huge blocks, e.g. when
14 * restoring databases from huge xml files. This patch finds the size of the
15 * file, and starts out with malloc'ing the full size.
16 * -- Peter Speck <speck@ruc.dk>
18 * Revision 1.8 2003/04/11 19:43:44 oetiker
19 * New special value COUNT which allows calculations based on the position of a
20 * value within a data set. Bug fix in rrd_rpncalc.c. PREV returned erroneus
21 * value for the second value. Bug fix in rrd_restore.c. Bug causing seek error
22 * when accesing an RRD restored from an xml that holds an RRD version <3.
23 * -- Ruben Justo <ruben@ainek.com>
25 * Revision 1.7 2003/03/31 21:22:12 oetiker
26 * enables RRDtool updates with microsecond or in case of windows millisecond
27 * precision. This is needed to reduce time measurement error when archive step
28 * is small. (<30s) -- Sasha Mikheev <sasha@avalon-net.co.il>
30 * Revision 1.6 2003/02/13 07:05:27 oetiker
31 * Find attached the patch I promised to send to you. Please note that there
32 * are three new source files (src/rrd_is_thread_safe.h, src/rrd_thread_safe.c
33 * and src/rrd_not_thread_safe.c) and the introduction of librrd_th. This
34 * library is identical to librrd, but it contains support code for per-thread
35 * global variables currently used for error information only. This is similar
36 * to how errno per-thread variables are implemented. librrd_th must be linked
37 * alongside of libpthred
39 * There is also a new file "THREADS", holding some documentation.
41 * -- Peter Stamfest <peter@stamfest.at>
43 * Revision 1.5 2002/06/20 00:21:03 jake
44 * More Win32 build changes; thanks to Kerry Calvert.
46 * Revision 1.4 2002/02/01 20:34:49 oetiker
47 * fixed version number and date/time
49 * Revision 1.3 2001/03/04 13:01:55 oetiker
50 * Aberrant Behavior Detection support. A brief overview added to rrdtool.pod.
51 * Major updates to rrd_update.c, rrd_create.c. Minor update to other core files.
52 * This is backwards compatible! But new files using the Aberrant stuff are not readable
53 * by old rrdtool versions. See http://cricket.sourceforge.net/aberrant/rrd_hw.htm
54 * -- Jake Brutlag <jakeb@corp.webtv.net>
56 * Revision 1.2 2001/03/04 10:29:20 oetiker
57 * fixed filedescriptor leak
58 * -- Mike Franusich <mike@franusich.com>
60 * Revision 1.1.1.1 2001/02/25 22:25:05 oetiker
63 *****************************************************************************/
69 /* DEBUG 2 prints information obtained via mincore(2) */
71 /* do not calculate exact madvise hints but assume 1 page for headers and
72 * set DONTNEED for the rest, which is assumed to be data */
74 /* Avoid calling madvise on areas that were already hinted. May be benefical if
75 * your syscalls are very slow */
76 #define CHECK_MADVISE_OVERLAPS 1
79 /* the cast to void* is there to avoid this warning seen on ia64 with certain
80 versions of gcc: 'cast increases required alignment of target type'
82 #define __rrd_read(dst, dst_t, cnt) \
83 (dst) = (dst_t*)(void*) (data + offset); \
84 offset += sizeof(dst_t) * (cnt)
86 #define __rrd_read(dst, dst_t, cnt) \
87 if ((dst = malloc(sizeof(dst_t)*(cnt))) == NULL) { \
88 rrd_set_error(#dst " malloc"); \
89 goto out_nullify_head; \
91 offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
94 /* next page-aligned (i.e. page-align up) */
96 #define PAGE_ALIGN(addr) (((addr)+_page_size-1)&(~(_page_size-1)))
98 /* previous page-aligned (i.e. page-align down) */
99 #ifndef PAGE_ALIGN_DOWN
100 #define PAGE_ALIGN_DOWN(addr) (((addr)+_page_size-1)&(~(_page_size-1)))
104 /* vector of last madvise hint */
105 typedef struct _madvise_vec_t {
109 _madvise_vec_t _madv_vec = { NULL, 0 };
112 #if defined CHECK_MADVISE_OVERLAPS
113 #define _madvise(_start, _off, _hint) \
114 if ((_start) != _madv_vec.start && (ssize_t)(_off) != _madv_vec.length) { \
115 _madv_vec.start = (_start) ; _madv_vec.length = (_off); \
116 madvise((_start), (_off), (_hint)); \
119 #define _madvise(_start, _off, _hint) \
120 madvise((_start), (_off), (_hint))
123 /* Open a database file, return its header and an open filehandle,
124 * positioned to the first cdp in the first rra.
125 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
126 * before returning an error. Do not call rrd_close upon failure of rrd_open.
129 rrd_file_t *rrd_open(
130 const char *const file_name,
135 mode_t mode = S_IRUSR;
139 ssize_t _page_size = sysconf(_SC_PAGESIZE);
140 int mm_prot = PROT_READ, mm_flags = 0;
145 rrd_file_t *rrd_file = NULL;
146 off_t newfile_size = 0;
148 if (rdwr & RRD_CREAT)
149 newfile_size = (off_t) rrd->stat_head;
151 rrd_file = malloc(sizeof(rrd_file_t));
152 if (rrd_file == NULL) {
153 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
156 memset(rrd_file, 0, sizeof(rrd_file_t));
159 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
160 (RRD_READONLY | RRD_READWRITE)) {
161 /* Both READONLY and READWRITE were given, which is invalid. */
162 rrd_set_error("in read/write request mask");
166 if (rdwr & RRD_READONLY) {
169 mm_flags = MAP_PRIVATE;
170 # ifdef MAP_NORESERVE
171 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
175 if (rdwr & RRD_READWRITE) {
179 mm_flags = MAP_SHARED;
180 mm_prot |= PROT_WRITE;
183 if (rdwr & RRD_CREAT) {
184 flags |= (O_CREAT | O_TRUNC);
187 if (rdwr & RRD_READAHEAD) {
189 mm_flags |= MAP_POPULATE; /* populate ptes and data */
191 #if defined MAP_NONBLOCK
192 // if (!(rdwr & RRD_COPY))
193 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
204 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
205 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
209 /* Better try to avoid seeks as much as possible. stat may be heavy but
210 * many concurrent seeks are even worse. */
211 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
212 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
215 if (newfile_size == 0) {
216 rrd_file->file_len = statb.st_size;
218 rrd_file->file_len = newfile_size;
219 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
220 write(rrd_file->fd, "\0", 1); /* poke */
221 lseek(rrd_file->fd, 0, SEEK_SET);
223 #ifdef HAVE_POSIX_FADVISE
224 /* In general we need no read-ahead when dealing with rrd_files.
225 When we stop reading, it is highly unlikely that we start up again.
226 In this manner we actually save time and diskaccess (and buffer cache).
227 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
228 if (0 != posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM)) {
229 rrd_set_error("setting POSIX_FADV_RANDOM on '%s': %s", file_name,
230 rrd_strerror(errno));
236 if (rdwr & RRD_READWRITE)
238 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
239 rrd_set_error("failed to disable the stream buffer\n");
245 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
246 rrd_file->fd, offset);
248 /* lets see if the first read worked */
249 if (data == MAP_FAILED) {
250 rrd_set_error("mmaping file '%s': %s", file_name,
251 rrd_strerror(errno));
254 rrd_file->file_start = data;
255 if (rdwr & RRD_CREAT) {
256 memset(data, DNAN, newfile_size - 1);
261 if (rdwr & RRD_COPY) {
262 /* We will read everything in a moment (copying) */
263 _madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
266 /* We do not need to read anything in for the moment */
267 _madvise(data, rrd_file->file_len, MADV_DONTNEED);
268 /* the stat_head will be needed soonish, so hint accordingly */
269 _madvise(data + PAGE_ALIGN_DOWN(offset),
270 PAGE_ALIGN(sizeof(stat_head_t)),
271 MADV_WILLNEED | MADV_RANDOM);
274 /* alternatively: keep 1 page worth of data, likely headers,
275 * don't need the rest. */
276 _madvise(data, _page_size, MADV_WILLNEED | MADV_SEQUENTIAL);
277 _madvise(data + _page_size, (rrd_file->file_len >= _page_size)
278 ? rrd_file->file_len - _page_size : 0, MADV_DONTNEED);
283 __rrd_read(rrd->stat_head, stat_head_t,
286 /* lets do some test if we are on track ... */
287 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
288 rrd_set_error("'%s' is not an RRD file", file_name);
289 goto out_nullify_head;
292 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
293 rrd_set_error("This RRD was created on another architecture");
294 goto out_nullify_head;
297 version = atoi(rrd->stat_head->version);
299 if (version > atoi(RRD_VERSION)) {
300 rrd_set_error("can't handle RRD file version %s",
301 rrd->stat_head->version);
302 goto out_nullify_head;
304 #if defined USE_MADVISE && !defined ONE_PAGE
305 /* the ds_def will be needed soonish, so hint accordingly */
306 _madvise(data + PAGE_ALIGN_DOWN(offset),
307 PAGE_ALIGN(sizeof(ds_def_t) * rrd->stat_head->ds_cnt),
310 __rrd_read(rrd->ds_def, ds_def_t,
311 rrd->stat_head->ds_cnt);
313 #if defined USE_MADVISE && !defined ONE_PAGE
314 /* the rra_def will be needed soonish, so hint accordingly */
315 _madvise(data + PAGE_ALIGN_DOWN(offset),
316 PAGE_ALIGN(sizeof(rra_def_t) * rrd->stat_head->rra_cnt),
319 __rrd_read(rrd->rra_def, rra_def_t,
320 rrd->stat_head->rra_cnt);
322 /* handle different format for the live_head */
324 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
325 if (rrd->live_head == NULL) {
326 rrd_set_error("live_head_t malloc");
330 memmove(&rrd->live_head->last_up, data + offset, sizeof(long));
331 offset += sizeof(long);
333 offset += read(rrd_file->fd, &rrd->live_head->last_up, sizeof(long));
335 rrd->live_head->last_up_usec = 0;
337 #if defined USE_MADVISE && !defined ONE_PAGE
338 /* the live_head will be needed soonish, so hint accordingly */
339 _madvise(data + PAGE_ALIGN_DOWN(offset),
340 PAGE_ALIGN(sizeof(live_head_t)), MADV_WILLNEED);
342 __rrd_read(rrd->live_head, live_head_t,
345 //XXX: This doesn't look like it needs madvise
346 __rrd_read(rrd->pdp_prep, pdp_prep_t,
347 rrd->stat_head->ds_cnt);
349 //XXX: This could benefit from madvise()ing
350 __rrd_read(rrd->cdp_prep, cdp_prep_t,
351 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
353 //XXX: This could benefit from madvise()ing
354 __rrd_read(rrd->rra_ptr, rra_ptr_t,
355 rrd->stat_head->rra_cnt);
357 rrd_file->header_len = offset;
358 rrd_file->pos = offset;
364 rrd->stat_head = NULL;
373 /* Close a reference to an rrd_file. */
376 rrd_file_t *rrd_file)
380 #if defined HAVE_MMAP || defined DEBUG
381 ssize_t _page_size = sysconf(_SC_PAGESIZE);
383 #if defined DEBUG && DEBUG > 1
384 /* pretty print blocks in core */
388 off = rrd_file->file_len +
389 ((rrd_file->file_len + _page_size - 1) / _page_size);
393 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
395 unsigned is_in = 0, was_in = 0;
397 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
398 is_in = vec[off] & 1; /* if lsb set then is core resident */
401 if (was_in != is_in) {
402 fprintf(stderr, "%sin core: %p len %ld\n",
403 was_in ? "" : "not ", vec + prev, off - prev);
409 "%sin core: %p len %ld\n",
410 was_in ? "" : "not ", vec + prev, off - prev);
412 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
418 /* Keep headers around, round up to next page boundary. */
420 PAGE_ALIGN(rrd_file->header_len % _page_size + rrd_file->header_len);
421 if (rrd_file->file_len > ret)
422 _madvise(rrd_file->file_start + ret,
423 rrd_file->file_len - ret, MADV_DONTNEED);
425 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
426 _madvise(rrd_file->file_start + PAGE_ALIGN_DOWN(rrd_file->header_len),
427 rrd_file->file_len - PAGE_ALIGN(rrd_file->header_len),
432 ret = munmap(rrd_file->file_start, rrd_file->file_len);
434 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
436 ret = close(rrd_file->fd);
438 rrd_set_error("closing file: %s", rrd_strerror(errno));
445 /* Set position of rrd_file. */
448 rrd_file_t *rrd_file,
455 if (whence == SEEK_SET)
457 else if (whence == SEEK_CUR)
458 rrd_file->pos += off;
459 else if (whence == SEEK_END)
460 rrd_file->pos = rrd_file->file_len + off;
462 ret = lseek(rrd_file->fd, off, whence);
464 rrd_set_error("lseek: %s", rrd_strerror(errno));
467 //XXX: mimic fseek, which returns 0 upon success
468 return ret == -1; //XXX: or just ret to mimic lseek
472 /* Get current position in rrd_file. */
474 inline off_t rrd_tell(
475 rrd_file_t *rrd_file)
477 return rrd_file->pos;
481 /* read count bytes into buffer buf, starting at rrd_file->pos.
482 * Returns the number of bytes read or <0 on error. */
484 inline ssize_t rrd_read(
485 rrd_file_t *rrd_file,
491 ssize_t _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
492 if (_surplus > 0) { /* short read */
497 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
499 rrd_file->pos += _cnt; /* mimmic read() semantics */
504 ret = read(rrd_file->fd, buf, count);
506 rrd_file->pos += ret; /* mimmic read() semantics */
512 /* write count bytes from buffer buf to the current position
513 * rrd_file->pos of rrd_file->fd.
514 * Returns the number of bytes written. */
516 inline ssize_t rrd_write(
517 rrd_file_t *rrd_file,
522 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
523 rrd_file->pos += count;
524 return count; /* mimmic write() semantics */
526 ssize_t _sz = write(rrd_file->fd, buf, count);
529 rrd_file->pos += _sz;
535 /* flush all data pending to be written to FD. */
537 inline void rrd_flush(
538 rrd_file_t *rrd_file)
540 if (fdatasync(rrd_file->fd) != 0) {
541 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
542 rrd_strerror(errno));
547 /* Initialize RRD header. */
552 rrd->stat_head = NULL;
555 rrd->live_head = NULL;
557 rrd->pdp_prep = NULL;
558 rrd->cdp_prep = NULL;
559 rrd->rrd_value = NULL;
563 /* free RRD header data. */
566 inline void rrd_free(
574 if (atoi(rrd->stat_head->version) < 3)
575 free(rrd->live_head);
576 free(rrd->stat_head);
582 free(rrd->rrd_value);
587 /* routine used by external libraries to free memory allocated by
597 /* XXX: FIXME: missing documentation. */
598 /*XXX: FIXME should be renamed to rrd_readfile or _rrd_readfile */
600 int /*_rrd_*/ readfile(
601 const char *file_name,
605 long writecnt = 0, totalcnt = MEMBLK;
610 if ((strcmp("-", file_name) == 0)) {
613 if ((input = fopen(file_name, "rb")) == NULL) {
614 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
622 } while (c != '\n' && !feof(input));
624 if (strcmp("-", file_name)) {
625 fseek(input, 0, SEEK_END);
626 /* have extra space for detecting EOF without realloc */
627 totalcnt = (ftell(input) + 1) / sizeof(char) - offset;
628 if (totalcnt < MEMBLK)
629 totalcnt = MEMBLK; /* sanitize */
630 fseek(input, offset * sizeof(char), SEEK_SET);
632 if (((*buffer) = (char *) malloc((totalcnt + 4) * sizeof(char))) == NULL) {
633 perror("Allocate Buffer:");
638 fread((*buffer) + writecnt, 1,
639 (totalcnt - writecnt) * sizeof(char), input);
640 if (writecnt >= totalcnt) {
643 rrd_realloc((*buffer),
644 (totalcnt + 4) * sizeof(char))) == NULL) {
645 perror("Realloc Buffer:");
649 } while (!feof(input));
650 (*buffer)[writecnt] = '\0';
651 if (strcmp("-", file_name) != 0) {