1 /*****************************************************************************
2 * RRDtool 1.2.99907080300 Copyright by Tobi Oetiker, 1997-2007
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
7 *****************************************************************************/
13 /* DEBUG 2 prints information obtained via mincore(2) */
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
24 #define __rrd_read(dst, dst_t, cnt) \
25 (dst) = (dst_t*)(void*) (data + offset); \
26 offset += sizeof(dst_t) * (cnt)
28 #define __rrd_read(dst, dst_t, cnt) \
29 if ((dst = malloc(sizeof(dst_t)*(cnt))) == NULL) { \
30 rrd_set_error(#dst " malloc"); \
31 goto out_nullify_head; \
33 offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
36 /* get the address of the start of this page */
38 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
42 /* Open a database file, return its header and an open filehandle,
43 * positioned to the first cdp in the first rra.
44 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
45 * before returning an error. Do not call rrd_close upon failure of rrd_open.
49 const char *const file_name,
54 mode_t mode = S_IRUSR;
58 ssize_t _page_size = sysconf(_SC_PAGESIZE);
59 int mm_prot = PROT_READ, mm_flags = 0;
64 rrd_file_t *rrd_file = NULL;
65 off_t newfile_size = 0;
67 if (rdwr & RRD_CREAT) {
68 /* yes bad inline signaling alert, we are using the
69 floatcookie to pass the size in ... only used in resize */
70 newfile_size = (off_t) rrd->stat_head->float_cookie;
74 rrd_file = malloc(sizeof(rrd_file_t));
75 if (rrd_file == NULL) {
76 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
79 memset(rrd_file, 0, sizeof(rrd_file_t));
82 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
83 (RRD_READONLY | RRD_READWRITE)) {
84 /* Both READONLY and READWRITE were given, which is invalid. */
85 rrd_set_error("in read/write request mask");
89 if (rdwr & RRD_READONLY) {
92 mm_flags = MAP_PRIVATE;
94 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
98 if (rdwr & RRD_READWRITE) {
102 mm_flags = MAP_SHARED;
103 mm_prot |= PROT_WRITE;
106 if (rdwr & RRD_CREAT) {
107 flags |= (O_CREAT | O_TRUNC);
110 if (rdwr & RRD_READAHEAD) {
112 mm_flags |= MAP_POPULATE; /* populate ptes and data */
114 #if defined MAP_NONBLOCK
115 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
119 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
120 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
124 /* Better try to avoid seeks as much as possible. stat may be heavy but
125 * many concurrent seeks are even worse. */
126 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
127 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
130 if (newfile_size == 0) {
131 rrd_file->file_len = statb.st_size;
133 rrd_file->file_len = newfile_size;
134 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
135 write(rrd_file->fd, "\0", 1); /* poke */
136 lseek(rrd_file->fd, 0, SEEK_SET);
138 #ifdef HAVE_POSIX_FADVISE
139 /* In general we need no read-ahead when dealing with rrd_files.
140 When we stop reading, it is highly unlikely that we start up again.
141 In this manner we actually save time and diskaccess (and buffer cache).
142 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
143 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
147 if (rdwr & RRD_READWRITE)
149 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
150 rrd_set_error("failed to disable the stream buffer\n");
156 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
157 rrd_file->fd, offset);
159 /* lets see if the first read worked */
160 if (data == MAP_FAILED) {
161 rrd_set_error("mmaping file '%s': %s", file_name,
162 rrd_strerror(errno));
165 rrd_file->file_start = data;
166 if (rdwr & RRD_CREAT) {
167 memset(data, DNAN, newfile_size - 1);
171 if (rdwr & RRD_CREAT)
174 if (rdwr & RRD_COPY) {
175 /* We will read everything in a moment (copying) */
176 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
178 /* We do not need to read anything in for the moment */
179 madvise(data, rrd_file->file_len, MADV_RANDOM);
180 /* the stat_head will be needed soonish, so hint accordingly */
181 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
185 __rrd_read(rrd->stat_head, stat_head_t,
188 /* lets do some test if we are on track ... */
189 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
190 rrd_set_error("'%s' is not an RRD file", file_name);
191 goto out_nullify_head;
194 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
195 rrd_set_error("This RRD was created on another architecture");
196 goto out_nullify_head;
199 version = atoi(rrd->stat_head->version);
201 if (version > atoi(RRD_VERSION)) {
202 rrd_set_error("can't handle RRD file version %s",
203 rrd->stat_head->version);
204 goto out_nullify_head;
206 #if defined USE_MADVISE
207 /* the ds_def will be needed soonish, so hint accordingly */
208 madvise(data + PAGE_START(offset),
209 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
211 __rrd_read(rrd->ds_def, ds_def_t,
212 rrd->stat_head->ds_cnt);
214 #if defined USE_MADVISE
215 /* the rra_def will be needed soonish, so hint accordingly */
216 madvise(data + PAGE_START(offset),
217 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
219 __rrd_read(rrd->rra_def, rra_def_t,
220 rrd->stat_head->rra_cnt);
222 /* handle different format for the live_head */
224 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
225 if (rrd->live_head == NULL) {
226 rrd_set_error("live_head_t malloc");
230 memmove(&rrd->live_head->last_up, data + offset, sizeof(long));
231 offset += sizeof(long);
233 offset += read(rrd_file->fd, &rrd->live_head->last_up, sizeof(long));
235 rrd->live_head->last_up_usec = 0;
237 #if defined USE_MADVISE
238 /* the live_head will be needed soonish, so hint accordingly */
239 madvise(data + PAGE_START(offset),
240 sizeof(live_head_t), MADV_WILLNEED);
242 __rrd_read(rrd->live_head, live_head_t,
245 //XXX: This doesn't look like it needs madvise
246 __rrd_read(rrd->pdp_prep, pdp_prep_t,
247 rrd->stat_head->ds_cnt);
249 //XXX: This could benefit from madvise()ing
250 __rrd_read(rrd->cdp_prep, cdp_prep_t,
251 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
253 //XXX: This could benefit from madvise()ing
254 __rrd_read(rrd->rra_ptr, rra_ptr_t,
255 rrd->stat_head->rra_cnt);
257 rrd_file->header_len = offset;
258 rrd_file->pos = offset;
262 rrd->stat_head = NULL;
271 #if defined DEBUG && DEBUG > 1
272 /* Print list of in-core pages of a the current rrd_file. */
275 rrd_file_t *rrd_file,
279 /* pretty print blocks in core */
282 ssize_t _page_size = sysconf(_SC_PAGESIZE);
284 off = rrd_file->file_len +
285 ((rrd_file->file_len + _page_size - 1) / _page_size);
289 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
291 unsigned is_in = 0, was_in = 0;
293 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
294 is_in = vec[off] & 1; /* if lsb set then is core resident */
297 if (was_in != is_in) {
298 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
299 was_in ? "" : "not ", vec + prev, off - prev);
305 "%s: %sin core: %p len %ld\n", mark,
306 was_in ? "" : "not ", vec + prev, off - prev);
308 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
311 fprintf(stderr, "sorry mincore only works with mmap");
314 #endif /* defined DEBUG && DEBUG > 1 */
317 /* drop cache except for the header and the active pages */
319 rrd_file_t *rrd_file,
322 unsigned long dontneed_start;
323 unsigned long rra_start;
324 unsigned long active_block;
326 ssize_t _page_size = sysconf(_SC_PAGESIZE);
328 #if defined DEBUG && DEBUG > 1
329 mincore_print(rrd_file, "before");
332 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
333 rra_start = rrd_file->header_len;
334 dontneed_start = PAGE_START(rra_start) + _page_size;
335 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
338 + rrd->rra_ptr[i].cur_row
339 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
340 if (active_block > dontneed_start) {
342 madvise(rrd_file->file_start + dontneed_start,
343 active_block - dontneed_start - 1, MADV_DONTNEED);
345 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
346 #ifdef HAVE_POSIX_FADVISE
347 posix_fadvise(rrd_file->fd, dontneed_start,
348 active_block - dontneed_start - 1,
349 POSIX_FADV_DONTNEED);
352 dontneed_start = active_block;
353 /* do not release 'hot' block if update for this RAA will occur
354 * within 10 minutes */
355 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
356 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
357 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
358 dontneed_start += _page_size;
361 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
365 madvise(rrd_file->file_start + dontneed_start,
366 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
368 #ifdef HAVE_POSIX_FADVISE
369 posix_fadvise(rrd_file->fd, dontneed_start,
370 rrd_file->file_len - dontneed_start, POSIX_FADV_DONTNEED);
372 #if defined DEBUG && DEBUG > 1
373 mincore_print(rrd_file, "after");
378 rrd_file_t *rrd_file)
383 ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
385 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
386 ret = munmap(rrd_file->file_start, rrd_file->file_len);
388 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
390 ret = close(rrd_file->fd);
392 rrd_set_error("closing file: %s", rrd_strerror(errno));
399 /* Set position of rrd_file. */
402 rrd_file_t *rrd_file,
409 if (whence == SEEK_SET)
411 else if (whence == SEEK_CUR)
412 rrd_file->pos += off;
413 else if (whence == SEEK_END)
414 rrd_file->pos = rrd_file->file_len + off;
416 ret = lseek(rrd_file->fd, off, whence);
418 rrd_set_error("lseek: %s", rrd_strerror(errno));
421 /* mimic fseek, which returns 0 upon success */
422 return ret < 0; //XXX: or just ret to mimic lseek
426 /* Get current position in rrd_file. */
428 inline off_t rrd_tell(
429 rrd_file_t *rrd_file)
431 return rrd_file->pos;
435 /* Read count bytes into buffer buf, starting at rrd_file->pos.
436 * Returns the number of bytes read or <0 on error. */
438 inline ssize_t rrd_read(
439 rrd_file_t *rrd_file,
447 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
450 return -1; /* EINVAL */
451 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
452 if (_surplus > 0) { /* short read */
457 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
459 rrd_file->pos += _cnt; /* mimmic read() semantics */
464 ret = read(rrd_file->fd, buf, count);
466 rrd_file->pos += ret; /* mimmic read() semantics */
472 /* Write count bytes from buffer buf to the current position
473 * rrd_file->pos of rrd_file->fd.
474 * Returns the number of bytes written or <0 on error. */
476 inline ssize_t rrd_write(
477 rrd_file_t *rrd_file,
485 return -1; /* EINVAL */
486 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
487 rrd_file->pos += count;
488 return count; /* mimmic write() semantics */
490 ssize_t _sz = write(rrd_file->fd, buf, count);
493 rrd_file->pos += _sz;
499 /* flush all data pending to be written to FD. */
501 inline void rrd_flush(
502 rrd_file_t *rrd_file)
504 if (fdatasync(rrd_file->fd) != 0) {
505 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
506 rrd_strerror(errno));
511 /* Initialize RRD header. */
516 rrd->stat_head = NULL;
519 rrd->live_head = NULL;
521 rrd->pdp_prep = NULL;
522 rrd->cdp_prep = NULL;
523 rrd->rrd_value = NULL;
527 /* free RRD header data. */
530 inline void rrd_free(
538 free(rrd->live_head);
539 free(rrd->stat_head);
545 free(rrd->rrd_value);
550 /* routine used by external libraries to free memory allocated by
560 /* XXX: FIXME: missing documentation. */
561 /*XXX: FIXME should be renamed to rrd_readfile or _rrd_readfile */
563 int /*_rrd_*/ readfile(
564 const char *file_name,
568 long writecnt = 0, totalcnt = MEMBLK;
573 if ((strcmp("-", file_name) == 0)) {
576 if ((input = fopen(file_name, "rb")) == NULL) {
577 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
585 } while (c != '\n' && !feof(input));
587 if (strcmp("-", file_name)) {
588 fseek(input, 0, SEEK_END);
589 /* have extra space for detecting EOF without realloc */
590 totalcnt = (ftell(input) + 1) / sizeof(char) - offset;
591 if (totalcnt < MEMBLK)
592 totalcnt = MEMBLK; /* sanitize */
593 fseek(input, offset * sizeof(char), SEEK_SET);
595 if (((*buffer) = (char *) malloc((totalcnt + 4) * sizeof(char))) == NULL) {
596 perror("Allocate Buffer:");
601 fread((*buffer) + writecnt, 1,
602 (totalcnt - writecnt) * sizeof(char), input);
603 if (writecnt >= totalcnt) {
606 rrd_realloc((*buffer),
607 (totalcnt + 4) * sizeof(char))) == NULL) {
608 perror("Realloc Buffer:");
612 } while (!feof(input));
613 (*buffer)[writecnt] = '\0';
614 if (strcmp("-", file_name) != 0) {