1 /*****************************************************************************
2 * RRDtool 1.2.23 Copyright by Tobi Oetiker, 1997-2007
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
7 *****************************************************************************/
13 /* DEBUG 2 prints information obtained via mincore(2) */
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
24 #define __rrd_read(dst, dst_t, cnt) \
25 (dst) = (dst_t*)(void*) (data + offset); \
26 offset += sizeof(dst_t) * (cnt)
28 #define __rrd_read(dst, dst_t, cnt) \
29 if ((dst = malloc(sizeof(dst_t)*(cnt))) == NULL) { \
30 rrd_set_error(#dst " malloc"); \
31 goto out_nullify_head; \
33 offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
36 /* get the address of the start of this page */
38 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
42 /* Open a database file, return its header and an open filehandle,
43 * positioned to the first cdp in the first rra.
44 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
45 * before returning an error. Do not call rrd_close upon failure of rrd_open.
49 const char *const file_name,
54 mode_t mode = S_IRUSR;
58 ssize_t _page_size = sysconf(_SC_PAGESIZE);
59 int mm_prot = PROT_READ, mm_flags = 0;
64 rrd_file_t *rrd_file = NULL;
65 off_t newfile_size = 0;
67 if (rdwr & RRD_CREAT) {
68 /* yes bad inline signaling alert, we are using the
69 floatcookie to pass the size in ... only used in resize */
70 newfile_size = (off_t) rrd->stat_head->float_cookie;
74 rrd_file = malloc(sizeof(rrd_file_t));
75 if (rrd_file == NULL) {
76 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
79 memset(rrd_file, 0, sizeof(rrd_file_t));
82 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
83 (RRD_READONLY | RRD_READWRITE)) {
84 /* Both READONLY and READWRITE were given, which is invalid. */
85 rrd_set_error("in read/write request mask");
89 if (rdwr & RRD_READONLY) {
92 mm_flags = MAP_PRIVATE;
94 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
98 if (rdwr & RRD_READWRITE) {
102 mm_flags = MAP_SHARED;
103 mm_prot |= PROT_WRITE;
106 if (rdwr & RRD_CREAT) {
107 flags |= (O_CREAT | O_TRUNC);
110 if (rdwr & RRD_READAHEAD) {
112 mm_flags |= MAP_POPULATE; /* populate ptes and data */
114 #if defined MAP_NONBLOCK
115 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
119 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
120 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
124 /* Better try to avoid seeks as much as possible. stat may be heavy but
125 * many concurrent seeks are even worse. */
126 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
127 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
130 if (newfile_size == 0) {
131 rrd_file->file_len = statb.st_size;
133 rrd_file->file_len = newfile_size;
134 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
135 write(rrd_file->fd, "\0", 1); /* poke */
136 lseek(rrd_file->fd, 0, SEEK_SET);
138 #ifdef HAVE_POSIX_FADVISE
139 /* In general we need no read-ahead when dealing with rrd_files.
140 When we stop reading, it is highly unlikely that we start up again.
141 In this manner we actually save time and diskaccess (and buffer cache).
142 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
143 if (0 != posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM)) {
144 rrd_set_error("setting POSIX_FADV_RANDOM on '%s': %s", file_name,
145 rrd_strerror(errno));
151 if (rdwr & RRD_READWRITE)
153 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
154 rrd_set_error("failed to disable the stream buffer\n");
160 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
161 rrd_file->fd, offset);
163 /* lets see if the first read worked */
164 if (data == MAP_FAILED) {
165 rrd_set_error("mmaping file '%s': %s", file_name,
166 rrd_strerror(errno));
169 rrd_file->file_start = data;
170 if (rdwr & RRD_CREAT) {
171 memset(data, DNAN, newfile_size - 1);
175 if (rdwr & RRD_CREAT)
178 if (rdwr & RRD_COPY) {
179 /* We will read everything in a moment (copying) */
180 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
182 /* We do not need to read anything in for the moment */
183 madvise(data, rrd_file->file_len, MADV_RANDOM);
184 /* the stat_head will be needed soonish, so hint accordingly */
185 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
189 __rrd_read(rrd->stat_head, stat_head_t,
192 /* lets do some test if we are on track ... */
193 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
194 rrd_set_error("'%s' is not an RRD file", file_name);
195 goto out_nullify_head;
198 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
199 rrd_set_error("This RRD was created on another architecture");
200 goto out_nullify_head;
203 version = atoi(rrd->stat_head->version);
205 if (version > atoi(RRD_VERSION)) {
206 rrd_set_error("can't handle RRD file version %s",
207 rrd->stat_head->version);
208 goto out_nullify_head;
210 #if defined USE_MADVISE
211 /* the ds_def will be needed soonish, so hint accordingly */
212 madvise(data + PAGE_START(offset),
213 sizeof(ds_def_t) * rrd->stat_head->ds_cnt,
216 __rrd_read(rrd->ds_def, ds_def_t,
217 rrd->stat_head->ds_cnt);
219 #if defined USE_MADVISE
220 /* the rra_def will be needed soonish, so hint accordingly */
221 madvise(data + PAGE_START(offset),
222 sizeof(rra_def_t) * rrd->stat_head->rra_cnt,
225 __rrd_read(rrd->rra_def, rra_def_t,
226 rrd->stat_head->rra_cnt);
228 /* handle different format for the live_head */
230 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
231 if (rrd->live_head == NULL) {
232 rrd_set_error("live_head_t malloc");
236 memmove(&rrd->live_head->last_up, data + offset, sizeof(long));
237 offset += sizeof(long);
239 offset += read(rrd_file->fd, &rrd->live_head->last_up, sizeof(long));
241 rrd->live_head->last_up_usec = 0;
243 #if defined USE_MADVISE
244 /* the live_head will be needed soonish, so hint accordingly */
245 madvise(data + PAGE_START(offset),
246 sizeof(live_head_t), MADV_WILLNEED);
248 __rrd_read(rrd->live_head, live_head_t,
251 //XXX: This doesn't look like it needs madvise
252 __rrd_read(rrd->pdp_prep, pdp_prep_t,
253 rrd->stat_head->ds_cnt);
255 //XXX: This could benefit from madvise()ing
256 __rrd_read(rrd->cdp_prep, cdp_prep_t,
257 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
259 //XXX: This could benefit from madvise()ing
260 __rrd_read(rrd->rra_ptr, rra_ptr_t,
261 rrd->stat_head->rra_cnt);
263 rrd_file->header_len = offset;
264 rrd_file->pos = offset;
268 rrd->stat_head = NULL;
277 /* Close a reference to an rrd_file. */
279 void mincore_print(rrd_file_t *rrd_file,char * mark){
281 /* pretty print blocks in core */
284 ssize_t _page_size = sysconf(_SC_PAGESIZE);
286 off = rrd_file->file_len +
287 ((rrd_file->file_len + _page_size - 1) / _page_size);
291 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
293 unsigned is_in = 0, was_in = 0;
295 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
296 is_in = vec[off] & 1; /* if lsb set then is core resident */
299 if (was_in != is_in) {
300 fprintf(stderr, "%s: %sin core: %p len %ld\n",mark,
301 was_in ? "" : "not ", vec + prev, off - prev);
307 "%s: %sin core: %p len %ld\n", mark,
308 was_in ? "" : "not ", vec + prev, off - prev);
310 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
313 fprintf(stderr, "sorry mincore only works with mmap");
318 /* drop cache except for the header and the active pages */
321 rrd_file_t *rrd_file,
323 unsigned long dontneed_start;
324 unsigned long rra_start;
325 unsigned long active_block;
327 ssize_t _page_size = sysconf(_SC_PAGESIZE);
329 #if defined DEBUG && DEBUG > 1
330 mincore_print(rrd_file,"before");
333 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
334 rra_start = rrd_file->header_len;
335 dontneed_start = PAGE_START(rra_start)+_page_size;
336 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
339 + rrd->rra_ptr[i].cur_row
340 * rrd->stat_head->ds_cnt
341 * sizeof(rrd_value_t));
342 if (active_block > dontneed_start) {
344 madvise(rrd_file->file_start + dontneed_start,
345 active_block-dontneed_start-1,
348 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
349 #ifdef HAVE_POSIX_FADVISE
350 posix_fadvise(rrd_file->fd, dontneed_start, active_block-dontneed_start-1, POSIX_FADV_DONTNEED);
353 dontneed_start = active_block;
354 /* do not relase 'hot' block if update for this RAA will occure within 10 minutes */
355 if ( rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
356 rrd->live_head->last_up % (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt) < 10*60 ){
357 dontneed_start += _page_size;
359 rra_start += rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt * sizeof(rrd_value_t);
362 madvise(rrd_file->file_start + dontneed_start,
363 rrd_file->file_len - dontneed_start,
366 #ifdef HAVE_POSIX_FADVISE
367 posix_fadvise(rrd_file->fd, dontneed_start, rrd_file->file_len-dontneed_start, POSIX_FADV_DONTNEED);
369 #if defined DEBUG && DEBUG > 1
370 mincore_print(rrd_file,"after");
375 rrd_file_t *rrd_file)
379 ret = munmap(rrd_file->file_start, rrd_file->file_len);
381 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
383 ret = close(rrd_file->fd);
385 rrd_set_error("closing file: %s", rrd_strerror(errno));
392 /* Set position of rrd_file. */
395 rrd_file_t *rrd_file,
402 if (whence == SEEK_SET)
404 else if (whence == SEEK_CUR)
405 rrd_file->pos += off;
406 else if (whence == SEEK_END)
407 rrd_file->pos = rrd_file->file_len + off;
409 ret = lseek(rrd_file->fd, off, whence);
411 rrd_set_error("lseek: %s", rrd_strerror(errno));
414 //XXX: mimic fseek, which returns 0 upon success
415 return ret == -1; //XXX: or just ret to mimic lseek
419 /* Get current position in rrd_file. */
421 inline off_t rrd_tell(
422 rrd_file_t *rrd_file)
424 return rrd_file->pos;
428 /* read count bytes into buffer buf, starting at rrd_file->pos.
429 * Returns the number of bytes read or <0 on error. */
431 inline ssize_t rrd_read(
432 rrd_file_t *rrd_file,
438 ssize_t _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
440 if (_surplus > 0) { /* short read */
445 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
447 rrd_file->pos += _cnt; /* mimmic read() semantics */
452 ret = read(rrd_file->fd, buf, count);
454 rrd_file->pos += ret; /* mimmic read() semantics */
460 /* write count bytes from buffer buf to the current position
461 * rrd_file->pos of rrd_file->fd.
462 * Returns the number of bytes written. */
464 inline ssize_t rrd_write(
465 rrd_file_t *rrd_file,
470 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
471 rrd_file->pos += count;
472 return count; /* mimmic write() semantics */
474 ssize_t _sz = write(rrd_file->fd, buf, count);
477 rrd_file->pos += _sz;
483 /* flush all data pending to be written to FD. */
485 inline void rrd_flush(
486 rrd_file_t *rrd_file)
488 if (fdatasync(rrd_file->fd) != 0) {
489 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
490 rrd_strerror(errno));
495 /* Initialize RRD header. */
500 rrd->stat_head = NULL;
503 rrd->live_head = NULL;
505 rrd->pdp_prep = NULL;
506 rrd->cdp_prep = NULL;
507 rrd->rrd_value = NULL;
511 /* free RRD header data. */
514 inline void rrd_free(
522 free(rrd->live_head);
523 free(rrd->stat_head);
529 free(rrd->rrd_value);
534 /* routine used by external libraries to free memory allocated by
544 /* XXX: FIXME: missing documentation. */
545 /*XXX: FIXME should be renamed to rrd_readfile or _rrd_readfile */
547 int /*_rrd_*/ readfile(
548 const char *file_name,
552 long writecnt = 0, totalcnt = MEMBLK;
557 if ((strcmp("-", file_name) == 0)) {
560 if ((input = fopen(file_name, "rb")) == NULL) {
561 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
569 } while (c != '\n' && !feof(input));
571 if (strcmp("-", file_name)) {
572 fseek(input, 0, SEEK_END);
573 /* have extra space for detecting EOF without realloc */
574 totalcnt = (ftell(input) + 1) / sizeof(char) - offset;
575 if (totalcnt < MEMBLK)
576 totalcnt = MEMBLK; /* sanitize */
577 fseek(input, offset * sizeof(char), SEEK_SET);
579 if (((*buffer) = (char *) malloc((totalcnt + 4) * sizeof(char))) == NULL) {
580 perror("Allocate Buffer:");
585 fread((*buffer) + writecnt, 1,
586 (totalcnt - writecnt) * sizeof(char), input);
587 if (writecnt >= totalcnt) {
590 rrd_realloc((*buffer),
591 (totalcnt + 4) * sizeof(char))) == NULL) {
592 perror("Realloc Buffer:");
596 } while (!feof(input));
597 (*buffer)[writecnt] = '\0';
598 if (strcmp("-", file_name) != 0) {