fixed indenting
[rrdtool.git] / src / rrd_open.c
index d701204..c0e4835 100644 (file)
@@ -1,5 +1,5 @@
 /*****************************************************************************
- * RRDtool 1.2.23  Copyright by Tobi Oetiker, 1997-2007
+ * RRDtool 1.3rc7  Copyright by Tobi Oetiker, 1997-2008
  *****************************************************************************
  * rrd_open.c  Open an RRD File
  *****************************************************************************
 #define MEMBLK 8192
 
 /* DEBUG 2 prints information obtained via mincore(2) */
-//#define DEBUG 2
+#define DEBUG 1
 /* do not calculate exact madvise hints but assume 1 page for headers and
  * set DONTNEED for the rest, which is assumed to be data */
-//#define ONE_PAGE 1
 /* Avoid calling madvise on areas that were already hinted. May be benefical if
  * your syscalls are very slow */
-#define CHECK_MADVISE_OVERLAPS 1
 
 #ifdef HAVE_MMAP
 /* the cast to void* is there to avoid this warning seen on ia64 with certain
 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
 #endif
 
-#ifdef HAVE_MMAP
-/* vector of last madvise hint */
-typedef struct _madvise_vec_t {
-    void     *start;
-    ssize_t   length;
-} _madvise_vec_t;
-_madvise_vec_t _madv_vec = { NULL, 0 };
-#endif
-
-#if defined CHECK_MADVISE_OVERLAPS
-#define _madvise(_start, _off, _hint) \
-    if ((_start) != _madv_vec.start && (ssize_t)(_off) != _madv_vec.length) { \
-        _madv_vec.start = (_start) ; _madv_vec.length = (_off); \
-        madvise((_start), (_off), (_hint)); \
-    }
-#else
-#define _madvise(_start, _off, _hint) \
-    madvise((_start), (_off), (_hint))
-#endif
 
 /* Open a database file, return its header and an open filehandle,
  * positioned to the first cdp in the first rra.
@@ -135,14 +114,7 @@ rrd_file_t *rrd_open(
 #if defined MAP_NONBLOCK
         mm_flags |= MAP_NONBLOCK;   /* just populate ptes */
 #endif
-#ifdef USE_DIRECT_IO
-    } else {
-        flags |= O_DIRECT;
-#endif
     }
-#ifdef O_NONBLOCK
-    flags |= O_NONBLOCK;
-#endif
 
     if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
         rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
@@ -168,11 +140,7 @@ rrd_file_t *rrd_open(
        When we stop reading, it is highly unlikely that we start up again.
        In this manner we actually save time and diskaccess (and buffer cache).
        Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
-    if (0 != posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM)) {
-        rrd_set_error("setting POSIX_FADV_RANDOM on '%s': %s", file_name,
-                      rrd_strerror(errno));
-        goto out_close;
-    }
+    posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
 #endif
 
 /*
@@ -205,21 +173,12 @@ rrd_file_t *rrd_open(
 #ifdef USE_MADVISE
     if (rdwr & RRD_COPY) {
         /* We will read everything in a moment (copying) */
-        _madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
+        madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
     } else {
-# ifndef ONE_PAGE
         /* We do not need to read anything in for the moment */
-        _madvise(data, rrd_file->file_len, MADV_RANDOM);
+        madvise(data, rrd_file->file_len, MADV_RANDOM);
         /* the stat_head will be needed soonish, so hint accordingly */
-        _madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
-
-# else
-/* alternatively: keep 1 page worth of data, likely headers,
- * don't need the rest.  */
-        _madvise(data, _page_size, MADV_WILLNEED | MADV_SEQUENTIAL);
-        _madvise(data + _page_size, (rrd_file->file_len >= _page_size)
-                 ? rrd_file->file_len - _page_size : 0, MADV_DONTNEED);
-# endif
+        madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
     }
 #endif
 
@@ -244,20 +203,18 @@ rrd_file_t *rrd_open(
                       rrd->stat_head->version);
         goto out_nullify_head;
     }
-#if defined USE_MADVISE && !defined ONE_PAGE
+#if defined USE_MADVISE
     /* the ds_def will be needed soonish, so hint accordingly */
-    _madvise(data + PAGE_START(offset),
-             sizeof(ds_def_t) * rrd->stat_head->ds_cnt,
-             MADV_WILLNEED);
+    madvise(data + PAGE_START(offset),
+            sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
 #endif
     __rrd_read(rrd->ds_def, ds_def_t,
                rrd->stat_head->ds_cnt);
 
-#if defined USE_MADVISE && !defined ONE_PAGE
+#if defined USE_MADVISE
     /* the rra_def will be needed soonish, so hint accordingly */
-    _madvise(data + PAGE_START(offset),
-             sizeof(rra_def_t) * rrd->stat_head->rra_cnt,
-             MADV_WILLNEED);
+    madvise(data + PAGE_START(offset),
+            sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
 #endif
     __rrd_read(rrd->rra_def, rra_def_t,
                rrd->stat_head->rra_cnt);
@@ -269,31 +226,28 @@ rrd_file_t *rrd_open(
             rrd_set_error("live_head_t malloc");
             goto out_close;
         }
-#ifdef HAVE_MMAP
-        memmove(&rrd->live_head->last_up, data + offset, sizeof(long));
-        offset += sizeof(long);
-#else
-        offset += read(rrd_file->fd, &rrd->live_head->last_up, sizeof(long));
+#if defined USE_MADVISE
+        /* the live_head will be needed soonish, so hint accordingly */
+        madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
 #endif
+        __rrd_read(rrd->legacy_last_up, time_t,
+                   1);
+
+        rrd->live_head->last_up = *rrd->legacy_last_up;
         rrd->live_head->last_up_usec = 0;
     } else {
-#if defined USE_MADVISE && !defined ONE_PAGE
+#if defined USE_MADVISE
         /* the live_head will be needed soonish, so hint accordingly */
-        _madvise(data + PAGE_START(offset),
-                 sizeof(live_head_t), MADV_WILLNEED);
+        madvise(data + PAGE_START(offset),
+                sizeof(live_head_t), MADV_WILLNEED);
 #endif
         __rrd_read(rrd->live_head, live_head_t,
                    1);
     }
-//XXX: This doesn't look like it needs madvise
     __rrd_read(rrd->pdp_prep, pdp_prep_t,
                rrd->stat_head->ds_cnt);
-
-//XXX: This could benefit from madvise()ing
     __rrd_read(rrd->cdp_prep, cdp_prep_t,
                rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
-
-//XXX: This could benefit from madvise()ing
     __rrd_read(rrd->rra_ptr, rra_ptr_t,
                rrd->stat_head->rra_cnt);
 
@@ -311,9 +265,13 @@ rrd_file_t *rrd_open(
 }
 
 
-/* Close a reference to an rrd_file.  */
+#if defined DEBUG && DEBUG > 1
+/* Print list of in-core pages of a the current rrd_file.  */
 static
-void mincore_print(rrd_file_t *rrd_file,char * mark){
+void mincore_print(
+    rrd_file_t *rrd_file,
+    char *mark)
+{
 #ifdef HAVE_MMAP
     /* pretty print blocks in core */
     off_t     off;
@@ -334,7 +292,7 @@ void mincore_print(rrd_file_t *rrd_file,char * mark){
                 if (off == 0)
                     was_in = is_in;
                 if (was_in != is_in) {
-                    fprintf(stderr, "%s: %sin core: %p len %ld\n",mark,
+                    fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
                             was_in ? "" : "not ", vec + prev, off - prev);
                     was_in = is_in;
                     prev = off;
@@ -347,59 +305,69 @@ void mincore_print(rrd_file_t *rrd_file,char * mark){
             fprintf(stderr, "mincore: %s", rrd_strerror(errno));
     }
 #else
-  fprintf(stderr, "sorry mincore only works with mmap");
+    fprintf(stderr, "sorry mincore only works with mmap");
 #endif
 }
+#endif                          /* defined DEBUG && DEBUG > 1 */
 
 
 /* drop cache except for the header and the active pages */
-void
-rrd_dontneed (
+void rrd_dontneed(
     rrd_file_t *rrd_file,
-    rrd_t *rrd){
-    unsigned long      dontneed_start;
-    unsigned long      rra_start;
-    unsigned long      active_block;
-    unsigned long      i;
+    rrd_t *rrd)
+{
+    unsigned long dontneed_start;
+    unsigned long rra_start;
+    unsigned long active_block;
+    unsigned long i;
     ssize_t   _page_size = sysconf(_SC_PAGESIZE);
 
 #if defined DEBUG && DEBUG > 1
-    mincore_print(rrd_file,"before");
+    mincore_print(rrd_file, "before");
 #endif
 
     /* ignoring errors from RRDs that are smaller then the file_len+rounding */
     rra_start = rrd_file->header_len;
-    dontneed_start = PAGE_START(rra_start)+_page_size;
+    dontneed_start = PAGE_START(rra_start) + _page_size;
     for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
-       active_block =
-              PAGE_START(rra_start
-                         + rrd->rra_ptr[i].cur_row 
-                         * rrd->stat_head->ds_cnt 
-                         * sizeof(rrd_value_t));
-       if (active_block > dontneed_start){
+        active_block =
+            PAGE_START(rra_start
+                       + rrd->rra_ptr[i].cur_row
+                       * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
+        if (active_block > dontneed_start) {
 #ifdef USE_MADVISE
-           _madvise(rrd_file->file_start + dontneed_start,
-                   active_block-dontneed_start-1,
-                   MADV_DONTNEED);
+            madvise(rrd_file->file_start + dontneed_start,
+                    active_block - dontneed_start - 1, MADV_DONTNEED);
 #endif
 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
 #ifdef HAVE_POSIX_FADVISE
-            posix_fadvise(rrd_file->fd, dontneed_start, active_block-dontneed_start-1, POSIX_FADV_DONTNEED);
+            posix_fadvise(rrd_file->fd, dontneed_start,
+                          active_block - dontneed_start - 1,
+                          POSIX_FADV_DONTNEED);
 #endif
-       }
-       dontneed_start = active_block + _page_size;
-       rra_start += rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt * sizeof(rrd_value_t);
+        }
+        dontneed_start = active_block;
+        /* do not release 'hot' block if update for this RAA will occur
+         * within 10 minutes */
+        if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
+            rrd->live_head->last_up % (rrd->stat_head->pdp_step *
+                                       rrd->rra_def[i].pdp_cnt) < 10 * 60) {
+            dontneed_start += _page_size;
+        }
+        rra_start +=
+            rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
+            sizeof(rrd_value_t);
     }
 #ifdef USE_MADVISE
-    _madvise(rrd_file->file_start + dontneed_start,
-             rrd_file->file_len - dontneed_start,
-             MADV_DONTNEED);
+    madvise(rrd_file->file_start + dontneed_start,
+            rrd_file->file_len - dontneed_start, MADV_DONTNEED);
 #endif
 #ifdef HAVE_POSIX_FADVISE
-    posix_fadvise(rrd_file->fd, dontneed_start, rrd_file->file_len-dontneed_start, POSIX_FADV_DONTNEED);
+    posix_fadvise(rrd_file->fd, dontneed_start,
+                  rrd_file->file_len - dontneed_start, POSIX_FADV_DONTNEED);
 #endif
 #if defined DEBUG && DEBUG > 1
-    mincore_print(rrd_file,"after");
+    mincore_print(rrd_file, "after");
 #endif
 }
 
@@ -407,7 +375,11 @@ int rrd_close(
     rrd_file_t *rrd_file)
 {
     int       ret;
+
 #ifdef HAVE_MMAP
+    ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
+    if (ret != 0)
+        rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
     ret = munmap(rrd_file->file_start, rrd_file->file_len);
     if (ret != 0)
         rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
@@ -443,8 +415,8 @@ off_t rrd_seek(
         rrd_set_error("lseek: %s", rrd_strerror(errno));
     rrd_file->pos = ret;
 #endif
-//XXX: mimic fseek, which returns 0 upon success
-    return ret == -1;   //XXX: or just ret to mimic lseek
+    /* mimic fseek, which returns 0 upon success */
+    return ret < 0;     /*XXX: or just ret to mimic lseek */
 }
 
 
@@ -457,7 +429,7 @@ inline off_t rrd_tell(
 }
 
 
-/* read count bytes into buffer buf, starting at rrd_file->pos.
+/* Read count bytes into buffer buf, starting at rrd_file->pos.
  * Returns the number of bytes read or <0 on error.  */
 
 inline ssize_t rrd_read(
@@ -467,8 +439,13 @@ inline ssize_t rrd_read(
 {
 #ifdef HAVE_MMAP
     size_t    _cnt = count;
-    ssize_t   _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
+    ssize_t   _surplus;
 
+    if (rrd_file->pos > rrd_file->file_len || _cnt == 0)    /* EOF */
+        return 0;
+    if (buf == NULL)
+        return -1;      /* EINVAL */
+    _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
     if (_surplus > 0) { /* short read */
         _cnt -= _surplus;
     }
@@ -489,9 +466,9 @@ inline ssize_t rrd_read(
 }
 
 
-/* write count bytes from buffer buf to the current position
+/* Write count bytes from buffer buf to the current position
  * rrd_file->pos of rrd_file->fd.
- * Returns the number of bytes written.  */
+ * Returns the number of bytes written or <0 on error.  */
 
 inline ssize_t rrd_write(
     rrd_file_t *rrd_file,
@@ -499,6 +476,10 @@ inline ssize_t rrd_write(
     size_t count)
 {
 #ifdef HAVE_MMAP
+    if (count == 0)
+        return 0;
+    if (buf == NULL)
+        return -1;      /* EINVAL */
     memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
     rrd_file->pos += count;
     return count;       /* mimmic write() semantics */
@@ -533,6 +514,7 @@ void rrd_init(
     rrd->ds_def = NULL;
     rrd->rra_def = NULL;
     rrd->live_head = NULL;
+    rrd->legacy_last_up = NULL;
     rrd->rra_ptr = NULL;
     rrd->pdp_prep = NULL;
     rrd->cdp_prep = NULL;
@@ -543,9 +525,12 @@ void rrd_init(
 /* free RRD header data.  */
 
 #ifdef HAVE_MMAP
-inline void rrd_free(
-    rrd_t UNUSED(*rrd))
+void rrd_free(
+    rrd_t *rrd)
 {
+    if (rrd->legacy_last_up) {  /* this gets set for version < 3 only */
+        free(rrd->live_head);
+    }
 }
 #else
 void rrd_free(