added msync before unmap
[rrdtool.git] / src / rrd_open.c
index d701204..70e0ab9 100644 (file)
@@ -1,5 +1,5 @@
 /*****************************************************************************
- * RRDtool 1.2.23  Copyright by Tobi Oetiker, 1997-2007
+ * RRDtool 1.2.99907080300  Copyright by Tobi Oetiker, 1997-2007
  *****************************************************************************
  * rrd_open.c  Open an RRD File
  *****************************************************************************
 #define MEMBLK 8192
 
 /* DEBUG 2 prints information obtained via mincore(2) */
-//#define DEBUG 2
+#define DEBUG 1
 /* do not calculate exact madvise hints but assume 1 page for headers and
  * set DONTNEED for the rest, which is assumed to be data */
-//#define ONE_PAGE 1
 /* Avoid calling madvise on areas that were already hinted. May be benefical if
  * your syscalls are very slow */
-#define CHECK_MADVISE_OVERLAPS 1
 
 #ifdef HAVE_MMAP
 /* the cast to void* is there to avoid this warning seen on ia64 with certain
 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
 #endif
 
-#ifdef HAVE_MMAP
-/* vector of last madvise hint */
-typedef struct _madvise_vec_t {
-    void     *start;
-    ssize_t   length;
-} _madvise_vec_t;
-_madvise_vec_t _madv_vec = { NULL, 0 };
-#endif
-
-#if defined CHECK_MADVISE_OVERLAPS
-#define _madvise(_start, _off, _hint) \
-    if ((_start) != _madv_vec.start && (ssize_t)(_off) != _madv_vec.length) { \
-        _madv_vec.start = (_start) ; _madv_vec.length = (_off); \
-        madvise((_start), (_off), (_hint)); \
-    }
-#else
-#define _madvise(_start, _off, _hint) \
-    madvise((_start), (_off), (_hint))
-#endif
 
 /* Open a database file, return its header and an open filehandle,
  * positioned to the first cdp in the first rra.
@@ -135,14 +114,7 @@ rrd_file_t *rrd_open(
 #if defined MAP_NONBLOCK
         mm_flags |= MAP_NONBLOCK;   /* just populate ptes */
 #endif
-#ifdef USE_DIRECT_IO
-    } else {
-        flags |= O_DIRECT;
-#endif
     }
-#ifdef O_NONBLOCK
-    flags |= O_NONBLOCK;
-#endif
 
     if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
         rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
@@ -205,21 +177,12 @@ rrd_file_t *rrd_open(
 #ifdef USE_MADVISE
     if (rdwr & RRD_COPY) {
         /* We will read everything in a moment (copying) */
-        _madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
+        madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
     } else {
-# ifndef ONE_PAGE
         /* We do not need to read anything in for the moment */
-        _madvise(data, rrd_file->file_len, MADV_RANDOM);
+        madvise(data, rrd_file->file_len, MADV_RANDOM);
         /* the stat_head will be needed soonish, so hint accordingly */
-        _madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
-
-# else
-/* alternatively: keep 1 page worth of data, likely headers,
- * don't need the rest.  */
-        _madvise(data, _page_size, MADV_WILLNEED | MADV_SEQUENTIAL);
-        _madvise(data + _page_size, (rrd_file->file_len >= _page_size)
-                 ? rrd_file->file_len - _page_size : 0, MADV_DONTNEED);
-# endif
+        madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
     }
 #endif
 
@@ -244,20 +207,18 @@ rrd_file_t *rrd_open(
                       rrd->stat_head->version);
         goto out_nullify_head;
     }
-#if defined USE_MADVISE && !defined ONE_PAGE
+#if defined USE_MADVISE
     /* the ds_def will be needed soonish, so hint accordingly */
-    _madvise(data + PAGE_START(offset),
-             sizeof(ds_def_t) * rrd->stat_head->ds_cnt,
-             MADV_WILLNEED);
+    madvise(data + PAGE_START(offset),
+            sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
 #endif
     __rrd_read(rrd->ds_def, ds_def_t,
                rrd->stat_head->ds_cnt);
 
-#if defined USE_MADVISE && !defined ONE_PAGE
+#if defined USE_MADVISE
     /* the rra_def will be needed soonish, so hint accordingly */
-    _madvise(data + PAGE_START(offset),
-             sizeof(rra_def_t) * rrd->stat_head->rra_cnt,
-             MADV_WILLNEED);
+    madvise(data + PAGE_START(offset),
+            sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
 #endif
     __rrd_read(rrd->rra_def, rra_def_t,
                rrd->stat_head->rra_cnt);
@@ -277,10 +238,10 @@ rrd_file_t *rrd_open(
 #endif
         rrd->live_head->last_up_usec = 0;
     } else {
-#if defined USE_MADVISE && !defined ONE_PAGE
+#if defined USE_MADVISE
         /* the live_head will be needed soonish, so hint accordingly */
-        _madvise(data + PAGE_START(offset),
-                 sizeof(live_head_t), MADV_WILLNEED);
+        madvise(data + PAGE_START(offset),
+                sizeof(live_head_t), MADV_WILLNEED);
 #endif
         __rrd_read(rrd->live_head, live_head_t,
                    1);
@@ -313,7 +274,10 @@ rrd_file_t *rrd_open(
 
 /* Close a reference to an rrd_file.  */
 static
-void mincore_print(rrd_file_t *rrd_file,char * mark){
+void mincore_print(
+    rrd_file_t *rrd_file,
+    char *mark)
+{
 #ifdef HAVE_MMAP
     /* pretty print blocks in core */
     off_t     off;
@@ -334,7 +298,7 @@ void mincore_print(rrd_file_t *rrd_file,char * mark){
                 if (off == 0)
                     was_in = is_in;
                 if (was_in != is_in) {
-                    fprintf(stderr, "%s: %sin core: %p len %ld\n",mark,
+                    fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
                             was_in ? "" : "not ", vec + prev, off - prev);
                     was_in = is_in;
                     prev = off;
@@ -347,59 +311,67 @@ void mincore_print(rrd_file_t *rrd_file,char * mark){
             fprintf(stderr, "mincore: %s", rrd_strerror(errno));
     }
 #else
-  fprintf(stderr, "sorry mincore only works with mmap");
+    fprintf(stderr, "sorry mincore only works with mmap");
 #endif
 }
 
 
 /* drop cache except for the header and the active pages */
-void
-rrd_dontneed (
+void rrd_dontneed(
     rrd_file_t *rrd_file,
-    rrd_t *rrd){
-    unsigned long      dontneed_start;
-    unsigned long      rra_start;
-    unsigned long      active_block;
-    unsigned long      i;
+    rrd_t *rrd)
+{
+    unsigned long dontneed_start;
+    unsigned long rra_start;
+    unsigned long active_block;
+    unsigned long i;
     ssize_t   _page_size = sysconf(_SC_PAGESIZE);
 
 #if defined DEBUG && DEBUG > 1
-    mincore_print(rrd_file,"before");
+    mincore_print(rrd_file, "before");
 #endif
 
     /* ignoring errors from RRDs that are smaller then the file_len+rounding */
     rra_start = rrd_file->header_len;
-    dontneed_start = PAGE_START(rra_start)+_page_size;
+    dontneed_start = PAGE_START(rra_start) + _page_size;
     for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
-       active_block =
-              PAGE_START(rra_start
-                         + rrd->rra_ptr[i].cur_row 
-                         * rrd->stat_head->ds_cnt 
-                         * sizeof(rrd_value_t));
-       if (active_block > dontneed_start){
+        active_block =
+            PAGE_START(rra_start
+                       + rrd->rra_ptr[i].cur_row
+                       * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
+        if (active_block > dontneed_start) {
 #ifdef USE_MADVISE
-           _madvise(rrd_file->file_start + dontneed_start,
-                   active_block-dontneed_start-1,
-                   MADV_DONTNEED);
+            madvise(rrd_file->file_start + dontneed_start,
+                    active_block - dontneed_start - 1, MADV_DONTNEED);
 #endif
 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
 #ifdef HAVE_POSIX_FADVISE
-            posix_fadvise(rrd_file->fd, dontneed_start, active_block-dontneed_start-1, POSIX_FADV_DONTNEED);
+            posix_fadvise(rrd_file->fd, dontneed_start,
+                          active_block - dontneed_start - 1,
+                          POSIX_FADV_DONTNEED);
 #endif
-       }
-       dontneed_start = active_block + _page_size;
-       rra_start += rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt * sizeof(rrd_value_t);
+        }
+        dontneed_start = active_block;
+        /* do not relase 'hot' block if update for this RAA will occure within 10 minutes */
+        if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
+            rrd->live_head->last_up % (rrd->stat_head->pdp_step *
+                                       rrd->rra_def[i].pdp_cnt) < 10 * 60) {
+            dontneed_start += _page_size;
+        }
+        rra_start +=
+            rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
+            sizeof(rrd_value_t);
     }
 #ifdef USE_MADVISE
-    _madvise(rrd_file->file_start + dontneed_start,
-             rrd_file->file_len - dontneed_start,
-             MADV_DONTNEED);
+    madvise(rrd_file->file_start + dontneed_start,
+            rrd_file->file_len - dontneed_start, MADV_DONTNEED);
 #endif
 #ifdef HAVE_POSIX_FADVISE
-    posix_fadvise(rrd_file->fd, dontneed_start, rrd_file->file_len-dontneed_start, POSIX_FADV_DONTNEED);
+    posix_fadvise(rrd_file->fd, dontneed_start,
+                  rrd_file->file_len - dontneed_start, POSIX_FADV_DONTNEED);
 #endif
 #if defined DEBUG && DEBUG > 1
-    mincore_print(rrd_file,"after");
+    mincore_print(rrd_file, "after");
 #endif
 }
 
@@ -407,7 +379,11 @@ int rrd_close(
     rrd_file_t *rrd_file)
 {
     int       ret;
+
 #ifdef HAVE_MMAP
+    ret = msync(rrd_file->file_start, rrd_file->file_len,MS_ASYNC);
+    if (ret != 0)
+        rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
     ret = munmap(rrd_file->file_start, rrd_file->file_len);
     if (ret != 0)
         rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
@@ -467,8 +443,13 @@ inline ssize_t rrd_read(
 {
 #ifdef HAVE_MMAP
     size_t    _cnt = count;
-    ssize_t   _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
+    ssize_t   _surplus;
 
+    if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
+       return 0;
+    if (buf == NULL)
+       return -1; /* EINVAL */
+    _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
     if (_surplus > 0) { /* short read */
         _cnt -= _surplus;
     }
@@ -491,7 +472,7 @@ inline ssize_t rrd_read(
 
 /* write count bytes from buffer buf to the current position
  * rrd_file->pos of rrd_file->fd.
- * Returns the number of bytes written.  */
+ * Returns the number of bytes written or <0 on error.  */
 
 inline ssize_t rrd_write(
     rrd_file_t *rrd_file,
@@ -499,6 +480,10 @@ inline ssize_t rrd_write(
     size_t count)
 {
 #ifdef HAVE_MMAP
+    if (count == 0)
+       return 0;
+    if (buf == NULL)
+       return -1; /* EINVAL */
     memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
     rrd_file->pos += count;
     return count;       /* mimmic write() semantics */