* rrd_open: rrd_close does not purge file from cache
authoroetiker <oetiker@a5681a0c-68f1-0310-ab6d-d61299d08faa>
Wed, 18 Jul 2007 00:30:49 +0000 (00:30 +0000)
committeroetiker <oetiker@a5681a0c-68f1-0310-ab6d-d61299d08faa>
Wed, 18 Jul 2007 00:30:49 +0000 (00:30 +0000)
* rrd_open: new function rrd_dontneed for purging un-needed pages from core
* rrd_open: in linux at least only fadivse DONTNEED has the power to purge
  pages from cache, so letst call madvise as well as fadvise
* rrd_create: uses open/write/close now, flushes file to disk and keeps only hot pages in core
* rrd_update: keeps only hot pages in core
* configure enables FADVISE even when mmap is in use

git-svn-id: svn://svn.oetiker.ch/rrdtool/trunk/program@1156 a5681a0c-68f1-0310-ab6d-d61299d08faa

src/rrd_create.c
src/rrd_open.c
src/rrd_tool.h
src/rrd_update.c

index a5c304f..a3b73ca 100644 (file)
@@ -624,31 +624,31 @@ int rrd_create_fn(
     rrd_t *rrd)
 {
     unsigned long i, ii;
-    FILE     *rrd_file;
+    int    rrd_file;
     rrd_value_t *unknown;
     int       unkn_cnt;
+    rrd_file_t *rrd_file_dn;
+    rrd_t      rrd_dn;
 
-    long      rrd_head_size;
-
-    if ((rrd_file = fopen(file_name, "wb")) == NULL) {
+    if ((rrd_file = open(file_name, O_WRONLY|O_CREAT|O_TRUNC,0666)) == NULL) {
         rrd_set_error("creating '%s': %s", file_name, rrd_strerror(errno));
         rrd_free(rrd);
         return (-1);
     }
 
-    fwrite(rrd->stat_head, sizeof(stat_head_t), 1, rrd_file);
+    write(rrd_file,rrd->stat_head, sizeof(stat_head_t));
 
-    fwrite(rrd->ds_def, sizeof(ds_def_t), rrd->stat_head->ds_cnt, rrd_file);
+    write(rrd_file,rrd->ds_def, sizeof(ds_def_t)*rrd->stat_head->ds_cnt);
 
-    fwrite(rrd->rra_def,
-           sizeof(rra_def_t), rrd->stat_head->rra_cnt, rrd_file);
+    write(rrd_file,rrd->rra_def,
+           sizeof(rra_def_t)* rrd->stat_head->rra_cnt);
 
-    fwrite(rrd->live_head, sizeof(live_head_t), 1, rrd_file);
+    write(rrd_file,rrd->live_head, sizeof(live_head_t));
 
     if ((rrd->pdp_prep = calloc(1, sizeof(pdp_prep_t))) == NULL) {
         rrd_set_error("allocating pdp_prep");
         rrd_free(rrd);
-        fclose(rrd_file);
+        close(rrd_file);
         return (-1);
     }
 
@@ -659,12 +659,12 @@ int rrd_create_fn(
         rrd->live_head->last_up % rrd->stat_head->pdp_step;
 
     for (i = 0; i < rrd->stat_head->ds_cnt; i++)
-        fwrite(rrd->pdp_prep, sizeof(pdp_prep_t), 1, rrd_file);
+        write(rrd_file,rrd->pdp_prep, sizeof(pdp_prep_t));
 
     if ((rrd->cdp_prep = calloc(1, sizeof(cdp_prep_t))) == NULL) {
         rrd_set_error("allocating cdp_prep");
         rrd_free(rrd);
-        fclose(rrd_file);
+        close(rrd_file);
         return (-1);
     }
 
@@ -701,7 +701,7 @@ int rrd_create_fn(
         }
 
         for (ii = 0; ii < rrd->stat_head->ds_cnt; ii++) {
-            fwrite(rrd->cdp_prep, sizeof(cdp_prep_t), 1, rrd_file);
+            write(rrd_file,rrd->cdp_prep, sizeof(cdp_prep_t));
         }
     }
 
@@ -711,7 +711,7 @@ int rrd_create_fn(
     if ((rrd->rra_ptr = calloc(1, sizeof(rra_ptr_t))) == NULL) {
         rrd_set_error("allocating rra_ptr");
         rrd_free(rrd);
-        fclose(rrd_file);
+        close(rrd_file);
         return (-1);
     }
 
@@ -721,15 +721,14 @@ int rrd_create_fn(
      * the pointer a priori. */
     for (i = 0; i < rrd->stat_head->rra_cnt; i++) {
         rrd->rra_ptr->cur_row = rrd->rra_def[i].row_cnt - 1;
-        fwrite(rrd->rra_ptr, sizeof(rra_ptr_t), 1, rrd_file);
+        write(rrd_file,rrd->rra_ptr, sizeof(rra_ptr_t));
     }
-    rrd_head_size = ftell(rrd_file);
 
     /* write the empty data area */
     if ((unknown = (rrd_value_t *) malloc(512 * sizeof(rrd_value_t))) == NULL) {
         rrd_set_error("allocating unknown");
         rrd_free(rrd);
-        fclose(rrd_file);
+        close(rrd_file);
         return (-1);
     }
     for (i = 0; i < 512; ++i)
@@ -740,40 +739,20 @@ int rrd_create_fn(
         unkn_cnt += rrd->stat_head->ds_cnt * rrd->rra_def[i].row_cnt;
 
     while (unkn_cnt > 0) {
-        fwrite(unknown, sizeof(rrd_value_t), min(unkn_cnt, 512), rrd_file);
+        write(rrd_file,unknown, sizeof(rrd_value_t) * min(unkn_cnt, 512));
+
         unkn_cnt -= 512;
     }
     free(unknown);
-
-    /* lets see if we had an error */
-    if (ferror(rrd_file)) {
-        rrd_set_error("a file error occurred while creating '%s'", file_name);
-        fclose(rrd_file);
-        rrd_free(rrd);
-        return (-1);
-    }
-#ifdef HAVE_POSIX_FADVISE
-    /* this file is not going to be read again any time
-       soon, so we drop everything except the header portion from
-       the buffer cache. for this to work, we have to fdsync the file
-       first though. This will not be all that fast, but 'good' data
-       like other rrdfiles headers will stay in cache. Now this only works if creating
-       a single rrd file is not too large, but I assume this should not be the case
-       in general. Otherwhise we would have to sync and release while writing all
-       the unknown data. */
-    fflush(rrd_file);
-    fdatasync(fileno(rrd_file));
-    if (0 !=
-        posix_fadvise(fileno(rrd_file), rrd_head_size, 0,
-                      POSIX_FADV_DONTNEED)) {
-        rrd_set_error("setting POSIX_FADV_DONTNEED on '%s': %s", file_name,
-                      rrd_strerror(errno));
-        fclose(rrd_file);
-        return (-1);
-    }
-#endif
-
-    fclose(rrd_file);
+    fdatasync(rrd_file);
     rrd_free(rrd);
+    if ( close(rrd_file) == -1 ) {
+        rrd_set_error("creating rrd: %s", rrd_strerror(errno));
+        return -1;
+    }
+    /* flush all we don't need out of the cache */
+    rrd_file_dn = rrd_open(file_name, &rrd_dn, RRD_READONLY);
+    rrd_dontneed(rrd_file_dn,&rrd_dn);
+    rrd_close(rrd_file_dn);
     return (0);
 }
index a01b075..d701204 100644 (file)
@@ -11,7 +11,7 @@
 #define MEMBLK 8192
 
 /* DEBUG 2 prints information obtained via mincore(2) */
-// #define DEBUG 2
+//#define DEBUG 2
 /* do not calculate exact madvise hints but assume 1 page for headers and
  * set DONTNEED for the rest, which is assumed to be data */
 //#define ONE_PAGE 1
        offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
 #endif
 
-/* next page-aligned (i.e. page-align up) */
-#ifndef PAGE_ALIGN
-#define PAGE_ALIGN(addr) (((addr)+_page_size-1)&(~(_page_size-1)))
-#endif
-/* previous page-aligned (i.e. page-align down) */
-#ifndef PAGE_ALIGN_DOWN
-#define PAGE_ALIGN_DOWN(addr) (((addr)+_page_size-1)&(~(_page_size-1)))
+/* get the address of the start of this page */
+#ifndef PAGE_START
+#define PAGE_START(addr) ((addr)&(~(_page_size-1)))
 #endif
 
 #ifdef HAVE_MMAP
@@ -213,11 +209,9 @@ rrd_file_t *rrd_open(
     } else {
 # ifndef ONE_PAGE
         /* We do not need to read anything in for the moment */
-        _madvise(data, rrd_file->file_len, MADV_DONTNEED);
+        _madvise(data, rrd_file->file_len, MADV_RANDOM);
         /* the stat_head will be needed soonish, so hint accordingly */
-        _madvise(data + PAGE_ALIGN_DOWN(offset),
-                 PAGE_ALIGN(sizeof(stat_head_t)),
-                 MADV_WILLNEED | MADV_RANDOM);
+        _madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
 
 # else
 /* alternatively: keep 1 page worth of data, likely headers,
@@ -252,8 +246,8 @@ rrd_file_t *rrd_open(
     }
 #if defined USE_MADVISE && !defined ONE_PAGE
     /* the ds_def will be needed soonish, so hint accordingly */
-    _madvise(data + PAGE_ALIGN_DOWN(offset),
-             PAGE_ALIGN(sizeof(ds_def_t) * rrd->stat_head->ds_cnt),
+    _madvise(data + PAGE_START(offset),
+             sizeof(ds_def_t) * rrd->stat_head->ds_cnt,
              MADV_WILLNEED);
 #endif
     __rrd_read(rrd->ds_def, ds_def_t,
@@ -261,8 +255,8 @@ rrd_file_t *rrd_open(
 
 #if defined USE_MADVISE && !defined ONE_PAGE
     /* the rra_def will be needed soonish, so hint accordingly */
-    _madvise(data + PAGE_ALIGN_DOWN(offset),
-             PAGE_ALIGN(sizeof(rra_def_t) * rrd->stat_head->rra_cnt),
+    _madvise(data + PAGE_START(offset),
+             sizeof(rra_def_t) * rrd->stat_head->rra_cnt,
              MADV_WILLNEED);
 #endif
     __rrd_read(rrd->rra_def, rra_def_t,
@@ -285,8 +279,8 @@ rrd_file_t *rrd_open(
     } else {
 #if defined USE_MADVISE && !defined ONE_PAGE
         /* the live_head will be needed soonish, so hint accordingly */
-        _madvise(data + PAGE_ALIGN_DOWN(offset),
-                 PAGE_ALIGN(sizeof(live_head_t)), MADV_WILLNEED);
+        _madvise(data + PAGE_START(offset),
+                 sizeof(live_head_t), MADV_WILLNEED);
 #endif
         __rrd_read(rrd->live_head, live_head_t,
                    1);
@@ -318,19 +312,13 @@ rrd_file_t *rrd_open(
 
 
 /* Close a reference to an rrd_file.  */
-
-int rrd_close(
-    rrd_file_t *rrd_file)
-{
-    int       ret;
-
-#if defined HAVE_MMAP || defined DEBUG
-    ssize_t   _page_size = sysconf(_SC_PAGESIZE);
-#endif
-#if defined DEBUG && DEBUG > 1
+static
+void mincore_print(rrd_file_t *rrd_file,char * mark){
+#ifdef HAVE_MMAP
     /* pretty print blocks in core */
     off_t     off;
     unsigned char *vec;
+    ssize_t   _page_size = sysconf(_SC_PAGESIZE);
 
     off = rrd_file->file_len +
         ((rrd_file->file_len + _page_size - 1) / _page_size);
@@ -346,35 +334,79 @@ int rrd_close(
                 if (off == 0)
                     was_in = is_in;
                 if (was_in != is_in) {
-                    fprintf(stderr, "%sin core: %p len %ld\n",
+                    fprintf(stderr, "%s: %sin core: %p len %ld\n",mark,
                             was_in ? "" : "not ", vec + prev, off - prev);
                     was_in = is_in;
                     prev = off;
                 }
             }
             fprintf(stderr,
-                    "%sin core: %p len %ld\n",
+                    "%s: %sin core: %p len %ld\n", mark,
                     was_in ? "" : "not ", vec + prev, off - prev);
         } else
             fprintf(stderr, "mincore: %s", rrd_strerror(errno));
     }
-#endif                          /* DEBUG */
+#else
+  fprintf(stderr, "sorry mincore only works with mmap");
+#endif
+}
+
+
+/* drop cache except for the header and the active pages */
+void
+rrd_dontneed (
+    rrd_file_t *rrd_file,
+    rrd_t *rrd){
+    unsigned long      dontneed_start;
+    unsigned long      rra_start;
+    unsigned long      active_block;
+    unsigned long      i;
+    ssize_t   _page_size = sysconf(_SC_PAGESIZE);
+
+#if defined DEBUG && DEBUG > 1
+    mincore_print(rrd_file,"before");
+#endif
 
-#ifdef USE_MADVISE
-# ifdef ONE_PAGE
-    /* Keep headers around, round up to next page boundary.  */
-    ret =
-        PAGE_ALIGN(rrd_file->header_len % _page_size + rrd_file->header_len);
-    if (rrd_file->file_len > ret)
-        _madvise(rrd_file->file_start + ret,
-                 rrd_file->file_len - ret, MADV_DONTNEED);
-# else
     /* ignoring errors from RRDs that are smaller then the file_len+rounding */
-    _madvise(rrd_file->file_start + PAGE_ALIGN_DOWN(rrd_file->header_len),
-             rrd_file->file_len - PAGE_ALIGN(rrd_file->header_len),
+    rra_start = rrd_file->header_len;
+    dontneed_start = PAGE_START(rra_start)+_page_size;
+    for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
+       active_block =
+              PAGE_START(rra_start
+                         + rrd->rra_ptr[i].cur_row 
+                         * rrd->stat_head->ds_cnt 
+                         * sizeof(rrd_value_t));
+       if (active_block > dontneed_start){
+#ifdef USE_MADVISE
+           _madvise(rrd_file->file_start + dontneed_start,
+                   active_block-dontneed_start-1,
+                   MADV_DONTNEED);
+#endif
+/* in linux at least only fadvise DONTNEED seems to purge pages from cache */
+#ifdef HAVE_POSIX_FADVISE
+            posix_fadvise(rrd_file->fd, dontneed_start, active_block-dontneed_start-1, POSIX_FADV_DONTNEED);
+#endif
+       }
+       dontneed_start = active_block + _page_size;
+       rra_start += rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt * sizeof(rrd_value_t);
+    }
+#ifdef USE_MADVISE
+    _madvise(rrd_file->file_start + dontneed_start,
+             rrd_file->file_len - dontneed_start,
              MADV_DONTNEED);
-# endif
 #endif
+#ifdef HAVE_POSIX_FADVISE
+    posix_fadvise(rrd_file->fd, dontneed_start, rrd_file->file_len-dontneed_start, POSIX_FADV_DONTNEED);
+#endif
+#if defined DEBUG && DEBUG > 1
+    mincore_print(rrd_file,"after");
+#endif
+}
+
+int rrd_close(
+    rrd_file_t *rrd_file)
+{
+    int       ret;
 #ifdef HAVE_MMAP
     ret = munmap(rrd_file->file_start, rrd_file->file_len);
     if (ret != 0)
index 4f43dbf..06f4de1 100644 (file)
@@ -137,6 +137,9 @@ extern    "C" {
     const char *const file_name,
     rrd_t *rrd,
     unsigned rdwr);
+    void rrd_dontneed(
+    rrd_file_t *rrd_file,
+    rrd_t *rrd);
     int       rrd_close(
     rrd_file_t *rrd_file);
     ssize_t   rrd_read(
index fa60c0b..9a1f32d 100644 (file)
@@ -1443,19 +1443,7 @@ int _rrd_update(
         goto err_free_pdp_new;
     }
 #endif
-#ifdef HAVE_POSIX_FADVISExxx
 
-    /* with update we have write ops, so they will probably not be done by now, this means
-       the buffers will not get freed. But calling this for the whole file - header
-       will let the data off the hook as soon as it is written when if it is from a previous
-       update cycle. Calling fdsync to force things is much too hard here. */
-
-    if (0 != posix_fadvise(rrd_file->fd, rra_begin, 0, POSIX_FADV_DONTNEED)) {
-        rrd_set_error("setting POSIX_FADV_DONTNEED on '%s': %s", filename,
-                      rrd_strerror(errno));
-        goto err_free_pdp_new;
-    }
-#endif
     /* rrd_flush(rrd_file); */
 
     /* calling the smoothing code here guarantees at most
@@ -1479,17 +1467,9 @@ int _rrd_update(
             rra_start += rrd.rra_def[i].row_cnt
                 * rrd.stat_head->ds_cnt * sizeof(rrd_value_t);
         }
-#ifdef HAVE_POSIX_FADVISExxx
-        /* same procedure as above ... */
-        if (0 !=
-            posix_fadvise(rrd_file->fd, rra_begin, 0, POSIX_FADV_DONTNEED)) {
-            rrd_set_error("setting POSIX_FADV_DONTNEED on '%s': %s", filename,
-                          rrd_strerror(errno));
-            goto err_free_pdp_new;
-        }
-#endif
     }
 
+    rrd_dontneed(rrd_file,&rrd);
     rrd_free(&rrd);
     rrd_close(rrd_file);