Aberrant Behavior Detection support. A brief overview added to rrdtool.pod.
[rrdtool.git] / src / rrd_format.h
1 /*****************************************************************************
2  * RRDtool 1.0.33  Copyright Tobias Oetiker, 1997, 1998, 1999
3  *****************************************************************************
4  * rrd_format.h  RRD Database Format header
5  *****************************************************************************/
6
7 #ifndef _RRD_FORMAT_H
8 #define _RRD_FORMAT_H
9
10 #include "rrd.h"
11
12 /*****************************************************************************
13  * put this in your /usr/lib/magic file (/etc/magic on HPUX)
14  *
15  *  # rrd database format
16  *  0       string          RRD\0           rrd file
17  *  >5      string          >\0             version '%s'
18  *
19  *****************************************************************************/
20
21 #define RRD_COOKIE    "RRD"
22 #define RRD_VERSION   "0002"
23 #define FLOAT_COOKIE  8.642135E130
24
25 #if defined(WIN32)
26 #define DNAN          ((double)fmod(0.0,0.0))    
27 #define DINF          ((double)log(0.0))
28 #else
29
30 #define DNAN          ((double)(0.0/0.0))     /* we use a DNAN to
31                                                * represent the UNKNOWN
32                                                * */
33 #define DINF          ((double)(1.0/0.0))     /* we use a DINF to
34                                                * represent a value at the upper or
35                                                * lower border of the graph ...
36                                                * */
37 #endif
38
39 typedef union unival { 
40     unsigned long u_cnt; 
41     rrd_value_t   u_val;
42 } unival;
43
44
45 /****************************************************************************
46  * The RRD Database Structure
47  * ---------------------------
48  * 
49  * In oder to properly describe the database structure lets define a few
50  * new words:
51  *
52  * ds - Data Source (ds) providing input to the database. A Data Source (ds)
53  *       can be a traffic counter, a temperature, the number of users logged
54  *       into a system. The rrd database format can handle the input of
55  *       several Data Sources (ds) in a singe database.
56  *  
57  * dst - Data Source Type (dst). The Data Source Type (dst) defines the rules
58  *       applied to Build Primary Data Points from the input provided by the
59  *       data sources (ds).
60  *
61  * pdp - Primary Data Point (pdp). After the database has accepted the
62  *       input from the data sources (ds). It starts building Primary
63  *       Data Points (pdp) from the data. Primary Data Points (pdp)
64  *       are evenly spaced along the time axis (pdp_step). The values
65  *       of the Primary Data Points are calculated from the values of
66  *       the data source (ds) and the exact time these values were
67  *       provided by the data source (ds).
68  *
69  * pdp_st - PDP Start (pdp_st). The moments (pdp_st) in time where
70  *       these steps occur are defined by the moments where the
71  *       number of seconds since 1970-jan-1 modulo pdp_step equals
72  *       zero (pdp_st). 
73  *
74  * cf -  Consolidation Function (cf). An arbitrary Consolidation Function (cf)
75  *       (averaging, min, max) is applied to the primary data points (pdp) to
76  *       calculate the consolidated data point.
77  *
78  * cdp - Consolidated Data Point (cdp) is the long term storage format for data
79  *       in the rrd database. Consolidated Data Points represent one or
80  *       several primary data points collected along the time axis. The
81  *       Consolidated Data Points (cdp) are stored in Round Robin Archives
82  *       (rra).
83  *
84  * rra - Round Robin Archive (rra). This is the place where the
85  *       consolidated data points (cdp) get stored. The data is
86  *       organized in rows (row) and columns (col). The Round Robin
87  *       Archive got its name from the method data is stored in
88  *       there. An RRD database can contain several Round Robin
89  *       Archives. Each Round Robin Archive can have a different row
90  *       spacing along the time axis (pdp_cnt) and a different
91  *       consolidation function (cf) used to build its consolidated
92  *       data points (cdp).  
93  * 
94  * rra_st - RRA Start (rra_st). The moments (rra_st) in time where
95  *       Consolidated Data Points (cdp) are added to an rra are
96  *       defined by the moments where the number of seconds since
97  *       1970-jan-1 modulo pdp_cnt*pdp_step equals zero (rra_st).
98  *
99  * row - Row (row). A row represent all consolidated data points (cdp)
100  *       in a round robin archive who are of the same age.
101  *       
102  * col - Column (col). A column (col) represent all consolidated
103  *       data points (cdp) in a round robin archive (rra) who
104  *       originated from the same data source (ds).
105  *
106  */
107
108 /****************************************************************************
109  * POS 1: stat_head_t                           static header of the database
110  ****************************************************************************/
111
112 typedef struct stat_head_t {
113
114     /* Data Base Identification Section ***/
115     char             cookie[4];          /* RRD */
116     char             version[5];         /* version of the format */
117     double           float_cookie;       /* is it the correct double
118                                           * representation ?  */
119
120     /* Data Base Structure Definition *****/
121     unsigned long    ds_cnt;             /* how many different ds provide
122                                           * input to the rrd */
123     unsigned long    rra_cnt;            /* how many rras will be maintained
124                                           * in the rrd */
125     unsigned long    pdp_step;           /* pdp interval in seconds */
126
127     unival           par[10];            /* global parameters ... unused
128                                             at the moment */
129 } stat_head_t;
130
131
132 /****************************************************************************
133  * POS 2: ds_def_t  (* ds_cnt)                        Data Source definitions
134  ****************************************************************************/
135
136 enum dst_en          { DST_COUNTER=0,     /* data source types available */
137                        DST_ABSOLUTE, 
138                        DST_GAUGE,
139                        DST_DERIVE};
140
141 enum ds_param_en {   DS_mrhb_cnt=0,       /* minimum required heartbeat. A
142                                            * data source must provide input at
143                                            * least every ds_mrhb seconds,
144                                            * otherwise it is regarded dead and
145                                            * will be set to UNKNOWN */             
146                              DS_min_val,          /* the processed input of a ds must */
147                      DS_max_val };        /* be between max_val and min_val
148                                            * both can be set to UNKNOWN if you
149                                            * do not care. Data outside the limits
150                                            * set to UNKNOWN */
151
152 /* The magic number here is one less than DS_NAM_SIZE */
153 #define DS_NAM_FMT    "%19[a-zA-Z0-9_-]"
154 #define DS_NAM_SIZE   20
155
156 #define DST_FMT    "%19[A-Z]"
157 #define DST_SIZE   20
158
159 typedef struct ds_def_t {
160     char             ds_nam[DS_NAM_SIZE]; /* Name of the data source (null terminated)*/
161     char             dst[DST_SIZE];       /* Type of data source (null terminated)*/
162     unival           par[10];             /* index of this array see ds_param_en */
163 } ds_def_t;
164
165 /****************************************************************************
166  * POS 3: rra_def_t ( *  rra_cnt)         one for each store to be maintained
167  ****************************************************************************/
168 enum cf_en           { CF_AVERAGE=0,     /* data consolidation functions */ 
169                        CF_MINIMUM, 
170                        CF_MAXIMUM,
171                        CF_LAST,
172                                            CF_HWPREDICT, 
173                                            /* An array of predictions using the seasonal 
174                                                 * Holt-Winters algorithm. Requires an RRA of type
175                                                 * CF_SEASONAL for this data source. */
176                                            CF_SEASONAL,
177                                            /* An array of seasonal effects. Requires an RRA of
178                                                 * type CF_HWPREDICT for this data source. */
179                                            CF_DEVPREDICT,
180                                            /* An array of deviation predictions based upon
181                                                 * smoothed seasonal deviations. Requires an RRA of
182                                                 * type CF_DEVSEASONAL for this data source. */
183                                            CF_DEVSEASONAL,
184                                            /* An array of smoothed seasonal deviations. Requires
185                                                 * an RRA of type CF_HWPREDICT for this data source.
186                                                 * */
187                                            CF_FAILURES};
188                                            /* A binary array of failure indicators: 1 indicates
189                                                 * that the number of violations in the prescribed
190                                                 * window exceeded the prescribed threshold. */
191
192 #define MAX_RRA_PAR_EN 10
193 enum rra_par_en {   RRA_cdp_xff_val=0,  /* what part of the consolidated
194                      * datapoint must be known, to produce a
195                                          * valid entry in the rra */
196                                         RRA_hw_alpha,
197                                         /* exponential smoothing parameter for the intercept in
198                                          * the Holt-Winters prediction algorithm. */
199                                         RRA_hw_beta,
200                                         /* exponential smoothing parameter for the slope in
201                                          * the Holt-Winters prediction algorithm. */
202                                         RRA_dependent_rra_idx,
203                                         /* For CF_HWPREDICT: index of the RRA with the seasonal 
204                                          * effects of the Holt-Winters algorithm (of type
205                                          * CF_SEASONAL).
206                                          * For CF_DEVPREDICT: index of the RRA with the seasonal
207                                          * deviation predictions (of type CF_DEVSEASONAL).
208                                          * For CF_SEASONAL: index of the RRA with the Holt-Winters
209                                          * intercept and slope coefficient (of type CF_HWPREDICT).
210                                          * For CF_DEVSEASONAL: index of the RRA with the 
211                                          * Holt-Winters prediction (of type CF_HWPREDICT).
212                                          * For CF_FAILURES: index of the CF_DEVSEASONAL array.
213                                          * */
214                                         RRA_seasonal_smooth_idx,
215                                         /* For CF_SEASONAL and CF_DEVSEASONAL:
216                                          * an integer between 0 and row_count - 1 which
217                                          * is index in the seasonal cycle for applying
218                                          * the period smoother. */
219                                     RRA_failure_threshold,
220                                         /* For CF_FAILURES, number of violations within the last
221                                          * window required to mark a failure. */
222                     RRA_seasonal_gamma = RRA_hw_alpha,
223                                         /* exponential smoothing parameter for seasonal effects.
224                                          * */
225                     RRA_delta_pos = RRA_hw_alpha,
226                     RRA_delta_neg = RRA_hw_beta,
227                                         /* confidence bound scaling parameters for the
228                                          * the FAILURES RRA. */
229                     RRA_window_len = RRA_seasonal_smooth_idx};
230                                         /* For CF_FAILURES, the length of the window for measuring
231                                          * failures. */
232                         
233 #define CF_NAM_FMT    "%19[A-Z]"
234 #define CF_NAM_SIZE   20
235
236 typedef struct rra_def_t {
237     char             cf_nam[CF_NAM_SIZE];/* consolidation function (null term) */
238     unsigned long    row_cnt;            /* number of entries in the store */
239     unsigned long    pdp_cnt;            /* how many primary data points are
240                                           * required for a consolidated data
241                                           * point?*/
242     unival           par[MAX_RRA_PAR_EN];            /* index see rra_param_en */
243
244 } rra_def_t;
245
246
247 /****************************************************************************
248  ****************************************************************************
249  ****************************************************************************
250  * LIVE PART OF THE HEADER. THIS WILL BE WRITTEN ON EVERY UPDATE         *
251  ****************************************************************************
252  ****************************************************************************
253  ****************************************************************************/
254 /****************************************************************************
255  * POS 4: live_head_t                    
256  ****************************************************************************/
257
258 typedef struct live_head_t {
259     time_t           last_up;            /* when was rrd last updated */
260 } live_head_t;
261
262
263 /****************************************************************************
264  * POS 5: pdp_prep_t  (* ds_cnt)                     here we prepare the pdps 
265  ****************************************************************************/
266 #define LAST_DS_LEN 30 /* DO NOT CHANGE THIS ... */
267
268 enum pdp_par_en {   PDP_unkn_sec_cnt=0,  /* how many seconds of the current
269                                           * pdp value is unknown data? */
270
271                     PDP_val};            /* current value of the pdp.
272                                             this depends on dst */
273
274 typedef struct pdp_prep_t{    
275     char last_ds[LAST_DS_LEN];           /* the last reading from the data
276                                           * source.  this is stored in ASCII
277                                           * to cater for very large counters
278                                           * we might encounter in connection
279                                           * with SNMP. */
280     unival          scratch[10];         /* contents according to pdp_par_en */
281 } pdp_prep_t;
282
283 /* data is passed from pdp to cdp when seconds since epoch modulo pdp_step == 0
284    obviously the updates do not occur at these times only. Especially does the
285    format allow for updates to occur at different times for each data source.
286    The rules which makes this work is as follows:
287
288    * DS updates may only occur at ever increasing points in time
289    * When any DS update arrives after a cdp update time, the *previous*
290      update cycle gets executed. All pdps are transfered to cdps and the
291      cdps feed the rras where necessary. Only then the new DS value
292      is loaded into the PDP.                                                   */
293
294
295 /****************************************************************************
296  * POS 6: cdp_prep_t (* rra_cnt * ds_cnt )      data prep area for cdp values
297  ****************************************************************************/
298 #define MAX_CDP_PAR_EN 10
299 #define MAX_CDP_FAILURES_IDX 8 
300 /* max CDP scratch entries avail to record violations for a FAILURES RRA */
301 #define MAX_FAILURES_WINDOW_LEN 28
302 enum cdp_par_en {  CDP_val=0,          
303                    /* the base_interval is always an
304                                         * average */
305                            CDP_unkn_pdp_cnt,       
306                                    /* how many unknown pdp were
307                     * integrated. This and the cdp_xff
308                                         * will decide if this is going to
309                                         * be a UNKNOWN or a valid value */
310                                    CDP_hw_intercept,
311                                    /* Current intercept coefficient for the Holt-Winters
312                                         * prediction algorithm. */
313                                    CDP_hw_last_intercept,
314                                    /* Last iteration intercept coefficient for the Holt-Winters
315                                         * prediction algorihtm. */
316                                    CDP_hw_slope,
317                                    /* Current slope coefficient for the Holt-Winters
318                                         * prediction algorithm. */
319                                    CDP_hw_last_slope,
320                                    /* Last iteration slope coeffient. */
321                                    CDP_null_count,
322                                    /* Number of sequential Unknown (DNAN) values + 1 preceding
323                                     * the current prediction.
324                                         * */
325                                    CDP_last_null_count,
326                                    /* Last iteration count of Unknown (DNAN) values. */
327                                    CDP_primary_val = 8,
328                                    /* optimization for bulk updates: the value of the first CDP
329                                         * value to be written in the bulk update. */
330                                    CDP_secondary_val = 9,
331                                    /* optimization for bulk updates: the value of subsequent
332                                         * CDP values to be written in the bulk update. */
333                    CDP_hw_seasonal = CDP_hw_intercept,
334                    /* Current seasonal coefficient for the Holt-Winters
335                     * prediction algorithm. This is stored in CDP prep to avoid
336                     * redundant seek operations. */
337                    CDP_hw_last_seasonal = CDP_hw_last_intercept,
338                    /* Last iteration seasonal coeffient. */
339                    CDP_seasonal_deviation = CDP_hw_intercept,
340                    CDP_last_seasonal_deviation = CDP_hw_last_intercept,
341                    CDP_init_seasonal = CDP_null_count};
342                    /* init_seasonal is a flag which when > 0, forces smoothing updates
343                     * to occur when rra_ptr.cur_row == 0 */
344
345 typedef struct cdp_prep_t{
346     unival         scratch[MAX_CDP_PAR_EN];          
347                                                                                  /* contents according to cdp_par_en *
348                                           * init state should be NAN */
349
350 } cdp_prep_t;
351
352 /****************************************************************************
353  * POS 7: rra_ptr_t (* rra_cnt)       pointers to the current row in each rra
354  ****************************************************************************/
355
356 typedef struct rra_ptr_t {
357     unsigned long    cur_row;            /* current row in the rra*/
358 } rra_ptr_t;
359
360
361 /****************************************************************************
362  ****************************************************************************
363  * One single struct to hold all the others. For convenience.
364  ****************************************************************************
365  ****************************************************************************/
366 typedef struct rrd_t {
367     stat_head_t      *stat_head;          /* the static header */
368     ds_def_t         *ds_def;             /* list of data source definitions */
369     rra_def_t        *rra_def;            /* list of round robin archive def */
370     live_head_t      *live_head;
371     pdp_prep_t       *pdp_prep;           /* pdp data prep area */  
372     cdp_prep_t       *cdp_prep;           /* cdp prep area */
373     rra_ptr_t        *rra_ptr;            /* list of rra pointers */
374     rrd_value_t      *rrd_value;          /* list of rrd values */
375 } rrd_t;
376
377 /****************************************************************************
378  ****************************************************************************
379  * AFTER the header section we have the DATA STORAGE AREA it is made up from
380  * Consolidated Data Points organized in Round Robin Archives.
381  ****************************************************************************
382  ****************************************************************************
383
384  *RRA 0
385  (0,0) .................... ( ds_cnt -1 , 0)
386  .
387  . 
388  .
389  (0, row_cnt -1) ... (ds_cnt -1, row_cnt -1)
390
391  *RRA 1
392  *RRA 2
393
394  *RRA rra_cnt -1
395  
396  ****************************************************************************/
397
398
399 #endif
400
401
402
403