?? apriori.c
字號:
/*---------------------------------------------------------------------- File : apriori.c Contents: apriori algorithm for finding association rules Author : Christian Borgelt History : 1996.02.14 file created 1996.07.26 output precision reduced 1996.11.22 options -b, -f, and -r added 1996.11.24 option -e added (add. evaluation measures) 1997.08.18 normalized chi^2 measure added option -m (minimal rule length) added 1997.10.13 quiet version (no output to stdout or stderr) 1998.01.27 adapted to changed ist_create() function 1998.08.08 optional input file (item appearances) added 1998.09.02 several assertions added 1998.09.07 hyperedge mode (option -h) added 1998.12.08 output of absolute support (option -a) added float changed to double 1998.12.09 conversion of names to a scanable form added 1999.02.05 long int changed to int 1999.02.09 input from stdin, output to stdout added 1999.08.09 bug in check of support parameter (<= 0) fixed 1999.11.05 rule evaluation measure EM_AIMP added 1999.11.08 output of add. rule eval. measure value added 2000.03.16 optional use of original rule support definition 2001.04.01 option -h replaced by option -t (target type) 2001.05.26 extended support output added (option -x) 2001.06.09 extended support output for item sets added 2001.08.15 module scan used for output formatting 2001.11.18 item and transaction functions made a module 2001.11.19 options -C, -l changed, option -y removed 2001.12.28 adapted to module tract, some improvements 2002.01.11 evaluation measures codes changed to letters 2002.02.10 option -q extended by a direction parameter 2002.02.11 memory usage minimization option added 2002.06.09 arbitrary supp./conf. formats made possible 2003.01.09 option -k (item separator) added 2003.01.14 check for empty transaction set added 2003.03.12 output of lift value (conf/prior) added 2003.07.17 item filtering w.r.t. usage added (option -u) 2003.07.17 sorting w.r.t. transaction size sum added 2003.07.18 maximal itemset filter added 2003.08.11 closed itemset filter added 2003.08.15 item filtering for transaction tree added 2003.08.16 parameter for transaction filtering added 2003.08.18 dynamic filtering decision based on times added 2003.08.21 option -j (heap sort for transactions) added 2003.09.22 meaning of option -j reversed (heapsort default) 2004.03.25 option -S added (maximal support of a set/rule) 2004.05.09 additional selection measure for sets added 2004.10.28 two unnecessary assignments removed 2004.11.20 bug in evaluation of -j (heap/quicksort) fixed 2004.11.23 absolute/relative support output changed 2004.12.09 semantics of option -p changed 2005.01.25 bug in output of absolute/relative support fixed 2005.01.31 another bug in this output fixed 2005.06.20 use of flag for "no item sorting" corrected 2007.02.13 adapted to modified module tabscan 2008.03.13 additional hyperedge evaluation added 2008.03.24 additional target added (association groups)----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <stdarg.h>#include <string.h>#include <limits.h>#include <math.h>#include <time.h>#include <assert.h>#include "scan.h"#include "tract.h"#include "istree.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define PRGNAME "apriori"#define DESCRIPTION "find association rules with the apriori algorithm"#define VERSION "version 4.35 (2008.03.24) " \ "(c) 1996-2008 Christian Borgelt"/* --- target types --- */#define TT_SET 0 /* frequent item sets */#define TT_CLSET 1 /* closed item sets */#define TT_MFSET 2 /* maximal item sets */#define TT_RULE 3 /* association rules */#define TT_HEDGE 4 /* association hyperedges */#define TT_GROUP 5 /* association groups *//* --- error codes --- */#define E_OPTION (-5) /* unknown option */#define E_OPTARG (-6) /* missing option argument */#define E_ARGCNT (-7) /* too few/many arguments */#define E_STDIN (-8) /* double assignment of stdin */#define E_TARGET (-9) /* invalid target type */#define E_SUPP (-10) /* invalid support */#define E_CONF (-11) /* invalid confidence */#define E_MEASURE (-12) /* invalid evaluation measure */#define E_RULELEN (-13) /* invalid rule length */#define E_NOTAS (-14) /* no items or transactions */#define E_NOFREQ (-15) /* no frequent items */#define E_UNKNOWN (-21) /* unknown error */#ifndef QUIET /* if not quiet version */#ifdef FFLUSH#define MSG(x) x /* print messages */#else /* if to flush every output */#define MSG(x) x, fflush(stderr)#endif#else /* if quiet version */#define MSG(x) /* suppress messages */#endif#define SEC_SINCE(t) ((clock()-(t)) /(double)CLOCKS_PER_SEC)#define RECCNT(s) (ts_reccnt(is_tabscan(s)) \ - ((ts_delim(is_tabscan(s)) == TS_REC) ? 1 : 0))#define BUFFER(s) ts_buf(is_tabscan(s))/*---------------------------------------------------------------------- Constants----------------------------------------------------------------------*/#ifndef QUIET /* if not quiet version *//* --- target types --- */static const char *ttypes[] = { /* TT_SET 0 */ "set", /* TT_CLSET 1 */ "set", /* TT_MFSET 2 */ "set", /* TT_RULE 3 */ "rule", /* TT_HEDGE 4 */ "hyperedge", /* TT_GROUP 5 */ "group",};/* --- error messages --- */static const char *errmsgs[] = { /* E_NONE 0 */ "no error\n", /* E_NOMEM -1 */ "not enough memory\n", /* E_FOPEN -2 */ "cannot open file %s\n", /* E_FREAD -3 */ "read error on file %s\n", /* E_FWRITE -4 */ "write error on file %s\n", /* E_OPTION -5 */ "unknown option -%c\n", /* E_OPTARG -6 */ "missing option argument\n", /* E_ARGCNT -7 */ "wrong number of arguments\n", /* E_STDIN -8 */ "double assignment of standard input\n", /* E_TARGET -9 */ "invalid target type '%c'\n", /* E_SUPP -10 */ "invalid minimal support %g%%\n", /* E_CONF -11 */ "invalid minimal confidence %g%%\n", /* E_MEASURE -12 */ "invalid additional evaluation measure %c\n", /* E_RULELEN -13 */ "invalid set size/rule length %d\n", /* E_NOTAS -14 */ "no items or transactions to work on\n", /* E_NOFREQ -15 */ "no frequent items\n", /* E_ITEMEXP -16 */ "file %s, record %d: item expected\n", /* E_DUPITEM -17 */ "file %s, record %d: duplicate item %s\n", /* E_APPEXP -18 */ "file %s, record %d: " "appearance indicator expected\n", /* E_UNKAPP -19 */ "file %s, record %d: " "unknown appearance indicator %s\n", /* E_FLDCNT -20 */ "file %s, record %d: too many fields\n", /* E_UNKNOWN -21 */ "unknown error\n"};#endif/*---------------------------------------------------------------------- Global Variables----------------------------------------------------------------------*/#ifndef QUIETstatic char *prgname; /* program name for error messages */#endifstatic ITEMSET *itemset = NULL; /* item set */static TASET *taset = NULL; /* transaction set */static TATREE *tatree = NULL; /* transaction tree */static ISTREE *istree = NULL; /* item set tree */static FILE *in = NULL; /* input file */static FILE *out = NULL; /* output file *//*---------------------------------------------------------------------- Main Functions----------------------------------------------------------------------*/static void help (void){ /* --- print help on eval. measures */ #ifndef QUIET fprintf(stderr, "\n"); /* terminate startup message */ printf("additional evaluation measures (option -e#)\n"); printf("frequent item sets:\n"); printf("d or 1: binary logarithm of support quotient\n"); printf("association rules:\n"); printf("d or 1: absolute confidence difference to prior\n"); printf("q or 2: absolute difference of confidence quotient to 1\n"); printf("a or 3: absolute difference of improvement value to 1\n"); printf("i or 4: information difference to prior\n"); printf("c or 5: normalized chi^2 measure\n"); printf("p or 6: p-value computed from chi^2 measure\n"); #endif exit(0); /* abort the program */} /* help() *//*--------------------------------------------------------------------*/static void error (int code, ...){ /* --- print an error message */ #ifndef QUIET /* if not quiet version */ va_list args; /* list of variable arguments */ const char *msg; /* error message */ assert(prgname); /* check the program name */ if (code < E_UNKNOWN) code = E_UNKNOWN; if (code < 0) { /* if to report an error, */ msg = errmsgs[-code]; /* get the error message */ if (!msg) msg = errmsgs[-E_UNKNOWN]; fprintf(stderr, "\n%s: ", prgname); va_start(args, code); /* get variable arguments */ vfprintf(stderr, msg, args);/* print error message */ va_end(args); /* end argument evaluation */ } #endif #ifndef NDEBUG /* if debug version */ if (istree) ist_delete(istree); /* clean up memory */ if (tatree) tat_delete(tatree); /* and close files */ if (taset) tas_delete(taset, 0); if (itemset) is_delete(itemset); if (in && (in != stdin)) fclose(in); if (out && (out != stdout)) fclose(out); #endif #ifdef STORAGE /* if storage debugging */ showmem("at end of program"); /* check memory usage */ #endif exit(code); /* abort the program */} /* error() *//*--------------------------------------------------------------------*/int main (int argc, char *argv[]){ /* --- main function */ int i, k = 0, n; /* loop variables, counters */ char *s; /* to traverse the options */ char **optarg = NULL; /* option argument */ char *fn_in = NULL; /* name of input file */ char *fn_out = NULL; /* name of output file */ char *fn_app = NULL; /* name of item appearances file */ char *blanks = NULL; /* blanks */ char *fldseps = NULL; /* field separators */ char *recseps = NULL; /* record separators */ char *comment = NULL; /* comment indicators */ char *used = NULL; /* item usage vector */ double supp = 0.1; /* minimal support (in percent) */ double smax = 1.0; /* maximal support (in percent) */ double conf = 0.8; /* minimal confidence (in percent) */ int mode = IST_BODY; /* search mode (rule support def.) */ int target = 'r'; /* target type (sets/rules/h.edges) */
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -