?? apriori.java
字號:
//Implement Apriori Algrithm
//Public Interface:
//Apriori(); Init();GenerateFrequence();
import java.lang.* ;
import java.io.* ;
class Apriori
{
//members
double minSup ,minCon ; //最小支持度,最小置信度
double deltaSup,deltaCon ;
int minNum ,num; //最小計數
FastVector fullSet ; //從GetSource class中,取得的全集(原始數據集)
FastVector frequence; //最大的頻繁集
FastVector[] children ; //存放子集的FastVector數組,使用前需new
FastVector ruleSrc,ruleDest ; //存放規則:源,目標;本身為big
FastVector confidence,support ;//特殊的FastVector,存放的是和rule對應的confidence,support
FastVector test ; //僅供測試
//Constuctor
public Apriori()
{
minSup = 0.2 ;
minCon = 0.4 ;
minNum = 2 ;
ruleSrc = new FastVector();
ruleDest = new FastVector() ;
confidence = new FastVector() ; //remember:special
support = new FastVector() ; //remember:special
}
//methods
public void WriteBig(FastVector big) //僅供測試
throws FileNotFoundException,IOException { //big
FileOutputStream fOut = new FileOutputStream("vector.txt");
int size1 = big.size() , size2;
FastVector temp ;
Item item ;
int data ;
for(int i=0 ; i<size1 ; i++)
{
size2 = ((FastVector)(big.elementAt(i))).size();
for(int j=0 ; j<size2 ; j++)
{
temp = (FastVector)big.elementAt(i);
item = (Item)temp.elementAt(j);
fOut.write(item.Key());
fOut.write(',');
}
fOut.write(';');
}
}
public void Write(FastVector vector) //僅供測試
throws FileNotFoundException,IOException { //itemset
FileOutputStream fOut = new FileOutputStream("vector.txt");
int size1 = vector.size();
Item item ;
for(int i=0 ; i<size1 ; i++)
{
item = (Item)vector.elementAt(i);
fOut.write(item.Key());
fOut.write(' ');
}
fOut.write(';');
}
public void Print(FastVector vector) //僅供測試
{ //itemset
int size1 = vector.size();
Item item ;
for(int i=0 ; i<size1 ; i++)
{
item = (Item)vector.elementAt(i);
System.out.print(item.Key()-'0');
System.out.print(' ');
}
System.out.println(' ');
}
public void PrintBig(FastVector vector) //僅供測試
{ //big
for(int i=0 ; i<vector.size() ; i++)
{
Print((FastVector)vector.elementAt(i));
System.out.println(" ");
}
}
public void PrintChildren() //for test only
{
for(int i=0 ; i<children.length ; i++){
PrintBig(children[i]);
System.out.println("--------------------");
}
}
public void PrintRules() //for test only
{
for(int i=0 ; i<ruleSrc.size() ; i++){
System.out.println("--------- "+i+" -----------");
Print((FastVector)ruleSrc.elementAt(i));
System.out.println(" "+"=>"+" confidence is:"+
((Double)confidence.elementAt(i)).toString() );
Print((FastVector)ruleDest.elementAt(i));
}
}
public void SetSup(double sup){
double num = fullSet.size(); //總itemsets數
if(sup <= 1.0)
minSup = sup ;
minNum = (int)((num * sup)+1.0) ;
}
public void SetCon(double con){
if(con <= 1.0)
minCon = con ;
}
public void Init(String str) //從文件 'str'中,初始化全集
throws IOException,FileNotFoundException
{
GetSource src = new GetSource(str);
fullSet = src.GetAll(); //Get it!
num = fullSet.size();
}
private boolean IsContain(FastVector itemset,Item item) //itemset是否包含item?
{
Item item0 ;
for(int i=0;i<itemset.size();i++)
{
item0 = (Item)itemset.elementAt(i);
if (item0.Key() == item.Key()) //found
return true ;
}
return false ;
}//pass
private int Num(FastVector big,FastVector itemset) //itemset在big中出現的次數
{
int num = 0;
FastVector temp ;
for(int i=0 ; i<big.size() ; i++)
{
temp = (FastVector)big.elementAt(i) ;
if( IsContain(temp,itemset) )
num ++ ;
}
return num ;
}//pass
private boolean IsContain(FastVector itemset1,FastVector itemset2) //itemset1是否包含itemset2
{
for(int i=0 ; i<itemset2.size() ; i++)
{
if(!IsContain(itemset1,(Item)itemset2.elementAt(i)))
return false ;
}
return true ;
}//pass
private boolean IsContain2(FastVector big,FastVector itemset) //big是否包含itemset?
{
FastVector temp ;
for(int i=0 ; i<big.size() ; i++)
{
temp = (FastVector)big.elementAt(i) ; //取得當前itemset
if(IsContain(temp,itemset)) //當前itemset包含了參數2
return true ;
}
return false ;
}//pass
private FastVector BreakOne(FastVector big) //將big分割成item的集合
{
int size1 = big.size() , size2;
FastVector pieces = new FastVector();
FastVector temp ;
for(int i=0 ; i<size1 ; i++)
{
size2 = ((FastVector)(big.elementAt(i))).size();
for(int j=0 ; j<size2 ; j++)
{
temp = (FastVector)big.elementAt(i);
if(!IsContain( pieces,(Item)temp.elementAt(j) ))
pieces.addElement((Item)temp.elementAt(j));
}
}
return pieces ;
}//pass
private FastVector ToC1(FastVector vector) //將item的集合包裝成一個big
{
FastVector big = new FastVector() ,
itemset ;
Item item ;
int size = vector.size();
for(int i=0 ; i<size ; i++)
{
itemset = new FastVector();
item = (Item)vector.elementAt(i);
itemset.addElement(item);
big.addElement(itemset);
}
return big ;
}//pass
private FastVector SelfConnect(FastVector La)//自聯結,由Li生成Ci+1,然后裁剪Ci+1,成為頻繁集
{ //參數為big,返回big
int i,j;
FastVector Cb = new FastVector();
FastVector pieces = BreakOne(La); //所有item的集合
FastVector temp ,swp,add ;
Item item ;
//將每一個item與itemset相聯結,并將合適的itemset加入Cb中
for(i=0 ; i<La.size() ; i++)
{
for(j=0 ; j<pieces.size() ; j++) //取出每一個item
{
temp = (FastVector)La.elementAt(i) ; //取出每一個itemset
item = (Item)pieces.elementAt(j);
if( !IsContain(temp,item) ) //如果item不包含在itemset中
{
add = new FastVector(); //new
add.appendElements(temp);
add.addElement( item ) ; //則加入之
if( !IsContain2(Cb,add) ) //Cb中沒有
Cb.addElement( add ) ; //加入Cb中
}
}
}//pass
//去除Cb中不滿足minNum的itemset
for(i=Cb.size() - 1 ; i>=0 ; i--)
{
temp = (FastVector)Cb.elementAt(i) ;
if( Num(fullSet,temp) < minNum ) //如果計數小于最小計數
Cb.removeElementAt(i) ; //則刪去該itemset
}
return Cb;
}//pass
public void GenerateFrequence() //由最初的原始數據集,生成滿足最小計數的最大頻繁集
{
FastVector swp ;
FastVector vector = BreakOne(fullSet); //get items
vector = ToC1(vector); //get C1
while(vector.size() > 0) //Ck to Ck+1
{
frequence = SelfConnect(vector);
swp = vector ;
vector = frequence ;
frequence = swp ;
}
children = new FastVector[frequence.size()]; //取得頻繁集后,初始化子集的數組
for(int i=0 ; i<children.length ; i++) //申請空間
children[i] = new FastVector();
}//pass
public void Recursion(FastVector vector,int pos) //pos指定Children數組的位置
{
int i ;
FastVector temp ;
if(vector.size() <= 2)//沒有大于一個item的真子集了
return ;
for(i=0 ; i<vector.size() ; i++)
{
temp = new FastVector();
temp.appendElements(vector);
temp.removeElementAt(i); //每次去除一個i位置的元素
if(!IsContain2(children[pos],temp)) //大頭加入children數組的相應位置,然后對大頭遞歸
children[pos].addElement(temp);
Recursion(temp,pos);
}
}//pass
public void Children(FastVector mother,int pos)//生成一個itemset的所有真子集的big集合
{
int i ;
FastVector temp ;
if (mother.size() == 1) //沒有意義
return ;
for(i=0 ; i<mother.size() ; i++) //將mother的每一個單個的item,當作子集加入children相應位置
{
temp = new FastVector();
temp.addElement(mother.elementAt(i));
children[pos].addElement(temp);
}
Recursion(mother,pos) ; //用遞歸來求的其余的真子集
}//pass
public void AllChildren(FastVector vector) //vector是求得的頻繁集的集合
{
int i ;
FastVector mother ;
for(i=0 ; i<vector.size() ; i++) //取得特定頻繁集
{
mother = (FastVector)vector.elementAt(i); //求得該頻繁集的所有真子集
Children(mother,i);
}
}//pass
public FastVector Odds(FastVector full,FastVector part) //求差集
{
int i ;
FastVector odds = new FastVector();
Item item ;
for(i=0 ; i<full.size() ; i++)
{
item = (Item)full.elementAt(i);
if( !IsContain(part,item) )
odds.addElement(item);
}
return odds ;
} //
public double Support(FastVector mother,FastVector child)//求支持度,m和c都是itemset
{
return (double)Num(fullSet,mother)/(double)Num(fullSet,child) ;
} //pass
public void GenerateRules()//由children[],生成ruleSrc和ruleDest
{
int i,j,length = children.length ;
FastVector odds,childset ;
FastVector mother ,child ;
double con ,sup ;
for(i=0 ; i<length ; i++) //每一個最大頻繁集
{
childset = children[i] ;
mother = new FastVector();
mother.appendElements( (FastVector)frequence.elementAt(i) );
for(j=0 ; j<childset.size() ;j++ ) //對每一個子集
{
child = new FastVector() ;
child.appendElements( (FastVector)childset.elementAt(j) );
con = Support(mother,child) ;
if( con > minCon ) //滿足最小置信度,填寫rule
{
sup = (double)Num(fullSet,child) / num ;
odds = Odds(mother,child);
ruleSrc.addElement(child);
ruleDest.addElement(odds);
confidence.addElement(new Double(con));
support.addElement(new Double(sup)) ;
}
}
}
}
public static void main(String arg[])
throws FileNotFoundException,IOException
{
FastVector temp,big,itemset;
Item item;
Apriori app = new Apriori();
app.Init("zoo.arff"); // get the full set
app.SetSup(0.4);
app.SetCon(0.8);
app.PrintBig(app.fullSet);
//Going Apriori
app.GenerateFrequence();
app.WriteBig(app.frequence);
app.AllChildren(app.frequence);
// app.PrintChildren();
app.GenerateRules();
// app.PrintRules();
Output.WriteRules(app);
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -