?? mainfu.pas
字號:
{
Description:ID3算法的簡單實現,使用ClientDataset作為內存表
Author : Oneloong
Datetime : 2008.03.08
Test data : WeatherInfo_EN.txt
}
unit MainFU;
interface
uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, ExtCtrls, Grids, StdCtrls, ComCtrls,math, DB, DBClient, DBGrids;
const
NodeInfo = 'AttributeName: %s, Count: %d, Entropy: %8.3f, Gain: %8.3f';
type
PSubSetInfo = ^TSubSetInfo;
TSubSetInfo = record
attriName : string;
count : integer;
entropy : double;
gain : double;
Splited : boolean ;
end;
//屬性信息
type
PAttriInfo = ^TAttriInfo;
TAttriInfo = record
Val : TStringList;//值的列表
Splited : boolean;//以分裂
end;
//值信息
type
PValInfo = ^TValInfo;
TValInfo = record
count : integer;//值的次數
ConditionCount : integer;//條件次數
end;
type
TMainF = class(TForm)
Panel1: TPanel;
Panel2: TPanel;
Panel3: TPanel;
Panel4: TPanel;
Splitter1: TSplitter;
btnLoadDB: TButton;
Button2: TButton;
PageControl1: TPageControl;
TabSheet1: TTabSheet;
TabSheet2: TTabSheet;
TV: TTreeView;
Memo: TMemo;
ledtNodeInfo: TLabeledEdit;
ClientDataSet: TClientDataSet;
DBGrid1: TDBGrid;
DataSource: TDataSource;
Label1: TLabel;
cbbSA: TComboBox;
procedure btnLoadDBClick(Sender: TObject);
procedure FormCreate(Sender: TObject);
procedure FormDestroy(Sender: TObject);
procedure Button2Click(Sender: TObject);
procedure TVChange(Sender: TObject; Node: TTreeNode);
private
FAttributesList: TStringList;
FOrgCDS: TClientDataSet;
FtempCDS: TClientDataSet;
FSplitedAttriList: TStringList;
procedure SetAttributesList(const Value: TStringList);
function isLeafNode(A1, V1: string;cds : TClientDataSet; var LeafCaption : string): boolean;
procedure SetOrgCDS(const Value: TClientDataSet);
procedure SettempCDS(const Value: TClientDataSet);
procedure SetSplitedAttriList(const Value: TStringList);
procedure CopyClientData(Src: TClientDataSet; Des : TClientDataSet);
private
FDBName: string;
FDBInfoList: TStringList;
FRecList: Tlist;
FInitState: double;
SubTreeView : TTreeView;
CurrentNode : TTreeNode;
//屬性列表
property AttributesList : TStringList read FAttributesList write SetAttributesList;
property SplitedAttriList :TStringList read FSplitedAttriList write SetSplitedAttriList;
function LoadDB: boolean;
function GetNodeInfo(attrName : string;Count :integer; Entropy :double; Gain :double) : string;
procedure SetDBName(const Value: string);
function FindNodeByText(aText : string) : TTreeNode;
private
{ Private declarations }
property DBName : string read FDBName write SetDBName;
property OrgCDS : TClientDataSet read FOrgCDS write SetOrgCDS;
property tempCDS : TClientDataSet read FtempCDS write SettempCDS;
procedure ParseDB(aDBName : string);//解析數據
procedure ClearAttriList();
function getValues(attri : string;cds : TClientDataSet) : TStringList;
function getEntropy(attri : string;cds : TClientDataSet) : double;//計算熵
//Gain(S, A) = Entropy(S) - ((|S v| / |S|) * Entropy(S v))
function getGain(
S,//information gain of example set S on attribute A
A : string;
cds : TClientDataSet): double;//信息增益
function getMaxGain(S : string; cds : TClientDataSet) : string;
//創建決策樹
procedure CreateDecisionTree(A, V: string; cds : TClientDataSet;ParNode : TTreeNode);
public
{ Public declarations }
end;
var
MainF: TMainF;
implementation
{$R *.dfm}
function TMainF.LoadDB : boolean;
var
OpenDlg : TOpenDialog;
begin
result := False;
OpenDlg := TOpenDialog.Create(self);
try
if OpenDlg.Execute then
begin
DBName := OpenDlg.FileName;
result := True;
end
else
begin
end;
finally
OpenDlg.Free;
end;
end;
procedure TMainF.btnLoadDBClick(Sender: TObject);
begin
if LoadDB then
begin
tv.Items.BeginUpdate;
ParseDB(DBName) ;
tv.Items.EndUpdate;
end;
tempCDS.Active := true;
end;
procedure TMainF.ParseDB(aDBName : string);
var
i, j : integer;
tmpStr : string;
AttributeName : string;//屬性名
tmpStrList, attristrList : TStringList;
aField : TStringField;
pAttInfo : PAttriInfo;
begin
tmpStrList := TstringList.Create;
attristrList := TstringList.Create;
try
tmpStrList.LoadFromFile(aDBName);
//取得屬性值
AttributesList.CommaText := tmpStrList.Strings[0];
cbbSA.Items.CommaText := AttributesList.CommaText;
ClientDataSet.Active := False;
ClientDataSet.FieldDefs.Clear;
//創建內存表
for i := 0 to AttributesList.Count -1 do
begin
aField := TStringField.Create(Self);
with aField do
begin
FieldName := Trim(AttributesList.Strings[i]);
Name := FieldName;
Size := 10;
Index := i//按創建的字段順序累加
end;
aField.DataSet := ClientDataSet;
end;
ClientDataSet.CreateDataSet;
tempCDS.Active := False;
tempCDS.FieldDefs.Clear;
tempCDS.FieldDefs := ClientDataSet.FieldDefs;
for i := 1 to tmpStrList.Count -1 do
begin
attristrList.CommaText := tmpStrList.Strings[i];
ClientDataSet.Append;
for j := 0 to attristrList.Count -1 do
ClientDataSet.Fields.Fields[j].AsString := attristrList.Strings[j];
ClientDataSet.Post;
end;
ClientDataSet.Active := true;
tempCDS.Data := ClientDataSet.Data;
OrgCDS.Data := ClientDataSet.Data;
finally
tmpStrList.Free;
attriStrList.Free;
end;
end;
procedure TMainF.FormCreate(Sender: TObject);
begin
AttributesList := TStringList.Create;
SplitedAttriList := TStringList.Create;
OrgCDS := TClientDataSet.Create(self);
tempCDS := TClientDataSet.Create(self);
end;
procedure TMainF.FormDestroy(Sender: TObject);
begin
SubTreeView.Free;
AttributesList.Free;
SplitedAttriList.Free;
OrgCDS.Free;
tempCDS.Free;
end;
function TMainF.GetNodeInfo(attrName: string; Count: integer; Entropy,
Gain: double): string;
begin
result := Format(NodeInfo,[attrName,Count,Entropy,Gain]);
end;
procedure TMainF.SetDBName(const Value: string);
begin
FDBName := Value;
end;
procedure TMainF.Button2Click(Sender: TObject);
var
i : integer;
begin
tv.Items.Clear;
SplitedAttriList.Clear;
clientDataset.Filter := '';
ClientDataSet.Filtered := False;
clientdataset.DisableControls;
for i := 0 to AttributesList.Count - 1 do
begin
if AttributesList.Strings[i] = cbbSA.Text then
continue;
CreateDecisionTree(AttributesList.Strings[i],'',clientDataSet,nil);
end;
clientdataset.Filtered := False;
clientdataset.EnableControls;
end;
function TMainF.FindNodeByText(aText: string): TTreeNode;
var
i : integer;
begin
result := nil;
for i := 0 to SubTreeView.Items.Count - 1 do
begin
if SubTreeView.Items.Item[i].Text <> aText then
continue
else
begin
result := SubTreeView.Items.Item[i];
break;
end;
end;
end;
procedure TMainF.TVChange(Sender: TObject; Node: TTreeNode);
begin
if Node.Data = nil then
exit;
ledtNodeInfo.Text := GetNodeInfo(
PSubSetInfo(Node.Data).attriName,
PSubSetInfo(Node.Data).count,
PSubSetInfo(Node.Data).entropy,
PSubSetInfo(Node.Data).gain
);
end;
procedure TMainF.SetAttributesList(const Value: TStringList);
begin
FAttributesList := Value;
end;
function TMainF.getEntropy(attri: string; cds: TClientDataSet) : double;
var
i, j, atrIndex : integer;
vList : TStringList;
pv : PValInfo;
tEntropy : double;
begin
vList := TStringList.Create;
tEntropy := 0.0;
try
//各值出現的次數
cds.First;
for i := 0 to cds.RecordCount - 1 do
begin
atrIndex := vList.IndexOf(cds.FieldByName(attri).AsString);
if atrIndex <> -1 then
begin
PValInfo(vList.Objects[atrIndex]).count :=
PValInfo(vList.Objects[atrIndex]).count + 1;
end
else
begin
pv := new(PValInfo);
pv.count := 1;
vList.AddObject(cds.FieldByName(attri).AsString,TObject(pv));
end;
cds.Next;
end;
//計算熵
for i := 0 to vList.Count - 1 do
begin
tEntropy := tEntropy -((PValInfo(vList.Objects[i]).count/cds.RecordCount))*log2(PValInfo(vList.Objects[i]).count/cds.RecordCount)
end;
result := tEntropy;
finally
for i := 0 to vList.Count - 1 do
begin
if vList.Objects[i] <> nil then
dispose(PValInfo(vList.Objects[i]));
end;
vList.Free;
end;
end;
procedure TMainF.ClearAttriList;
begin
end;
function TMainF.isLeafNode(A1, V1: string;cds : TClientDataSet; var LeafCaption : string) : boolean;
var
sFilter : string;
i : integer;
tStr : string;
begin
result := true;
sFilter := A1+'='+quotedstr(v1);//+' and '+A2+'='+quotedstr(v2);
cds.Filtered := false;
cds.Filter := sFilter;
cds.Filtered := true;
cds.First;
tStr := cds.FieldByName(cbbsa.Text).AsString;
LeafCaption := tStr;
for i := 0 to cds.RecordCount -1 do
begin
if cds.FieldByName(cbbsa.Text).AsString <> tStr then
begin
result := False;
break;
end;
cds.Next;
end;
end;
procedure TmainF.CopyClientData(Src : TClientDataSet; Des : TClientDataSet);
var
i, j : integer;
begin
tempCDS.Filtered := False;
for i := 0 to tempCDS.RecordCount -1 do
begin
tempCDS.First;
tempCDS.Delete;
end;
for i := 0 to src.RecordCount -1 do
begin
tempCDS.Append;
for j := 0 to src.FieldCount -1 do
tempCDS.Fields.Fields[j].AsVariant := Src.Fields.Fields[j].AsVariant;
tempCDS.Post;
src.Next;
end;
end;
procedure TMainF.CreateDecisionTree(A, V: string; cds: TClientDataSet;ParNode : TTreeNode);
var
splitedAttribute : string;
tNode : TTreeNode;
i, j : integer;
vList : TStringList;
sLeafCaption,sFilter, tStr : string;
begin
//根據最大信息增益得到分裂屬性
splitedAttribute := getMaxGain(cbbsa.Text,cds);
if splitedAttribute = '' then
exit;
//加入分裂屬性列表
if SplitedAttriList.IndexOf(splitedAttribute) = -1 then
SplitedAttriList.Add(splitedAttribute);
if A = splitedAttribute then
tStr := A
else
tStr := A+'----'+splitedAttribute;
tNode := TV.Items.AddChild(ParNode,tStr);
vList := TStringlist.Create;
try
vList := getValues(splitedAttribute,cds);
for i := 0 to vList.Count - 1 do
begin
if isLeafNode(splitedAttribute, vList.Strings[i], cds, sLeafCaption) then
begin
//葉節點
TV.Items.AddChild(tNode,vList.Strings[i]+'-----'+sLeafCaption);
end
else
begin
sFilter := splitedAttribute+'='+quotedstr(vList.Strings[i]);
ClientDataSet.Filtered := False;
ClientDataSet.Filter := sFilter;
ClientDataSet.Filtered := True;
CopyClientData(ClientDataSet,tempCDS);
CreateDecisionTree(vList.Strings[i],'',tempCDS,tNode);
end;
end;
finally
vList.Free;
end;
end;
{*
*}
function TMainF.getGain(S, A: string; cds : TClientDataSet): double;
var
i, j, atrIndex, recCount : integer;
vList,SList : TStringList;
pv : PValInfo;
strFilter : string;
d1, d2 : double;
begin
result := 0.0;
d1 := 0.0;
d2 := 0.0;
vList := TStringList.Create;
sList := TStringList.Create;
try
//A各值出現的次數
cds.First;
for i := 0 to cds.RecordCount - 1 do
begin
atrIndex := vList.IndexOf(cds.FieldByName(A).AsString);
if atrIndex <> -1 then
begin
PValInfo(vList.Objects[atrIndex]).count :=
PValInfo(vList.Objects[atrIndex]).count + 1;
end
else
begin
pv := new(PValInfo);
pv.count := 1;
vList.AddObject(cds.FieldByName(A).AsString,TObject(pv));
end;
cds.Next;
end;
//S各值出現的次數
cds.First;
for i := 0 to cds.RecordCount - 1 do
begin
atrIndex := sList.IndexOf(cds.FieldByName(S).AsString);
if atrIndex <> -1 then
begin
PValInfo(sList.Objects[atrIndex]).count :=
PValInfo(sList.Objects[atrIndex]).count + 1;
end
else
begin
pv := new(PValInfo);
pv.count := 1;
sList.AddObject(cds.FieldByName(S).AsString,TObject(pv));
end;
cds.Next;
end;
recCount := cds.RecordCount;
for i := 0 to vList.Count - 1 do
begin
strFilter := A+' = '+ Quotedstr(vList.Strings[i]) ;
cds.Filtered := false;
cds.Filter := strFilter;
cds.Filtered := true;
d1 := d1 - (PValInfo(vList.Objects[i]).count/recCount)*getEntropy(S,cds);
end;
cds.Filtered := false;
result := getEntropy(S,cds) + d1;
finally
for i := 0 to vList.Count - 1 do
begin
if vList.Objects[i] <> nil then
dispose(PValInfo(vList.Objects[i]));
end;
for i := 0 to sList.Count - 1 do
begin
if sList.Objects[i] <> nil then
dispose(PValInfo(sList.Objects[i]));
end;
vList.Free;
sList.Free;
end;
cds.Filtered := false;
end;
{*
*}
function TMainF.getMaxGain(S: string; cds : TClientDataSet): string;
var
i, j : integer;
tmpStr : string;
tmpD,tmpGain : Double;
begin
tmpD := 0.0;
tmpGain := 0.0;
for i := 0 to AttributesList.Count - 1 do
begin
if cbbsa.Text = AttributesList.Strings[i] then
continue;
if SplitedAttriList.IndexOf(AttributesList.Strings[i]) <> -1 then
continue;
tmpGain := getGain(S,AttributesList.Strings[i],cds);
if tmpGain > tmpD then
begin
tmpStr := AttributesList.Strings[i];
tmpD := tmpGain;
end;
end;
result := tmpStr;
end;
function TMainF.getValues(attri : string;cds : TClientDataSet) : TStringList;
var
i, atrIndex : integer;
begin
cds.First;
result := TStringList.Create;
for i := 0 to cds.RecordCount - 1 do
begin
atrIndex := result.IndexOf(cds.FieldByName(attri).AsString);
if atrIndex = -1 then
begin
result.Add(cds.FieldByName(attri).AsString);
end;
cds.Next;
end;
end;
procedure TMainF.SetOrgCDS(const Value: TClientDataSet);
begin
FOrgCDS := Value;
end;
procedure TMainF.SettempCDS(const Value: TClientDataSet);
begin
FtempCDS := Value;
end;
procedure TMainF.SetSplitedAttriList(const Value: TStringList);
begin
FSplitedAttriList := Value;
end;
end.
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -