?? uct--delphi.txt
字號:
電腦圍棋的算法UCT語言delphi實現(xiàn)
經(jīng)過幾天的努力,終于實現(xiàn)了uct代碼,因為pascal是最優(yōu)美易懂,適合表達算法的語言,特奉上,有錯請大家拍磚,剛?cè)腴T的愛好者可以修改完善。如有建議,請不吝指教!
unit _XGoUCT;
interface
uses SysUtils, Classes, windows, _XGoBase;
var
_UCTMaxSimulation: integer = 100; //這個數(shù)越大,深入的層越多!
_UCTMaxTime: cardinal = 5000; //一次著棋的毫秒數(shù)
_UCTkomi: double = 2.5;
type
PNode = ^TNode;
TNode = record
move: TVertex;
wins: double;
visits: double;
child: array of PNode;
bestNode: PNode;
end;
type
TUCT = class(TXGoBase)
private
UCTk: double; //UCT常數(shù)
UCTActivePlayer: shortint; //當前次序
UCTweight: array[0..18, 0..18] of integer;
function fCreateChildNodes(const n: PNode): integer; //建立子節(jié)點,返回數(shù)量
procedure fFreeNodes(const n: PNode); //釋放樹技
procedure fSetBestNode(const n: PNode); //設(shè)置最好的節(jié)點
function fPlayRandom: integer; //自由布局
function fUCTSelect(const n: PNode): PNode; //uct選擇
function fUCTSimulation(const n: PNode): integer; //模似
function fUCTSearch(const p: Tplayer; const count: integer): TVertex; //搜索
public
constructor Create;
destructor Destroy; override;
function isEye(const i, j: integer; const p: Tplayer): boolean;
function getWeight(const x, y: integer): integer;
function fCalculate: double;
function fgetEmptyVertexs: TVertexs; //獲取被壓縮的空點
function getEvaluate: double; //評估,返回黑正白負
function PlayMove(const m: TVertex): boolean;
function genMove(const p: Tplayer): TVertex;
procedure showboard;
end;
implementation
//==============================================================================
procedure TUCT.showboard;
var
i, j, b: integer;
c, s, os: string;
const
topbottom: string = ' A B C D E F G H J K L M N O P Q R S T';
begin
os := '';
b := boardsize;
writeln(copy(topbottom, 1, 4 + (b - 1) * 2));
for i := 0 to b - 1 do
begin
s := '';
for j := 0 to b - 1 do
begin
case Vertexs[i, j] of
BLACK: if getmoveLast = char(i) + char(j) then
s := s + '◆'
else
s := s + '●';
WHITE: if getmoveLast = char(i) + char(j) then
s := s + '◇'
else
s := s + '○';
else
if (i = 0) then
if (j = 0) then
c := '┏'
else if (j > 0) and (j < b - 1) then
c := '┯'
else if (j = b - 1) then
c := '┓';
if (i > 0) and (i < b - 1) then
if (j = 0) then
c := '┠'
else if (j > 0) and (j < b - 1) then
c := '┼'
else if (j = b - 1) then
c := '┨';
if (i = b - 1) then
if (j = 0) then
c := '┗'
else if (j > 0) and (j < b - 1) then
c := '┷'
else if (j = b - 1) then
c := '┛';
case b of
19: if ((i = 3) and ((j = 3) or (j = b div 2) or (j = b - 4)))
or ((i = b div 2) and ((j = 3) or (j = b - 4)))
or ((i = b - 4) and ((j = 3) or (j = b div 2) or (j = b - 4)))
then
c := '╋';
13: if ((i = 3) and ((j = 3) or (j = b - 4)))
or ((i = b - 4) and ((j = 3) or (j = b - 4))) then
c := '╋';
end;
s := s + c;
end; //case
end;
s := format('%0:2s', [inttostr(b - i)]) + s + inttostr(b - i);
writeln(s);
end;
writeln(copy(topbottom, 1, 4 + (b - 1) * 2));
end;
function TUCT.getWeight(const x, y: integer): integer;
begin
Result := UCTweight[x, y];
end;
//==============================================================================
constructor TUCT.Create;
begin
inherited create;
UCTk := 1;
UCTActivePlayer := BLACK;
fillchar(UCTweight, sizeof(UCTweight), 0);
end;
destructor TUCT.Destroy;
begin
inherited Destroy;
end;
//------------------------------------------------------------------------------
function TUCT.isEye(const i, j: integer; const p: Tplayer): boolean;
var
b: boolean;
op: Tplayer;
begin
if p = BLACK then
op := WHITE
else
op := BLACK;
if (i - 1) in [0..BoardSize - 1] then //這一步很重要@~@
b := Vertexs[i - 1, j] = p
else
b := true;
if (i + 1) in [0..BoardSize - 1] then
b := b and (Vertexs[i + 1, j] = p);
if (j - 1) in [0..BoardSize - 1] then
b := b and (Vertexs[i, j - 1] = p);
if (j + 1) in [0..BoardSize - 1] then
b := b and (Vertexs[i, j + 1] = p);
{ if ((i - 1) in [0..BoardSize - 1]) and ((j - 1) in [0..BoardSize - 1]) then
b := b and (Vertexs[i - 1, j - 1] <> op);
if ((i + 1) in [0..BoardSize - 1]) and ((j - 1) in [0..BoardSize - 1]) then
b := b and (Vertexs[i + 1, j - 1] <> op);
if ((i - 1) in [0..BoardSize - 1]) and ((j + 1) in [0..BoardSize - 1]) then
b := b and (Vertexs[i - 1, j + 1] <> op);
if ((i + 1) in [0..BoardSize - 1]) and ((j + 1) in [0..BoardSize - 1]) then
b := b and (Vertexs[i + 1, j + 1] <> op);
}result := b;
end;
function TUCT.fCalculate: double;
//計算勝負,黑為正,白為負,盤面應(yīng)當為黑里無白,白里無黑的終結(jié)場面
var
x, y: integer;
c: integer;
begin
c := 0;
for x := 0 to BoardSize - 1 do
for y := 0 to BoardSize - 1 do
case Vertexs[x, y] of
BLACK: inc(c);
WHITE: dec(c);
else
if isEye(x, y, BLACK) then
inc(c)
else
dec(c); //判斷這個目歸屬,非黑即白
end;
result := c - _uctkomi;
end;
function TUCT.getEvaluate: double;
//評估勝負,黑為正,白為負
var
p, v, x, y, m, n, r, i, j, sc: integer;
begin
sc := 0;
fillchar(UCTweight, SizeOf(UCTweight), 0);
for x := 0 to boardsize - 1 do
for y := 0 to boardsize - 1 do
if Vertexs[x, y] <> EMPTY then
begin
p := 1 - Vertexs[x, y] shl 1;
for m := -3 to 3 do
begin
r := 3 - abs(m);
for n := -r to r do
begin
v := 1 shl (3 - abs(m) - abs(n));
i := x + m;
j := y + n;
if (i in [0..boardsize - 1]) and (j in [0..boardsize - 1]) then
UCTweight[i, j] := UCTweight[i, j] + v * p;
end;
end;
sc := sc + UCTweight[x, y];
end;
result := sc - _uctkomi;
end;
function TUCT.fCreateChildNodes(const n: PNode): integer;
//在父節(jié)點下面建立很多個子節(jié)點,要選擇地做點吧,要不還不overflow?!!!@_@
var
i: integer;
s: string;
Count: integer;
begin
result := 0;
s := fgetEmptyVertexs;
Count := length(s) div 2;
setlength(n.child, Count);
for i := 0 to Count - 1 do
begin
new(n.child);
n.child.move := s[i * 2 + 1] + s[i * 2 + 2];
n.child.wins := 0;
n.child.visits := 0;
n.child.bestNode := nil;
end;
result := Count;
end;
procedure TUCT.fFreeNodes(const n: PNode);
var
i: integer;
begin
if n <> nil then
begin
for i := 0 to length(n.child) - 1 do
fFreeNodes(n.child);
setlength(n.child, 0);
dispose(n);
end;
end;
function TUCT.fgetEmptyVertexs: TVertexs;
//得到可供下子的位置,此函數(shù)極為重要,必須大大地降低執(zhí)行時間
var
x, y: integer;
ms: TVertexs;
begin
ms := '';
for x := 0 to BoardSize - 1 do
for y := 0 to BoardSize - 1 do
if Vertexs[x, y] = EMPTY then
ms := ms + xyToVertex(x, y);
result := ms;
end;
procedure TUCT.fSetBestNode(const n: PNode);
//設(shè)置本節(jié)點的bestNode值為本節(jié)點的子節(jié)點中勝率最大的節(jié)點,相同時返回第一個
var
i: integer;
best: PNode;
winrate, bestwinrate: double;
begin
best := nil;
bestwinrate := -1;
for i := 0 to length(n.child) - 1 do
if n.Child.visits > 0 then
begin
winrate := n.Child.wins / n.Child.visits;
if winrate > bestwinrate then
begin
bestwinrate := winrate;
best := n.Child;
end;
end;
n.bestNode := best;
end;
function TUCT.PlayMove(const m: TVertex): boolean;
var
x, y: integer;
begin
result := false;
x := byte(m[1]);
y := byte(m[2]);
if not isEye(x, y, UCTActivePlayer) then //不能自殺,只漏某種特別的情況
if Play(x, y, UCTActivePlayer) then //并不一定始終能下,可能有劫的情況
begin
if UCTActivePlayer = BLACK then //如果成功,設(shè)置為另一方
UCTActivePlayer := WHITE
else
UCTActivePlayer := BLACK;
result := true;
end;
end;
function TUCT.fUCTSelect(const n: PNode): PNode;
//重要的過程,遍歷所有n的子節(jié)點并找出uct值最大的那個節(jié)點
var
i: integer;
winrate, uct, uctvalue, bestuctvalue: double;
begin
result := nil;
bestuctvalue := 0;
for i := 0 to length(n.child) - 1 do
begin
if n.Child.visits > 0 then
begin
winrate := n.Child.wins / n.Child.visits;
uct := UCTk * sqrt(ln(n.visits) / (5 * n.Child.visits));
uctvalue := winrate + uct;
end
else
uctvalue := 10000 + Random(1000); //等于無窮大,優(yōu)先選擇未下過的步
if uctvalue > bestuctvalue then
begin // get max uctvalue of all Children
bestuctvalue := uctvalue;
Result := n.Child;
end;
end; //end of for
end; //end of fUCTSelect
function TUCT.fPlayRandom: integer;
//隨機下棋并返回是否勝!這個函數(shù)極為重要,必須提高執(zhí)行效率
var
i: integer;
d: double;
s, vs: TVertexs;
p: TPlayer;
jie: Tstringlist;
label
labelCaculate;
begin
randomize;
jie := Tstringlist.Create;
p := UCTActivePlayer; //獲得當前輪到方
vs := fGetEmptyVertexs; //取得可下子的位置,肯定不為空,因為總有眼
repeat
if GetKeyState(VK_ESCAPE) < 0 then
showboard; //debug
i := random(length(vs) div 2);
s := copy(vs, i * 2 + 1, 2);
if PlayMove(s) then
begin
vs := fGetEmptyVertexs; //取得可下子的位置,速度重要!
jie.Add(s); //記錄最后幾次著棋
if jie.Count > 4 then //循環(huán)隊列
jie.Delete(0);
end
else
delete(vs, i * 2 + 1, 2); //將不能下的點刪除
if jie.Count > 3 then
if (jie[0] = jie[2]) and (jie[1] = jie[3]) then //判斷連環(huán)劫
begin
d := getEvaluate; //在這種情況下,只能評估
goto labelCaculate; //直接計算結(jié)果
end;
until vs = ''; //執(zhí)行直到一方pass
vs := fGetEmptyVertexs; //讓另一方去填死那些對方不能下子的空
for i := 0 to length(vs) div 2 - 1 do //方便計算結(jié)果
begin
if UCTActivePlayer = BLACK then
UCTActivePlayer := WHITE
else
UCTActivePlayer := BLACK;
PlayMove(copy(vs, i * 2 + 1, 2));
end;
d := fCalculate;
labelCaculate: //進行計算結(jié)果
if ((d > 0) and (p = BLACK)) //判斷哪方勝
or ((d < 0) and (p = WHITE)) then
result := 1
else
result := 0;
jie.Free;
end;
function TUCT.fUCTSimulation(const n: PNode): integer;
//不停地按uct選定的規(guī)則摸擬走棋
var
UCTnode: PNode;
UCTresult: integer;
begin
UCTresult := 0;
if n.visits = 0 then //該節(jié)點未被訪問過
UCTresult := 1 - fPlayRandom //評估本次模擬的結(jié)果
else //n.visits > 0
begin
if length(n.child) = 0 then //無子節(jié)點 1
fCreateChildNodes(n); //建立子節(jié)點,絕不會無一個
UCTnode := fUCTSelect(n); //UCT選擇一個,絕不會為空
if PlayMove(UCTnode.move) then //更新,考慮自殺和劫的情況
UCTresult := 1 - fUCTSimulation(UCTnode)
end;
n.visits := n.visits + 1;
n.wins := n.wins + UCTresult; //wins,存儲自己節(jié)點的勝率
if length(n.child) > 0 then
fSetBestNode(n); //在n的子節(jié)點中找出勝率最大的
result := UCTresult;
end;
function TUCT.fUCTSearch(const p: Tplayer; const count: integer): TVertex;
var
c: integer;
t: cardinal;
sl: Tstringlist;
noderoot: PNode;
v: array[0..18, 0..18] of shortint;
begin
result := ''; //空即為pass
new(noderoot); //生成
noderoot.move := '';
noderoot.wins := 0;
noderoot.visits := 0;
noderoot.bestNode := nil;
sl := Tstringlist.Create;
sl.Assign(MoveList); //保存現(xiàn)場
move(Vertexs, v, sizeof(Vertexs));
UCTActivePlayer := p; //設(shè)置顏色
t := getTickcount; //限時
for c := 0 to count - 1 do //模擬操作i次
begin
fUCTSimulation(noderoot);
move(v, Vertexs, sizeof(Vertexs));
MoveList.Assign(sl); //恢復(fù)現(xiàn)場
UCTActivePlayer := p; //恢復(fù)顏色
if getTickcount - t > _UCTMaxtime then
break;
end;
if noderoot.bestNode <> nil then
Result := noderoot.bestNode.move; //勝率最高的一個
sl.Free;
fFreeNodes(noderoot); //釋放樹
end;
function TUCT.genMove(const p: Tplayer): TVertex;
begin
result := fUCTSearch(p, _UCTMaxSimulation);
end;
end.
//==============================================================================
[ 本帖最后由 netxing 于 2008-4-1 18:40 編輯 ]
發(fā)表于 2008-3-18 15:04 只看該作者
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -