?? levmarq.m
字號:
%#
%# function [w1f,w2f,indisp] = levmarq(topo,w1,w2,xtr,ytr,xm,ym,epochs,periodisp)
%#
%# AIM: Levenberg-Marquardt algorithm for training feed-forward backpropagation
%# neural networks . Also pruned (ie. not fully connected) networks can be
%# trained.
%#
%# PRINCIPLE: Second-order optimization method based on an approximation of the
%# inverse Hessian matrix of second derivatives of the cost function with
%# respect to the weights. The function is called by "nnmodel.m".
%#
%# INPUT: topo (2 x nh) : matrix containing structure of the NN (nh hidden nodes)
%# (H: hyperbolic tangent, L: linear, -: no function)
%# w1 (nh x (p+1)) : matrix of random weights between input-hidden layer
%# w2 (1 x (nh+1)) : matrix of random weights between hidden-output layer
%# xtr (p x ntr) : matrix of training inputs (ntr objects, p variables)
%# ytr (1 x ntr) : matrix of training responses
%# xm (p x nm) : matrix of monitoring inputs
%# ym (1 x nm) : vector of monitoring responses
%# epochs : maximum number of epochs (iterations) for the training
%# periodisp : periodicity of display
%#
%# OUTPUT: w1f (nh x (p+1)) : matrix of weights between input and hidden layer
%# w2f (1 x (nh+1)) : matrix of weights between hidden and output layer
%# indisp : optimum number of epochs
%#
%# SUBROUTINES: pmntanh.m : fast hyperbolic tangent function
%# lmeval.m : estimation of responses with the final neural network model
%#
%# AUTHOR: Programmed by : Magnus Norgaard, IAU/EI/IMM (1994)
%# Neural Network-Based System Identification Toolbox
%# Web site : http://www.iau.dk/Projects/proj/nnsysid.html
%#
%# Modified by Frederic Despagne
%# Copyright(c) 1997 for ChemoAC
%# Dienst FABI, Vrije Universiteit Brussel
%# Laarbeeklaan 103, 1090 Jette
%#
%# VERSION: 1.1 (28/02/1998)
%#
%# TEST: Krzysztof Szczubialka
%#
function [w1f,w2f,indisp] = levmarq(topo,w1,w2,xtr,ytr,xm,ym,epochs,periodisp)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% NETWORK INITIALIZATION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
close
displ = 1; % Index for training curve display. If not wanted, set it to 0.
N = length(ytr); % Number of training output data
Nm = length(ym); % Number of monitoring data
[hidden,inputs] = size(w1); % Number of hidden nodes
inputs = inputs-1; % Number of input variables
L_hidden = find(topo(1,:)=='L')'; % Location of linear hidden neurons
H_hidden = find(topo(1,:)=='H')'; % Location of tanh hidden neurons
L_output = find(topo(2,:)=='L')'; % Location of linear output neurons
H_output = find(topo(2,:)=='H')'; % Location of tanh output neurons
y1 = [zeros(hidden,N);ones(1,N)]; % Hidden layer outputs
y2 = zeros(1,N); % Network output
index = 1*(hidden+1)+1+[0:hidden-1]*(inputs+1); % A useful vector
index2 = (0:N-1)*1; % Another useful vector
iteration = 1; % Counter variable
dw = 1; % Flag telling that the weights are new
xtr = [xtr;ones(1,N)]; % Adds bias to training input data
parameters1 = hidden*(inputs+1); % Number of input-to-hidden weights
parameters2 = 1*(hidden+1); % Number of hidden-to-output weights
parameters = parameters1 + parameters2; % Total # of weights
PSI = zeros(parameters,1*N); % Derivative of each output w.r.t. each weight
ones_h = ones(hidden+1,1); % A vector of ones
ones_i = ones(inputs+1,1); % Another vector of ones
% Parameter vector containing all weights
theta = [reshape(w2',parameters2,1) ; reshape(w1',parameters1,1)];
theta_index = find(theta); % Index to weights<>0
theta_red = theta(theta_index); % Reduced parameter vector
p0 = 1e6; % Diagonal element of H_inv
Ident = eye(1); % Identity matrix
reduced = length(theta_index); % The number of parameters in theta_red
index3 = 1:(reduced+1):(reduced^2); % A third useful vector
stop_crit = 0; % Stop training if criterion gets below this value
lambda = 1; % Initial regularization factor
PI_vector = zeros(epochs,1); % A vector containing the accumulated SSE
clc;
%%%%%%%%%%%%%%%%%%%%%%%%% COMPUTATION OF NETWORK OUTPUT %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
h1 = w1*xtr; % Inputs to hidden layer
y1(H_hidden,:) = pmntanh(h1(H_hidden,:)); % Nonlinear outputs of hidden layer
y1(L_hidden,:) = h1(L_hidden,:); % Linear outputs of hidden layer
h2 = w2*y1; % Inputs to output layer
y2(H_output,:) = pmntanh(h2(H_output,:)); % Nonlinear outputs of output layer
y2(L_output,:) = h2(L_output,:); % Linear outputs of output layer
E = ytr - y2; % Training error
E_vector = E(:); % Reshape E into a long vector
SSE = E_vector'*E_vector; % Sum of squared errors (SSE)
PI = SSE; % Performance index
while iteration <= epochs
if dw == 1,
%%%%% COMPUTATION OF NETWORK OUTPUT DERIVATIVES WITH RESPECT TO EACH WEIGHT (PSI MATRIX) %%%%
% Elements corresponding to linear output units
for i = L_output'
index1 = (i-1) * (hidden + 1) + 1;
% Part of PSI corresponding to hidden-to-output layer weights
PSI(index1:index1+hidden,index2+i) = y1;
% Part of PSI corresponding to input-to-hidden layer weights
for j = L_hidden',
% Derivatives of linear hidden layer outputs
PSI(index(j):index(j)+inputs,index2+i) = w2(i,j)*xtr;
end
for j = H_hidden',
tmp = w2(i,j)*(1-y1(j,:).*y1(j,:)); % Hyp. tang. derivative
% Derivatives of nonlinear hidden layer outputs
PSI(index(j):index(j)+inputs,index2+i) = tmp(ones_i,:).*xtr;
end
end
% Elements corresponding to nonlinear output units
for i = H_output',
index1 = (i-1) * (hidden + 1) + 1;
tmp = 1 - y2(i,:).*y2(i,:);
% Part of PSI corresponding to hidden-to-output layer weights
PSI(index1:index1+hidden,index2+i) = y1.*tmp(ones_h,:);
% Part of PSI corresponding to input-to-hidden layer weights
for j = L_hidden',
tmp = w2(i,j)*(1-y2(i,:).*y2(i,:)); % Hyp. tang. derivative
% Derivatives of nonlinear hidden layer outputs
PSI(index(j):index(j)+inputs,index2+i) = tmp(ones_i,:).*xtr;
end
for j = H_hidden',
tmp = w2(i,j)*(1-y1(j,:).*y1(j,:));
tmp2 = (1-y2(i,:).*y2(i,:));
% Derivatives of nonlinear hidden layer outputs
PSI(index(j):index(j)+inputs,index2+i) = tmp(ones_i,:)...
.*tmp2(ones_i,:).*xtr;
end
end
PSI_red = PSI(theta_index,:); % Reduced PSI-matrix
G = PSI_red*E_vector; % Gradient
R = PSI_red*PSI_red'; % Means square error part Hessian
dw = 0;
end
%%%%%%%%%%%%%%%%%%%%%%%%%%% COMPUTATION OF WEIGHTS UPDATES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
H = R; % Hessian matrix
H(index3) = H(index3)'+lambda; % Adds diagonal matrix
h = H\G; % Calculates new search direction
theta_red_new = theta_red + h; % Updates parameter vector
theta(theta_index) = theta_red_new;
% Put the parameters back into the weight matrices
w1_new = reshape(theta(parameters2+1:parameters),inputs+1,hidden)';
w2_new = reshape(theta(1:parameters2),hidden+1,1)';
%%%%%%%%%%%%%%%%%%%%%%%%% COMPUTATION OF NEW NETWORK OUTPUT %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
h1 = w1_new*xtr; % Inputs to hidden layer
y1(H_hidden,:) = pmntanh(h1(H_hidden,:)); % Nonlinear outputs of hidden layer
y1(L_hidden,:) = h1(L_hidden,:); % Linear outputs of hidden layer
h2 = w2_new*y1; % Inputs to output layer
y2(H_output,:) = pmntanh(h2(H_output,:)); % Nonlinear outputs of output layer
y2(L_output,:) = h2(L_output,:); % Linear outputs of output layer
E_new = ytr-y2; % Training error
E_new_vector = E_new(:); % Reshape E into a long vector
SSE_new = sum(sum(E_new_vector'*E_new_vector))/(2*N); % Sum of squared training errors (SSE)
PI_new = SSE_new; % Performance index
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% UPDATE LAMBDA %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
L = h'*G+h'*(h.*lambda);
if 2*N*(PI-PI_new) > (0.75*L),
lambda = lambda/2; % Decrease lambda if SSE has fallen sufficiently
elseif 2*N*(PI-PI_new) <= (0.25*L),
lambda = 2*lambda; % Increase lambda if SSE has grown sufficiently
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% UPDATE FOR NEXT ITERATION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Update only if criterion has decreased
if PI_new < PI,
w1 = w1_new; % New input-hidden matrix of weigths
w2 = w2_new; % New hidden-output matrix of weights
theta_red = theta_red_new; % New parameter vector
E_vector = E_new_vector; % Training error
PI = PI_new; % New performance index
dw = 1; % Indicates new weights
iteration = iteration + 1; % Increments counter
PI_vector(iteration-1) = PI; % Collect PI in vector
%%%%%%%%%%%%%%%%%%%%%%% PERIODIC PREDICTIONS ON MONITORING SET %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[yhv,yvh] = lmeval(topo,w1,w2,xm); % Predict monitoring response
dd = 0:displ-1; % Display vector
sec(displ) = PI_new; % Training SSE
E_mon = ym - yvh; % Monitoring error
sep(displ) = sum(sum(E_mon.*E_mon))/(2*Nm); % Monitoring SSE
if displ == 1
oldsep = sep(displ); % Reference value of monitoring SSE
end
% Display learning curves at regular intervals
if periodisp & iteration/periodisp == round(iteration/periodisp)
plot(dd,sec(1:displ),'--',dd,sep(1:displ),'c')
rens1=sprintf(' Architecture :(%1.0f-%1.0f-1)',[inputs hidden]);
rens2=sprintf('Epochs : %5.0f',iteration);
rens3=sprintf('NNMODEL ');
title([rens3 rens1]); xlabel(rens2); ylabel('-- SSEC - SSEP')
drawnow
end
% Stores weights if monitoring SSE smaller than the reference value
if sep(displ) <= oldsep
w1f = w1;
w2f = w2;
oldsep = sep(displ); % Update the reference SSE
indisp = iteration; % Save the corresponding number of epochs
end
displ = displ+1; % Increment display counter
end
% Interrupts training if stop condition is fullfilled
if (PI < stop_crit) | (lambda>1e7),break, end
end
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -