?? timeseries2symbol.m
字號:
% Copyright and terms of use (DO NOT REMOVE):
% The code is made freely available for non-commercial uses only, provided that the copyright
% header in each file not be removed, and suitable citation(s) (see below) be made for papers
% published based on the code.
%
% The code is not optimized for speed, and we are not responsible for any errors that might
% occur in the code.
%
% The copyright of the code is retained by the authors. By downloading/using this code you
% agree to all the terms stated above.
%
% Lin, J., Keogh, E., Lonardi, S. & Chiu, B.
% "A Symbolic Representation of Time Series, with Implications for Streaming Algorithms."
% In proceedings of the 8th ACM SIGMOD Workshop on Research Issues in Data Mining and
% Knowledge Discovery. San Diego, CA. June 13, 2003.
%
%
% Lin, J., Keogh, E., Patel, P. & Lonardi, S.
% "Finding Motifs in Time Series". In proceedings of the 2nd Workshop on Temporal Data Mining,
% at the 8th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining.
% Edmonton, Alberta, Canada. July 23-26, 2002
%
% This function takes in a time series and convert it to string(s).
% There are two options:
% 1. Convert the entire time series to ONE string
% 2. Use sliding windows, extract the subsequences and convert these subsequences to strings
%
% For the first option, simply enter the length of the time series as "N"
% ex. We have a time series of length 32 and we want to convert it to a 8-symbol string,
% with alphabet size 3:
% timeseries2symbol(data, 32, 8, 3)
% For the second option, enter the desired sliding window length as "N"
% ex. We have a time series of length 32 and we want to extract subsequences of length 16 using
% sliding windows, and convert the subsequences to 8-symbol strings, with alphabet size 3:
% timeseries2symbol(data, 16, 8, 3)
%
%
% Input:
% data is the raw time series.
% N is the length of sliding window (use the length of the raw time series
% instead if you don't want to have sliding windows)
% n is the number of symbols in the low dimensional approximation of the sub sequence.
% alphabet_size is the number of discrete symbols. 2 <= alphabet_size <= 10, although alphabet_size = 2 is a special "useless" case.
%
% Output:
% symbolic_data: matrix of symbolic data (no-repetition). If consecutive subsequences
% have the same string, then only the first occurrence is recorded, with
% a pointer to its location stored in "pointers"
% pointers: location of the first occurrences of the strings
%
% N/n must be an integer, otherwise the program will give a warning, and abort.
%
% The variable "win_size" is assigned to N/n, this is the number of data points on the raw
% time series that will be mapped to a single symbol, and can be imagined as the
% "compression rate".
%
% The symbolic data is returned in "symbolic_data", with pointers to the subsequences
%
%
%
%
% Copyright (c) 2003, Eamonn Keogh, Jessica Lin, Stefano Lonardi, Pranav Patel. All rights reserved.
%
function [symbolic_data, pointers] = timeseries2symbol(data, N, n, alphabet_size)
if (N/n - floor(N/n)) % N/n must be an integer.
disp('N/n must be an integer. Aborting '); , return;
end;
if alphabet_size > 10
disp('Currently alphabet_size cannot be larger than 10. Please update the breakpoint table if you wish to do so');
return;
end
win_size = floor(N/n); % win_size is the number of data points on the raw time series that will be mapped to a single symbol
pointers = []; % Initialize pointers,
symbolic_data = zeros(1,n); % Initialize symbolic_data with a void string, it will be removed later.
all_string = zeros(length(data)-N+1,n);
% Scan accross the time series extract sub sequences, and converting them to strings.
for i = 1 : length(data) - (N -1)
% Remove the current subsection.
sub_section = data(i:i + N -1);
% Z normalize it.
sub_section = (sub_section - mean(sub_section))/std(sub_section);
% take care of the special case where there is no dimensionality reduction
if N == n
PAA = sub_section;
% Convert to PAA.
else
PAA = [mean(reshape(sub_section,win_size,n))] ;
end
current_string = map_to_string(PAA,alphabet_size); % Convert the PAA to a string.
if ~all(current_string == symbolic_data(end,:)) % If the string differs from its leftmost neighbor...
symbolic_data = [symbolic_data; current_string]; % ... add it to the set...
pointers = [pointers ; i]; % ... and add a new pointer.
end;
end;
% Delete the first element, it was just used to initialize the data structure
symbolic_data(1,:) = [];
%--------------------------------------------------------------------------------------------------------------------------------------------------------
%----------------Local Functions----------------------Local Functions----------------Local Functions----------------------Local Functions----------------
%--------------------------------------------------------------------------------------------------------------------------------------------------------
function string = map_to_string(PAA,alphabet_size)
string = zeros(1,length(PAA));
switch alphabet_size
case 2, cut_points = [-inf 0];
case 3, cut_points = [-inf -0.43 0.43];
case 4, cut_points = [-inf -0.67 0 0.67];
case 5, cut_points = [-inf -0.84 -0.25 0.25 0.84];
case 6, cut_points = [-inf -0.97 -0.43 0 0.43 0.97];
case 7, cut_points = [-inf -1.07 -0.57 -0.18 0.18 0.57 1.07];
case 8, cut_points = [-inf -1.15 -0.67 -0.32 0 0.32 0.67 1.15];
case 9, cut_points = [-inf -1.22 -0.76 -0.43 -0.14 0.14 0.43 0.76 1.22];
case 10, cut_points = [-inf -1.28 -0.84 -0.52 -0.25 0. 0.25 0.52 0.84 1.28];
otherwise disp('Error! alphabet_size is too big');
end;
for i = 1 : length(PAA)
string(i) = sum( (cut_points <= PAA(i)), 2 ); % order is now: a = 1, b = 2, c = 3..
end;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -