?? noise_filter.java
字號:
package wavelet_util;
import java.util.Vector;
import sort.*;
import wavelets.*;
import java.io.*;
/**
<p>
The objective in filtering is to remove noise while keeping the
features that are interesting.
</p>
<p>
Wavelets allow a time series to be examined at various
resolutions. This can be a powerful tool in filtering out noise.
This class supports the subtraction of gaussian noise from
the time series.
</p>
<p>
The identification of noise is complex and I have not found any
material that I could understand which discussed noise
identification in the context of wavelets. I did find some material
that has been difficult and frustrating. In particular
<i>Image Processing and Data Analysis: the multiscale approach</i>
by Starck, Murtagh and Bijaoui.
</p>
<p>
If the price of a stock follows a random walk, its price will be
distributed in a bell (gaussian) curve. This is one way of stating
the concept from financial theory that the daily return is normally
distributed (here daily return is defined as the difference between
yesterdays close price and today's close price). Movement outside
the bounds of the curve may represent something other than a random walk
and so, in theory, might be interesting.
</p>
<p>
At least in the case of the single test case used in developing this
code (Applied Materials, symbol: AMAT), the coefficient distribution
in the highest frequency is almost a perfect normal curve. That is,
the mean is close to zero and the standard deviation is close to
one. The area under this curve is very close to one. This
resolution approximates the daily return. At lower frequencies the
mean moves away from zero and the standard deviation increases.
This results is a flattened curve, whose area in the coefficient
range is increasingly less than one.
</p>
<p>
The code in this class subtracts the normal curve from the
coefficients at each frequency up to some minimum. This leaves only
the coefficients above the curve which are used to regenerate the
time series (without the noise, in theory). This filter removes 50
to 60 percent of the coefficients.
</p>
<p>
Its probably worth mentioning that there are other kinds of
noise, most notably Poisson noise. In theory daily data
tends to show gaussian noise, while intraday data would
should Poisson noise. Intraday Poisson noise would result
from the random arrival and size of orders.
</p>
<p>
This function has two public methods:
</p>
<ol>
<li>
<p>n
<i>filter_time_series</i>, which is passed a file name and a time series
</p>
</li>
<li>
<p>
<i>gaussian_filter</i> which is passed a set of Haar coefficient
spectrum and an array allocated for the noise values. The
noise array will be the same size as the coefficient array.
</p>
</li>
<ol>
</p>
*/
public class noise_filter extends plot {
String class_name() { return "noise_filter"; }
/**
<p>
The point class represents a coefficient value so that it can be
sorted for histogramming and then resorted back into the orignal
ordering (e.g., sorted by value and then sorted by index)
</p>
*/
private class point {
point(int i, double v)
{
index = i;
val = v;
}
public int index; // index in original array
public double val; // coefficient value
} // point
/**
<p>
A histogram bin
</p>
<p>
For a histogram bin b<sub>i</sub>, the range of
values is b<sub>i</sub>.start to b<sub>i+1</sub>.start.
</p>
<p>
The vector object <i>vals</i> stores references to
the point objects which fall in the bin range.
</p>
<p>
The number of values in the bin is <i>vals.size()</i>
</p>
*/
private class bin {
bin( double s ) { start = s; }
public double start;
public Vector vals = new Vector();
} // bin
/**
Bell curve info: mean, sigma (the standard deviation)
*/
private class bell_info {
public bell_info() {}
public bell_info(double m, double s)
{
mean = m;
sigma = s;
}
public double mean;
public double sigma;
} // bell_info
/**
<p>
Build a histogram from the sorted data in the pointz
array. The histogram is constructed by appending a
point object to the the bin <i>vals</i> Vector if the value
of the point is between b[i].start and b[i].start + step.
</p>
*/
private void histogram( bin binz[], point pointz[] )
{
double step = binz[1].start - binz[0].start;
double start = binz[0].start;
double end = binz[1].start;
int len = pointz.length;
double max = binz[ binz.length-1 ].start + step;
int i = 0;
int ix = 0;
while (i < len && ix < binz.length) {
if (pointz[i].val >= start && pointz[i].val < end) {
binz[ix].vals.addElement( pointz[i] );
i++;
}
else {
ix++;
start = end;
end = end + step;
}
} // while
} // histogram
/**
Sort an array of <i>point</i> objects by the
index field.
*/
private class sort_by_index extends generic_sort {
/**
if (a.index == b.index) return 0
if (a.index < b.index) return -1
if (a.index > b.index) return 1;
*/
protected int compare( Object a, Object b )
{
int rslt = 0;
point t_a = (point)a;
point t_b = (point)b;
if (t_a.index < t_b.index)
rslt = -1;
else if (t_a.index > t_b.index)
rslt = 1;
return rslt;
} // compare
} // sort_by_index
/**
Sort an array of <i>point</i> objects by the
val filed.
*/
private class sort_by_val extends generic_sort {
/**
if (a.val == b.val) return 0
if (a.val < b.val) return -1
if (a.val > b.val) return 1;
*/
protected int compare( Object a, Object b )
{
int rslt = 0;
point t_a = (point)a;
point t_b = (point)b;
if (t_a.val < t_b.val)
rslt = -1;
else if (t_a.val > t_b.val)
rslt = 1;
return rslt;
} // compare
} // sort_by_val
/**
Allocate an array of histogram bins that is <i>num_bins</i> in
length. Initialize the start value of each bin with
a start value calculated from <i>low</i> and <i>high</i>.
*/
private bin[] alloc_bins( int num_bins, double low, double high )
{
double range = high - low;
double step = range / (double)num_bins;
double start = low;
bin binz[] = new bin[ num_bins ];
for (int i = 0; i < num_bins; i++) {
binz[i] = new bin( start );
start = start + step;
}
return binz;
} // alloc_bins
/**
<p>
Calculate the histogram of the coefficients using
<i>num_bins</i> histogram bins
</p>
<p>
The Haar coefficients are stored in point objects
which consist of the coefficient value and the
index in the point array.
</p>
<p>
To calculate the histogram, the pointz array is
sorted by value. After it is histogrammed it
is resorted by index to return the original ordering.
</p>
*/
private bin[] calc_histo( point pointz[], int num_bins )
{
// sort by value
sort_by_val by_val = new sort_by_val();
by_val.sort( pointz );
int len = pointz.length;
double low = pointz[0].val;
double high = pointz[len-1].val;
bin binz[] = alloc_bins( num_bins, low, high );
histogram( binz, pointz );
// return the array to its original order by sorting by index
sort_by_index by_index = new sort_by_index();
by_index.sort( pointz );
return binz;
} // calc_histo
/**
<p>
Allocate and initialize an array of <i>point</i> objects.
The size of the array is <tt><i>end</i> - <i>start</i></tt>.
Each point object in the array is initialized with its
index and a Haar coefficient (from the <i>coef</i> array).
</p>
<p>
Since the allocation code has to iterate through the
coefficient spectrum the mean and standard deviation
are also calculated to avoid an extra iteration. These
values are returned in the <i>bell_info</i> object.
</p>
*/
private point[] alloc_points( double coef[],
int start,
int end,
bell_info info )
{
int size = end - start;
point pointz[] = new point[ size ];
double sum = 0;
int ix = start;
for (int i = 0; i < size; i++) {
pointz[i] = new point( i, coef[ix] );
sum = sum + coef[ix];
ix++;
}
double mean = sum / (double)size;
// now calculate the standard deviation
double stdDevSum = 0;
double x;
for (int i = 0; i < size; i++) {
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -