?? cluster.cpp
字號:
can be, then a new prototype is formed and returned to the caller. If it can't be, then NULL is returned to the caller.Return: Pointer to new elliptical prototype or NULL.Exceptions: NoneHistory: 6/12/89, DSJ, Created.****************************************************************************/PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer, CLUSTER *Cluster, STATISTICS *Statistics, BUCKETS *Buckets) { PROTOTYPE *Proto = NULL; int i; // check that each dimension is a normal distribution for (i = 0; i < Clusterer->SampleSize; i++) { if (Clusterer->ParamDesc[i].NonEssential) continue; FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]), Cluster->Mean[i], sqrt ((FLOAT64) Statistics-> CoVariance[i * (Clusterer->SampleSize + 1)])); if (!DistributionOK (Buckets)) break; } // if all dimensions matched a normal distribution, make a proto if (i >= Clusterer->SampleSize) Proto = NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics); return (Proto);} // MakeEllipticalProto/** MakeMixedProto ***********************************************************Parameters: Clusterer data struct containing samples being clustered Cluster cluster to be made into a prototype Statistics statistical info about cluster NormalBuckets histogram struct used to analyze distribution Confidence confidence level for alternate distributionsGlobals: NoneOperation: This routine tests each dimension of the specified cluster to see what distribution would best approximate that dimension. Each dimension is compared to the following distributions in order: normal, random, uniform. If each dimension can be represented by one of these distributions, then a new prototype is formed and returned to the caller. If it can't be, then NULL is returned to the caller.Return: Pointer to new mixed prototype or NULL.Exceptions: NoneHistory: 6/12/89, DSJ, Created.********************************************************************************/PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer, CLUSTER *Cluster, STATISTICS *Statistics, BUCKETS *NormalBuckets, FLOAT64 Confidence) { PROTOTYPE *Proto; int i; BUCKETS *UniformBuckets = NULL; BUCKETS *RandomBuckets = NULL; // create a mixed proto to work on - initially assume all dimensions normal*/ Proto = NewMixedProto (Clusterer->SampleSize, Cluster, Statistics); // find the proper distribution for each dimension for (i = 0; i < Clusterer->SampleSize; i++) { if (Clusterer->ParamDesc[i].NonEssential) continue; FillBuckets (NormalBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), Proto->Mean[i], sqrt ((FLOAT64) Proto->Variance.Elliptical[i])); if (DistributionOK (NormalBuckets)) continue; if (RandomBuckets == NULL) RandomBuckets = GetBuckets (D_random, Cluster->SampleCount, Confidence); MakeDimRandom (i, Proto, &(Clusterer->ParamDesc[i])); FillBuckets (RandomBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), Proto->Mean[i], Proto->Variance.Elliptical[i]); if (DistributionOK (RandomBuckets)) continue; if (UniformBuckets == NULL) UniformBuckets = GetBuckets (uniform, Cluster->SampleCount, Confidence); MakeDimUniform(i, Proto, Statistics); FillBuckets (UniformBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), Proto->Mean[i], Proto->Variance.Elliptical[i]); if (DistributionOK (UniformBuckets)) continue; break; } // if any dimension failed to match a distribution, discard the proto if (i < Clusterer->SampleSize) { FreePrototype(Proto); Proto = NULL; } if (UniformBuckets != NULL) FreeBuckets(UniformBuckets); if (RandomBuckets != NULL) FreeBuckets(RandomBuckets); return (Proto);} // MakeMixedProto/* MakeDimRandom *************************************************************Parameters: i index of dimension to be changed Proto prototype whose dimension is to be altered ParamDesc description of specified dimensionGlobals: NoneOperation: This routine alters the ith dimension of the specified mixed prototype to be D_random.Return: NoneExceptions: NoneHistory: 6/20/89, DSJ, Created.******************************************************************************/void MakeDimRandom(UINT16 i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc) { Proto->Distrib[i] = D_random; Proto->Mean[i] = ParamDesc->MidRange; Proto->Variance.Elliptical[i] = ParamDesc->HalfRange; // subtract out the previous magnitude of this dimension from the total Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i]; Proto->Magnitude.Elliptical[i] = 1.0 / ParamDesc->Range; Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i]; Proto->LogMagnitude = log ((double) Proto->TotalMagnitude); // note that the proto Weight is irrelevant for D_random protos} // MakeDimRandom/** MakeDimUniform ***********************************************************Parameters: i index of dimension to be changed Proto prototype whose dimension is to be altered Statistics statistical info about prototypeGlobals: NoneOperation: This routine alters the ith dimension of the specified mixed prototype to be uniform.Return: NoneExceptions: NoneHistory: 6/20/89, DSJ, Created.******************************************************************************/void MakeDimUniform(UINT16 i, PROTOTYPE *Proto, STATISTICS *Statistics) { Proto->Distrib[i] = uniform; Proto->Mean[i] = Proto->Cluster->Mean[i] + (Statistics->Min[i] + Statistics->Max[i]) / 2; Proto->Variance.Elliptical[i] = (Statistics->Max[i] - Statistics->Min[i]) / 2; if (Proto->Variance.Elliptical[i] < MINVARIANCE) Proto->Variance.Elliptical[i] = MINVARIANCE; // subtract out the previous magnitude of this dimension from the total Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i]; Proto->Magnitude.Elliptical[i] = 1.0 / (2.0 * Proto->Variance.Elliptical[i]); Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i]; Proto->LogMagnitude = log ((double) Proto->TotalMagnitude); // note that the proto Weight is irrelevant for uniform protos} // MakeDimUniform/** ComputeStatistics *********************************************************Parameters: N number of dimensions ParamDesc array of dimension descriptions Cluster cluster whose stats are to be computedGlobals: NoneOperation: This routine searches the cluster tree for all leaf nodes which are samples in the specified cluster. It computes a full covariance matrix for these samples as well as keeping track of the ranges (min and max) for each dimension. A special data structure is allocated to return this information to the caller. An incremental algorithm for computing statistics is not used because it will not work with circular dimensions.Return: Pointer to new data structure containing statisticsExceptions: NoneHistory: 6/2/89, DSJ, Created.*********************************************************************************/STATISTICS *ComputeStatistics (INT16 N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) { STATISTICS *Statistics; int i, j; FLOAT32 *CoVariance; FLOAT32 *Distance; LIST SearchState; SAMPLE *Sample; UINT32 SampleCountAdjustedForBias; // allocate memory to hold the statistics results Statistics = (STATISTICS *) Emalloc (sizeof (STATISTICS)); Statistics->CoVariance = (FLOAT32 *) Emalloc (N * N * sizeof (FLOAT32)); Statistics->Min = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); Statistics->Max = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); // allocate temporary memory to hold the sample to mean distances Distance = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32)); // initialize the statistics Statistics->AvgVariance = 1.0; CoVariance = Statistics->CoVariance; for (i = 0; i < N; i++) { Statistics->Min[i] = 0.0; Statistics->Max[i] = 0.0; for (j = 0; j < N; j++, CoVariance++) *CoVariance = 0; } // find each sample in the cluster and merge it into the statistics InitSampleSearch(SearchState, Cluster); while ((Sample = NextSample (&SearchState)) != NULL) { for (i = 0; i < N; i++) { Distance[i] = Sample->Mean[i] - Cluster->Mean[i]; if (ParamDesc[i].Circular) { if (Distance[i] > ParamDesc[i].HalfRange) Distance[i] -= ParamDesc[i].Range; if (Distance[i] < -ParamDesc[i].HalfRange) Distance[i] += ParamDesc[i].Range; } if (Distance[i] < Statistics->Min[i]) Statistics->Min[i] = Distance[i]; if (Distance[i] > Statistics->Max[i]) Statistics->Max[i] = Distance[i]; } CoVariance = Statistics->CoVariance; for (i = 0; i < N; i++) for (j = 0; j < N; j++, CoVariance++) *CoVariance += Distance[i] * Distance[j]; } // normalize the variances by the total number of samples // use SampleCount-1 instead of SampleCount to get an unbiased estimate // also compute the geometic mean of the diagonal variances // ensure that clusters with only 1 sample are handled correctly if (Cluster->SampleCount > 1) SampleCountAdjustedForBias = Cluster->SampleCount - 1; else SampleCountAdjustedForBias = 1; CoVariance = Statistics->CoVariance; for (i = 0; i < N; i++) for (j = 0; j < N; j++, CoVariance++) { *CoVariance /= SampleCountAdjustedForBias; if (j == i) { if (*CoVariance < MINVARIANCE) *CoVariance = MINVARIANCE; Statistics->AvgVariance *= *CoVariance; } } Statistics->AvgVariance = (float)pow((double)Statistics->AvgVariance, 1.0 / N); // release temporary memory and return memfree(Distance); return (Statistics);} // ComputeStatistics/** NewSpericalProto *********************************************************Parameters: N number of dimensions Cluster cluster to be made into a spherical prototype Statistics statistical info about samples in clusterGlobals: NoneOperation: This routine creates a spherical prototype data structure to approximate the samples in the specified cluster. Spherical prototypes have a single variance which is common across all dimensions. All dimensions are normally distributed and independent.Return: Pointer to a new spherical prototype data structureExceptions: NoneHistory: 6/19/89, DSJ, Created.******************************************************************************/PROTOTYPE *NewSphericalProto(UINT16 N, CLUSTER *Cluster, STATISTICS *Statistics) { PROTOTYPE *Proto; Proto = NewSimpleProto (N, Cluster); Proto->Variance.Spherical = Statistics->AvgVariance; if (Proto->Variance.Spherical < MINVARIANCE) Proto->Variance.Spherical = MINVARIANCE; Proto->Magnitude.Spherical = 1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Spherical)); Proto->TotalMagnitude = (float)pow((double)Proto->Magnitude.Spherical, (double) N); Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical; Proto->LogMagnitude = log ((double) Proto->TotalMagnitude); return (Proto);} // NewSphericalProto/** NewEllipticalProto *******************************************************Parameters: N number of dimensions Cluster cluster to be made into an elliptical prototype Statistics statistical info about samples in clusterGlobals: NoneOperation: This routine creates an elliptical prototype data structure to approximate the samples in the specified cluster. Elliptical prototypes have a variance for each dimension. All dimensions are normally distributed and independent.Return: Pointer to a new elliptical prototype data structureExceptions: NoneHistory: 6/19/89, DSJ, Created.*******************************************************************************/PROTOTYPE *NewEllipticalProto(INT16 N, CLUSTER *Clust
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -