?? cluster.cpp
字號:
*m = (n1 * *m1 + n2 * (*m2 - ParamDesc->Range)) / n; if (*m < ParamDesc->Min) *m += ParamDesc->Range; } else if ((*m1 - *m2) > ParamDesc->HalfRange) { *m = (n1 * (*m1 - ParamDesc->Range) + n2 * *m2) / n; if (*m < ParamDesc->Min) *m += ParamDesc->Range; } else *m = (n1 * *m1 + n2 * *m2) / n; } else *m = (n1 * *m1 + n2 * *m2) / n; } return (n);} // MergeClusters/** ComputePrototypes *******************************************************Parameters: Clusterer data structure holding cluster tree Config parameters used to control prototype generationGlobals: NoneOperation: This routine decides which clusters in the cluster tree should be represented by prototypes, forms a list of these prototypes, and places the list in the Clusterer data structure.Return: NoneExceptions: NoneHistory: 5/30/89, DSJ, Created.*******************************************************************************/void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { LIST ClusterStack = NIL; CLUSTER *Cluster; PROTOTYPE *Prototype; // use a stack to keep track of clusters waiting to be processed // initially the only cluster on the stack is the root cluster if (Clusterer->Root != NULL) ClusterStack = push (NIL, Clusterer->Root); // loop until we have analyzed all clusters which are potential prototypes while (ClusterStack != NIL) { // remove the next cluster to be analyzed from the stack // try to make a prototype from the cluster // if successful, put it on the proto list, else split the cluster Cluster = (CLUSTER *) first (ClusterStack); ClusterStack = pop (ClusterStack); Prototype = MakePrototype (Clusterer, Config, Cluster); if (Prototype != NULL) { Clusterer->ProtoList = push (Clusterer->ProtoList, Prototype); } else { ClusterStack = push (ClusterStack, Cluster->Right); ClusterStack = push (ClusterStack, Cluster->Left); } }} // ComputePrototypes/** MakePrototype ***********************************************************Parameters: Clusterer data structure holding cluster tree Config parameters used to control prototype generation Cluster cluster to be made into a prototypeGlobals: NoneOperation: This routine attempts to create a prototype from the specified cluster that conforms to the distribution specified in Config. If there are too few samples in the cluster to perform a statistical analysis, then a prototype is generated but labelled as insignificant. If the dimensions of the cluster are not independent, no prototype is generated and NULL is returned. If a prototype can be found that matches the desired distribution then a pointer to it is returned, otherwise NULL is returned.Return: Pointer to new prototype or NULLExceptions: NoneHistory: 6/19/89, DSJ, Created.*******************************************************************************/PROTOTYPE *MakePrototype(CLUSTERER *Clusterer, CLUSTERCONFIG *Config, CLUSTER *Cluster) { STATISTICS *Statistics; PROTOTYPE *Proto; BUCKETS *Buckets; // filter out clusters which contain samples from the same character if (MultipleCharSamples (Clusterer, Cluster, Config->MaxIllegal)) return (NULL); // compute the covariance matrix and ranges for the cluster Statistics = ComputeStatistics (Clusterer->SampleSize, Clusterer->ParamDesc, Cluster); // check for degenerate clusters which need not be analyzed further // note that the MinSamples test assumes that all clusters with multiple // character samples have been removed (as above) Proto = MakeDegenerateProto (Clusterer->SampleSize, Cluster, Statistics, Config->ProtoStyle, (INT32) (Config->MinSamples * Clusterer->NumChar)); if (Proto != NULL) { FreeStatistics(Statistics); return (Proto); } // check to ensure that all dimensions are independent if (!Independent (Clusterer->ParamDesc, Clusterer->SampleSize, Statistics->CoVariance, Config->Independence)) { FreeStatistics(Statistics); return (NULL); } if (HOTELLING && Config->ProtoStyle == elliptical) { Proto = TestEllipticalProto(Clusterer, Cluster, Statistics); if (Proto != NULL) { FreeStatistics(Statistics); return Proto; } } // create a histogram data structure used to evaluate distributions Buckets = GetBuckets (normal, Cluster->SampleCount, Config->Confidence); // create a prototype based on the statistics and test it switch (Config->ProtoStyle) { case spherical: Proto = MakeSphericalProto (Clusterer, Cluster, Statistics, Buckets); break; case elliptical: Proto = MakeEllipticalProto (Clusterer, Cluster, Statistics, Buckets); break; case mixed: Proto = MakeMixedProto (Clusterer, Cluster, Statistics, Buckets, Config->Confidence); break; case automatic: Proto = MakeSphericalProto (Clusterer, Cluster, Statistics, Buckets); if (Proto != NULL) break; Proto = MakeEllipticalProto (Clusterer, Cluster, Statistics, Buckets); if (Proto != NULL) break; Proto = MakeMixedProto (Clusterer, Cluster, Statistics, Buckets, Config->Confidence); break; } FreeBuckets(Buckets); FreeStatistics(Statistics); return (Proto);} // MakePrototype/** MakeDegenerateProto ******************************************************Parameters: N number of dimensions Cluster cluster being analyzed Statistics statistical info about cluster Style type of prototype to be generated MinSamples minimum number of samples in a clusterGlobals: NoneOperation: This routine checks for clusters which are degenerate and therefore cannot be analyzed in a statistically valid way. A cluster is defined as degenerate if it does not have at least MINSAMPLESNEEDED samples in it. If the cluster is found to be degenerate, a prototype of the specified style is generated and marked as insignificant. A cluster is also degenerate if it does not have at least MinSamples samples in it. If the cluster is not degenerate, NULL is returned.Return: Pointer to degenerate prototype or NULL.Exceptions: NoneHistory: 6/20/89, DSJ, Created. 7/12/89, DSJ, Changed name and added check for 0 stddev. 8/8/89, DSJ, Removed check for 0 stddev (handled elsewhere).********************************************************************************/PROTOTYPE *MakeDegenerateProto( //this was MinSample UINT16 N, CLUSTER *Cluster, STATISTICS *Statistics, PROTOSTYLE Style, INT32 MinSamples) { PROTOTYPE *Proto = NULL; if (MinSamples < MINSAMPLESNEEDED) MinSamples = MINSAMPLESNEEDED; if (Cluster->SampleCount < MinSamples) { switch (Style) { case spherical: Proto = NewSphericalProto (N, Cluster, Statistics); break; case elliptical: case automatic: Proto = NewEllipticalProto (N, Cluster, Statistics); break; case mixed: Proto = NewMixedProto (N, Cluster, Statistics); break; } Proto->Significant = FALSE; } return (Proto);} // MakeDegenerateProto/** TestEllipticalProto ****************************************************Parameters: Clusterer data struct containing samples being clustered Cluster cluster to be made into an elliptical prototype Statistics statistical info about clusterGlobals: NoneOperation: This routine tests the specified cluster to see if *** there is a statistically significant difference between* the sub-clusters that would be made if the cluster were to* be split. If not, then a new prototype is formed and* returned to the caller. If there is, then NULL is returned* to the caller.Return: Pointer to new elliptical prototype or NULL.****************************************************************************/PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer, CLUSTER *Cluster, STATISTICS *Statistics) { int N = Clusterer->SampleSize; CLUSTER* Left = Cluster->Left; CLUSTER* Right = Cluster->Right; if (Left == NULL || Right == NULL) return NULL; int TotalDims = Left->SampleCount + Right->SampleCount; if (TotalDims < N + 1) return NULL; FLOAT32* Inverse = (FLOAT32 *) Emalloc(N * N * sizeof(FLOAT32)); FLOAT32* Delta = (FLOAT32*) Emalloc(N * sizeof(FLOAT32)); double err = InvertMatrix(Statistics->CoVariance, N, Inverse); if (err > 1) { cprintf("Clustering error: Matrix inverse failed with error %g\n", err); } for (int dim = 0; dim < N; ++dim) { Delta[dim] = Left->Mean[dim] - Right->Mean[dim]; } // Compute Hotelling's T-squared. double Tsq = 0.0; for (int x = 0; x < N; ++x) { double temp = 0.0; for (int y = 0; y < N; ++y) { temp += Inverse[y + N*x] * Delta[y]; } Tsq += Delta[x] * temp; } memfree(Inverse); memfree(Delta); Tsq *= Left->SampleCount * Right->SampleCount / TotalDims; double F = Tsq * (TotalDims - N - 1) / ((TotalDims - N) * 2); int Fx = N; if (Fx > FTABLE_X) Fx = FTABLE_X; --Fx; int Fy = TotalDims - N - 1; if (Fy > FTABLE_Y) Fy = FTABLE_Y; --Fy; if (F < FTable[Fy][Fx]) { return NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics); } return NULL;}/* MakeSphericalProto *******************************************************Parameters: Clusterer data struct containing samples being clustered Cluster cluster to be made into a spherical prototype Statistics statistical info about cluster Buckets histogram struct used to analyze distributionGlobals: NoneOperation: This routine tests the specified cluster to see if it can be approximated by a spherical normal distribution. If it can be, then a new prototype is formed and returned to the caller. If it can't be, then NULL is returned to the caller.Return: Pointer to new spherical prototype or NULL.Exceptions: NoneHistory: 6/1/89, DSJ, Created.******************************************************************************/PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer, CLUSTER *Cluster, STATISTICS *Statistics, BUCKETS *Buckets) { PROTOTYPE *Proto = NULL; int i; // check that each dimension is a normal distribution for (i = 0; i < Clusterer->SampleSize; i++) { if (Clusterer->ParamDesc[i].NonEssential) continue; FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]), Cluster->Mean[i], sqrt ((FLOAT64) (Statistics->AvgVariance))); if (!DistributionOK (Buckets)) break; } // if all dimensions matched a normal distribution, make a proto if (i >= Clusterer->SampleSize) Proto = NewSphericalProto (Clusterer->SampleSize, Cluster, Statistics); return (Proto);} // MakeSphericalProto/** MakeEllipticalProto ****************************************************Parameters: Clusterer data struct containing samples being clustered Cluster cluster to be made into an elliptical prototype Statistics statistical info about cluster Buckets histogram struct used to analyze distributionGlobals: NoneOperation: This routine tests the specified cluster to see if it can be approximated by an elliptical normal distribution. If it
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -