?? hyper.cpp
字號:
// printf("\n*******************************************\n");
// printf("Read %d headers\n", i);
fclose(fd);
third=clock();
trace_evaluation(trace, i, root);
forth=clock();
duration2=forth-third;
long int space=0;
space_node(root,space);
space=space/(1024*1024);
printf("規則的個數 初始化時間毫秒 搜索時間毫秒 空間MBytes\n");
printf("%u %f %f %u\n",original,duration1,duration2/i,(node_count*32+branch_count*4+filters_count*16)/1024);
// printf("%u %f %f %u\n",original,duration1,duration2/i,space);
}
node *tree_construction(id_list *filters, hyper_rectangle rectangle, UINT depth) {
UINT id_list_count(id_list *);
UINT minimal_value(id_list *, UINT), maximal_value(id_list *, UINT);
id_list *add_id_list(id_list *, filter *);
id_list *remove_id_list(id_list *, filter *);
bool id_list_compare(id_list *, id_list *);
UINT filter_count=id_list_count(filters), i;
cut_calculation *cuts;
id_list **filter_lists;
node *current_node=(node *)calloc(1, sizeof(node));
node_count++;
//沒有給當前節點規定范圍,加入對應域的賦值過程
//修改時間:3月3日
for(i=0;i<DIMENSIONS;i++)
{
current_node->start[i]=rectangle.start[i];
current_node->end[i]=rectangle.end[i];
}
/*if the number of filters is less than the certain value, the node becomes to a leaf*/
// if ((filter_count <= bucket_size) || (depth > 8)){
if (filter_count <= common_bucket_size) {
current_node->filters=filters;
current_node->filter_count=filter_count;
filters_count+=filter_count;
// for(i=0;i<DIMENSIONS;i++)
// {
// current_node->start[i]=rectangle.start[i];
// current_node->end[i]=rectangle.end[i];
// }
return current_node;
}
/* find out and remove the filters, which completely cover the rectangle of the current node */
for (id_list *head=filters; head!=NULL; head=head->next) {
bool filter_all_cover=true;
for (i=0; i<DIMENSIONS; i++)
if ((head->filters->start[i] > rectangle.start[i]) || (head->filters->end[i] < rectangle.end[i]))
filter_all_cover=false;
//把通配符的規則加入當前節點
if (filter_all_cover)
current_node->filters=add_id_list(current_node->filters, head->filters);
}
// for (id_list *head=current_node->filters; head!=NULL; head=head->next)
//從原來的鏈表中將通配符的規則刪掉
for (head=current_node->filters; head!=NULL; head=head->next)
filters=remove_id_list(filters, head->filters);
filter_count=id_list_count(filters);
if (filter_count==0)
return current_node;
//這句話在此不知道作用是什么,因此隱去
//修改時間:3月3日
// current_node->filters=NULL;
#ifdef region_compact
/* region compaction */
for (i=0; i<DIMENSIONS; i++) {
rectangle.start[i]=max(minimal_value(filters, i), rectangle.start[i]);
rectangle.end[i]=min(maximal_value(filters, i), rectangle.end[i]);
}
/* This time, we remove the filters covered the compacted region first to speed up */
UINT count=0;
for (id_list *head=filters; (head!=NULL) && (count < common_bucket_size); head=head->next) {
bool filter_all_cover=true;
for (i=0; (i<DIMENSIONS); i++)
if ((head->filters->start[i] > rectangle.start[i]) || (head->filters->end[i] < rectangle.end[i]))
filter_all_cover=false;
if (filter_all_cover) {
count++;
current_node->filters=add_id_list(current_node->filters, head->filters);
}
}
for (id_list *head=current_node->filters; head!=NULL; head=head->next)
filters=remove_id_list(filters, head->filters);
filter_count=id_list_count(filters);
if (filter_count==0)
return current_node;
#endif
/* The function calculates the optimal cut for all dimensions */
cut_calculation *cuts_optimization(hyper_rectangle, id_list *);
cuts=cuts_optimization(rectangle, filters);
UINT total_cuts=1;
for (i=0; i<DIMENSIONS; i++) {
current_node->start[i]=rectangle.start[i];
current_node->end[i]=rectangle.end[i];
current_node->cut[cuts[i].dimension]=cuts[i].cuts;
total_cuts*=cuts[i].cuts;
}
/*if there is no suggested cut, the node will become a leave */
if (total_cuts <= cut_threshold) {
current_node->filters=filters;
filters_count+=filter_count;
for (i=0; i<DIMENSIONS; i++)
current_node->cut[i]=1;
return current_node;
}
/*
if (total_cuts == 1) {
current_node->filters=filters;
filters_count+=filter_count;
for (i=0; i<DIMENSIONS; i++)
current_node->cut[i]=1;
return current_node;
}
*/
current_node->branch=(node **)calloc(total_cuts, sizeof(node));
filter_lists=(id_list **)calloc(total_cuts, sizeof(id_list));
current_node->rectangle=(hyper_rectangle *)calloc(total_cuts, sizeof(hyper_rectangle));
/* Calculate the region of the hyper rectangle */
void rectangle_generation(node *);
rectangle_generation(current_node);
/* Decide the relevant rectangles of each filter by appending them into the associated link lists*/
void filter_dispatch(id_list *, node *, id_list **, UINT);
filter_dispatch(filters, current_node, filter_lists, total_cuts);
/*
bool rectanglecmp(hyper_rectangle, hyper_rectangle);
for (i=0; i<total_cuts; i++) {
if (rectanglecmp(rectangle, current_node->rectangle[i])) {
printf("\n %d, %d", current_node->cut[0], current_node->cut[1]);
for (UINT j=0; j<DIMENSIONS; j++)
printf("%x,%x, ", current_node->rectangle[i].start[j], current_node->rectangle[i].end[j]);
}
}
*/
#ifdef debug
printf("\n%d, %d, %d, %d, ", node_count, id_list_count(filters), current_node->filter_count, total_cuts);
for (i=0; i<DIMENSIONS; i++)
printf("%x,%x(%d), ", current_node->start[i], current_node->end[i], current_node->cut[i]);
#endif
if (current_node->filter_count == filter_count) {
for (i=0; i<total_cuts; i++)
current_node->branch[i]=NULL;
for (i=0; i<DIMENSIONS; i++)
current_node->cut[i]=1;
filters_count+=filter_count;
return current_node;
}
for (i=0; i<total_cuts; i++)
if (filter_lists[i]!=NULL)
if (id_list_compare(filter_lists[i], current_node->filters))
filter_lists[i]=NULL;
#ifdef redundant_removal
rectangle_list *rectangles=(rectangle_list *)calloc(1, sizeof(rectangle_list)), *tmp;
rectangles->rectangle=current_node->rectangle[0];
rectangles->filters=filter_lists[0];
for (i=1; i<total_cuts; i++) {
bool flag=true;
for (tmp=rectangles; (tmp!=NULL) && flag; tmp=tmp->next)
if (id_list_compare(filter_lists[i], tmp->filters)) {
flag=false;
for (UINT j=0; j<DIMENSIONS; j++) {
tmp->rectangle.start[j]=min(current_node->rectangle[i].start[j], tmp->rectangle.start[j]);
tmp->rectangle.end[j]=max(current_node->rectangle[i].end[j], tmp->rectangle.end[j]);
}
// if (rectanglecmp(rectangle, tmp->rectangle))
// printf("");
}
if (flag) {
for (tmp=rectangles; tmp->next!=NULL; )
tmp=tmp->next;
tmp->next=(rectangle_list *)calloc(1, sizeof(rectangle_list));
tmp->next->rectangle=current_node->rectangle[i];
tmp->next->filters=filter_lists[i];
}
}
for (tmp=rectangles; tmp!=NULL; tmp=tmp->next)
if (tmp->filters!=NULL) {
node *temp_node=NULL;
if (rectanglecmp(rectangle, tmp->rectangle)) {
/* For the redundant branch removal, if the merge rectangle is equal to the original rectangle, it might result in infinite recursive calls,
Therefore, we must move these filters into the current node (just like the instruction specified in common_filter_extraction)*/
current_node->filters=tmp->filters;
current_node->filter_count=id_list_count(current_node->filters);
for (i=0; i<total_cuts; i++) {
if (id_list_compare(filter_lists[i], tmp->filters))
filter_lists[i]=NULL;
}
}
else {
temp_node=tree_construction(tmp->filters, tmp->rectangle, depth+1);
for (i=0; i<total_cuts; i++) {
if (current_node->branch[i]==NULL)
if (id_list_compare(filter_lists[i], tmp->filters))
current_node->branch[i]=temp_node;
}
}
}
#else
for (i=0; i<total_cuts; i++)
if (id_list_count(filter_lists[i])>0)
current_node->branch[i]=tree_construction(filter_lists[i], current_node->rectangle[i], depth+1);
#endif
branch_count+=total_cuts;
if (current_node->filter_count > 0)
branch_count++;
filters_count+=current_node->filter_count;
return current_node;
}
UINT optimal_cut(id_list *filters, UINT dimension, UINT maximal_cut, UINT start, UINT end) {
bool range_cover(UINT, UINT, UINT, UINT);
UINT id_list_count(id_list *), range_calculation(UINT, UINT, UINT);
UINT i, j;
UINT range, max_length, mean_length, empty_bucket, *tmp_array, prev_max_length=id_list_count(filters), prev_mean_length=id_list_count(filters); //, prev_empty_bucket=0;
UINT region_start, region_end, count=0, best_cut=2;
for (i=2; i<=maximal_cut; i*=2) {
tmp_array=(UINT *)calloc(i, sizeof(UINT));
range=range_calculation(start, end, i);
region_start=start;
region_end=start+range-1;
for (j=0; j<i; j++) {
for (id_list *head=filters; head != NULL; head=head->next) {
if (range_cover(region_start, region_end, head->filters->start[dimension], head->filters->end[dimension]))
tmp_array[j]++;
}
region_start=region_end+1;
region_end=min(region_start+range-1, end);
}
mean_length=max_length=tmp_array[0];
empty_bucket=0;
for (j=1; j<i; j++) {
if (tmp_array[j]==0)
empty_bucket++;
if (tmp_array[j]>max_length)
max_length=tmp_array[j];
mean_length+=tmp_array[j];
}
mean_length/=i;
free(tmp_array);
#define threshold 40
if (((prev_max_length-max_length)*threshold/prev_max_length) >= spfac)
// best_cut=i;
if (((prev_mean_length-mean_length)*threshold/prev_mean_length) >= spfac)
best_cut=i;
// if ((empty_bucket*threshold/i) <= spfac)
// best_cut=i;
}
return best_cut;
}
cut_calculation *cuts_optimization(hyper_rectangle rectangle, id_list *filters) {
UINT log2(UINT);
UINT id_list_count(id_list *);
UINT distinct_field_in_rectangle(id_list *, UINT, hyper_rectangle);
UINT filter_count=id_list_count(filters), maximal_cut, i;
int cut_sort( const void *, const void *);
cut_calculation *cuts=(cut_calculation *)calloc(DIMENSIONS, sizeof(cut_calculation));
float mean=0.0;
//計算兩維域上規則的不同范圍的個數
for (i=0; i<DIMENSIONS; i++) {
cuts[i].dimension=i;
cuts[i].distinct_field=distinct_field_in_rectangle(filters, i, rectangle);
}
//但是為什么要處以4呢?
cuts[0].normalized_count=cuts[0].distinct_field/4;
cuts[1].normalized_count=cuts[1].distinct_field/4;
for (i=0; i<DIMENSIONS; i++)
mean+=cuts[i].normalized_count;
//計算不同值的個數的平均值
mean/=DIMENSIONS;
//對每一維的cut按照不同范圍的值的個數的大小進行排序,順序是從小到大
qsort((void *)cuts, DIMENSIONS, sizeof(cut_calculation), cut_sort);
//maximal_cut是1左移i位第一次大于 f(N)的值 sqrt(x):計算x的平方根
maximal_cut=log2(sqrt((double)filter_count)*spfac);
bool flag=false;
if (maximal_cut >= 2)
flag=true;
float temp_count=0.0;
UINT rouding(float);
//float pow(float x, float y), 計算x的y次冪,返回冪指數的結果。
for (i=0; i<DIMENSIONS; i++)
if ((cuts[i].normalized_count > mean) || (flag))
temp_count+=cuts[i].normalized_count;
for (i=0; i<DIMENSIONS; i++)
if ((cuts[i].normalized_count > mean) || (flag))
cuts[i].maximal_cuts=pow(2.0, rouding((float)cuts[i].normalized_count*(float)maximal_cut/temp_count));
else
cuts[i].maximal_cuts=2;
for (i=0; i<DIMENSIONS; i++)
if ((cuts[i].normalized_count > mean) || (flag))
cuts[i].cuts=min(cuts[i].maximal_cuts, optimal_cut(filters, cuts[i].dimension, cuts[i].maximal_cuts, rectangle.start[cuts[i].dimension], rectangle.end[cuts[i].dimension]));
else cuts[i].cuts=1;
return cuts;
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -