?? convert.cpp
字號(hào):
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <map>
#include <vector>
#include <string>
using namespace std;
// Conversion modes
#define OUTCACTUS 1
#define CSVTOCACTUS 2
#define OUTCLICK 3
#define CSVTOCLICK 4
#define CLICKTOROCK 5
#define CONFUSION 6
#define MAXCLUSTERS 1000
int main(int argc, char *argv[])
{
int mode;
int numItems, id, i, j, k;
int* items;
char buffer[1024];
ifstream ifile;
ifstream ifile2;
ifstream ifile3;
ofstream ofile;
ofstream ofile2;
int currentColumn, outColumn, totalColumns, recordID, outColumns, outValue;
int labelColumn, numberOfTuples;
int common, total;
vector<int> ignoredColumns;
vector<int>::const_iterator columnIt;
map<string,int>* attrMap;
map<string,int>::iterator attrMapIt;
map<string,int> labelMap;
int highestLabel;
vector<int> highestOutValue;
vector<int> columnOffset;
map<int, int> confusionMap;
int mapFrom, mapTo;
int confusionMatrix[MAXCLUSTERS][MAXCLUSTERS];
int cluster1, cluster2;
string entry;
int maxDim1, maxDim2;
int position;
string tokenizer;
int clusterNumber;
int ignore_st= 0;
bool use_ascii = false;
vector<int> clusters1;
vector<int> clusters2;
int* tuples;
if(argc < 2){
printf("convert: You must give a conversion mode.\n");
return(1);
}
if(!strcmp(argv[1], "OUTCACTUS"))
mode = OUTCACTUS;
else if(!strcmp(argv[1], "CSVTOCACTUS"))
mode = CSVTOCACTUS;
else if(!strcmp(argv[1], "CSVTOCLICK"))
mode = CSVTOCLICK;
else if(!strcmp(argv[1], "CLICKTOROCK"))
mode = CLICKTOROCK;
else if(!strcmp(argv[1], "CONFUSION"))
mode = CONFUSION;
else if(!strcmp(argv[1], "OUTCLICK"))
mode = OUTCLICK;
else{
printf("convert: I don't know that mode you specified.\n");
return(1);
}
ifile.open(argv[2]);
if(!ifile){
printf("convert: Cannot read input file.\n");
return(1);
}
switch(mode){
case OUTCACTUS:
items = NULL;
while(ifile.read((char*)&id, sizeof(int))){
printf("%d ", id);
ifile.read((char*)&numItems, sizeof(int));
if(!items){
items = new int[numItems];
}
ifile.read((char*)items, numItems*sizeof(int));
for(i = 0; i < numItems; ++i){
printf("%d ", items[i]);
}
printf("\n");
// EOL
ifile.read((char*)&id, sizeof(int));
}
if(items)
delete items;
break;
case OUTCLICK:
items = NULL;
ifile.read((char*)&numberOfTuples, sizeof(int));
printf("Total %d tuples in the file, ", numberOfTuples);
ifile.read((char*)&numItems, sizeof(int));
printf("with %d attributes.\n", numItems);
items = new int[numItems];
ifile.read((char*)items, numItems*sizeof(int));
printf("Distinct attribute values are\n");
for(i = 0; i < numItems; i++){
printf("A%d (%d), ", i+1, items[i]);
}
printf("\n");
delete items;
items = new int[numItems + 3];
while(ifile.read((char*)items, (numItems+3)*sizeof(int))){
for(i = 0; i < numItems + 2; ++i){
printf("%d ", items[i]);
}
printf("\n");
}
break;
case CSVTOCACTUS:
if(argc < 4){
printf("convert: You need to specify the number of columns in the input file\n");
printf("convert: CSVTOCACTUS <file> <totalColumns> [ascii] {ignoredColumn}*\n");
exit(1);
}
use_ascii = false;
totalColumns = atoi(argv[3]);
ignore_st = 4;
if (strcmp(argv[4], "ascii") == 0){
ignore_st = 5;
use_ascii = true;
}
for(i = ignore_st; i < argc; i++){
ignoredColumns.push_back(atoi(argv[i]));
// printf("Ignoring %d\n", atoi(argv[i]));
}
recordID = 1;
outColumns = totalColumns - ignoredColumns.size();
attrMap = new map<string,int>[outColumns];
// printf("OutColumns = %d\n", outColumns);
highestOutValue.assign(outColumns, 0);
while(!ifile.eof()){
currentColumn = 1;
outColumn = 0;
buffer[0] = '\0';
if (use_ascii) cout << recordID << " ";
else cout.write((char*)&recordID, sizeof(int));
recordID++;
if (use_ascii) cout << outColumns << " ";
else cout.write((char*)&outColumns, sizeof(int));
while(outColumn < outColumns){
ifile >> buffer;
//printf("'%s' is current column %d, out column %d\n", buffer, currentColumn, outColumn);
if(buffer[0] == '\n' || buffer[0] == ','){
}
else{
// Check if this column is blocked
for(columnIt = ignoredColumns.begin(); columnIt != ignoredColumns.end(); columnIt++){
if(currentColumn == *columnIt)
break;
}
if(columnIt == ignoredColumns.end()){
entry = buffer;
outValue = attrMap[outColumn][entry];
if(outValue == 0){
outValue = ++highestOutValue[outColumn];
attrMap[outColumn][entry] = outValue;
}
// outValue += outColumn * 100;
if (use_ascii) cout << outValue << " ";
else cout.write((char*)&outValue, sizeof(int));
outColumn++;
}
currentColumn++;
}
}
outValue = -1;
if (use_ascii) cout << outValue << endl;
else cout.write((char*)&outValue, sizeof(int));
}
delete attrMap;
break;
case CSVTOCLICK:
if(argc < 7){
printf("convert: You need to specify the number of columns in the input file and the label column\n");
printf("convert: CSVTOCACTUS <sourcefile> <confusionfile> <mappingfile> <totalColumns> <label column> [ascii] {ignoredColumn}*\n");
exit(1);
}
ofile.open(argv[3]);
ofile2.open(argv[4]);
if(!ofile.is_open() || !ofile2.is_open()){
cout << "convert: The confusion file or mapping file could nout be opened." << endl;
exit(1);
}
totalColumns = atoi(argv[5]);
labelColumn = atoi(argv[6]);
use_ascii = false;
ignore_st = 7;
if (strcmp(argv[7], "ascii") == 0){
ignore_st = 8;
use_ascii = true;
}
for(i = ignore_st; i < argc; i++){
ignoredColumns.push_back(atoi(argv[i]));
}
outColumns = totalColumns - ignoredColumns.size();
attrMap = new map<string,int>[outColumns];
highestOutValue.assign(outColumns, 0);
columnOffset.assign(outColumns, 0);
highestLabel = 0;
numberOfTuples = 0;
// First pass: Compute attribute -> value mapping and count the number of
// tuples
while(!ifile.eof()){
currentColumn = 0;
outColumn = 0;
buffer[0] = '\0';
while(outColumn < outColumns){
ifile >> buffer;
entry = buffer;
if(currentColumn == labelColumn){
if(labelMap[entry] == 0)
labelMap[entry] = ++highestLabel;
}
if(buffer[0] == '\n' || buffer[0] == ','){
}
else{
// Check if this column is blocked
for(columnIt = ignoredColumns.begin(); columnIt != ignoredColumns.end(); columnIt++){
if(currentColumn == *columnIt)
break;
}
if(columnIt == ignoredColumns.end()){
outValue = attrMap[outColumn][entry];
if(outValue == 0){
outValue = ++highestOutValue[outColumn];
attrMap[outColumn][entry] = outValue;
}
outColumn++;
}
currentColumn++;
}
}
while(currentColumn < totalColumns){
ifile >> buffer;
currentColumn++;
}
outValue = -1;
numberOfTuples++;
}
// cout << "Number of tuples " << numberOfTuples << endl;
ifile2.open(argv[2]);
if (use_ascii) cout << numberOfTuples << " ";
else cout.write((char*)&numberOfTuples, sizeof(int));
if (use_ascii) cout << outColumns << " ";
else cout.write((char*)&outColumns, sizeof(int));
for(i = 0; i < outColumns; ++i){
outValue = highestOutValue[i];
if (use_ascii) cout << outValue << " ";
else cout.write((char*)&(outValue), sizeof(int));
}
if (use_ascii) cout << endl;
for(i = 1; i < outColumns; i++){
columnOffset[i] = columnOffset[i-1] + highestOutValue[i-1];
}
?? 快捷鍵說(shuō)明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -