?? garff.cpp
字號:
/* Copyright (C) 2006, Mike Gashler This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. see http://www.gnu.org/copyleft/lesser.html*/#include "GArff.h"#include "../GClasses/GArray.h"#include "../GClasses/GMacros.h"#include "../GClasses/GMath.h"#include <math.h>#include "GBits.h"#include "GMatrix.h"GArffRelation::GArffRelation(){ m_szName = NULL; m_pAttributes = new GPointerArray(32); m_nInputCount = -1; m_pInputIndexes = NULL; m_nOutputCount = -1; m_pOutputIndexes = NULL;}GArffRelation::~GArffRelation(){ int n; int nCount; nCount = m_pAttributes->GetSize(); for(n = 0; n < nCount; n++) delete((GArffAttribute*)m_pAttributes->GetPointer(n)); delete(m_pAttributes); delete[] m_szName; delete[] m_pInputIndexes; delete[] m_pOutputIndexes;}void GArffRelation::AddAttribute(GArffAttribute* pAttr){ m_pAttributes->AddPointer(pAttr);}GArffRelation* ParseError(int nLine, const char* szProblem){ GAssert(false, szProblem); return NULL;}int GArffRelation::CountContinuousAttributes(){ int n; int nAttributes = GetAttributeCount(); int nCount = 0; for(n = 0; n < nAttributes; n++) { GArffAttribute* pAttr = GetAttribute(n); if(pAttr->IsContinuous()) nCount++; } return nCount;}void GArffRelation::SaveArffFile(GArffData* pData, const char* szFilename){ // Open the file for writing FILE* pFile = fopen(szFilename, "w"); FileHolder hFile(pFile); if(!pFile) ThrowError(L"Failed to open file: %s", szFilename); // Write the relation title fputs("@RELATION ", pFile); const char* szName = GetName(); if(!szName) szName = "Untitled"; fputs(szName, pFile); fputs("\n\n", pFile); // Write the attributes char szTmp[64]; int i, j; for(i = 0; i < GetAttributeCount(); i++) { GArffAttribute* pAttr = GetAttribute(i); fputs("@ATTRIBUTE ", pFile); szName = pAttr->GetName(); if(!szName) { strcpy(szTmp, "a"); itoa(i, szTmp, 10); szName = szTmp; } fputs(szName, pFile); fputs("\t", pFile); if(pAttr->IsContinuous()) fputs("CONTINUOUS", pFile); else { fputs("{", pFile); for(j = 0; j < pAttr->GetValueCount(); j++) { szName = pAttr->GetValue(j); if(!szName) { strcpy(szTmp, "v"); itoa(j, szTmp, 10); szName = szTmp; } fputs(szName, pFile); fputs(",", pFile); } fputs("}", pFile); } fputs("\n", pFile); } // Write the data fputs("\n@DATA\n", pFile); for(i = 0; i < pData->GetSize(); i++) { double* pVector = pData->GetVector(i); for(j = 0; j < GetAttributeCount(); j++) { if(j > 0) fputs(",", pFile); GArffAttribute* pAttr = GetAttribute(j); if(pAttr->IsContinuous()) { GBits::DoubleToString(szTmp, pVector[j]); fputs(szTmp, pFile); } else { szName = pAttr->GetValue((int)pVector[j]); if(!szName) { strcpy(szTmp, "v"); itoa(j, szTmp, 10); szName = szTmp; } fputs(szName, pFile); } } fputs("\n", pFile); }}/*static*/ GArffRelation* GArffRelation::ParseFile(GArffData** ppOutData, const char* szFile, int nLen){ // Parse the relation name int nPos = 0; int nLine = 1; Holder<GArffRelation*> hRelation(new GArffRelation()); GArffRelation* pRelation = hRelation.Get(); while(true) { // Skip Whitespace while(nPos < nLen && szFile[nPos] <= ' ') { if(szFile[nPos] == '\n') nLine++; nPos++; } if(nPos >= nLen) return ParseError(nLine, "Expected @RELATION"); // Check for comments if(szFile[nPos] == '%') { for(nPos++; szFile[nPos] != '\n' && nPos < nLen; nPos++) { } continue; } // Parse Relation if(nLen - nPos < 9 || strnicmp(&szFile[nPos], "@RELATION", 9) != 0) return ParseError(nLine, "Expected @RELATION"); nPos += 9; // Skip Whitespace while(szFile[nPos] <= ' ' && nPos < nLen) { if(szFile[nPos] == '\n') nLine++; nPos++; } if(nPos >= nLen) return ParseError(nLine, "Expected relation name"); // Parse Name int nNameStart = nPos; while(szFile[nPos] > ' ' && nPos < nLen) nPos++; pRelation->m_szName = new char[nPos - nNameStart + 1]; memcpy(pRelation->m_szName, &szFile[nNameStart], nPos - nNameStart); pRelation->m_szName[nPos - nNameStart] = '\0'; break; } // Parse the attribute section while(true) { // Skip Whitespace while(nPos < nLen && szFile[nPos] <= ' ') { if(szFile[nPos] == '\n') nLine++; nPos++; } if(nPos >= nLen) return ParseError(nLine, "Expected @ATTRIBUTE or @DATA"); // Check for comments if(szFile[nPos] == '%') { for(nPos++; szFile[nPos] != '\n' && nPos < nLen; nPos++) { } continue; } // Check for @DATA if(nLen - nPos < 5) // 10 = strlen("@DATA") return ParseError(nLine, "Expected @DATA"); if(strnicmp(&szFile[nPos], "@DATA", 5) == 0) { nPos += 5; break; } // Parse @ATTRIBUTE if(nLen - nPos < 10) // 10 = strlen("@ATTRIBUTE") return ParseError(nLine, "Expected @ATTRIBUTE"); if(strnicmp(&szFile[nPos], "@ATTRIBUTE", 10) != 0) return ParseError(nLine, "Expected @ATTRIBUTE or @DATA"); nPos += 10; GArffAttribute* pAttr = GArffAttribute::Parse(&szFile[nPos], nLen - nPos); if(!pAttr) return ParseError(nLine, "Problem with attribute"); pRelation->m_pAttributes->AddPointer(pAttr); // Move to next line for(nPos++; szFile[nPos] != '\n' && nPos < nLen; nPos++) { } } // Parse the data section Holder<GArffData*> hData(new GArffData(256)); GArffData* pData = hData.Get(); while(true) { // Skip Whitespace while(nPos < nLen && szFile[nPos] <= ' ') { if(szFile[nPos] == '\n') nLine++; nPos++; } if(nPos >= nLen) break; // Check for comments if(szFile[nPos] == '%') { for(nPos++; szFile[nPos] != '\n' && nPos < nLen; nPos++) { } continue; } // Parse the data line double* pRow = pRelation->ParseDataRow(&szFile[nPos], nLen - nPos); if(!pRow) return ParseError(nLine, "Problem with data line"); pData->AddVector(pRow); // Move to next line for(nPos++; szFile[nPos] != '\n' && nPos < nLen; nPos++) { } continue; } *ppOutData = hData.Drop(); return hRelation.Drop();}/*static*/ double* GArffRelation::ParseDataRow(const char* szFile, int nLen){ char szBuf[512]; int nAttributeCount = GetAttributeCount(); Holder<double*> hData(new double[nAttributeCount]); double* pData = hData.Get(); GArffAttribute* pAttr; int n; for(n = 0; n < nAttributeCount; n++) { // Eat whitespace while(nLen > 0 && *szFile <= ' ') { if(*szFile == '\n') return NULL; szFile++; nLen--; } if(nLen < 1) return NULL; // Parse the next value pAttr = GetAttribute(n); int nPos; for(nPos = 0; nPos < nLen; nPos++) { if(szFile[nPos] == ',') break; if(szFile[nPos] == '\n') break; if(nPos > 0 && szFile[nPos] > ' ' && szFile[nPos - 1] <= ' ') { nPos--; break; } } int nEnd; for(nEnd = nPos; nEnd > 0 && szFile[nEnd - 1] <= ' '; nEnd--) { } memcpy(szBuf, szFile, nEnd); szBuf[nEnd] = '\0'; if(strcmp(szBuf, "?") == 0) pData[n] = -1; else if(pAttr->IsContinuous()) { // Parse a continuous value if(szBuf[0] == '.' || szBuf[0] == '-' || (szBuf[0] >= '0' && szBuf[0] <= '9')) pData[n] = atof(szBuf); else return NULL; } else { // Parse an enumerated value int nVal = pAttr->FindEnumeratedValue(szBuf); if(nVal < 0) return NULL; pData[n] = nVal; } // Advance past the attribute if(nPos < nLen) nPos++; while(nPos > 0) { szFile++; nPos--; nLen--; } } return hData.Drop();}int GArffRelation::GetAttributeCount(){ return m_pAttributes->GetSize();}GArffAttribute* GArffRelation::GetAttribute(int n){ return (GArffAttribute*)m_pAttributes->GetPointer(n);}void GArffRelation::CountInputs(){ m_nInputCount = 0; m_nOutputCount = 0; int n; int nCount = GetAttributeCount(); GArffAttribute* pAttr; for(n = 0; n < nCount; n++) { pAttr = GetAttribute(n); if(pAttr->IsInput()) m_nInputCount++; else m_nOutputCount++; } GAssert(m_nInputCount > 0, "no inputs"); //GAssert(m_nOutputCount > 0, "no outputs"); delete[] m_pInputIndexes; delete[] m_pOutputIndexes; m_pInputIndexes = new int[m_nInputCount]; m_pOutputIndexes = new int[m_nOutputCount]; int nIn = 0; int nOut = 0; for(n = 0; n < nCount; n++) { pAttr = GetAttribute(n); if(pAttr->IsInput()) m_pInputIndexes[nIn++] = n; else m_pOutputIndexes[nOut++] = n; }}int GArffRelation::GetInputCount(){ if(m_nInputCount < 0) CountInputs(); return m_nInputCount;}int GArffRelation::GetOutputCount(){ if(m_nOutputCount < 0) CountInputs(); return m_nOutputCount;}int GArffRelation::GetInputIndex(int n){ if(!m_pInputIndexes) CountInputs(); GAssert(n >= 0 && n < m_nInputCount, "out of range"); return m_pInputIndexes[n];}int GArffRelation::GetOutputIndex(int n){ if(!m_pOutputIndexes) CountInputs(); GAssert(n >= 0 && n < m_nOutputCount, "out of range"); return m_pOutputIndexes[n];}double GArffRelation::MeasureTotalOutputInfo(GArffData* pData){ double dInfo = 0; int nOutputs = GetOutputCount(); int n, nIndex; GArffAttribute* pAttr; for(n = 0; n < nOutputs; n++) { nIndex = GetOutputIndex(n); pAttr = GetAttribute(nIndex); if(pAttr->IsContinuous()) dInfo += pData->ComputeVariance(pData->ComputeMean(nIndex), nIndex); else dInfo += pData->MeasureEntropy(this, nIndex); } return dInfo;}double GArffRelation::ComputeInputDistanceSquared(double* pRow1, double* pRow2){ double dSum = 0; double d; int n, nIndex; for(n = 0; n < m_nInputCount; n++) { nIndex = GetInputIndex(n); if(GetAttribute(nIndex)->IsContinuous()) { d = pRow2[nIndex] - pRow1[nIndex]; dSum += (d * d); } else { if(pRow2[nIndex] != pRow1[nIndex]) dSum += 1; } } return dSum;}double GArffRelation::ComputeScaledInputDistanceSquared(double* pRow1, double* pRow2, double* pInputScales){ double dSum = 0; double d; int n, nIndex; for(n = 0; n < m_nInputCount; n++) { nIndex = GetInputIndex(n); if(GetAttribute(nIndex)->IsContinuous()) { d = pRow2[nIndex] * pInputScales[n] - pRow1[nIndex] * pInputScales[n]; dSum += (d * d); } else
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -