?? rna_profile_alignment.cpp
字號:
/* Copyright by Matthias Hoechsmann (C) 2002-2004 ===================================== You may use, copy and distribute this file freely as long as you - do not change the file, - leave this copyright notice in the file, - do not make any profit with the distribution of this file - give credit where credit is due You are not allowed to copy or distribute this file otherwise The commercial usage and distribution of this file is prohibited Please report bugs and suggestions to <mhoechsm@TechFak.Uni-Bielefeld.DE>*/#ifndef WIN32#include "config.h"#endif#include <cmath>#ifdef HAVE_LIBG2#include <g2.h>#include <g2_PS.h>#endif#include <fstream>#include <iostream>#include <iomanip>#include <sstream>#include "matrix.h"#include "rna_profile_alignment.h"#include "rnafuncs.h"#include "utils.h"#ifdef HAVE_LIBRNA // This features require the ViennaRNA library#include "part_func.h"#include "fold_vars.h"extern "C"{#include "fold.h"}#endif#include "ppforest.t.cpp"ostream& operator <<(ostream &s,RNA_Alphabet_Profile p){ s.precision(2); s << p.p[ALPHA_PRO_BASE_A] << "\\n"; s.precision(2); s << p.p[ALPHA_PRO_BASE_C] << "\\n"; s.precision(2); s << p.p[ALPHA_PRO_BASE_G] << "\\n"; s.precision(2); s << p.p[ALPHA_PRO_BASE_U] << "\\n"; s.precision(2); s << p.p[ALPHA_PRO_BASEPAIR] << "\\n"; s.precision(2); s << p.p[ALPHA_PRO_GAP]; s.precision(2);
s << p.p[ALPHA_PRO_BASE]; // for(Uint i=0;i<p.columnStr.length();i++) // s << p.columnStr[i] << "\\n"; return s;}/* ****************************************** *//* Constructor and Destructor functions *//* ****************************************** */RNAProfileAlignment::RNAProfileAlignment(const string &baseStr, const string &viennaStr, const string &name): PPForestAli<RNA_Alphabet_Profile,RNA_Alphabet_Profile>(RNAFuncs::treeSize(viennaStr)), m_name(name),
m_numStructures(1){ buildForest(baseStr,viennaStr); addStrName(name);};#ifdef HAVE_LIBRNA // This features require the ViennaRNA libraryRNAProfileAlignment::RNAProfileAlignment(const string &baseStr, const string &name, const string &constraint, double t) : PPForestAli<RNA_Alphabet_Profile,RNA_Alphabet_Profile>(2*baseStr.length()), m_name(name), m_numStructures(1){ char *viennaStr=NULL; // calculate partition function for the sequence do_backtrack=1; init_pf_fold(baseStr.length()); //if(constraint.length()>0) //pf_fold((char*)baseStr.c_str(),(char*)constraint.c_str()); // expicit conversion to non-const value, but pf_fold does not alter baseStr //else pf_fold((char*)baseStr.c_str(),NULL); // expicit conversion to non-const value, but pf_fold does not alter baseStr viennaStr=new char[baseStr.length()+1]; dangles=2; fold((char*)baseStr.c_str(),viennaStr); setSize(RNAFuncs::treeSize(viennaStr)); buildForest(baseStr,viennaStr,true); free_pf_arrays(); delete[] viennaStr; // hasSequence=true; addStrName(name);}#endifRNAProfileAlignment::RNAProfileAlignment(const string &filename){ // read ppforest from file size_type size; ifstream s; char *colStr; s.open(filename.c_str()); // first of all save the size s.read((char*)&size,sizeof(size_type)); s.read((char*)&m_numStructures,sizeof(Uint)); colStr=new char[m_numStructures+1]; colStr[m_numStructures]=0; // terminate string // initialize structures // ::PPForestBase(m_size); // m_lb=new L[ppf.size()]; initialize(size); // save the arrays for(Uint i=0;i<size;i++) { label_type l; for(int r=0;r<RNA_ALPHABET_SIZE;r++) { double d; s.read(reinterpret_cast<char*>(&d),sizeof(double)); l.p[r]=d; } s.read(colStr,sizeof(char)*m_numStructures); l.columnStr=colStr; m_lb[i]=l; } s.read(reinterpret_cast<char*>(m_rb),sizeof(size_type)*size); s.read(reinterpret_cast<char*>(m_noc),sizeof(size_type)*size); // s.read((char*)m_sumUpCSF,sizeof(size_type)*size); // s.read((char*)m_rmb,sizeof(size_type)*size); for(Uint r=0;r<m_numStructures;r++) { char str[21]; str[20]=0; s.read(str,sizeof(char)*20); m_strNames.push_back(string(str)); } calcSumUpCSF(); calcRMB(); }/*RNAProfileForest::RNAProfileForest(const Profile_RNA_Alignment_Forest &ppf)
: RNAForestBase<RNA_Alphabet_Profile>(ppf){ m_numStructures=ppf.m_numStructuresX+ppf.m_numStructuresY;}*//* ****************************************** *//* Private functions *//* ****************************************** */void RNAProfileAlignment::makeRepLabel(size_type node, RNA_Alphabet_Profile a, RNA_Alphabet_Profile b) {
double p,q;
double m;
p=m_numStructuresX; // weight number of structures in profile
q=m_numStructuresY;
m=p+q;
p/=m;
q/=m;
// profile for(int i=0;i<RNA_ALPHABET_SIZE;i++) m_lb[node].p[i]=p*a.p[i]+q*b.p[i];
// alignment column
m_lb[node].columnStr=a.columnStr + b.columnStr; }; void RNAProfileAlignment::makeDelLabel(size_type node, RNA_Alphabet_Profile a) {
double p,q;
double m;
p=m_numStructuresX; // weight number of structures in profile
q=1;
m=p+q;
p/=m;
q/=m;
// profile
m_lb[node]=a;
//m_lb[node].p[ALPHA_GAP]=(1+m_lb[node].p[ALPHA_GAP])/2.0;
for(int i=0;i<RNA_ALPHABET_SIZE;i++) { m_lb[node].p[i]*=p; } m_lb[node].p[ALPHA_PRO_GAP]+=q; // alignment column
m_lb[node].columnStr=a.columnStr + string(m_numStructuresY,ALPHA_GAP); }; void RNAProfileAlignment::makeInsLabel(size_type node, RNA_Alphabet_Profile b) {
double p,q;
double m;
p=1; // weight number of structures in profile
q=m_numStructuresY;
m=p+q;
p/=m;
q/=m;
// profile
m_lb[node]=b;
//m_lb[node].p[ALPHA_GAP]=(1+m_lb[node].p[ALPHA_GAP])/2.0;
for(int i=0;i<RNA_ALPHABET_SIZE;i++) { m_lb[node].p[i]*=q; } m_lb[node].p[ALPHA_PRO_GAP]+=p; // m_lb[node].p[ALPHA_PRO_GAP]=(p+q*m_lb[node].p[ALPHA_PRO_GAP])/2.0;
// alignment column
m_lb[node].columnStr=string(m_numStructuresX,ALPHA_GAP) + b.columnStr; }; void RNAProfileAlignment::showLabel(ostream &s,RNA_Alphabet_Profile p) const { s << p; };void RNAProfileAlignment::makeLabel(RNA_Alphabet_Profile &p,char c){ int i; // initialize profile entries to zero for(i=0;i<RNA_ALPHABET_SIZE;i++) p.p[i]=0.0; i=alpha2RNA_Alpha(c); p.p[i]=1.0; // if it is acgu it is also a base if(i<=ALPHA_PRO_BASE_U) p.p[ALPHA_PRO_BASE]=1.0;
// set column string
p.columnStr=c;}void RNAProfileAlignment::buildForest(const string &baseStr, const string &viennaStr, bool use_bp_prob){ Ulong basePairCount=0,maxDepth=0,stackPtr; Uint baseStrLen,viennaStrLen,node,*nodeStack, *numChildrenStack, *baseposStack; assert(RNAFuncs::isRNAString(baseStr)); RNAFuncs::isViennaString(viennaStr,basePairCount,maxDepth); baseStrLen=baseStr.length(); viennaStrLen=viennaStr.length(); if(baseStrLen) hasSequence=true; else hasSequence=false; // check if base string and vienna string have the same length // if(baseStr.length() > 0) // if(baseStr.length() != viennaStrLen) // throw RNAForestExceptionInput(RNAForestExceptionInput::Error_BaseStringAndViennaStringIncompatible); nodeStack=new Uint[maxDepth+1]; numChildrenStack=new Uint[maxDepth+1]; baseposStack=new Uint[maxDepth+1]; memset(nodeStack,0,sizeof(Uint)*maxDepth+1); memset(numChildrenStack,0,sizeof(Uint)*maxDepth+1); memset(baseposStack,0,sizeof(Uint)*maxDepth+1); // fill PPForest structure stackPtr=0; node=0; for(Uint i=0;i<viennaStrLen;i++) { switch(viennaStr[i]) { case '.': // set label if(baseStrLen) makeLabel(m_lb[node],baseStr[i]); else makeLabel(m_lb[node],'B'); // set right brother if(node==size()-1) setRightBrotherIndex(node,0); else setRightBrotherIndex(node,node+1); // set num children setNumChildren(node,0); // increase stack values numChildrenStack[stackPtr]++; node++; break; case '(': // set label makeLabel(m_lb[node],'P'); // increase stack values numChildrenStack[stackPtr]++; baseposStack[stackPtr]=i+1; // push stackPtr++; nodeStack[stackPtr]=node; numChildrenStack[stackPtr]=1; node++; // set label if(baseStrLen) makeLabel(m_lb[node],baseStr[i]); else makeLabel(m_lb[node],'B'); // set right brother setRightBrotherIndex(node,node+1); // set num children setNumChildren(node,0); node++; break; case ')': // set label if(baseStrLen) makeLabel(m_lb[node],baseStr[i]); else makeLabel(m_lb[node],'B'); // set right brother setRightBrotherIndex(node,0); // set num children setNumChildren(node,0); // pop if(node==size()-1) setRightBrotherIndex(nodeStack[stackPtr],0); else setRightBrotherIndex(nodeStack[stackPtr],node+1); setNumChildren(nodeStack[stackPtr],numChildrenStack[stackPtr]+1); stackPtr--;#ifdef HAVE_LIBRNA // set basepair probability if(use_bp_prob) { m_lb[nodeStack[stackPtr]].p[ALPHA_PRO_BASEPAIR]=pr[iindx[baseposStack[stackPtr]]-(i+1)]; m_lb[nodeStack[stackPtr]].p[ALPHA_PRO_GAP]=1-m_lb[nodeStack[stackPtr]].p[ALPHA_PRO_BASEPAIR]; } #endif node++; break; } } delete[] nodeStack; delete[] numChildrenStack; delete[] baseposStack; calcSumUpCSF(); calcRMB(); assert (m_noc[m_size-1]==0);}void RNAProfileAlignment::getStructureAlignmentFromCSF(string &s, deque<double> &pairprob, double t,size_type i, size_type j) const{ size_type h; double bestPairScore; // list<Uint> leftPairList; //list<Uint> rightPairList; //Uint bestLeftIndex,bestRightIndex; //Uint lastLeftIndex,lastRightIndex; // QWATCH(i); // QWATCH(j); if(j==0) return; if(isPair(i)) { // TRACE(DBG_GET_PROFILE_STRUCTURE,"Profile_RNA_Alignment_Forest::getStructureAlignmentFromCSF","basepair"); // WATCH(DBG_GET_PROFILE_STRUCTURE,"Profile_RNA_Alignment_Forest::getStructureAlignmentFromCSF",m_lb[i].p[ALPHA_BASEPAIR]); bestPairScore=bestPairs(i); // backtrack best pairs if(bestPairScore == bestPairs(i+1) + bestPairs(getRightmostBrotherIndex(i+1)) || m_lb[i].p[ALPHA_PRO_BASEPAIR] < t) { // i pairs not // cout << "unpaired" << endl; getStructureAlignmentFromCSF(s,pairprob,t,i+1,noc(i)); // cout << "back to:" << endl; // QWATCH(i); // QWATCH(j); } else { // cout << "paired" << endl; // i pairs s += '('; pairprob.push_back(m_lb[i].p[ALPHA_PRO_BASEPAIR]); // left path - righthand best pairs h=i+1; while(h < size() && isPair(h)) { // cout << "left" << endl; // QWATCH(h); assert((int)noc(h)-1>=0); getStructureAlignmentFromCSF(s,pairprob,t,rb(h+1),noc(h)-1); h=h+1; } assert((int)noc(i)-2>=0); getStructureAlignmentFromCSF(s,pairprob,t,rb(i+1),noc(i)-2); // cout << "back to:" << endl; // QWATCH(i); // QWATCH(j); // right path - lefthand best pairs h=getRightmostBrotherIndex(i+1); while(h < size() && isPair(h)) { // cout << "right" << endl; // QWATCH(h); assert((int)noc(h)-1>=0); getStructureAlignmentFromCSF(s,pairprob,t,h+1,noc(h)-1); //h=h+1; h=getRightmostBrotherIndex(h+1); } s += ')'; pairprob.push_back(m_lb[i].p[ALPHA_PRO_BASEPAIR]); } } else { s+= '.'; pairprob.push_back(m_lb[i].p[ALPHA_PRO_BASE]); } // right forest getStructureAlignmentFromCSF(s,pairprob,t,rb(i),j-1);}double RNAProfileAlignment::bestPairs(size_type node) const{ size_type i=node; double d1,d2; WATCH(DBG_GET_PROFILE_STRUCTURE,"RNAProfileForest::getStructureAlignment",node); if(isBase(node)) return 0; else { // node pairs not d1=bestPairs(node+1) + bestPairs(getRightmostBrotherIndex(node+1)); // node pairs d2=label(node).p[ALPHA_PRO_BASEPAIR]; // left path - righthand best pairs i=node+1; while(i < size() && isPair(i)) { d2+=bestPairs(getRightmostBrotherIndex(i+1)); i=i+1; } // right path - lefthand best pairs i=getRightmostBrotherIndex(node+1); while(isPair(i) && i < size()) { d2+=bestPairs(i+1); i=getRightmostBrotherIndex(i+1); } return max(d1,d2); }} #ifdef HAVE_LIBG2 // This features require the g2 libraryvoid RNAProfileAlignment::drawBaseCircles(int device_id,const BaseProbs &bp,double center_x,double center_y) const{ const double box_size=6.0; const double max_radius=3.0; double xpos,ypos; int color; // double dashes=0.5; // draw the base probabilities as circles on the edges of the square and the gap probability // as the center square // upper left corner = a xpos=center_x-box_size/2.0; ypos=center_y+box_size/2.0; color=g2_ink(device_id,1,0,0); g2_pen(device_id,color); g2_filled_circle(device_id,xpos,ypos,max_radius*bp.a); // upper right corner = c xpos=center_x+box_size/2.0; ypos=center_y+box_size/2.0; color=g2_ink(device_id,0,1,0); g2_pen(device_id,color); g2_filled_circle(device_id,xpos,ypos,max_radius*bp.c); // lower right corner = g xpos=center_x+box_size/2.0; ypos=center_y-box_size/2.0; color=g2_ink(device_id,0,0,1); g2_pen(device_id,color); g2_filled_circle(device_id,xpos,ypos,max_radius*bp.g); // lower right corner = u xpos=center_x-box_size/2.0; ypos=center_y-box_size/2.0; color=g2_ink(device_id,1,0,1); g2_pen(device_id,color); g2_filled_circle(device_id,xpos,ypos,max_radius*bp.u); // gap in the center color=g2_ink(device_id,0,0,0); g2_pen(device_id,color); g2_filled_circle(device_id,center_x,center_y,max_radius*bp.gap); // draw rectangle for orientation //g2_set_dash(device_id,1,&dashes);
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -