?? pairalign.c
字號:
/* Change int h to int gh everywhere DES June 1994 */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include "clustalw.h"#define MIN(a,b) ((a)<(b)?(a):(b))#define MAX(a,b) ((a)>(b)?(a):(b))#define gap(k) ((k) <= 0 ? 0 : g + gh * (k))#define tbgap(k) ((k) <= 0 ? 0 : tb + gh * (k))#define tegap(k) ((k) <= 0 ? 0 : te + gh * (k))/* * Prototypes */static void add(sint v);static sint calc_score(sint iat, sint jat, sint v1, sint v2);static float tracepath(sint tsb1,sint tsb2);static void forward_pass(char *ia, char *ib, sint n, sint m);static void reverse_pass(char *ia, char *ib);static sint diff(sint A, sint B, sint M, sint N, sint tb, sint te);static void del(sint k);/* * Global variables */#ifdef MAC#define pwint short#else#define pwint int#endifstatic sint int_scale;extern double **tmat;extern float pw_go_penalty;extern float pw_ge_penalty;extern float transition_weight;extern sint nseqs;extern sint max_aa;extern sint gap_pos1,gap_pos2;extern sint max_aln_length;extern sint *seqlen_array;extern sint debug;extern sint mat_avscore;extern short blosum30mt[],pam350mt[],idmat[],pw_usermat[],pw_userdnamat[];extern short clustalvdnamt[],swgapdnamt[];extern short gon250mt[];extern short def_dna_xref[],def_aa_xref[],pw_dna_xref[],pw_aa_xref[];extern Boolean dnaflag;extern char **seq_array;extern char *amino_acid_codes;extern char pw_mtrxname[];extern char pw_dnamtrxname[];static float mm_score;static sint print_ptr,last_print;static sint *displ;static pwint *HH, *DD, *RR, *SS;static sint g, gh;static sint seq1, seq2;static sint matrix[NUMRES][NUMRES];static pwint maxscore;static sint sb1, sb2, se1, se2;sint pairalign(sint istart, sint iend, sint jstart, sint jend){ short *mat_xref; static sint si, sj, i; static sint n,m,len1,len2; static sint maxres; static short *matptr; static char c; static float gscale,ghscale; displ = (sint *)ckalloc((2*max_aln_length+1) * sizeof(sint)); HH = (pwint *)ckalloc((max_aln_length) * sizeof(pwint)); DD = (pwint *)ckalloc((max_aln_length) * sizeof(pwint)); RR = (pwint *)ckalloc((max_aln_length) * sizeof(pwint)); SS = (pwint *)ckalloc((max_aln_length) * sizeof(pwint)); #ifdef MAC int_scale = 10;#else int_scale = 100;#endif gscale=ghscale=1.0; if (dnaflag) { if (debug>1) fprintf(stdout,"matrix %s\n",pw_dnamtrxname); if (strcmp(pw_dnamtrxname, "iub") == 0) { matptr = swgapdnamt; mat_xref = def_dna_xref; } else if (strcmp(pw_dnamtrxname, "clustalw") == 0) { matptr = clustalvdnamt; mat_xref = def_dna_xref; gscale=0.6667; ghscale=0.751; } else { matptr = pw_userdnamat; mat_xref = pw_dna_xref; } maxres = get_matrix(matptr, mat_xref, matrix, TRUE, int_scale); if (maxres == 0) return((sint)-1); matrix[0][4]=transition_weight*matrix[0][0]; matrix[4][0]=transition_weight*matrix[0][0]; matrix[2][11]=transition_weight*matrix[0][0]; matrix[11][2]=transition_weight*matrix[0][0]; matrix[2][12]=transition_weight*matrix[0][0]; matrix[12][2]=transition_weight*matrix[0][0]; } else { if (debug>1) fprintf(stdout,"matrix %s\n",pw_mtrxname); if (strcmp(pw_mtrxname, "blosum") == 0) { matptr = blosum30mt; mat_xref = def_aa_xref; } else if (strcmp(pw_mtrxname, "pam") == 0) { matptr = pam350mt; mat_xref = def_aa_xref; } else if (strcmp(pw_mtrxname, "gonnet") == 0) { matptr = gon250mt; int_scale /= 10; mat_xref = def_aa_xref; } else if (strcmp(pw_mtrxname, "id") == 0) { matptr = idmat; mat_xref = def_aa_xref; } else { matptr = pw_usermat; mat_xref = pw_aa_xref; } maxres = get_matrix(matptr, mat_xref, matrix, TRUE, int_scale); if (maxres == 0) return((sint)-1); } for (si=MAX(0,istart);si<nseqs && si<iend;si++) { n = seqlen_array[si+1]; len1 = 0; for (i=1;i<=n;i++) { c = seq_array[si+1][i]; if ((c!=gap_pos1) && (c != gap_pos2)) len1++; } for (sj=MAX(si+1,jstart+1);sj<nseqs && sj<jend;sj++) { m = seqlen_array[sj+1]; if(n==0 || m==0) { tmat[si+1][sj+1]=1.0; tmat[sj+1][si+1]=1.0; continue; } len2 = 0; for (i=1;i<=m;i++) { c = seq_array[sj+1][i]; if ((c!=gap_pos1) && (c != gap_pos2)) len2++; } if (dnaflag) { g = 2 * (float)pw_go_penalty * int_scale*gscale; gh = pw_ge_penalty * int_scale*ghscale; } else { if (mat_avscore <= 0) g = 2 * (float)(pw_go_penalty + log((double)(MIN(n,m))))*int_scale; else g = 2 * mat_avscore * (float)(pw_go_penalty + log((double)(MIN(n,m))))*gscale; gh = pw_ge_penalty * int_scale; } if (debug>1) fprintf(stdout,"go %d ge %d\n",(pint)g,(pint)gh); /* align the sequences */ seq1 = si+1; seq2 = sj+1; forward_pass(&seq_array[seq1][0], &seq_array[seq2][0], n, m); reverse_pass(&seq_array[seq1][0], &seq_array[seq2][0]); last_print = 0; print_ptr = 1;/* sb1 = sb2 = 1; se1 = n-1; se2 = m-1;*//* use Myers and Miller to align two sequences */ maxscore = diff(sb1-1, sb2-1, se1-sb1+1, se2-sb2+1, (sint)0, (sint)0); /* calculate percentage residue identity */ mm_score = tracepath(sb1,sb2); if(len1==0 || len2==0) mm_score=0; else mm_score /= (float)MIN(len1,len2); tmat[si+1][sj+1] = ((float)100.0 - mm_score)/(float)100.0; tmat[sj+1][si+1] = ((float)100.0 - mm_score)/(float)100.0;if (debug>1){ fprintf(stdout,"Sequences (%d:%d) Aligned. Score: %d CompScore: %d\n", (pint)si+1,(pint)sj+1, (pint)mm_score, (pint)maxscore/(MIN(len1,len2)*100));}else{ info("Sequences (%d:%d) Aligned. Score: %d", (pint)si+1,(pint)sj+1, (pint)mm_score);} } } displ=ckfree((void *)displ); HH=ckfree((void *)HH); DD=ckfree((void *)DD); RR=ckfree((void *)RR); SS=ckfree((void *)SS); return((sint)1);}static void add(sint v){ if(last_print<0) { displ[print_ptr-1] = v; displ[print_ptr++] = last_print; } else last_print = displ[print_ptr++] = v;}static sint calc_score(sint iat,sint jat,sint v1,sint v2){ sint ipos,jpos; sint ret; ipos = v1 + iat; jpos = v2 + jat; ret=matrix[(int)seq_array[seq1][ipos]][(int)seq_array[seq2][jpos]]; return(ret);}static float tracepath(sint tsb1,sint tsb2){ char c1,c2; sint i1,i2,r; sint i,k,pos,to_do; sint count; float score; char s1[600], s2[600]; to_do=print_ptr-1; i1 = tsb1; i2 = tsb2; pos = 0; count = 0; for(i=1;i<=to_do;++i) { if (debug>1) fprintf(stdout,"%d ",(pint)displ[i]); if(displ[i]==0) { c1 = seq_array[seq1][i1]; c2 = seq_array[seq2][i2]; if (debug>0) { if (c1>max_aa) s1[pos] = '-'; else s1[pos]=amino_acid_codes[c1]; if (c2>max_aa) s2[pos] = '-'; else s2[pos]=amino_acid_codes[c2]; } if ((c1!=gap_pos1) && (c1 != gap_pos2) && (c1 == c2)) count++; ++i1; ++i2; ++pos; } else { if((k=displ[i])>0) { if (debug>0) for (r=0;r<k;r++) { s1[pos+r]='-'; if (seq_array[seq2][i2+r]>max_aa) s2[pos+r] = '-'; else s2[pos+r]=amino_acid_codes[seq_array[seq2][i2+r]]; } i2 += k; pos += k; } else { if (debug>0) for (r=0;r<(-k);r++) { s2[pos+r]='-'; if (seq_array[seq1][i1+r]>max_aa) s1[pos+r] = '-'; else s1[pos+r]=amino_acid_codes[seq_array[seq1][i1+r]]; } i1 -= k; pos -= k; } } } if (debug>0) fprintf(stdout,"\n"); if (debug>0) { for (i=0;i<pos;i++) fprintf(stdout,"%c",s1[i]); fprintf(stdout,"\n"); for (i=0;i<pos;i++) fprintf(stdout,"%c",s2[i]); fprintf(stdout,"\n"); } /* if (count <= 0) count = 1; */ score = 100.0 * (float)count; return(score);}static void forward_pass(char *ia, char *ib, sint n, sint m){ sint i,j; pwint f,hh,p,t; maxscore = 0; se1 = se2 = 0; for (i=0;i<=m;i++) { HH[i] = 0; DD[i] = -g; } for (i=1;i<=n;i++) { hh = p = 0; f = -g; for (j=1;j<=m;j++) { f -= gh; t = hh - g - gh; if (f<t) f = t; DD[j] -= gh; t = HH[j] - g - gh; if (DD[j]<t) DD[j] = t; hh = p + matrix[(int)ia[i]][(int)ib[j]]; if (hh<f) hh = f; if (hh<DD[j]) hh = DD[j]; if (hh<0) hh = 0; p = HH[j]; HH[j] = hh; if (hh > maxscore) { maxscore = hh; se1 = i; se2 = j; } } }}static void reverse_pass(char *ia, char *ib){ sint i,j; pwint f,hh,p,t; pwint cost; cost = 0; sb1 = sb2 = 1; for (i=se2;i>0;i--) { HH[i] = -1; DD[i] = -1; } for (i=se1;i>0;i--) { hh = f = -1; if (i == se1) p = 0; else p = -1; for (j=se2;j>0;j--) { f -= gh; t = hh - g - gh; if (f<t) f = t; DD[j] -= gh; t = HH[j] - g - gh; if (DD[j]<t) DD[j] = t; hh = p + matrix[(int)ia[i]][(int)ib[j]]; if (hh<f) hh = f; if (hh<DD[j]) hh = DD[j]; p = HH[j]; HH[j] = hh; if (hh > cost) { cost = hh; sb1 = i; sb2 = j; if (cost >= maxscore) break; } } if (cost >= maxscore) break; }}static int diff(sint A,sint B,sint M,sint N,sint tb,sint te){ sint type; sint midi,midj,i,j; int midh; static pwint f, hh, e, s, t; if(N<=0) { if(M>0) { del(M); } return(-(int)tbgap(M)); } if(M<=1) { if(M<=0) { add(N); return(-(int)tbgap(N)); } midh = -(tb+gh) - tegap(N); hh = -(te+gh) - tbgap(N); if (hh>midh) midh = hh; midj = 0; for(j=1;j<=N;j++) { hh = calc_score(1,j,A,B) - tegap(N-j) - tbgap(j-1); if(hh>midh) { midh = hh; midj = j; } } if(midj==0) { del(1); add(N); } else { if(midj>1) add(midj-1); displ[print_ptr++] = last_print = 0; if(midj<N) add(N-midj); } return midh; } /* Divide: Find optimum midpoint (midi,midj) of cost midh */ midi = M / 2; HH[0] = 0.0; t = -tb; for(j=1;j<=N;j++) { HH[j] = t = t-gh; DD[j] = t-g; } t = -tb; for(i=1;i<=midi;i++) { s=HH[0]; HH[0] = hh = t = t-gh; f = t-g; for(j=1;j<=N;j++) { if ((hh=hh-g-gh) > (f=f-gh)) f=hh; if ((hh=HH[j]-g-gh) > (e=DD[j]-gh)) e=hh; hh = s + calc_score(i,j,A,B); if (f>hh) hh = f; if (e>hh) hh = e; s = HH[j]; HH[j] = hh; DD[j] = e; } } DD[0]=HH[0]; RR[N]=0; t = -te; for(j=N-1;j>=0;j--) { RR[j] = t = t-gh; SS[j] = t-g; } t = -te; for(i=M-1;i>=midi;i--) { s = RR[N]; RR[N] = hh = t = t-gh; f = t-g; for(j=N-1;j>=0;j--) { if ((hh=hh-g-gh) > (f=f-gh)) f=hh; if ((hh=RR[j]-g-gh) > (e=SS[j]-gh)) e=hh; hh = s + calc_score(i+1,j+1,A,B); if (f>hh) hh = f; if (e>hh) hh = e; s = RR[j]; RR[j] = hh; SS[j] = e; } } SS[N]=RR[N]; midh=HH[0]+RR[0]; midj=0; type=1; for(j=0;j<=N;j++) { hh = HH[j] + RR[j]; if(hh>=midh) if(hh>midh || (HH[j]!=DD[j] && RR[j]==SS[j])) { midh=hh; midj=j; } } for(j=N;j>=0;j--) { hh = DD[j] + SS[j] + g; if(hh>midh) { midh=hh; midj=j; type=2; } } /* Conquer recursively around midpoint */ if(type==1) { /* Type 1 gaps */ diff(A,B,midi,midj,tb,g); diff(A+midi,B+midj,M-midi,N-midj,g,te); } else { diff(A,B,midi-1,midj,tb,0.0); del(2); diff(A+midi+1,B+midj,M-midi-1,N-midj,0.0,te); } return midh; /* Return the score of the best alignment */}static void del(sint k){ if(last_print<0) last_print = displ[print_ptr-1] -= k; else last_print = displ[print_ptr++] = -(k);}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -