?? prfalign.c
字號:
#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include "clustalw.h"#define ENDALN 127#define MAX(a,b) ((a)>(b)?(a):(b))#define MIN(a,b) ((a)<(b)?(a):(b))/* * Prototypes */static lint pdiff(sint A,sint B,sint i,sint j,sint go1,sint go2);static lint prfscore(sint n, sint m);static sint gap_penalty1(sint i, sint j,sint k);static sint open_penalty1(sint i, sint j);static sint ext_penalty1(sint i, sint j);static sint gap_penalty2(sint i, sint j,sint k);static sint open_penalty2(sint i, sint j);static sint ext_penalty2(sint i, sint j);static void padd(sint k);static void pdel(sint k);static void palign(void);static void ptracepath(sint *alen);static void add_ggaps(void);static char * add_ggaps_mask(char *mask, int len, char *path1, char *path2);/* * Global variables */extern double **tmat;extern float gap_open, gap_extend;extern float transition_weight;extern sint gap_pos1, gap_pos2;extern sint max_aa;extern sint nseqs;extern sint *seqlen_array;extern sint *seq_weight;extern sint debug;extern Boolean neg_matrix;extern sint mat_avscore;extern short blosum30mt[], blosum40mt[], blosum45mt[];extern short blosum62mt2[], blosum80mt[];extern short pam20mt[], pam60mt[];extern short pam120mt[], pam160mt[], pam350mt[];extern short gon40mt[], gon80mt[];extern short gon120mt[], gon160mt[], gon250mt[], gon350mt[];extern short clustalvdnamt[],swgapdnamt[];extern short idmat[];extern short usermat[];extern short userdnamat[];extern Boolean user_series;extern UserMatSeries matseries;extern short def_dna_xref[],def_aa_xref[],dna_xref[],aa_xref[];extern sint max_aln_length;extern Boolean distance_tree;extern Boolean dnaflag;extern char mtrxname[];extern char dnamtrxname[];extern char **seq_array;extern char *amino_acid_codes;extern char *gap_penalty_mask1,*gap_penalty_mask2;extern char *sec_struct_mask1,*sec_struct_mask2;extern sint struct_penalties1, struct_penalties2;extern Boolean use_ss1, use_ss2;extern Boolean endgappenalties;static sint print_ptr,last_print;static sint *displ;static char **alignment;static sint *aln_len;static sint *aln_weight;static char *aln_path1, *aln_path2;static sint alignment_len;static sint **profile1, **profile2;static lint *HH, *DD, *RR, *SS;static lint *gS;static sint matrix[NUMRES][NUMRES];static sint nseqs1, nseqs2;static sint prf_length1, prf_length2;static sint *gaps;static sint gapcoef1,gapcoef2;static sint lencoef1,lencoef2;static Boolean switch_profiles;lint prfalign(sint *group, sint *aligned){ static Boolean found; static Boolean negative; static Boolean error_given=FALSE; static sint i, j, count = 0; static sint NumSeq; static sint len, len1, len2, is, minlen; static sint se1, se2, sb1, sb2; static sint maxres; static sint int_scale; static short *matptr; static short *mat_xref; static char c; static lint score; static float scale; static double logmin,logdiff; static double pcid; alignment = (char **) ckalloc( nseqs * sizeof (char *) ); aln_len = (sint *) ckalloc( nseqs * sizeof (sint) ); aln_weight = (sint *) ckalloc( nseqs * sizeof (sint) ); for (i=0;i<nseqs;i++) if (aligned[i+1] == 0) group[i+1] = 0; nseqs1 = nseqs2 = 0; for (i=0;i<nseqs;i++) { if (group[i+1] == 1) nseqs1++; else if (group[i+1] == 2) nseqs2++; } if ((nseqs1 == 0) || (nseqs2 == 0)) return(0.0); if (nseqs2 > nseqs1) { switch_profiles = TRUE; for (i=0;i<nseqs;i++) { if (group[i+1] == 1) group[i+1] = 2; else if (group[i+1] == 2) group[i+1] = 1; } } else switch_profiles = FALSE; int_scale = 100;/* calculate the mean of the sequence pc identities between the two groups*/ count = 0; pcid = 0.0; negative=neg_matrix; for (i=0;i<nseqs;i++) { if (group[i+1] == 1) for (j=0;j<nseqs;j++) if (group[j+1] == 2) { count++; pcid += tmat[i+1][j+1]; } } pcid = pcid/(float)count;if (debug > 0) fprintf(stdout,"mean tmat %3.1f\n", pcid);/* Make the first profile.*/ prf_length1 = 0; for (i=0;i<nseqs;i++) if (group[i+1] == 1) if(seqlen_array[i+1]>prf_length1) prf_length1=seqlen_array[i+1]; nseqs1 = 0;if (debug>0) fprintf(stdout,"sequences profile 1:\n"); for (i=0;i<nseqs;i++) { {if (debug>0) {extern char **names;fprintf(stdout,"%s\n",names[i+1]);} len = seqlen_array[i+1]; alignment[nseqs1] = (char *) ckalloc( (prf_length1+2) * sizeof (char) ); for (j=0;j<len;j++) alignment[nseqs1][j] = seq_array[i+1][j+1]; for(j=len;j<prf_length1;j++) alignment[nseqs1][j]=gap_pos1; alignment[nseqs1][j] = ENDALN; aln_len[nseqs1] = prf_length1; aln_weight[nseqs1] = seq_weight[i]; nseqs1++; } }/* Make the second profile.*/ prf_length2 = 0; for (i=0;i<nseqs;i++) if (group[i+1] == 2) if(seqlen_array[i+1]>prf_length2) prf_length2=seqlen_array[i+1]; nseqs2 = 0;if (debug>0) fprintf(stdout,"sequences profile 2:\n"); for (i=0;i<nseqs;i++) { if (group[i+1] == 2) {if (debug>0) {extern char **names;fprintf(stdout,"%s\n",names[i+1]);} len = seqlen_array[i+1]; alignment[nseqs1+nseqs2] = (char *) ckalloc( (prf_length2+2) * sizeof (char) ); for (j=0;j<len;j++) alignment[nseqs1+nseqs2][j] = seq_array[i+1][j+1]; for(j=len;j<prf_length2;j++) alignment[nseqs1+nseqs2][j]=gap_pos1; alignment[nseqs1+nseqs2][j] = ENDALN; aln_len[nseqs1+nseqs2] = prf_length2; aln_weight[nseqs1+nseqs2] = seq_weight[i]; nseqs2++; } } max_aln_length = prf_length1 + prf_length2+2; /* calculate real length of profiles - removing gaps!*/ len1=0; for (i=0;i<nseqs1;i++) { is=0; for (j=0; j<MIN(aln_len[i],prf_length1); j++) { c = alignment[i][j]; if ((c !=gap_pos1) && (c != gap_pos2)) is++; } len1+=is; } len1/=(float)nseqs1; len2=0; for (i=nseqs1;i<nseqs2+nseqs1;i++) { is=0; for (j=0; j<MIN(aln_len[i],prf_length2); j++) { c = alignment[i][j]; if ((c !=gap_pos1) && (c != gap_pos2)) is++; } len2+=is; } len2/=(float)nseqs2; if (dnaflag) { scale=1.0; if (strcmp(dnamtrxname, "iub") == 0) { matptr = swgapdnamt; mat_xref = def_dna_xref; } else if (strcmp(dnamtrxname, "clustalw") == 0) { matptr = clustalvdnamt; mat_xref = def_dna_xref; scale=0.66; } else { matptr = userdnamat; mat_xref = dna_xref; } maxres = get_matrix(matptr, mat_xref, matrix, neg_matrix, int_scale); if (maxres == 0) return((sint)-1);/* matrix[0][4]=transition_weight*matrix[0][0]; matrix[4][0]=transition_weight*matrix[0][0]; matrix[2][11]=transition_weight*matrix[0][0]; matrix[11][2]=transition_weight*matrix[0][0]; matrix[2][12]=transition_weight*matrix[0][0]; matrix[12][2]=transition_weight*matrix[0][0];*//* fix suggested by Chanan Rubin at Compugen */ matrix[mat_xref[0]][mat_xref[4]]=transition_weight*matrix[0][0]; matrix[mat_xref[4]][mat_xref[0]]=transition_weight*matrix[0][0]; matrix[mat_xref[2]][mat_xref[11]]=transition_weight*matrix[0][0]; matrix[mat_xref[11]][mat_xref[2]]=transition_weight*matrix[0][0]; matrix[mat_xref[2]][mat_xref[12]]=transition_weight*matrix[0][0]; matrix[mat_xref[12]][mat_xref[2]]=transition_weight*matrix[0][0]; gapcoef1 = gapcoef2 = 100.0 * gap_open *scale; lencoef1 = lencoef2 = 100.0 * gap_extend *scale; } else { if(len1==0 || len2==0) { logmin=1.0; logdiff=1.0; } else { minlen = MIN(len1,len2); logmin = 1.0/log10((double)minlen); if (len2<len1) logdiff = 1.0+0.5*log10((double)((float)len2/(float)len1)); else if (len1<len2) logdiff = 1.0+0.5*log10((double)((float)len1/(float)len2)); else logdiff=1.0; if(logdiff<0.9) logdiff=0.9; }if(debug>0) fprintf(stdout,"%d %d logmin %f logdiff %f\n",(pint)len1,(pint)len2, logmin,logdiff); scale=0.75; if (strcmp(mtrxname, "blosum") == 0) { scale=0.75; if (negative || distance_tree == FALSE) matptr = blosum40mt; else if (pcid > 80.0) { matptr = blosum80mt; } else if (pcid > 60.0) { matptr = blosum62mt2; } else if (pcid > 40.0) { matptr = blosum45mt; } else if (pcid > 30.0) { scale=0.5; matptr = blosum45mt; } else if (pcid > 20.0) { scale=0.6; matptr = blosum45mt; } else { scale=0.6; matptr = blosum30mt; } mat_xref = def_aa_xref; } else if (strcmp(mtrxname, "pam") == 0) { scale=0.75; if (negative || distance_tree == FALSE) matptr = pam120mt; else if (pcid > 80.0) matptr = pam20mt; else if (pcid > 60.0) matptr = pam60mt; else if (pcid > 40.0) matptr = pam120mt; else matptr = pam350mt; mat_xref = def_aa_xref; } else if (strcmp(mtrxname, "gonnet") == 0) { scale/=2.0; if (negative || distance_tree == FALSE) matptr = gon250mt; else if (pcid > 35.0) { matptr = gon80mt; scale/=2.0; } else if (pcid > 25.0) { if(minlen<100) matptr = gon250mt; else matptr = gon120mt; } else { if(minlen<100) matptr = gon350mt; else matptr = gon160mt; } mat_xref = def_aa_xref;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -