?? sequence.c
字號:
/********* Sequence input routines for CLUSTAL W *******************/
/* DES was here. FEB. 1994 */
/* Now reads PILEUP/MSF and CLUSTAL alignment files */
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include "clustalw.h"
#define MIN(a,b) ((a)<(b)?(a):(b))
/*
* Prototypes
*/
static char * get_seq(char *,sint *,char *);
static char * get_clustal_seq(char *,sint *,char *,sint);
static char * get_msf_seq(char *,sint *,char *,sint);
static void check_infile(sint *);
static void p_encode(char *, char *, sint);
static void n_encode(char *, char *, sint);
static sint res_index(char *,char);
static Boolean check_dnaflag(char *, sint);
static sint count_clustal_seqs(void);
static sint count_pir_seqs(void);
static sint count_msf_seqs(void);
static sint count_rsf_seqs(void);
static void get_swiss_feature(char *line,sint len);
static void get_rsf_feature(char *line,sint len);
static void get_swiss_mask(char *line,sint len);
static void get_clustal_ss(sint length);
static void get_embl_ss(sint length);
static void get_rsf_ss(sint length);
static void get_gde_ss(sint length);
static Boolean cl_blankline(char *line);
/*
* Global variables
*/
extern sint max_names;
FILE *fin;
extern Boolean usemenu, dnaflag, explicit_dnaflag;
extern Boolean interactive;
extern char seqname[];
extern sint nseqs;
extern sint *seqlen_array;
extern sint *output_index;
extern char **names,**titles;
extern char **seq_array;
extern Boolean profile1_empty, profile2_empty;
extern sint gap_pos2;
extern sint max_aln_length;
extern char *gap_penalty_mask, *sec_struct_mask;
extern sint struct_penalties;
extern char *ss_name;
extern sint profile_no;
extern sint debug;
char *amino_acid_codes = "ABCDEFGHIKLMNPQRSTUVWXYZ-"; /* DES */
static sint seqFormat;
static char chartab[128];
static char *formatNames[] = {"unknown","EMBL/Swiss-Prot","PIR",
"Pearson","GDE","Clustal","Pileup/MSF","RSF","USER","PHYLIP","NEXUS"};
void fill_chartab(void) /* Create translation and check table */
{
register sint i;
register char c;
for(i=0;i<128;chartab[i++]=0);
for(i=0;(c=amino_acid_codes[i]);i++)
chartab[(int)c]=chartab[tolower(c)]=c;
}
static char * get_msf_seq(char *sname,sint *len,char *tit,sint seqno)
/* read the seqno_th. sequence from a PILEUP multiple alignment file */
{
static char line[MAXLINE+1];
char *seq = NULL;
sint i,j,k;
unsigned char c;
fseek(fin,0,0); /* start at the beginning */
*len=0; /* initialise length to zero */
for(i=0;;i++) {
if(fgets(line,MAXLINE+1,fin)==NULL) return NULL; /* read the title*/
if(linetype(line,"//") ) break; /* lines...ignore*/
}
while (fgets(line,MAXLINE+1,fin) != NULL) {
if(!blankline(line)) {
for(i=1;i<seqno;i++) fgets(line,MAXLINE+1,fin);
for(j=0;j<=strlen(line);j++) if(line[j] != ' ') break;
for(k=j;k<=strlen(line);k++) if(line[k] == ' ') break;
strncpy(sname,line+j,MIN(MAXNAMES,k-j));
sname[MIN(MAXNAMES,k-j)]=EOS;
rtrim(sname);
blank_to_(sname);
if(seq==NULL)
seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));
else
seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));
for(i=k;i<=MAXLINE;i++) {
c=line[i];
if(c == '.' || c == '~' ) c = '-';
if(c == '*') c = 'X';
if(c == '\n' || c == EOS) break; /* EOL */
c=chartab[c];
if(c) seq[++(*len)]=c;
}
for(i=0;;i++) {
if(fgets(line,MAXLINE+1,fin)==NULL) return seq;
if(blankline(line)) break;
}
}
}
return seq;
}
static Boolean cl_blankline(char *line)
{
int i;
if (line[0] == '!') return TRUE;
for(i=0;line[i]!='\n' && line[i]!=EOS;i++) {
if( isdigit(line[i]) ||
isspace(line[i]) ||
(line[i] == '*') ||
(line[i] == ':') ||
(line[i] == '.'))
;
else
return FALSE;
}
return TRUE;
}
static char * get_clustal_seq(char *sname,sint *len,char *tit,sint seqno)
/* read the seqno_th. sequence from a clustal multiple alignment file */
{
static char line[MAXLINE+1];
static char tseq[MAXLINE+1];
char *seq = NULL;
sint i,j;
unsigned char c;
fseek(fin,0,0); /* start at the beginning */
*len=0; /* initialise length to zero */
fgets(line,MAXLINE+1,fin); /* read the title line...ignore it */
while (fgets(line,MAXLINE+1,fin) != NULL) {
if(!cl_blankline(line)) {
for(i=1;i<seqno;i++) fgets(line,MAXLINE+1,fin);
for(j=0;j<=strlen(line);j++) if(line[j] != ' ') break;
sscanf(line,"%s%s",sname,tseq);
for(j=0;j<MAXNAMES;j++) if(sname[j] == ' ') break;
sname[j]=EOS;
rtrim(sname);
blank_to_(sname);
if(seq==NULL)
seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));
else
seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));
for(i=0;i<=MAXLINE;i++) {
c=tseq[i];
/*if(c == '\n' || c == EOS) break;*/ /* EOL */
if(isspace(c) || c == EOS) break; /* EOL */
c=chartab[c];
if(c) seq[++(*len)]=c;
}
for(i=0;;i++) {
if(fgets(line,MAXLINE+1,fin)==NULL) return seq;
if(cl_blankline(line)) break;
}
}
}
return seq;
}
static void get_clustal_ss(sint length)
/* read the structure data from a clustal multiple alignment file */
{
static char title[MAXLINE+1];
static char line[MAXLINE+1];
static char lin2[MAXLINE+1];
static char tseq[MAXLINE+1];
static char sname[MAXNAMES+1];
sint i,j,len,ix,struct_index=0;
char c;
fseek(fin,0,0); /* start at the beginning */
len=0; /* initialise length to zero */
if (fgets(line,MAXLINE+1,fin) == NULL) return; /* read the title line...ignore it */
if (fgets(line,MAXLINE+1,fin) == NULL) return; /* read the next line... */
/* skip any blank lines */
for (;;) {
if(fgets(line,MAXLINE+1,fin)==NULL) return;
if(!blankline(line)) break;
}
/* look for structure table lines */
ix = -1;
for(;;) {
if(line[0] != '!') break;
if(strncmp(line,"!SS",3) == 0) {
ix++;
sscanf(line+4,"%s%s",sname,tseq);
for(j=0;j<MAXNAMES;j++) if(sname[j] == ' ') break;
sname[j]=EOS;
rtrim(sname);
blank_to_(sname);
if (interactive) {
strcpy(title,"Found secondary structure in alignment file: ");
strcat(title,sname);
(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
}
else (*lin2) = 'y';
if ((*lin2 != 'n') && (*lin2 != 'N')) {
struct_penalties = SECST;
struct_index = ix;
for (i=0;i<length;i++)
{
sec_struct_mask[i] = '.';
gap_penalty_mask[i] = '.';
}
strcpy(ss_name,sname);
for(i=0;len < length;i++) {
c = tseq[i];
if(c == '\n' || c == EOS) break; /* EOL */
if (!isspace(c)) sec_struct_mask[len++] = c;
}
}
}
else if(strncmp(line,"!GM",3) == 0) {
ix++;
sscanf(line+4,"%s%s",sname,tseq);
for(j=0;j<MAXNAMES;j++) if(sname[j] == ' ') break;
sname[j]=EOS;
rtrim(sname);
blank_to_(sname);
if (interactive) {
strcpy(title,"Found gap penalty mask in alignment file: ");
strcat(title,sname);
(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
}
else (*lin2) = 'y';
if ((*lin2 != 'n') && (*lin2 != 'N')) {
struct_penalties = GMASK;
struct_index = ix;
for (i=0;i<length;i++)
gap_penalty_mask[i] = '1';
strcpy(ss_name,sname);
for(i=0;len < length;i++) {
c = tseq[i];
if(c == '\n' || c == EOS) break; /* EOL */
if (!isspace(c)) gap_penalty_mask[len++] = c;
}
}
}
if (struct_penalties != NONE) break;
if(fgets(line,MAXLINE+1,fin)==NULL) return;
}
if (struct_penalties == NONE) return;
/* skip any more comment lines */
while (line[0] == '!') {
if(fgets(line,MAXLINE+1,fin)==NULL) return;
}
/* skip the sequence lines and any comments after the alignment */
for (;;) {
if(isspace(line[0])) break;
if(fgets(line,MAXLINE+1,fin)==NULL) return;
}
/* read the rest of the alignment */
for (;;) {
/* skip any blank lines */
for (;;) {
if(!blankline(line)) break;
if(fgets(line,MAXLINE+1,fin)==NULL) return;
}
/* get structure table line */
for(ix=0;ix<struct_index;ix++) {
if (line[0] != '!') {
if(struct_penalties == SECST)
error("bad secondary structure format");
else
error("bad gap penalty mask format");
struct_penalties = NONE;
return;
}
if(fgets(line,MAXLINE+1,fin)==NULL) return;
}
if(struct_penalties == SECST) {
if (strncmp(line,"!SS",3) != 0) {
error("bad secondary structure format");
struct_penalties = NONE;
return;
}
sscanf(line+4,"%s%s",sname,tseq);
for(i=0;len < length;i++) {
c = tseq[i];
if(c == '\n' || c == EOS) break; /* EOL */
if (!isspace(c)) sec_struct_mask[len++] = c;
}
}
else if (struct_penalties == GMASK) {
if (strncmp(line,"!GM",3) != 0) {
error("bad gap penalty mask format");
struct_penalties = NONE;
return;
}
sscanf(line+4,"%s%s",sname,tseq);
for(i=0;len < length;i++) {
c = tseq[i];
if(c == '\n' || c == EOS) break; /* EOL */
if (!isspace(c)) gap_penalty_mask[len++] = c;
}
}
/* skip any more comment lines */
while (line[0] == '!') {
if(fgets(line,MAXLINE+1,fin)==NULL) return;
}
/* skip the sequence lines */
for (;;) {
if(isspace(line[0])) break;
if(fgets(line,MAXLINE+1,fin)==NULL) return;
}
}
}
static void get_embl_ss(sint length)
{
static char title[MAXLINE+1];
static char line[MAXLINE+1];
static char lin2[MAXLINE+1];
static char sname[MAXNAMES+1];
char feature[MAXLINE+1];
sint i;
/* find the start of the sequence entry */
for (;;) {
while( !linetype(line,"ID") )
if (fgets(line,MAXLINE+1,fin) == NULL) return;
for(i=5;i<=strlen(line);i++) /* DES */
if(line[i] != ' ') break;
strncpy(sname,line+i,MAXNAMES); /* remember entryname */
for(i=0;i<=strlen(sname);i++)
if(sname[i] == ' ') {
sname[i]=EOS;
break;
}
sname[MAXNAMES]=EOS;
rtrim(sname);
blank_to_(sname);
/* look for secondary structure feature table / gap penalty mask */
while(fgets(line,MAXLINE+1,fin) != NULL) {
if (linetype(line,"FT")) {
sscanf(line+2,"%s",feature);
if (strcmp(feature,"HELIX") == 0 ||
strcmp(feature,"STRAND") == 0)
{
if (interactive) {
strcpy(title,"Found secondary structure in alignment file: ");
strcat(title,sname);
(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
}
else (*lin2) = 'y';
if ((*lin2 != 'n') && (*lin2 != 'N')) {
struct_penalties = SECST;
for (i=0;i<length;i++)
sec_struct_mask[i] = '.';
do {
get_swiss_feature(&line[2],length);
fgets(line,MAXLINE+1,fin);
} while( linetype(line,"FT") );
}
else {
do {
fgets(line,MAXLINE+1,fin);
} while( linetype(line,"FT") );
}
strcpy(ss_name,sname);
}
}
else if (linetype(line,"GM")) {
if (interactive) {
strcpy(title,"Found gap penalty mask in alignment file: ");
strcat(title,sname);
(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
}
else (*lin2) = 'y';
if ((*lin2 != 'n') && (*lin2 != 'N')) {
struct_penalties = GMASK;
for (i=0;i<length;i++)
gap_penalty_mask[i] = '1';
do {
get_swiss_mask(&line[2],length);
fgets(line,MAXLINE+1,fin);
} while( linetype(line,"GM") );
}
else {
do {
fgets(line,MAXLINE+1,fin);
} while( linetype(line,"GM") );
}
strcpy(ss_name,sname);
}
if (linetype(line,"SQ"))
break;
if (struct_penalties != NONE) break;
}
}
}
static void get_rsf_ss(sint length)
{
static char title[MAXLINE+1];
static char line[MAXLINE+1];
static char lin2[MAXLINE+1];
static char sname[MAXNAMES+1];
sint i;
/* skip the comments */
while (fgets(line,MAXLINE+1,fin) != NULL) {
if(line[strlen(line)-2]=='.' &&
line[strlen(line)-3]=='.')
break;
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -