?? sortd.src
字號:
** If the inputs are null ("" or 0) the procedure
** will ask for them.
**
** Globals: None
*/
proc 0 = mergeby(infile1,infile2,outfile,keytyp);
local err,f1,f2,fout,name1,name2,b1,b2,invar1,invar2,varnames,ws;
clear f1,f2,fout,err;
get1:
if infile1 $== "";
Print "Name of input file 1: ";;
infile1 = cons;
print;
endif;
get2:
if infile2 $== "";
Print "Name of input file 2: ";;
infile2 = cons;
print;
endif;
open f1 = ^infile1;
if f1==-1;
print "Input file 1: " infile1 " not found";
infile1 = "";
goto get1;
endif;
if iscplxf(f1);
errorlog "ERROR: Not implemented for complex data sets.";
end;
endif;
open f2 = ^infile2;
if f2==-1;
print "Input file 2: " infile2 " not found";
infile1 = "";
goto get2;
endif;
if iscplxf(f2);
errorlog "ERROR: Not implemented for complex data sets.";
end;
endif;
invar1 = seqa(1,1,colsf(f1)); /* File 1 variable indices */
invar2 = seqa(1,1,colsf(f2)); /* File 2 variable indices */
if outfile $== "";
Print "Name of output file: ";;
outfile = cons;
print;
endif;
name1 = getname(infile1);
name2 = getname(infile2);
name1 = name1[invar1,1];
name2 = name2[invar2,1];
if indcv(name1[1,1],name2[1,1]) /= 1;
errorlog "Key variables must have the same name";
goto errout;
endif;
varnames = name1|name2[2:rows(name2),1];
/* The assumption being made below is that all the variables from
:: both input files will be included in the output file and the type
:: of the data will follow the file with the largest type. The key
:: variable will be output from only one file.
*/
if not keytyp;
print "Key variable type:";
print " 1 numeric key, ascending order";
print " 2 character key, ascending order";
print;
print " ? ";;
keytyp = stof(cons);
print;
endif;
if keytyp==2 and (typef(f1) /= 8 or typef(f2) /= 8);
errorlog "WARNING - Sort is character type, dataset not double precis"\
"ion.";
endif;
create fout = ^outfile with ^varnames,0,maxc(typef(f1)|typef(f2));
if fout==-1;
"Can't open output file.";
goto errout;
endif;
b1 = submat(readr(f1,1),1,invar1);
b2 = submat(readr(f2,1),1,invar2);
ws = 1; /* is set to 1 inside loop if write is successful */
/* ------------------ merge loop ------------------ */
if keytyp==2;
gosub charkey;
else;
gosub numkey;
endif;
finish:
if err;
goto writerr;
endif;
call close(f1);
call close(f2);
call close(fout);
retp;
writerr:
errorlog "Error writing output file";
errout:
call close(f1);
call close(f2);
call close(fout);
end;
/* ======================================================================== */
/* -------------------------- Subroutines Follow -------------------------- */
/* ======================================================================== */
/* =============================== */
/* ===== Numeric Key Merge ======= */
/* =============================== */
numkey:
do while ws; /* do while write successful */
if b1[1,1] == b2[1,1];
/* key column from file 2 is dropped here */
ws = writer(fout,b1~b2[1,2:cols(b2)]);
if eof(f1) or eof(f2);
return;
endif;
b1 = submat(readr(f1,1),1,invar1);
b2 = submat(readr(f2,1),1,invar2);
elseif b1[1,1] > b2[1,1];
if eof(f2);
return;
endif;
b2 = submat(readr(f2,1),1,invar2);
else;
if eof(f1);
return;
endif;
b1 = submat(readr(f1,1),1,invar1);
endif;
endo;
err = 1;
return;
/* =============================== */
/* ==== Character Key Merge ====== */
/* =============================== */
charkey:
do while ws; /* do while write successful */
if b1[1,1] $== b2[1,1];
/* NOTE - key column from file 2 is dropped here */
ws = writer(fout,b1~b2[1,2:cols(b2)]);
if eof(f1) or eof(f2);
return;
endif;
b1 = submat(readr(f1,1),1,invar1);
b2 = submat(readr(f2,1),1,invar2);
elseif b1[1,1] $> b2[1,1];
if eof(f2);
return;
endif;
b2 = submat(readr(f2,1),1,invar2);
else;
if eof(f1);
return;
endif;
b1 = submat(readr(f1,1),1,invar1);
endif;
endo;
err = 1;
return;
/* ======================= End of Subroutines =========================== */
endp;
/*
**> sortd
**
** Purpose: To sort data file on disk with respect to a
** specified variable.
**
** Format: sortd(infile,outfile,keyvar,keytyp);
**
** Inputs: infile string, name of input file.
**
** outfile string, name of output file, must be different.
**
** keyvar string, name of key variable.
**
** keytyp scalar, type of key variable.
**
** 1 numeric key, ascending order
** 2 character key, ascending order
** -1 numeric key, descending order
** -2 character key, descending order
**
** Remarks: The data set INFILE will be sorted on the variable
** KEYVAR, and will be placed in OUTFILE.
**
** INFILE can have up to 4095 rows, with up to about
** 8100 variables. Putting this file on a ram disk
** can speed up the program considerably.
**
** If the inputs are null ("" or 0) the procedure
** will ask for them.
**
** Globals: None
*/
proc 0 = sortd(infile,outfile,keyvar,keytyp);
local ord,fin,fout,inx,x,srtmat,inrow,off,nr,rs,r,mix,varnames;
clear fin,fout;
get1:
if infile $== "";
Print "Name of input file: ";;
infile = cons;
print;
endif;
if outfile $== "";
Print "Name of output file: ";;
outfile = cons;
print;
endif;
open fin = ^infile;
if fin==-1;
print "Can't open " infile;
infile = "";
goto get1;
endif;
#ifUNIX
if infile $== outfile;
#else
if upper(infile) $== upper(outfile);
#endif
errorlog "Names must be different";
goto errout;
endif;
varnames = getname(infile);
if keyvar $/= "";
goto havit;
endif;
format 8,8;
retry1:
print "Variables are:";
print $varnames';
print "Name of key variable: ";;
keyvar = cons;
print;
havit:
inx = indcv(keyvar,varnames);
if scalmiss(inx);
print "Variable " keyvar " not found";
goto retry1;
endif;
if not keytyp;
print "Sort order:";
print " 1 numeric key, ascending order";
print " 2 character key, ascending order";
print " -1 numeric key, descending order";
print " -2 character key, descending order";
print;
print " ? ";;
keytyp = stof(cons);
print;
endif;
if keytyp < 0;
ord = 0;
keytyp = -keytyp;
else;
ord = 1;
endif;
if iscplxf( fin );
create complex fout = ^outfile with ^varnames,0,typef(fin);
else;
create fout = ^outfile with ^varnames,0,typef(fin);
endif;
if fout==-1;
errorlog "Can't open output file";
goto errout;
endif;
/* no. of rows to read per iter. change if there's memory constraints */
nr = 3000/colsf(fin);
inrow = rowsf(fin);
#ifLIGHT
if inrow > floor(maxvec/2);
errorlog "Maximum rows " $+ ftos(floor(maxvec/2),"%lf",1,0) $+
", this file has " $+ ftos(rowsf(fin),"%lf",1,0);
goto errout;
endif;
#endif
x = readr(fin,1);
srtmat = 1~x[1,inx];
off = 1;
/* reading input file and keeping only key variable */
do until eof(fin);
x = readr(fin,nr);
srtmat = srtmat|(seqa(off+1,1,rows(x))~x[.,inx]);
off = off+rows(x);
endo;
mix = seqa(1,1,rows(srtmat))~rndu(rows(srtmat),1);
mix = sortc(mix,2);
/* make sure its not already sorted, a quicksort would be real slow */
srtmat = srtmat[mix[.,1],.];
clear mix;
if keytyp==2;
srtmat = sortcc(srtmat,2);
else;
srtmat = sortc(srtmat,2);
endif;
if ord == 0;
srtmat = rev(srtmat);
endif;
r = 1;
rs = 1;
do while rs and r <= inrow; /* now we write output using index */
call seekr( fin, real(srtmat[r,1]) );
rs = writer(fout,readr(fin,1));
r = r+1;
endo;
if not rs;
errorlog "\nDisk Full - Sort Incomplete";
goto errout;
endif;
out:
call close(fin);
call close(fout);
retp;
errout:
call close(fin);
call close(fout);
end;
endp;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -