?? sqljmac.sas
字號:
/****************************************************************/ /* S A S S A M P L E L I B R A R Y */ /* */ /* NAME: SQLJMAC */ /* TITLE: improves SQL join performance with macros */ /* PRODUCT: BASE */ /* SYSTEM: ALL */ /* KEYS: SQL DATMAN MACRO %UPCASE %SCAN %EVAL SELECT WHERE */ /* ALIAS */ /* PROCS: SQL */ /* DATA: */ /* */ /* SUPPORT: pmk UPDATE: */ /* REF: */ /* MISC: */ /* */ /****************************************************************/ title1 'SAS SAMPLE LIBRARY, MEMBER(SQLJMAC)'; /* * * Users have noticed that joining a small SAS dataset with a Large * DBMS table using PROC SQL can take a very long time, even if the * "joining variables" are indexed in the DBMS. * * The reson for this is that DBMS Software does not make the * indexes visible to applications software thru the SQL interface, * so the PROC SQL query optimiser has no choice but to make a * complete pass over all the DBMS rows to see if they match with * the rows in the smaller SAS dataset. (This problem does not * arise when the larger dataset is a SAS dataset with indexes * as SAS is able to perform direct access operations on only * those rows that it needs from the larger table) * * This performace gap can be remedied by constructing a where * clause that requests only the needed rows from the larger file. * PROC SQL will send any parts of a where clause that relate * to a single table only off to the engine that is responsible * for that table -- this gives the DBMS a chance to put the index * to work. * * If the number of unique keys is small relative to the size * of the DBMS table, this approach may give you very good * performance gains. * * This technique of calculating the keys required for one * side of the join, and then restricting the rows retrieved * from the other side is a relatively new optimisation made * by DBMS Software. (DB2 2.3 is the first db2 release that * implements this scheme). A future Version of PROC SQL may * perform this kind of optimisation transparently. * * * If the number of unique keys in the small dataset gets large * it may prove inefficient to create macro variables for all the * values. The datastep in the prejoin macro could be modified * to write the needed where clause to a temporary file.. * The drawback to this approach is that the other parts of the * query must be written out to temporary files too, and then * all the parts included together like this: * * %inc(pre where post); * * * If you know something about the range of keys in the smaller * file, it may be more efficient to construct a BETWEEN x AND y * to restrict the rows accessed from the larger table. This * generates a smaller where clause and requires fewer macro variables, * but is not as general in its application... it would be useful if * (for example) you knew that the smaller dataset contained dates * from just one month in a history file. * */ /* * * The prejoin macro constructs the list of unique key values * that are needed to build a where clause to apply to the * larger dataset. The pfx argument allows you to choose the * "root" of the macro names created by the macro... * * Assuming the default prefix "TJ" * * TJKN is the number of key variables. * TJNN is the number of unique ocurrences of the key. * TJK1 .. TJKn are the key variable names in the smaller table. * TJT1 .. TJTn are the datatypes of those key variables. * TJ1X1 .. TJnXm are the values of the keys. * */%macro prejoin( ds= /*-- the smaller dataset name --*/ , key= /*-- the names of the join variables --*/ , keytype= /*-- the types of the keys (C or N) --*/ , pfx=TJ /*-- work datasets and macros prefix --*/ ); /*---------------------------------------------------------------*/ /*-- load up the individual keynames into their own macro --*/ /*-- variables. --*/ /*---------------------------------------------------------------*/ %local i j var; %let i = 1; %let var = %scan(&key, &i); %do %while ( &var ne ); %global &pfx.K&i; %let &pfx.K&i = &var; %global &pfx.T&i; %let &pfx.T&i = %upcase(%scan(&keytype, &i)); %let i = %eval(&i+1); %let var = %scan(&key, &i); %end; %global &pfx.KN &pfx.NN; %let &pfx.KN = %eval(&i - 1); /*---------------------------------------------------------------*/ /*-- create a table with the unique values of the key from --*/ /*-- the smaller dataset. these values will be used to build --*/ /*-- a where clause to apply to the larger dataset. --*/ /*---------------------------------------------------------------*/ proc sql; create table work.&pfx as select distinct %do i = 1 %to &&&pfx.KN; %if (&i > 1) %then %str(,); &&&pfx.K&i %end; from &ds; %global &pfx.NN; %let &pfx.NN = &sqlobs; %put NOTE: There are &sqlobs unique keys in the smaller table.; quit; %do i = 1 %to &&&pfx.NN; %do j = 1 %to &&&pfx.KN; %global &pfx.&j.X&i; %end; %end; /*---------------------------------------------------------------*/ /*-- place these unique values into macro variables, so that --*/ /*-- we can use them later to build a where clause to apply to --*/ /*-- the larger table. --*/ /*-- --*/ /*-- the quote() function is new with sas6.07, and takes care --*/ /*-- of the messy details of character strings with embedded --*/ /*-- quotes.. If you know your keys dont have quotes, then --*/ /*-- you could omit the quote() call. --*/ /*---------------------------------------------------------------*/ data _null_; set work.&pfx; length cn $6 mvar $8; cn = 'X' || left(put(_n_, 5.)); %do i = 1 %to &&&pfx.KN; mvar = "&pfx.&i" || cn; * put mvar= &&&pfx.K&i=; %if ( N = &&&pfx.T&i ) %then %do; call symput( mvar, put(&&&pfx.K&i, best12.) ); %end; %else %do; call symput( mvar, quote(&&&pfx.K&i) ); * call symput( mvar, &&&pfx.K&i ); /* if no quotes */ %end; %end; run;%mend; /* * * The keyjoin macro constructs the where clause from the * values saved away by the prejoin macro. * */%macro keyjoin( alias= /*-- the alias of the big dataset --*/ , key= /*-- the names of the join variables --*/ , pfx=TJ /*-- work datasets and macros prefix --*/ ); %local i j; /*---------------------------------------------------------------*/ /*-- if they supply a key value, then the "names" of the keys --*/ /*-- are not the same in the BIG dataset. so remember these --*/ /*-- new key names. (They better be in the same order with --*/ /*-- repect to the order of the names given to prejoin.) --*/ /*---------------------------------------------------------------*/ %if ( %str(X&key) ne X ) %then %do; %let i = 1; %let var = %scan(&key, &i); %do %while ( &var ne ); %local &pfx.K&i; %let &pfx.K&i = &var; %let i = %eval(&i+1); %let var = %scan(&key, &i); %end; %end; /*---------------------------------------------------------------*/ /*-- go emit the key variables and their values that were --*/ /*-- found in the "smaller" dataset. --*/ /*---------------------------------------------------------------*/( %do i = 1 %to &&&pfx.NN; %if (&i > 1) %then %str(OR); ( %do j = 1 %to &&&pfx.KN; %if (&j > 1) %then %str(AND); &alias..&&&pfx.K&j = &&&pfx.&j.X&i %end; ) %end;) %mend; /*---------------------------------------------------------------*/ /*-- lets make some data to play with. --*/ /*-- --*/ /*-- doing this exercise when joining SAS datasets helps, --*/ /*-- as it sometimes makes the size of the data we must --*/ /*-- sort smaller. but in 607 we often do joins without --*/ /*-- any sorting, so you should test it out on YOUR data. --*/ /*-- --*/ /*-- --*/ /*-- doing this exercise when joining SAS datasets with --*/ /*-- datasets that come form an external source via --*/ /*-- SAS/ACCESS software helps, as we dont have to --*/ /*-- transfer as many records for the DBMS into SAS to --*/ /*-- begin with, let alone sort them all.. --*/ /*-- --*/ /*---------------------------------------------------------------*/data big; length k $20; do i = 1 to 100; do j = 1 to 5; k = trim(put(i, words13.)) || '*' || put(j, words6.); output; end; end; run;data small; length kk $20; do i = 7 to 270 by 4; j = 3; kk = trim(put(i, words13.)) || '*' || put(j, words6.); output; end; run; /*---------------------------------------------------------------*/ /*-- this example joins the variables on one numeric key -- I --*/ /*-- --*/ /*-- the %prejoin step computes the unique key values of I --*/ /*-- in the "small" dataset. --*/ /*-- --*/ /*-- then the %keyjoin macro emits a where expression that --*/ /*-- selects only those values computed by %prejoin --*/ /*-- from the big table. --*/ /*-- --*/ /*---------------------------------------------------------------*/%prejoin(ds=small, key=i, keytype=n);proc sql feedback; select * from big,small where big.i=small.i and big.j=2 and %keyjoin(alias=big); /*---------------------------------------------------------------*/ /*-- this example joins the variables on two keys -- I and J --*/ /*---------------------------------------------------------------*/%prejoin(ds=small, key=i j, keytype=n n);proc sql feedback; select * from big,small where big.i=small.i and big.j=small.j and %keyjoin(alias=big) ; /*---------------------------------------------------------------*/ /*-- join on one character key --*/ /*-- just to make it interesting, its called K in one dataset --*/ /*-- (the bigger one) and "KK" in the smaller one. --*/ /*---------------------------------------------------------------*/%prejoin(ds=small, key=kk, keytype=c);proc sql feedback; select * from big,small where big.k=small.kk and %keyjoin(alias=big,key=k) ;quit;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -