Download presentation
Presentation is loading. Please wait.
Published byAllen Boyd Modified over 9 years ago
1
PROC SQL: Tips and Translations for Data Step Users By: Gail Jorgensen Susan Marcella
2
AGENDA SQL Syntax Review Joins Translated SQL Strengths & Uses
3
PROC SQL: Tips and Translations for Data Step Users Syntax Proc SQL; create table/view newdsname as select var1, var2, … varN from dsname where condition ; Quit;
4
PROC SQL: Tips and Translations for Data Step Users JOIN vs MERGE Types of JOINs Inner Join – selects only matching records (same as: if ina and inb) Outer Join – selects some non-matching records – Left Join – selects all records from first table, only matching records from second (same as: if ina) – Right join – selects all records from second table, only matching records from first (same as: if inb) -- Full join – selects all records from both tables (same as having no if statement)
5
PROC SQL: Tips and Translations for Data Step Users Inner Join proc sql; create table ds_c as select ds_a1.*, ds_b.* from ds_a1, ds_b where ds_a1.idfld = ds_b.idno; quit; data c; merge ds_a1(in=ina) ds_b(in=inb rename=(idno=idfld)); by idfld; if ina and inb; run; Dataset DS_A1Dataset DS_BB idfldcol2 1M 1N 2O 3P 4Q 5R idnocol3col4 1XC 2XD 2YF 4Z 5Z 7Z idfldcol2idnocol3col4 1M1XC 1N1XC 2O2XD 2O2YF 4Q4Z 5R5Z idfldcol2col3col4 1MXC 1NXC 2OXD 2OYF 4QZ 5RZ
6
PROC SQL: Tips and Translations for Data Step Users Left Join idfldcol2col5 1MA 1ND 2O 3PJ 4QK 5RN proc sql; create table sql_left as select a.*, b.* from inf_a as a left join inf_b as b on a.idfld = b.idfld; quit; data ds_left; merge inf_a(in=ina) inf_b(in=inb); by idfld; if ina ; run; idfldcol3col4 1XC 2XD 2YF 4Z 5Z 7Z Dataset inf_aDataset inf_b idfldcol2col5col3col4 1NDXC 1MAXC 2OYF 2OXD 3PJ 4QKZ 5RNZ idfldcol2col5col3col4 1NDXC 1MAXC 2OYF 2OXD 3PJ 4QKZ 5RNZ Dataset sql_leftDataset ds_left
7
PROC SQL: Tips and Translations for Data Step Users Right Join proc sql; create table sql_right as select a.*, b.* from inf_a as a right join inf_b as b on a.idfld = b.idfld; quit; data ds_right; merge inf_a(in=ina) inf_b(in=inb); by idfld; if inb; run; idfldcol2col5col3col4 1NDXC 1MAXC 2OYF 2OXD 4QKZ 5RNZ.Z idfldcol2col5col3col4 1MAXC 1NDXC 2OXD 2OYF 4QKZ 5RNZ 7Z
8
PROC SQL: Tips and Translations for Data Step Users Full Join Obsnamerecdsent 1 Amandayesno 2 Gabiyes 3 Janyes 4 Jimnoyes 5 Pamno Obsnamerecdsent 1 Alisonyes 2 Janyes 3 Pamno 4 Tomyes CList07 CList08 proc sql; create table sql_clist as select c7.name, c7.recd as recd07, c8.recd as recd08, c7.sent as sent07, c8.sent as sent08 from clist07 as c7 full join clist08 as c8 on c7.name=c8.name; quit; proc sort data=clist07; by name; run; proc sort data=clist08; by name; run; data data_clist; merge clist07 clist08 (rename=(recd=recd08 sent=sent08)); by name; run;
9
PROC SQL: Tips and Translations for Data Step Users Full Join (Con’t) Obs Namerecd07recd08sent07sent08 1 yes 2 Amandayesno 3 Gabiyes 4 Janyes 5 Jimnoyes 6 Pamno 7 yes Obs namerecdsentrecd08sent08 1 Alisonyes 2 Amandayesno 3 Gabiyes 4 Janyes 5 Jimnoyes 6 Pamno 7 Tomyes Sql_CListData_CList
10
PROC SQL: Tips and Translations for Data Step Users Handling Duplicate Variable Names To always select the variable from one dataset: – Drop unwanted version of variable (PROC SQL permits all SAS dataset options) – Select variable from specific table To keep variable from both tables: – Rename the variable in one dataset To select variable based on value: – Use CASE statement
11
PROC SQL: Tips and Translations for Data Step Users CASE Statement proc sql; create table NewCList as select case when missing(c7.name) then c8.name else c7.name end as name, c7.recd as recd07, c8.recd as recd08, c7.sent as sent07, c8.sent as sent08 from clist07 as c7 full join clist08 as c8 on c7.name=c8.name; quit; proc sort data=clist07; by name; run; proc sort data=clist08; by name; run; data data_clist; merge clist07 clist08 (rename=(recd=recd08 sent=sent08)); by name; run;
12
PROC SQL: Tips and Translations for Data Step Users CASE Statement - Results Obs namerecd07recd08sent07sent08 1 Alisonyes 2 Amandayesno 3 Gabiyes 4 Janyes 5 Jimnoyes 6 Pamno 7 Tomyes
13
PROC SQL: Tips and Translations for Data Step Users Down Calculations PROC SORT data=shs.exposure; by subject_id; run; DATA counters(KEEP=TableName MAXOBS TOTOBS); SET shs.exposure END=LAST; BY subject_id; length TableName $ 50; RETAIN MAXOBS OBSCNTR TOTOBS 0; TableName=“exposure"; TOTOBS+1; OBSCNTR+1; IF LAST.subject_id THEN DO; IF MAXOBS < OBSCNTR THEN MAXOBS=OBSCNTR; OBSCNTR=0; END; IF LAST THEN OUTPUT chemcnts; label maxobs='Maximum number of obs per person' totobs='Total Number obs in table'; run; proc print data=counters; run; Obs TableNameMAXOBSTOTOBS 1 exposure142124
14
PROC SQL: Tips and Translations for Data Step Users Down Calculations proc sql; create table sqlcounter as select distinct subject_id, count(*) as subjcnt from fshs.exposure group by subject_id; select “exposure" as TableName, max(subjcnt) as MaxObs, sum(subjcnt) as TotObs from sqlcounter; quit; TableNameMaxObsTotObs exposure142124 Obssubject_idsubjcnt 11787 23061 33071 43081 53263 63301 73311 83321 sqlcounter
15
PROC SQL: Tips and Translations for Data Step Users Counts and Nesting Queries proc sql; select distinct genre, count(*) from itunes group by genre; quit; proc sql outobs=1; select (select count(*) from itunes) as TotalSongs, (select count(distinct genre) from itunes) as GenreCnt, (select count(distinct artist) from itunes) as ArtistCnt, (select count(distinct album) from itunes) as AlbumCnt from itunes; quit; Genre Alternative9 Bluegrass43 Blues14 Children's Music62 Christian & Gospel88 Classical74 Country77 Easy Listening31 Electronic1 Folk16 General Folk18 Gospel & Religious40 Hip Hop/Rap2 Holiday13 Inspirational70 TotalSongsGenreCntArtistCntAlbumCnt 80223160100
16
PROC SQL: Tips and Translations for Data Step User Dictionaries proc sql; create view detail as select * from dictionary.columns ; create view extern as select * from dictionary.members ; create view tbl as select * from dictionary.tables ; create view gotem as select trim(libname) as LibName, trim(memname) as TableName, trim(name) as ColName, label as ColLabel from sashelp.vcolumn ; quit; SAS
17
PROC SQL: Tips and Translations for Data Step User Dictionaries – Getting variable names proc sql; /* get names of all variables you want */ select name into :drinkvars separated by ', ' from dictionary.columns where libname=‘AUG' and memname='DEMOG' and lowcase(name) contains ‘ndrk'; /* use your newly created macro variable in your select statement */ create table drinks as select &drinkvars from aug.demog; quit;
18
PROC SQL: Tips and Translations for Data Step User Dictionaries – Getting variable names proc sql; /* add the table alias to the front of each variable name as you create your macro variable */ select 'd.'||name into :aliasvars separated by ', ' from dictionary.columns where libname='AUG' and memname='DEMOG' and lowcase(name) contains ('ndrk'); /* do your merge or whatever using the macro variable you just created */ create table newtable as select &aliasvars, c.expcategory from aug.demog as d left join aug.exposure as c on d.jcml_id=c.jcml_id; quit;
19
PROC SQL: Tips and Translations for Data Step User Views Views are ‘virtual tables’ Created with CREATE VIEW statement Can be used as if they are normal physical tables Enhance security – can construct a view of only fields and rows that user is allowed to view Enhance ease-of-use – Can combine rows and columns from multiple tables into a single view Facilitate data integrity – Can have several views on the same table, but only have to update the base table – Users always see up-to-date data proc sql; create view aug.testview as select d.subject_id, d.case_id, d.age, e.job_num, e.exposure_element from aug.demog as d, aug.exposure as e where d.subject_id=e.subject_id; quit;
20
PROC SQL: Tips and Translations for Data Step Users Creating Data Source Indicators Obsfamidnameinc 1 2Art22000 2 1Bill30000 3 3Paul25000 4 4Karl95000 dads Obsfamidfaminc96faminc97faminc98 1 3750007600077000 2 1400004050041000 3 2450004540045800 4 5550006500070000 5 6220002400028000 faminc proc sql; create table sql_fj as select *, (dads.famid=faminc.famid) as indic, (dads.famid ~=.) as dadind, (faminc.famid ~=.) as famind, coalesce(dads.famid, faminc.famid) as fid from dads full join faminc on dads.famid=faminc.famid; quit; proc sort data=dads out=sorted_dads; by famid; run; proc sort data=faminc out=sorted_faminc; by famid; run; data ds_fj; merge sorted_dads(in=in1) sorted_faminc(in=in2); by famid; if in1 and in2 then indic=1; else indic=0; dadind=in1; famind=in2; fid=famid; run;
21
PROC SQL: Tips and Translations for Data Step Users Full Join - cont Obsfamidnameincfaminc96faminc97faminc98indicdadindfamindfid 1 1Bill300004000040500410001111 2 2Art220004500045400458001112 3 3Paul250007500076000770001113 4 4Karl95000...0104 5..5500065000700000015 6..2200024000280000016 Obsfamidnameincfaminc96faminc97faminc98indicdadindfamindfid 1 1Bill300004000040500410001111 2 2Art220004500045400458001112 3 3Paul250007500076000770001113 4 4Karl95000...0104 5 5.5500065000700000015 6 6.2200024000280000016 Sql_fj Ds_fj
22
PROC SQL: Tips and Translations for Data Step Users Additional Uses proc sql; title 'Bad Control Matches'; select c.subject_id, c.casenum, c.gender, age as CntlAge label='CntlAge', (select age from cases where subject_id=c.casenum) as CaseAge, abs(cntlage-calculated caseage) as AgeDiff from controls as c left join demog as d on c.subject_id=d.subject_id where (not within5 and not within10); quit; Scenario: For a case/control study, verify that all controls have age within 5 to 10 years of the related case age.
23
PROC SQL: Tips and Translations for Data Step User Merging Multiple Tables proc sql; create table sql_c3 as select a.name, b.class, case when missing(c.grade1) then "missing 1“ when missing(c.grade2) then "missing 2“ when missing(c.grade3) then "missing 3“ when missing(c.grade4) then "missing 4" else "none missing“ end as miss_grade from indat_a as a, indat_b as b, indat_c as c where c.classid=b.classid and c.perid=a.perid; quit; Obsnameclassmiss_grade 1 MaryArtnone missing 2 OliveArtnone missing 3 QuincyArtnone missing 4 NatArtnone missing 5 PatArtnone missing 6 QuincyMusicmissing 4 7 RichardMusicnone missing 8 MaryMathnone missing 9 NatMathnone missing 10 OliveMathnone missing 11 PatMathnone missing 12 QuincyMathmissing 2 13 RichardMathnone missing 14 MaryEnglishnone missing 15 NatEnglishnone missing 16 OliveEnglishnone missing 17 PatEnglishnone missing 18 QuincyEnglishnone missing 19 RichardEnglishnone missing
24
SAS and all other SAS Institute Inc. product or service names are registered trademarks or trademarks of SAS Institute Inc. in the USA and other countries. ® indicates USA registration.
Similar presentations
© 2025 SlidePlayer.com. Inc.
All rights reserved.