Home

SAS Notes

Tutorials

Abbreviations

DS = Dataset, similar to a table in a database SAS uses DataSets to organize, retain, and describe Data

Handy Tips

Data step

DATA output_DS outputDS_options;
    <statements 1 ... n>
RUN;

Where outputDS_options incl:
    (keep= x y z ) - to keep only the columns x y & z
    (drop= x y z ) - to drop x,y, & z but keep all others

NULL step

set output dataset name = NULL to run code rather than produce an output data. This is often used to set or manipulate variables.

DATA _NULL_;

Merge

    DATA  Output_datasets;
    MERGE input_dataset1(in=A)
          ...
          input_datasetn(in=N)
    ;
    BY column;
    if A=1 or B=1; /*This is equiv to a full outer join    */
    if A=1 and B=1; /*This is equiv to an inner join        */
    if A=1;         /*This is equiv to a left join          */
    if B=1;         /*This is equiv to a right join         */
    RUN;

PROCS

Import-Export

# Importing from an excel 
PROC IMPORT OUT= sas_Dataset
        DATAFILE= "path_to_sourceFile_inclFileName" 
        DBMS=EXCEL REPLACE;
    SHEET=Subs_BL;
    GETNAMES=YES;
    MIXED=YES;
    SCANTEXT=YES;
    USEDATE=YES;
    SCANTIME=YES;
RUN;

proc export 
    data=sas_Dataset
    outfile="/sasdata/d2d/Monzer/myresults.xlsx"
    DBMS=xlsx replace;  /* Export to Excel      */
    DBMS=csv replace;   /* Export to a CSV      */
    sheet="aa1_test1";  /* Keep for Excel only  */
run;

Upload - Download

Proc Upload
    infile='input_Path_To_readFile'
    outfile='output_Path_To_DestinationFile' ;
    run;

proc download 
    infile="input_Path_To_readFile"
    outfile='output_Path_To_DestinationFile' 
    binary;
   run;

Transpose

proc transpose
    data=input_DS
    OUT =output_DS;
    By  grouping_col;
    Id  row_col;
    var pivot_col;
    RUN;

Frequency

PROC FREQ options;
    DATA=[sas_dataset]
    BY variable-list;
    TABLES requests / options;
    WEIGHT variable;
    OUTPUT [OUT= sas_dataset][output-statistic-list];
    FORMAT ;
    EXACT statistic-keywords [/computation-option];
    TEST options;
RUN;
EXAMPLE:
proc freq 
    data=Attrition(where=(YearMth in ("201801","201712","201711","201710","201709"))); 
    tables type*YearMth /missing nocum nocol;
    title 'attrition by type*YearMth';
run;

Univariate

proc univariate data=Accounts;
    class type;
    var Previous_Amount;
    output out=Means    mean=Type_Mean;
    output out=Stats    mean=Type_Mean std=Type_SD 
                        min=Type_Min  max=Type_Max;
    output out=PTiles   pctlpts = 1 5 25 50 75 95 99
                        pctpre  = type
                        pctlname = pct1 pct5 pct25 pct50 pct75 pct95 pct99;
run;

Fastclus

The FASTCLUS procedure performs a disjoint cluster analysis on the basis of distances computed from one or more quantitative variables. By default, the FASTCLUS uses Euclidean distances centered on least-squares estimation. This kind of clustering method is often called a k-means model

PROC FASTCLUS DATA=attr_num
    MAXCLUSTERS=5
    OUT=a1_RESULT 
    OUTSEED=a1_CENTRES
    OUTSTAT=a1_CLUSTER_STAT 
    REPLACE=RANDOM
    RANDOM=187413849 
    CONVERGE=0.001 
    MAXITER=350;
    VAR _ALL_;
RUN;

GLMMOD

Replaces a categorical variable with binary indicator dummy columns

proc glmmod 
    data=test1 
    outdesign=test2 
    outparm=GLMParms NoPrint;
    class Tenure Age_Grp;
    model NAccts = Tenure Age_Grp;
    run;
Your final result set should have a column for each possible category in Tenure and Age_Grp each row should still have the NAccts value from the original table

Append

proc append base=base_data data=new_data;
run;

Contents

proc contents data=libname.dataset   # replace dataset with _all_ to get a listing of all tables and columns
              out =results 
        noprint;
        run;

Functions

Symput

#places variable on program stack, so you can use it outside the _null_ step
data _null_;
    lastday  = intnx('dtmonth', input("&thismth",yymmn6.), 0, 'E');
    call symput('firstday', firstday);  
output;
stop;
run;

Intnx & Intck

INTNX function increments dates by intervals. INTNX computes the date or datetime of the start of the interval a specified number of intervals from the interval that contains a given date or datetime value  

    SYNTAX: INTNX(interval, from, n, [,alignment] )  

INTCK function counts the number of interval boundaries between two date values or between two datetime values.
    SYNTAX: INTCK ( interval, from, to ) 

data _null_;
    firstday = intnx('dtmonth', input("&thismth",yymmn6.), 0, 'B');
    lastday  = intnx('dtmonth', input("&thismth",yymmn6.), 0, 'E');
    call symput('firstday', firstday);  
    call symput('lastday', lastday); 
output;
stop;
run;

Put-Input

data x; /* Convert a number to text with leading zeros */
   a=374747830939;
   b=put(a,z15.);
   format a z15.;
run;

Examples

Timing Code

/* Start your timer */
%let Start_Timer = %sysfunc(datetime());

... do some stuff ...
/* Get end time, compute duration */
data _null_;
    dur = datetime() - &Start_Timer;
    put 30*'-' / ' TOTAL DURATION (ScriptName):' dur time13.2 / 30*'-';
run;
# searching a library for a column
proc sql; 
   select name,memname
   from dictionary.columns
   where libname eq 'SOWSTAR' 
      and upcase(name) like '%CUST_ID%' 
      /* and memtype eq 'DATA' */
;quit; 
run;

Removing Duplicates

proc sort data=Accounts;    by account_number; run;

data account_clean accounts_duplicates;
set  accounts
     ;  
By account_number;
if not first.account_number and last.account_number
    THEN OUTPUT accounts_duplicates ;
    ELSE OUTPUT account_clean;
run;

Asynchronous submission

%let mynode=sas_server_ip port;
     options comamid=tcp remote=mynode;
     signon user= XXXX  password='YYYY';

/* sas lib setup goes here */

/* We're going to try to run multiple sessions in single program */
options sascmd='path_to_sas.exe -nosyntaxcheck'
autosignon;

rsubmit remote1 wait=no remote=mynode;
proc sql; create table Jan2018 as select count(*) from DS_1;    ;quit;
endrsubmit;

rsubmit remote2 wait=no remote=mynode;
proc sql; create table Dec2017 as select count(*) from DS_2;    ;quit;
endrsubmit;

waitfor _all_ remote1 remote2;                          /* 5 */

rsubmit mynode;
    proc sql;
    create table results as
          select * from Jan2018
    union select * from Dec2017;
    ;quit;
endrsubmit;

File Permissions (UNIX)

%macro chmod(libname,dataset,permis=744);
    proc sql noprint;   /* get the absolute path of the libname               */
        select path into: extpath from dictionary.members
            where libname="%UPCASE(&LIBNAME)"
    ;quit;

    proc sql noprint;   /* find out if there are any datasets in the library  */
        select count(path) into: cnt from dictionary.members
            where libname="%UPCASE(&LIBNAME)";
    ;quit;

    /* if there are datasets...then issue the below command to change the
       permissions of the dataset called in this macro  to 777                */
     %if &cnt > 0 %then %do;
        filename chmod pipe "chmod &PERMIS %trim(&extpath)/&dataset..sas7bdat";
        data _null_;
          file chmod;
        run;
     %end;
%mend;

N+ Rows to csv

data inTable;
input letter $ number $;
datalines;
a 1
a 2
a 3
b 4
b 5
b 6
;

data want;
length cat $20.;
   do until (last.letter);
      set intable;
        by letter notsorted;
      cat=catx(',',cat,number);
   end;
   drop number;
run;

Sample dataset

data example1;
input x y $ z;
cards;
201701 A 60
201702 A 70
201703 A 100
201704 B 10
201705 B 67
201706 C 81
201707 C 63
201708 C 55
201709 C 55
201710 C 55
201711 C 55
201712 C 55
201612 C 55
201611 C 55
201610 C 55
201609 C 55
;
run;