DS = Dataset, similar to a table in a database SAS uses DataSets to organize, retain, and describe Data
DATA output_DS outputDS_options;
<statements 1 ... n>
RUN;
Where outputDS_options incl:
(keep= x y z ) - to keep only the columns x y & z
(drop= x y z ) - to drop x,y, & z but keep all others
set output dataset name = NULL to run code rather than produce an output data. This is often used to set or manipulate variables.
DATA _NULL_;
DATA Output_datasets;
MERGE input_dataset1(in=A)
...
input_datasetn(in=N)
;
BY column;
if A=1 or B=1; /*This is equiv to a full outer join */
if A=1 and B=1; /*This is equiv to an inner join */
if A=1; /*This is equiv to a left join */
if B=1; /*This is equiv to a right join */
RUN;
# Importing from an excel
PROC IMPORT OUT= sas_Dataset
DATAFILE= "path_to_sourceFile_inclFileName"
DBMS=EXCEL REPLACE;
SHEET=Subs_BL;
GETNAMES=YES;
MIXED=YES;
SCANTEXT=YES;
USEDATE=YES;
SCANTIME=YES;
RUN;
proc export
data=sas_Dataset
outfile="/sasdata/d2d/Monzer/myresults.xlsx"
DBMS=xlsx replace; /* Export to Excel */
DBMS=csv replace; /* Export to a CSV */
sheet="aa1_test1"; /* Keep for Excel only */
run;
Proc Upload
infile='input_Path_To_readFile'
outfile='output_Path_To_DestinationFile' ;
run;
proc download
infile="input_Path_To_readFile"
outfile='output_Path_To_DestinationFile'
binary;
run;
proc transpose
data=input_DS
OUT =output_DS;
By grouping_col;
Id row_col;
var pivot_col;
RUN;
PROC FREQ options;
DATA=[sas_dataset]
BY variable-list;
TABLES requests / options;
WEIGHT variable;
OUTPUT [OUT= sas_dataset][output-statistic-list];
FORMAT ;
EXACT statistic-keywords [/computation-option];
TEST options;
RUN;
EXAMPLE:
proc freq
data=Attrition(where=(YearMth in ("201801","201712","201711","201710","201709")));
tables type*YearMth /missing nocum nocol;
title 'attrition by type*YearMth';
run;
proc univariate data=Accounts;
class type;
var Previous_Amount;
output out=Means mean=Type_Mean;
output out=Stats mean=Type_Mean std=Type_SD
min=Type_Min max=Type_Max;
output out=PTiles pctlpts = 1 5 25 50 75 95 99
pctpre = type
pctlname = pct1 pct5 pct25 pct50 pct75 pct95 pct99;
run;
The FASTCLUS procedure performs a disjoint cluster analysis on the basis of distances computed from one or more quantitative variables. By default, the FASTCLUS uses Euclidean distances centered on least-squares estimation. This kind of clustering method is often called a k-means model
PROC FASTCLUS DATA=attr_num
MAXCLUSTERS=5
OUT=a1_RESULT
OUTSEED=a1_CENTRES
OUTSTAT=a1_CLUSTER_STAT
REPLACE=RANDOM
RANDOM=187413849
CONVERGE=0.001
MAXITER=350;
VAR _ALL_;
RUN;
Replaces a categorical variable with binary indicator dummy columns
proc glmmod
data=test1
outdesign=test2
outparm=GLMParms NoPrint;
class Tenure Age_Grp;
model NAccts = Tenure Age_Grp;
run;
Your final result set should have a column for each possible category in Tenure and Age_Grp each row should still have the NAccts value from the original table
proc append base=base_data data=new_data;
run;
proc contents data=libname.dataset # replace dataset with _all_ to get a listing of all tables and columns
out =results
noprint;
run;
#places variable on program stack, so you can use it outside the _null_ step
data _null_;
lastday = intnx('dtmonth', input("&thismth",yymmn6.), 0, 'E');
call symput('firstday', firstday);
output;
stop;
run;
INTNX function increments dates by intervals. INTNX computes the date or datetime of the start of the interval a specified number of intervals from the interval that contains a given date or datetime value
SYNTAX: INTNX(interval, from, n, [,alignment] )
INTCK function counts the number of interval boundaries between two date values or between two datetime values.
SYNTAX: INTCK ( interval, from, to )
data _null_;
firstday = intnx('dtmonth', input("&thismth",yymmn6.), 0, 'B');
lastday = intnx('dtmonth', input("&thismth",yymmn6.), 0, 'E');
call symput('firstday', firstday);
call symput('lastday', lastday);
output;
stop;
run;
data x; /* Convert a number to text with leading zeros */
a=374747830939;
b=put(a,z15.);
format a z15.;
run;
/* Start your timer */
%let Start_Timer = %sysfunc(datetime());
... do some stuff ...
/* Get end time, compute duration */
data _null_;
dur = datetime() - &Start_Timer;
put 30*'-' / ' TOTAL DURATION (ScriptName):' dur time13.2 / 30*'-';
run;
# searching a library for a column
proc sql;
select name,memname
from dictionary.columns
where libname eq 'SOWSTAR'
and upcase(name) like '%CUST_ID%'
/* and memtype eq 'DATA' */
;quit;
run;
proc sort data=Accounts; by account_number; run;
data account_clean accounts_duplicates;
set accounts
;
By account_number;
if not first.account_number and last.account_number
THEN OUTPUT accounts_duplicates ;
ELSE OUTPUT account_clean;
run;
%let mynode=sas_server_ip port;
options comamid=tcp remote=mynode;
signon user= XXXX password='YYYY';
/* sas lib setup goes here */
/* We're going to try to run multiple sessions in single program */
options sascmd='path_to_sas.exe -nosyntaxcheck'
autosignon;
rsubmit remote1 wait=no remote=mynode;
proc sql; create table Jan2018 as select count(*) from DS_1; ;quit;
endrsubmit;
rsubmit remote2 wait=no remote=mynode;
proc sql; create table Dec2017 as select count(*) from DS_2; ;quit;
endrsubmit;
waitfor _all_ remote1 remote2; /* 5 */
rsubmit mynode;
proc sql;
create table results as
select * from Jan2018
union select * from Dec2017;
;quit;
endrsubmit;
%macro chmod(libname,dataset,permis=744);
proc sql noprint; /* get the absolute path of the libname */
select path into: extpath from dictionary.members
where libname="%UPCASE(&LIBNAME)"
;quit;
proc sql noprint; /* find out if there are any datasets in the library */
select count(path) into: cnt from dictionary.members
where libname="%UPCASE(&LIBNAME)";
;quit;
/* if there are datasets...then issue the below command to change the
permissions of the dataset called in this macro to 777 */
%if &cnt > 0 %then %do;
filename chmod pipe "chmod &PERMIS %trim(&extpath)/&dataset..sas7bdat";
data _null_;
file chmod;
run;
%end;
%mend;
data inTable;
input letter $ number $;
datalines;
a 1
a 2
a 3
b 4
b 5
b 6
;
data want;
length cat $20.;
do until (last.letter);
set intable;
by letter notsorted;
cat=catx(',',cat,number);
end;
drop number;
run;
data example1;
input x y $ z;
cards;
201701 A 60
201702 A 70
201703 A 100
201704 B 10
201705 B 67
201706 C 81
201707 C 63
201708 C 55
201709 C 55
201710 C 55
201711 C 55
201712 C 55
201612 C 55
201611 C 55
201610 C 55
201609 C 55
;
run;