/************************************************************************/ /* multi_stat_example.sas -- SAS program to read data from HW4 data */ /* cart and illustrate different multivariate procedures */ /************************************************************************/ FILENAME cartdata "C:\Data\PSID\ECO 725\hw4.txt" ; TITLE1 "Multivariate statistics example"; /* Read HW4 data from cart; select respondent heads with valid info. */ DATA t0; /* Read (input) PSID data for HW4 using PSID-generated statements */ ATTRIB ER30001 FORMAT=F4. LABEL="1968 INTERVIEW NUMBER" ER30002 FORMAT=F3. LABEL="PERSON NUMBER 68" ER32000 FORMAT=F1. LABEL="SEX OF INDIVIDUAL" ER25104 FORMAT=F2. LABEL="BC1 EMPLOYMENT STATUS-1ST MENTION" ER25105 FORMAT=F2. LABEL="BC1 EMPLOYMENT STATUS-2ND MENTION" ER25106 FORMAT=F2. LABEL="BC1 EMPLOYMENT STATUS-3RD MENTION" ER25362 FORMAT=F2. LABEL="DE1 EMPLOYMENT STATUS-1ST MENTION" ER25363 FORMAT=F2. LABEL="DE1 EMPLOYMENT STATUS-2ND MENTION" ER25364 FORMAT=F2. LABEL="DE1 EMPLOYMENT STATUS-3RD MENTION" ER27931 FORMAT=F7. LABEL="LABOR INCOME OF HEAD-2004" ER27943 FORMAT=F7. LABEL="LABOR INCOME OF WIFE-2004" ER28037 FORMAT=F7. LABEL="TOTAL FAMILY INCOME-2004" ER28078 FORMAT=F7.3 LABEL="2005 CORE/IMMIGRANT FAM WEIGHT NUMBER 1" ER33801 FORMAT=F5. LABEL="2005 INTERVIEW NUMBER" ER33802 FORMAT=F2. LABEL="SEQUENCE NUMBER 05" ER33803 FORMAT=F2. LABEL="RELATION TO HEAD 05" ER33804 FORMAT=F3. LABEL="AGE OF INDIVIDUAL 05" ER33813 FORMAT=F1. LABEL="EMPLOYMENT STATUS 05" ER33817 FORMAT=F2. LABEL="YEARS COMPLETED EDUCATION 05" ER33847 FORMAT=F2. LABEL="WHY NONRESPONSE 05" ER33848 FORMAT=F7.3 LABEL="CORE/IMM INDIVIDUAL LONGITUDINAL WT 05" ; INFILE cartdata LRECL = 72 ; INPUT ER30001 1 - 4 ER30002 5 - 7 ER32000 8 - 8 ER25104 9 - 10 ER25105 11 - 12 ER25106 13 - 14 ER25362 15 - 16 ER25363 17 - 18 ER25364 19 - 20 ER27931 21 - 27 ER27943 28 - 34 ER28037 35 - 41 ER28078 42 - 48 ER33801 49 - 53 ER33802 54 - 55 ER33803 56 - 57 ER33804 58 - 60 ER33813 61 - 61 ER33817 62 - 63 ER33847 64 - 65 ER33848 66 - 72 ; /* Restrict analysis data set to head respondents with valid age and */ /* education information */ IF ER33847 EQ 0; /* respondents */ IF ER33803 EQ 10; /* heads */ IF ER33817 GT 0 AND ER33817 LT 98; /* valid education */ IF ER33804 GT 0 AND ER33804 LT 999; /* valid age */ /* Create female variable */ female = ER32000 EQ 2; LABEL female = 'Female'; LABEL ER33817 = 'Education'; LABEL ER33804 = 'Age'; /* Create quadratics of age and education */ age_sqr = (ER33804**2) / 100; educ_sqr = (ER33817**2) / 100; LABEL age_sqr = 'Age squared (/100)'; LABEL educ_sqr = 'Education squrared (/100)'; anyearn = ER27931 GT 0; LABEL anyearn = 'Head had earnings last year'; IF ER27931 EQ 0 THEN earn_cat = 0; ELSE IF ER27931 LT 25000 THEN earn_cat = 1; ELSE earn_cat = 2; LABEL earn_cat = 'Categorical earnings variable'; upcnearn = MIN(ER27931, 100000); LABEL upcnearn = 'Earnings censored from above at $100K'; KEEP ER30001 ER30002 ER27931 ER28037 ER33817 ER33804 female age_sqr educ_sqr anyearn earn_cat upcnearn; /* Calculate statistics to document data set */ PROC MEANS DATA=t0; TITLE2 'Means for 2005 resp. heads with valid age and educ. info.'; /* Basic regressions and tests */ /* - includes tests of coefficients */ /* - outputs predictions and residuals */ /* - outputs coefficients */ PROC REG DATA=t0 OUTEST=tcoeff; TITLE2 'Regressions with joint coefficient hypothesis tests'; ModA: MODEL ER28037 = female ER33817 ER33804; OUTPUT OUT=tmoda P=predinca R=res_inca; TstA1: TEST ER33817, ER33804; ModB: MODEL ER28037 = female ER33817 educ_sqr ER33804 age_sqr; OUTPUT OUT=tmodb P=predincb R=res_incb; TstB1: TEST ER33817, ER33804; TstB2: TEST educ_sqr, age_sqr; /* Display different data sets created by PROC REG */ PROC MEANS DATA=tmoda; TITLE2 'Output from ModA Regression'; PROC MEANS DATA=tmodb; TITLE2 'Output from ModB Regression'; PROC PRINT DATA=tcoeff; TITLE2 'Coefficients form Regression'; /* Basic logit */ /* - outputs predictions of probabilities and latent variables */ /* - outputs coefficients */ PROC LOGISTIC DATA=t0 OUTEST=tlcoeff; TITLE2 'Head earnings logit'; MODEL anyearn(DESCENDING) = female ER33817 educ_sqr ER33804 age_sqr; OUTPUT OUT=tlogmod P=prprearn XBETA=prxbearn; /* Display different data sets created by PROC LOGISTIC */ PROC MEANS DATA=tlogmod; PROC PRINT DATA=tlcoeff; /* Ordered logit */ PROC LOGISTIC DATA=t0; TITLE2 'Head earnings ordered-logit'; MODEL earn_cat(descending) = female ER33817 educ_sqr ER33804 age_sqr; /* Logit using PROC QLIM */ PROC QLIM DATA=t0; TITLE2 'Head earnings logit (using PROC QLIM)'; MODEL anyearn = female ER33817 educ_sqr ER33804 age_sqr / DISCRETE(D=LOGIT); /* Heteroskedastic probit using PROC QLIM */ PROC QLIM DATA=t0; TITLE2 'Head earnings probit accounting for heteroskedasticity'; MODEL anyearn = female ER33817 educ_sqr ER33804 age_sqr / DISCRETE; HETERO anyearn ~ female ER33817 ER33804; /* Tobit using PROC QLIM */ PROC QLIM DATA=t0; TITLE2 'Head earnings tobit using PROC QLIM'; MODEL ER27931 = female ER33817 educ_sqr ER33804 age_sqr; ENDOGENOUS ER27931 ~ CENSORED(LB=0); /* Doubly-censored tobit using PROC QLIM */ PROC QLIM DATA=t0; TITLE2 'Head earnings double tobit using PROC QLIM'; MODEL upcnearn = female ER33817 educ_sqr ER33804 age_sqr; ENDOGENOUS upcnearn ~ CENSORED(LB=0 UB=100000); RUN;