/************************************************************************/ /* by_example.sas -- SAS program to demonstrate different uses of BY */ /* statements by calculated family statistics */ /************************************************************************/ FILENAME indata "c:\data\PSID\ECO 725\hw2.txt" ; TITLE1 "BY Example Program"; DATA t1; /* Read (input) PSID data for HW2 using PSID-generated statements */ ATTRIB ER30001 FORMAT=F4. LABEL="1968 INTERVIEW NUMBER" ER30002 FORMAT=F3. LABEL="PERSON NUMBER 68" ER32000 FORMAT=F1. LABEL="SEX OF INDIVIDUAL" ER25104 FORMAT=F2. LABEL="BC1 EMPLOYMENT STATUS-1ST MENTION" ER25362 FORMAT=F2. LABEL="DE1 EMPLOYMENT STATUS-1ST MENTION" ER28037 FORMAT=F7. LABEL="TOTAL FAMILY INCOME-2004" ER33801 FORMAT=F5. LABEL="2005 INTERVIEW NUMBER" ER33802 FORMAT=F2. LABEL="SEQUENCE NUMBER 05" ER33803 FORMAT=F2. LABEL="RELATION TO HEAD 05" ER33804 FORMAT=F3. LABEL="AGE OF INDIVIDUAL 05" ER33813 FORMAT=F1. LABEL="EMPLOYMENT STATUS 05" ER33817 FORMAT=F2. LABEL="YEARS COMPLETED EDUCATION 05" ER33847 FORMAT=F2. LABEL="WHY NONRESPONSE 05" ER33848 FORMAT=F7.3 LABEL="CORE/IMM INDIVIDUAL LONGITUDINAL WT 05" ; INFILE indata LRECL = 43 ; INPUT ER30001 1 - 4 ER30002 5 - 7 ER32000 8 - 8 ER25104 9 - 10 ER25362 11 - 12 ER28037 13 - 19 ER33801 20 - 24 ER33802 25 - 26 ER33803 27 - 28 ER33804 29 - 31 ER33813 32 - 32 ER33817 33 - 34 ER33847 35 - 36 ER33848 37 - 43 ; /* Restrict input to respondents with valid ages and education values*/ /* education values */ IF ER33847 EQ 0; /* respondents */ IF ER33804 GT 0 AND ER33804 LT 999; /* with valid ages */ IF (ER33817 GT 0) AND (ER33817 LT 98); /* with valid educ. values*/ KEEP ER30001 ER30002 ER28037 ER33801 ER33804 ER33817; /* Sort by 2005 Family ID */ PROC SORT; BY ER33801; /* Calculate max ages and education and min age for each family */ PROC SUMMARY DATA=t1; BY ER33801; VAR ER33804 ER33817; LABEL fmoldest = 'Oldest resp. in family 05' fmmosted = 'Highest educ. in family 05' fmyngest = 'Youngest resp. in family 05'; OUTPUT OUT=fmsum1 MAX=fmoldest fmmosted MIN(ER33804)=fmyngest; /* Calculate and display means */ PROC MEANS DATA=fmsum1; TITLE2 "Family stats from SUMMARY procedure"; /* Calculate same stats using DATA step */ DATA fmsum2; SET t1; BY ER33801; IF FIRST.ER33801 THEN DO; fmoldest = ER33804; fmmosted = ER33817; fmyngest = ER33804; END; ELSE DO; fmoldest = MAX(ER33804, fmoldest); fmmosted = MAX(ER33817, fmmosted); fmyngest = MIN(ER33804, fmyngest); END; LABEL fmoldest = 'Oldest resp. in family 05' fmmosted = 'Highest educ. in family 05' fmyngest = 'Youngest resp. in family 05'; RETAIN fmoldest fmmosted fmyngest; IF LAST.ER33801; KEEP ER33801 fmoldest fmmosted fmyngest; /* Calculate and display means */ PROC MEANS; TITLE2 "Family stats from DATA step"; RUN;