/*****************************************************************************/
/* This is a hypothetical example using the WHI Specimen Results Data Files  */
/*                                                                           */
/* Setting: analyze cohort of participants with vitamin D (25(OH)D) results; */
/* based on QC information (see example 4 above), want to exclude            */
/* results from study W15 - pull #6.                                         */
/* Note: This example assumes that all four data files have been read        */
/* into SAS using the provided SAS code.                                     */
/*****************************************************************************/

** Step 1: use the TESTS data file to get information on the vitamin D tests **;
PROC FREQ DATA=spec\*tests_inv ; table TESTABBR ; RUN;
/* note that the abbreviated test name for vitamin D is VITD */
DATA spec_tests_vitd ; set spec_tests_inv ; if TESTABBR="VITD" ;
KEEP TESTABBR TESTNAME TESTVER TESTVERID SPECTYPE ;
RUN;

** Step 2: merge the information on the vitamin D tests with the specimen results **;
** data file and keep just the results for vitamin D. **;
PROC SORT DATA=spec\*tests_vitd ; BY TESTVERID ; RUN;
PROC SORT DATA=spec_results_ctos_inv ; BY TESTVERID ; RUN;
DATA spec_results_vitd ;
MERGE spec_results_ctos_inv spec_tests_vitd (in=inkeep) ;
BY TESTVERID ;
IF inkeep ;
/* Based on QC information, remove results from pull #6 in W15 (PULLID=W15-6) */
 IF PULLID = "W15-6" THEN DELETE ;
RUN;
/* check that results from W15 - pull #6 are not in file */
PROC FREQ DATA=spec_results_vitd ; table PULLID; **RU**N;

** Step 3: find out from what study visits blood samples were taken by **
** merging in information from DRAWS data file. **;
PROC SORT DATA=spec_results_vitd; BY ID PPTDRW ; RUN;
PROC SORT DATA=spec_draws_ctos_inv ; BY ID PPTDRW; RUN;
DATA spec_results_vitd ;
MERGE spec_results_vitd (in=inkeep) spec_draws_ctos_inv (KEEP=ID PPTDRW DRAWDAYS DRAWVTYP DRAWVY) ;
 IPPTDRW ;
IF inkeep ;
RUN;
PROC FREQ DATA=spec_results_vitd; table studyid*drawvtyp*drawvy / missing;RUN;
/* Note that studies 105, 181 and BA9 used screening, and W15 used year 1 samples */

** Step 4: add information about case-control type for each study **;
PROC SORT DATA=spec_results_vitd; BY ID STUDYID ; RUN;
PROC SORT DATA=spec_case_control_types_ctos_inv ; BY ID STUDYID; RUN ;
DATA spec_results_vitd ;
MERGE spec_results_vitd (in=inkeep) spec_case_control_types_ctos_inv ;
 ISTUDYID ;
IF inkeep;
RUN;
PROC FREQ DATA=spec_results_vitd; table studyid\*caseflg/missing;RUN;
/* Note that study 105 was a cohort only, and the others were case-control studies */

/* decide to use only the samples from Study 105 and the controls from Studies 181 */
/* and BA9 since their samples all came from the screening visit */
DATA final_results_vitd ;
SET spec_results_vitd ;
 SDYID = "105" or (STUDYID in ("181","BA9") and CASEFLG="N") ;
RUN;
/* final checks */
PROC FREQ DATA=final_results_vitd; table studyid\*caseflg /missing;RUN;
PROC MEANS DATA=final_results_vitd ; CLASS pullid; VAR testval ;**RU**N;