/* ex6_12.do example 6.12 radiology data from Muller et al (1989) 5 point ordinal assessment 2 modalities 3 readers 50 plates (2 images per plate: 1 with "lesion", 1 w/o) 2 readings (12,000 obs) */ version 7 set more off cap semt_profile cap log close clear set mem 10m ************************************* * program to fit an ROC-GLM model on a single bootstrap sample: capture program drop glmfit program define glmfit version 7 if "`1'"=="?" { global S_1 "alpha_1 beta_1 beta_2 beta_3 alpha_0" exit } drop image *********** /* STEPS 1 & 2 : calculate the covariate specific reference distn, t, (the survivor fxn S_db_z(Y)), using ordered logit estimation of outcome as a function of reader for non-diseased obs. Estimated separately for the 2 test modalities This is, equivalently, the fpr, corresponding to the non-dis Y_db_i, and yields the placement values for the diseased Y_d_i, */ /* trap to deal with -predict- cmd error in case of bs sample without category 4 outcome among non-dis obs */ ologit y x2 x3 if d==0 & xtest==0 if e(k_cat) == 4 { predict p1 p2 p3 p4 if xtest==0 } else { assert e(k_cat) == 3 count if d==0 & y==4 & xtest==0 assert r(N)==0 predict p1 p2 p3 if xtest==0 gen p4 = 0 if xtest==0 } ologit y x2 x3 if d==0 & xtest==1 if e(k_cat) == 4 { predict p1_1 p2_1 p3_1 p4_1 if xtest==1 } else { assert e(k_cat) == 3 count if d==0 & y==4 & xtest==1 assert r(N)==0 predict p1_1 p2_1 p3_1 if xtest==1 gen p4_1 = 0 if xtest==1 } forvalues i = 1/4 { replace p`i' = p`i'_1 if xtest==1 } drop p*_1 gen t = p4 + p3*(y<=3) + p2*(y<=2) + p1*(y==1) gen plval = t if d==1 *********** /* STEP 3a : restructure data to include nt (distinct t (fpr) of T_z) obs for each diseased obs. (nd x nt obs), for each covariate level */ bys d reader xtest t: drop if d==0 & _n > 1 /* restrict non-diseased obs to distinct fpr's */ drop if t==1 & d==0 egen int nt = sum(d==0), by(reader xtest) gen uid = _n /* unique id for each record */ quietly bys d reader xtest (y): gen id_t =_n if d==0 /* id_t: unique id each t within covariate levels */ expand nt if d==1 sort uid quietly by uid: replace id_t = _n if d==1 /* assigns a set of t's to each disease obs */ bys reader xtest id_t (d) : replace t = t[1] drop if d==0 *********** /* STEP 3b : calculate the binary placement value indicators */ gen byte u_it = (plval <= t) *********** /* STEP 4 : calculate h_s(t) */ gen logitt = log(t/(1.0 - t)) ********* /* STEP 5 : fit the GLM model */ logit u_it logitt xtest x2 x3 post `1' (_b[logitt]) (_b[xtest]) (_b[x2]) (_b[x3]) (_b[_cons]) end ************************************************************* /* SET UP DATA */ use ${semt_data}mlt1,clear gen byte x2 = reader == 2 gen byte x3 = reader == 3 gen byte xtest = mode==2 recode y 5=4 keep x* y d reader image compress _all log using ${semt_log}ex6_12, replace set rmsg on set seed 156385 bstrap glmfit, cluster(image) reps(5000) saving(${semt_log}ex6_12) replace qui log close set rmsg off