data prostata; infile "http://staff.pubhealth.ku.dk/~sr/BasicStatistics/datasets/prostate.txt" URL firstobs=2; input gennemtraengning01 involvering knude $ psa alder65 $ gleason; run; /* *********************************************************** */ /* *********************************************************** */ /* Video 2 */ * -- slide 7-8 -- tabel med andele + chi-square test; proc freq data=prostata; tables involvering*gennemtraengning01 / nopercent nocol chisq; run; * -- slide 9 -- CI for risikodifferens; proc freq data=prostata; tables involvering*gennemtraengning01 / nopercent nocol riskdiffc; run; * CI aflaeses i raekken med titel 'Difference' - * (ligegyldigt om det er Col 1 eller Col 2 Risk Estimates, det er * et spoergsmaal om fortegn alene); * -- slide 10-11 -- RR og OR; proc freq data=prostata; tables involvering*gennemtraengning01 / nopercent nocol relrisk; run; * OR vender rigtigt, men RR vender desvaerre forkert; * Vi kan vende den rundt ved at sortere efter aftagende gennemtraengning01 og involvering * og derefter kalde proc freq og fortaelle at den skal lave tabellen efter * den nye orden (order); proc sort data=prostata; by descending gennemtraengning01 descending involvering; run; proc freq data=prostata order=data; tables involvering*gennemtraengning01 / nopercent nocol relrisk; run; * RR aflaeses nu i Relative Risk (Column 1); /* *********************************************************** */ /* *********************************************************** */ /* Video 3 */ * -- slide 17 - logistisk regression (involvering); * Med involvering som dummy-variabel (kvantitativ); proc logistic data=prostata; model gennemtraengning01(event='1') = involvering; run; * Med involvering som class-variabel; proc logistic data=prostata; class involvering (ref='0') / param=glm; model gennemtraengning01(event='1') = involvering; run; /* *********************************************************** */ /* *********************************************************** */ /* Video 4 */ * -- slide 19 - scatterplot med udglatning; proc sgplot data=prostata; loess x=psa y=gennemtraengning01; run; * Den kurve bliver meget 'bumpy' - vi kan styre glatheden med ekstra * option smooth (proev dig frem med forskellige vaerdier); proc sgplot data=prostata; loess x=psa y=gennemtraengning01 / smooth=.5; run; * med linje oveni; proc sgplot data=prostata; loess x=psa y=gennemtraengning01 / smooth=.5; reg x=psa y=gennemtraengning01; run; * -- slide 20 - logistisk regression (PSA); proc logistic data=prostata; model gennemtraengning01(event='1') = psa; estimate '10 enheder' psa 10 / exp cl; run; * Bemaerk at vi i estimate linjen efter / har tilfoejet exp, saa vi faar * beregnet OR; * -- slide 21-22 -; * se e-bogen kapitel om praediktioner under logistisk regression for forklaring; proc logistic data=prostata; model gennemtraengning01(event='1') = psa; output out=predictions predictions=pred xbeta=log_odds; run; proc print data=predictions (obs=15); run; * -- slide 21; proc sgplot data=predictions; scatter x=psa y=log_odds; reg x=psa y=log_odds; run; * -- slide 22; proc sgplot data=predictions; scatter x=psa y=gennemtraengning01; scatter x=psa y=pred; run; * Hvis vi vil have det som en linje maa vi foerst sortere efter psa, ellers * kommer det til at se mystisk ud (proev!); proc sort data=predictions; by psa; run; proc sgplot data=predictions; scatter x=psa y=gennemtraengning01; series x=psa y=pred; run; /* *********************************************************** */ /* *********************************************************** */ /* Video 5 */ * -- 27-28 - Hosmer-Lemeshow; proc logistic data=prostata; model gennemtraengning01(event='1') = psa / lackfit; * model gennemtraengning01(event='0') = psa / lackfit; *hvis test paa gennemtraengning01=0; run; * -- slide 30 - residualplot med udglatning (bemaerk at udglatningen * ser lidt anderledes ud i SAS, som benytter en anden vaegtning end * R som vist i slides); * Henter Pearson residualerne; proc logistic data=prostata; model gennemtraengning01(event='1') = psa; output out=residuals stdreschi=pearson_res; run; proc sgplot data=residuals; scatter x=psa y=pearson_res / group=gennemtraengning01; loess x=psa y=pearson_res ;*/ smooth=.5; lineparm x=0 y=0 slope=0 / lineattrs=(color=gray); run; * -- 31 - diagnostics; proc logistic data=prostata; model gennemtraengning01(event='1') = psa; output out=diagnostics dfbeta=_all_; run; * NB Cooks distance virker ikke i nyeste version af SAS (i hvert * fald ikke hos mig ..) tidligere kunne man i output linjen * ogsaa angive cooksd. Uden Cook maa vi noejes med at se paa dfbetas; proc sgplot data=diagnostics; scatter x=psa y=dfbeta_psa / group=gennemtraengning01; run; /* *********************************************************** */ /* *********************************************************** */ /* Video 6 */ * -- slide 34 -; * Se evt e-bogen om splines i den lineaere model; * Kvartiler af PSA blandt cases; proc means data=prostata q1 median q3; var psa; where gennemtraengning01=1; run; * definerer splinefunktioner; data prostata; set prostata; psa_spline1=(psa-7.4)*(psa>7.4); psa_spline2=(psa-13.2)*(psa>13.2); psa_spline3=(psa-26)*(psa>26); run; * -- slide 35-38 -; * fitter modellen, beregner OR i hvert interval, beregner test for * linearitet, henter praediktioner; proc logistic data=prostata; model gennemtraengning01(event='1') = psa psa_spline1 psa_spline2 psa_spline3 / lackfit; estimate 'OR interval 2' psa 1 psa_spline1 1 / exp cl; estimate 'OR interval 3' psa 1 psa_spline1 1 psa_spline2 1 / exp cl; estimate 'OR interval 4' psa 1 psa_spline1 1 psa_spline2 1 psa_spline3 1/ exp cl; contrast 'test spline1=spline2=spline3=0' psa_spline1 1, psa_spline2 1, psa_spline3 1; output out=predictions xbeta=log_odds pred=pred; run; proc sort data=predictions; by psa; run; * plot af logitfunktionen (slide 36); proc sgplot data=predictions; series x=psa y=log_odds; run; * plot af praediktionerne (slide 38); proc sgplot data=predictions; scatter x=psa y=gennemtraengning01; series x=psa y=pred; run; * -- slide 39 - test af lineaer effekt af log2PSA; data prostata; set prostata; log2psa=log2(psa); run; proc logistic data=prostata; model gennemtraengning01(event='1') = psa log2psa; run; * -- slide 40 - model med log2PSA; proc logistic data=prostata; model gennemtraengning01(event='1') = log2psa; run; * lav evt selv plots slide 41-42 (log_odds og pred); /* *********************************************************** */ /* *********************************************************** */ /* OBS nedenstaaende er endnu ikke gennemgaaet, kommer loebende med videoerne dvs sidenumre passer endnu ikke og der mangler lidt ... */ /* *********************************************************** */ /* *********************************************************** */ /* Video 7 */ * -- slide 43-44 en kval + en kvant kovariat; proc logistic data=prostata; class involvering (ref='0') / param=glm; model gennemtraengning01(event='1') = involvering log2psa; run; /* *********************************************************** */ /* *********************************************************** */ /* Video 9 */ * -- slide 56 -; proc freq data=prostata; tables gleason*gennemtraengning01 / nopercent nocol; run; proc logistic data=prostata; class alder65 (ref='Over') / param=glm; model gennemtraengning01(event='1') = alder65 gleason alder65*gleason; run; * -- slide 57 -; proc logistic data=prostata; class alder65 (ref='Over') / param=glm; model gennemtraengning01(event='1') = alder65 gleason alder65*gleason; output out=preds xbeta=log_odds pred=pred; run; proc sort data=preds; by gleason; run; * plot paa log-odds skala; proc sgplot data=preds; scatter x=gleason y=log_odds / group=alder65; series x=gleason y=log_odds / group=alder65; run; * plot paa ssh skala; proc sgplot data=preds; scatter x=gleason y=pred / group=alder65; series x=gleason y=pred / group=alder65; run; * -- slide 58 -; proc logistic data=prostata; class alder65 (ref='Under') / param=glm; model gennemtraengning01(event='1') = alder65 gleason alder65*gleason; oddsratio gleason / at ( alder65="Under" "Over"); run; * -- slide 59 -; data prostata; set prostata; if psa >= 20 then psa01=">= 20"; if psa < 20 then psa01="< 20"; run; * tal i tabeller tv. : 3-vejstabel (1. tabel er tal i naevner, 2. i taeller); proc freq data=prostata; tables gennemtraengning01*psa01*involvering / nopercent nocol norow; run; proc logistic data=prostata; class involvering (ref='0') psa01 (ref='< 20') / param=glm; model gennemtraengning01(event='1') = involvering psa01 psa01*involvering; run; * -- slide 61; proc logistic data=prostata; class involvering (ref='0') psa01 (ref='< 20') / param=glm; model gennemtraengning01(event='1') = involvering psa01 psa01*involvering; oddsratio involvering / at (psa01='< 20' '>= 20'); oddsratio psa01 / at (involvering='0' '1'); run; * -- slide 62; proc logistic data=prostata; class involvering (ref='0') psa01 (ref='< 20') / param=glm; model gennemtraengning01(event='1') = involvering psa01; run; /* *********************************************************** */ /* *********************************************************** */ /* Video 10 */ * -- slide 64 -; proc logistic data=prostata; class involvering (ref='0') alder65(ref='Over') knude (ref='Ingen') / param=glm; model gennemtraengning01(event='1') = involvering log2psa alder65 gleason alder65*gleason knude / lackfit; output out=predictions pred=pred; run; * -- slide 65 -; data predictions; set predictions; pred_case=(pred>0.5); *her praedikteres at vi har en case; run; * tabel + sens + spec for taerskel paa .5; proc freq data=predictions; tables gennemtraengning01*pred_case / nopercent nocol; run; * -- slide 66-; proc logistic data=prostata plots(only)=roc; class involvering (ref='0') alder65(ref='Over') knude (ref='Ingen') / param=glm; model gennemtraengning01(event='1') = involvering log2psa alder65 gleason alder65*gleason knude / outroc=roc; roc; run; * -- slide 67 -; * bemaerk outroc-statement i modellinjen ovenfor; * formler for hvordan vi finder Youden og Closest-Top-Left, se; * https://www.hindawi.com/journals/cmmm/2017/3762651/ ; data roc; set roc; youden=_sensit_-_1mspec_; ctl = sqrt( (1-_sensit_)**2 + (_1mspec_)**2); run; * Vi skal finde den vaerdi af praediktion (_prob_) hvor vi har max Youden; * sorterer efter aftagende youden og aflaeser i 1. linje; proc sort data=roc; by decending youden; run; proc print data=roc; run; * Vi skal finde den vaerdi af praediktion (_prob_) hvor vi har min ctl; * sorterer efter ctl og aflaeser i 1. linje; proc sort data=roc; by ctl; run; proc print data=roc; run; * -- slide 69 -; * Se evt hvordan man goer hvis man skal have flere ROC-kurver i en: * https://blogs.sas.com/content/iml/2018/11/14/compare-roc-curves-sas.html; proc logistic data=prostata plots(only)=roc; model gennemtraengning01(event='1') = log2psa; run;