/* ---------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------------- */ * *** Uge 1: Basale statistiske begreber *** *; /* ---------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------------- */ /* -------------------------- Vitamin D --------------------------------------- */ data vit; infile "http://staff.pubhealth.ku.dk/~sr/BasicStatistics/datasets/vitamin.csv" URL firstobs=2 delimiter=","; input country vitd age bmi sunexp vitdintake; run; * ** Definerer sol- og landeformater; proc format; value sunf 1 = "Avoid sun" 2 = "Sometimes in sun" 3 = "Prefer sun"; value countryf 1 = "DK" 2 = "SF" 4 = "EI" 6 = "PL"; run; * ** saetter formaterne paa variablene i datasaettet; data vit; set vit; format sunexp sunf.; format country countryf.; run; /* ---------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------------- */ /* ------------------------------ Tegn ---------------------------------------- */ * definerer subdatasaet bestaaende af irske kvinder; data vitEI; set vit; if country=4; run; * *** Histogram; proc sgplot data=vitEI; histogram vitd; density vitd; density vitd / type=kernel; run; * *** Boxplot; proc sgplot data=vit; vbox vitd / category=country; label vitd='Vitamin D'; label country='Land'; run; /* ---------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------------- */ /* ------------------------------ Regn ---------------------------------------- */ proc means data=vit n mean median stddev q1 q3 min max; var vitd; class country; run; /* ---------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------------- */ /* ------------------------------ Referenceomraader ---------------------------- */ * ** fraktilbaseret; proc univariate data=vitEI; var vitd; output out=regn pctlpre=P_ pctlpts=2.5,97.5; run; proc print data=regn; run; * ** normalfordelingsbaseret; data qt; qt=quantile('T',0.975,40); run; proc print data=qt; run; data normalomr; mean=48.0073171; sd=20.2221214; lower1 = mean - 2*sd; upper1 = mean + 2*sd; * eller praecist, med rigtig fraktil; lower2 = mean - (1+1/41)*2.02108*sd; upper2 = mean + (1+1/41)*2.02108*sd; run; proc print data=normalomr; run; * Antal kvinder med vitamin D under 25; data vitEI; set vitEI; lav = (vitd < 25); run; proc freq data=vitEI; tables lav; run; /* ---------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------------- */ /* ------------------------------ Vurdering af normalfordeling ----------------- */ proc univariate data=vitEI noprint; qqplot vitd / normal (mu=est sigma=est); run; /* ---------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------------- */ /* -------------------------- Hoejreskaeve fordelinger -------------------------- */ data imm; infile "http://staff.pubhealth.ku.dk/~sr/BasicStatistics/datasets/imm.csv" URL firstobs=2 delimiter=","; input img; run; * deskriptiv statistik; proc means data=imm n mean median stddev q1 q3 min max; var img; run; * boxplot; proc sgplot data=imm; vbox img; run; * histogram; proc sgplot data=imm; histogram img; density img; density img / type=kernel; run; * QQ-plot; proc univariate data=imm noprint; qqplot img / normal (mu=est sigma=est); run; * log2-tranformeret img; data imm; set imm; log2img=log2(img); run; * histogram; proc sgplot data=imm; histogram log2img; density log2img; density log2img / type=kernel; run; * QQ-plot; proc univariate data=imm noprint; qqplot log2img / normal (mu=est sigma=est); run; * Referenceomraade; * ** fraktilbaseret; proc univariate data=imm noprint; var img; output out=regn2 pctlpre=P_ pctlpts=2.5,97.5; run; proc print data=regn2; run; * ** normalfordelingsbaserede (for hhv img og log2img); proc means data=imm mean stddev; var img log2img; run; data normalomr2; mean_img=0.8030201; sd_img=0.4694982; mean_log2=-0.5239339; sd_log2=0.7890028; lower_img = mean_img - 2*sd_img; upper_img = mean_img + 2*sd_img; lower_log2 = mean_log2 - 2*sd_log2; upper_log2 = mean_log2 + 2*sd_log2; * regner baglaens med det samme; lower=2**lower_log2; upper=2**upper_log2; run; proc print data=normalomr2; run; * ***; * Laengdespring; data l; infile "http://staff.pubhealth.ku.dk/~sr/BasicStatistics/datasets/longjump.csv" URL firstobs=2 delimiter=","; input rank name $ country $ distance; expdist=exp(distance); run; proc sgplot data=l; vbox distance; run; proc univariate data=l noprint; qqplot distance / normal (mu=est sigma=est); run; proc univariate data=l noprint; qqplot expdist / normal (mu=est sigma=est); run; /* ---------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------------- */ /* -------------------------- Parrede sammenligninger ------------------------- */ data ms; infile "http://staff.pubhealth.ku.dk/~sr/BasicStatistics/datasets/mf_sv.txt" URL firstobs=2 delimiter=" "; input mf sv; run; * scatter; proc sgplot data=ms; scatter X=sv Y=mf / markerattrs=(color=blue); lineparm x=40 y=40 slope=1 / lineattrs=(color=red); run; * ***; data lang; infile "http://staff.pubhealth.ku.dk/~sr/BasicStatistics/datasets/mf_sv_lang.csv" URL firstobs=2 delimiter=","; input vol metode $ id; run; * boxplot - forkert tegning; proc sgplot data=lang; vbox vol / group=metode; run; * spaghettiplot - rigtig tegning; proc sgplot data=lang; series X=metode Y=vol / group=id; run; * ***; * ***; * differenser, vurdering af normalfordeling; data ms; set ms; dif=mf-sv; run; proc sgplot data=ms; histogram dif; density dif; density dif / type=kernel; run; proc univariate data=ms noprint; qqplot dif / normal (mu=est sigma=est); run; * estimation - incl CI (vi kan lave det i haanden / i et datastep som ovenfor eller benytte proc means til at finde CI); proc means data=ms n mean stddev stderr clm; var dif; run; * MEN vi kan ogsaa bare vaere dovne og udnytte at det hele kommer automatisk naar vi laver et one sample t-test :-) ; proc ttest data=ms; var dif; run; * hvilket er praecis det samme som et parret t-test; proc ttest data=ms; paired mf*sv; run; /* ---------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------------- */ /* -------------------------- Bland-Altman plot ------------------------------- */ * definerer gennemsnit; data ms; set ms; gns=(mf+sv)/2; run; /* Bland-Altman plot */ proc sgplot data=ms; scatter X=gns Y=dif / markerattrs=(color=blue); lineparm x=40 y=0 slope=0 / lineattrs=(color=gray pattern=dot); lineparm x=40 y=0.238 slope=0 / lineattrs=(color=red); lineparm x=40 y=-13.68 slope=0 / lineattrs=(color=blue); lineparm x=40 y=14.16 slope=0 / lineattrs=(color=blue); run;