esoph
## agegp alcgp tobgp ncases ncontrols
## 1 25-34 0-39g/day 0-9g/day 0 40
## 2 25-34 0-39g/day 10-19 0 10
## 3 25-34 0-39g/day 20-29 0 6
## 4 25-34 0-39g/day 30+ 0 5
## 5 25-34 40-79 0-9g/day 0 27
## 6 25-34 40-79 10-19 0 7
## 7 25-34 40-79 20-29 0 4
## 8 25-34 40-79 30+ 0 7
## 9 25-34 80-119 0-9g/day 0 2
## 10 25-34 80-119 10-19 0 1
## 11 25-34 80-119 30+ 0 2
## 12 25-34 120+ 0-9g/day 0 1
## 13 25-34 120+ 10-19 1 1
## 14 25-34 120+ 20-29 0 1
## 15 25-34 120+ 30+ 0 2
## 16 35-44 0-39g/day 0-9g/day 0 60
## 17 35-44 0-39g/day 10-19 1 14
## 18 35-44 0-39g/day 20-29 0 7
## 19 35-44 0-39g/day 30+ 0 8
## 20 35-44 40-79 0-9g/day 0 35
## 21 35-44 40-79 10-19 3 23
## 22 35-44 40-79 20-29 1 14
## 23 35-44 40-79 30+ 0 8
## 24 35-44 80-119 0-9g/day 0 11
## 25 35-44 80-119 10-19 0 6
## 26 35-44 80-119 20-29 0 2
## 27 35-44 80-119 30+ 0 1
## 28 35-44 120+ 0-9g/day 2 3
## 29 35-44 120+ 10-19 0 3
## 30 35-44 120+ 20-29 2 4
## 31 45-54 0-39g/day 0-9g/day 1 46
## 32 45-54 0-39g/day 10-19 0 18
## 33 45-54 0-39g/day 20-29 0 10
## 34 45-54 0-39g/day 30+ 0 4
## 35 45-54 40-79 0-9g/day 6 38
## 36 45-54 40-79 10-19 4 21
## 37 45-54 40-79 20-29 5 15
## 38 45-54 40-79 30+ 5 7
## 39 45-54 80-119 0-9g/day 3 16
## 40 45-54 80-119 10-19 6 14
## 41 45-54 80-119 20-29 1 5
## 42 45-54 80-119 30+ 2 4
## 43 45-54 120+ 0-9g/day 4 4
## 44 45-54 120+ 10-19 3 4
## 45 45-54 120+ 20-29 2 3
## 46 45-54 120+ 30+ 4 4
## 47 55-64 0-39g/day 0-9g/day 2 49
## 48 55-64 0-39g/day 10-19 3 22
## 49 55-64 0-39g/day 20-29 3 12
## 50 55-64 0-39g/day 30+ 4 6
## 51 55-64 40-79 0-9g/day 9 40
## 52 55-64 40-79 10-19 6 21
## 53 55-64 40-79 20-29 4 17
## 54 55-64 40-79 30+ 3 6
## 55 55-64 80-119 0-9g/day 9 18
## 56 55-64 80-119 10-19 8 15
## 57 55-64 80-119 20-29 3 6
## 58 55-64 80-119 30+ 4 4
## 59 55-64 120+ 0-9g/day 5 10
## 60 55-64 120+ 10-19 6 7
## 61 55-64 120+ 20-29 2 3
## 62 55-64 120+ 30+ 5 6
## 63 65-74 0-39g/day 0-9g/day 5 48
## 64 65-74 0-39g/day 10-19 4 14
## 65 65-74 0-39g/day 20-29 2 7
## 66 65-74 0-39g/day 30+ 0 2
## 67 65-74 40-79 0-9g/day 17 34
## 68 65-74 40-79 10-19 3 10
## 69 65-74 40-79 20-29 5 9
## 70 65-74 80-119 0-9g/day 6 13
## 71 65-74 80-119 10-19 4 12
## 72 65-74 80-119 20-29 2 3
## 73 65-74 80-119 30+ 1 1
## 74 65-74 120+ 0-9g/day 3 4
## 75 65-74 120+ 10-19 1 2
## 76 65-74 120+ 20-29 1 1
## 77 65-74 120+ 30+ 1 1
## 78 75+ 0-39g/day 0-9g/day 1 18
## 79 75+ 0-39g/day 10-19 2 6
## 80 75+ 0-39g/day 30+ 1 3
## 81 75+ 40-79 0-9g/day 2 5
## 82 75+ 40-79 10-19 1 3
## 83 75+ 40-79 20-29 0 3
## 84 75+ 40-79 30+ 1 1
## 85 75+ 80-119 0-9g/day 1 1
## 86 75+ 80-119 10-19 1 1
## 87 75+ 120+ 0-9g/day 2 2
## 88 75+ 120+ 10-19 1 1
summary(esoph)
## agegp alcgp tobgp ncases ncontrols
## 25-34:15 0-39g/day:23 0-9g/day:24 Min. : 0.000 Min. : 1.00
## 35-44:15 40-79 :23 10-19 :24 1st Qu.: 0.000 1st Qu.: 3.00
## 45-54:16 80-119 :21 20-29 :20 Median : 1.000 Median : 6.00
## 55-64:16 120+ :21 30+ :20 Mean : 2.273 Mean :11.08
## 65-74:15 3rd Qu.: 4.000 3rd Qu.:14.00
## 75+ :11 Max. :17.000 Max. :60.00
head(esoph)
## agegp alcgp tobgp ncases ncontrols
## 1 25-34 0-39g/day 0-9g/day 0 40
## 2 25-34 0-39g/day 10-19 0 10
## 3 25-34 0-39g/day 20-29 0 6
## 4 25-34 0-39g/day 30+ 0 5
## 5 25-34 40-79 0-9g/day 0 27
## 6 25-34 40-79 10-19 0 7
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(ggplot2)
boxplot(esoph$ncases ~ esoph$alcgp)
boxplot(esoph$ncases ~ esoph$tobgp)
## effects of alcohol, tobacco and interaction *
model1 <- glm(cbind(ncases, ncontrols) ~ agegp + tobgp * alcgp,
data = esoph, family = binomial())
summary(model1)
##
## Call:
## glm(formula = cbind(ncases, ncontrols) ~ agegp + tobgp * alcgp,
## family = binomial(), data = esoph)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8895 -0.5317 -0.2304 0.2704 2.0724
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.75985 0.19822 -8.878 < 2e-16 ***
## agegp.L 2.99646 0.65386 4.583 4.59e-06 ***
## agegp.Q -1.35008 0.59197 -2.281 0.0226 *
## agegp.C 0.13436 0.45056 0.298 0.7655
## agegp^4 0.07098 0.30974 0.229 0.8187
## agegp^5 -0.21347 0.19627 -1.088 0.2768
## tobgp.L 0.63846 0.19710 3.239 0.0012 **
## tobgp.Q 0.02922 0.19617 0.149 0.8816
## tobgp.C 0.15607 0.19796 0.788 0.4304
## alcgp.L 1.37077 0.21136 6.485 8.85e-11 ***
## alcgp.Q -0.14913 0.19645 -0.759 0.4478
## alcgp.C 0.22823 0.18203 1.254 0.2099
## tobgp.L:alcgp.L -0.70426 0.41128 -1.712 0.0868 .
## tobgp.Q:alcgp.L 0.12225 0.42044 0.291 0.7712
## tobgp.C:alcgp.L -0.29187 0.42939 -0.680 0.4967
## tobgp.L:alcgp.Q 0.12948 0.38889 0.333 0.7392
## tobgp.Q:alcgp.Q -0.44527 0.39224 -1.135 0.2563
## tobgp.C:alcgp.Q -0.05205 0.39538 -0.132 0.8953
## tobgp.L:alcgp.C -0.16118 0.36697 -0.439 0.6605
## tobgp.Q:alcgp.C 0.04843 0.36211 0.134 0.8936
## tobgp.C:alcgp.C -0.13905 0.35754 -0.389 0.6973
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 227.241 on 87 degrees of freedom
## Residual deviance: 47.484 on 67 degrees of freedom
## AIC: 236.96
##
## Number of Fisher Scoring iterations: 6
## linear effect of alcohol and tobacco *
model2 <- glm(cbind(ncases, ncontrols) ~ agegp + unclass(tobgp) + unclass(alcgp),
data = esoph, family = binomial())
summary(model2)
##
## Call:
## glm(formula = cbind(ncases, ncontrols) ~ agegp + unclass(tobgp) +
## unclass(alcgp), family = binomial(), data = esoph)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7628 -0.6426 -0.2709 0.3043 2.0421
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.01097 0.31224 -12.846 < 2e-16 ***
## agegp.L 2.96113 0.65092 4.549 5.39e-06 ***
## agegp.Q -1.33735 0.58918 -2.270 0.02322 *
## agegp.C 0.15292 0.44792 0.341 0.73281
## agegp^4 0.06668 0.30776 0.217 0.82848
## agegp^5 -0.20288 0.19523 -1.039 0.29872
## unclass(tobgp) 0.26162 0.08198 3.191 0.00142 **
## unclass(alcgp) 0.65308 0.08452 7.727 1.10e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 227.241 on 87 degrees of freedom
## Residual deviance: 59.277 on 80 degrees of freedom
## AIC: 222.76
##
## Number of Fisher Scoring iterations: 6
# * models are taken from:
# http://www.imsbio.co.jp/RGM/R_rdfile?f=datasets/man/esoph.Rd&d=R_rel)