esoph
##    agegp     alcgp    tobgp ncases ncontrols
## 1  25-34 0-39g/day 0-9g/day      0        40
## 2  25-34 0-39g/day    10-19      0        10
## 3  25-34 0-39g/day    20-29      0         6
## 4  25-34 0-39g/day      30+      0         5
## 5  25-34     40-79 0-9g/day      0        27
## 6  25-34     40-79    10-19      0         7
## 7  25-34     40-79    20-29      0         4
## 8  25-34     40-79      30+      0         7
## 9  25-34    80-119 0-9g/day      0         2
## 10 25-34    80-119    10-19      0         1
## 11 25-34    80-119      30+      0         2
## 12 25-34      120+ 0-9g/day      0         1
## 13 25-34      120+    10-19      1         1
## 14 25-34      120+    20-29      0         1
## 15 25-34      120+      30+      0         2
## 16 35-44 0-39g/day 0-9g/day      0        60
## 17 35-44 0-39g/day    10-19      1        14
## 18 35-44 0-39g/day    20-29      0         7
## 19 35-44 0-39g/day      30+      0         8
## 20 35-44     40-79 0-9g/day      0        35
## 21 35-44     40-79    10-19      3        23
## 22 35-44     40-79    20-29      1        14
## 23 35-44     40-79      30+      0         8
## 24 35-44    80-119 0-9g/day      0        11
## 25 35-44    80-119    10-19      0         6
## 26 35-44    80-119    20-29      0         2
## 27 35-44    80-119      30+      0         1
## 28 35-44      120+ 0-9g/day      2         3
## 29 35-44      120+    10-19      0         3
## 30 35-44      120+    20-29      2         4
## 31 45-54 0-39g/day 0-9g/day      1        46
## 32 45-54 0-39g/day    10-19      0        18
## 33 45-54 0-39g/day    20-29      0        10
## 34 45-54 0-39g/day      30+      0         4
## 35 45-54     40-79 0-9g/day      6        38
## 36 45-54     40-79    10-19      4        21
## 37 45-54     40-79    20-29      5        15
## 38 45-54     40-79      30+      5         7
## 39 45-54    80-119 0-9g/day      3        16
## 40 45-54    80-119    10-19      6        14
## 41 45-54    80-119    20-29      1         5
## 42 45-54    80-119      30+      2         4
## 43 45-54      120+ 0-9g/day      4         4
## 44 45-54      120+    10-19      3         4
## 45 45-54      120+    20-29      2         3
## 46 45-54      120+      30+      4         4
## 47 55-64 0-39g/day 0-9g/day      2        49
## 48 55-64 0-39g/day    10-19      3        22
## 49 55-64 0-39g/day    20-29      3        12
## 50 55-64 0-39g/day      30+      4         6
## 51 55-64     40-79 0-9g/day      9        40
## 52 55-64     40-79    10-19      6        21
## 53 55-64     40-79    20-29      4        17
## 54 55-64     40-79      30+      3         6
## 55 55-64    80-119 0-9g/day      9        18
## 56 55-64    80-119    10-19      8        15
## 57 55-64    80-119    20-29      3         6
## 58 55-64    80-119      30+      4         4
## 59 55-64      120+ 0-9g/day      5        10
## 60 55-64      120+    10-19      6         7
## 61 55-64      120+    20-29      2         3
## 62 55-64      120+      30+      5         6
## 63 65-74 0-39g/day 0-9g/day      5        48
## 64 65-74 0-39g/day    10-19      4        14
## 65 65-74 0-39g/day    20-29      2         7
## 66 65-74 0-39g/day      30+      0         2
## 67 65-74     40-79 0-9g/day     17        34
## 68 65-74     40-79    10-19      3        10
## 69 65-74     40-79    20-29      5         9
## 70 65-74    80-119 0-9g/day      6        13
## 71 65-74    80-119    10-19      4        12
## 72 65-74    80-119    20-29      2         3
## 73 65-74    80-119      30+      1         1
## 74 65-74      120+ 0-9g/day      3         4
## 75 65-74      120+    10-19      1         2
## 76 65-74      120+    20-29      1         1
## 77 65-74      120+      30+      1         1
## 78   75+ 0-39g/day 0-9g/day      1        18
## 79   75+ 0-39g/day    10-19      2         6
## 80   75+ 0-39g/day      30+      1         3
## 81   75+     40-79 0-9g/day      2         5
## 82   75+     40-79    10-19      1         3
## 83   75+     40-79    20-29      0         3
## 84   75+     40-79      30+      1         1
## 85   75+    80-119 0-9g/day      1         1
## 86   75+    80-119    10-19      1         1
## 87   75+      120+ 0-9g/day      2         2
## 88   75+      120+    10-19      1         1
summary(esoph)
##    agegp          alcgp         tobgp        ncases         ncontrols    
##  25-34:15   0-39g/day:23   0-9g/day:24   Min.   : 0.000   Min.   : 1.00  
##  35-44:15   40-79    :23   10-19   :24   1st Qu.: 0.000   1st Qu.: 3.00  
##  45-54:16   80-119   :21   20-29   :20   Median : 1.000   Median : 6.00  
##  55-64:16   120+     :21   30+     :20   Mean   : 2.273   Mean   :11.08  
##  65-74:15                                3rd Qu.: 4.000   3rd Qu.:14.00  
##  75+  :11                                Max.   :17.000   Max.   :60.00
head(esoph)
##   agegp     alcgp    tobgp ncases ncontrols
## 1 25-34 0-39g/day 0-9g/day      0        40
## 2 25-34 0-39g/day    10-19      0        10
## 3 25-34 0-39g/day    20-29      0         6
## 4 25-34 0-39g/day      30+      0         5
## 5 25-34     40-79 0-9g/day      0        27
## 6 25-34     40-79    10-19      0         7
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(ggplot2)
boxplot(esoph$ncases ~ esoph$alcgp)

boxplot(esoph$ncases ~ esoph$tobgp)

## effects of alcohol, tobacco and interaction *
model1 <- glm(cbind(ncases, ncontrols) ~ agegp + tobgp * alcgp,
              data = esoph, family = binomial())
summary(model1)
## 
## Call:
## glm(formula = cbind(ncases, ncontrols) ~ agegp + tobgp * alcgp, 
##     family = binomial(), data = esoph)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8895  -0.5317  -0.2304   0.2704   2.0724  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -1.75985    0.19822  -8.878  < 2e-16 ***
## agegp.L          2.99646    0.65386   4.583 4.59e-06 ***
## agegp.Q         -1.35008    0.59197  -2.281   0.0226 *  
## agegp.C          0.13436    0.45056   0.298   0.7655    
## agegp^4          0.07098    0.30974   0.229   0.8187    
## agegp^5         -0.21347    0.19627  -1.088   0.2768    
## tobgp.L          0.63846    0.19710   3.239   0.0012 ** 
## tobgp.Q          0.02922    0.19617   0.149   0.8816    
## tobgp.C          0.15607    0.19796   0.788   0.4304    
## alcgp.L          1.37077    0.21136   6.485 8.85e-11 ***
## alcgp.Q         -0.14913    0.19645  -0.759   0.4478    
## alcgp.C          0.22823    0.18203   1.254   0.2099    
## tobgp.L:alcgp.L -0.70426    0.41128  -1.712   0.0868 .  
## tobgp.Q:alcgp.L  0.12225    0.42044   0.291   0.7712    
## tobgp.C:alcgp.L -0.29187    0.42939  -0.680   0.4967    
## tobgp.L:alcgp.Q  0.12948    0.38889   0.333   0.7392    
## tobgp.Q:alcgp.Q -0.44527    0.39224  -1.135   0.2563    
## tobgp.C:alcgp.Q -0.05205    0.39538  -0.132   0.8953    
## tobgp.L:alcgp.C -0.16118    0.36697  -0.439   0.6605    
## tobgp.Q:alcgp.C  0.04843    0.36211   0.134   0.8936    
## tobgp.C:alcgp.C -0.13905    0.35754  -0.389   0.6973    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 227.241  on 87  degrees of freedom
## Residual deviance:  47.484  on 67  degrees of freedom
## AIC: 236.96
## 
## Number of Fisher Scoring iterations: 6
## linear effect of alcohol and tobacco *
model2 <- glm(cbind(ncases, ncontrols) ~ agegp + unclass(tobgp) + unclass(alcgp),
              data = esoph, family = binomial())
summary(model2)
## 
## Call:
## glm(formula = cbind(ncases, ncontrols) ~ agegp + unclass(tobgp) + 
##     unclass(alcgp), family = binomial(), data = esoph)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7628  -0.6426  -0.2709   0.3043   2.0421  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -4.01097    0.31224 -12.846  < 2e-16 ***
## agegp.L         2.96113    0.65092   4.549 5.39e-06 ***
## agegp.Q        -1.33735    0.58918  -2.270  0.02322 *  
## agegp.C         0.15292    0.44792   0.341  0.73281    
## agegp^4         0.06668    0.30776   0.217  0.82848    
## agegp^5        -0.20288    0.19523  -1.039  0.29872    
## unclass(tobgp)  0.26162    0.08198   3.191  0.00142 ** 
## unclass(alcgp)  0.65308    0.08452   7.727 1.10e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 227.241  on 87  degrees of freedom
## Residual deviance:  59.277  on 80  degrees of freedom
## AIC: 222.76
## 
## Number of Fisher Scoring iterations: 6
# * models are taken from:
# http://www.imsbio.co.jp/RGM/R_rdfile?f=datasets/man/esoph.Rd&d=R_rel)