summary(marvel)
##     page_id                                        name      
##  100012 :    1                                       :   51  
##  100013 :    1   'Spinner (Earth-616)                :    1  
##  100099 :    1   \\u00c4kr\\u00e4s (Earth-616)       :    1  
##  100160 :    1   107 (Earth-616)                     :    1  
##  100209 :    1   11-Ball (Earth-616)                 :    1  
##  100395 :    1   115 (Legion Personality) (Earth-616):    1  
##  (Other):16370   (Other)                             :16320  
##                                     urlslug     
##                                         :   51  
##  \\/%27Spinner_(Earth-616)              :    1  
##  \\/%C3%84kr%C3%A4s_(Earth-616)         :    1  
##  \\/107_(Earth-616)                     :    1  
##  \\/11-Ball_(Earth-616)                 :    1  
##  \\/115_(Legion_Personality)_(Earth-616):    1  
##  (Other)                                :16320  
##                              ID                      ALIGN     
##                               :3813                     :2856  
##  Known to Authorities Identity:  15   Bad Characters    :6703  
##  No Dual Identity             :1781   Good Characters   :4617  
##  Public Identity              :4516   Neutral Characters:2200  
##  Secret Identity              :6251                            
##                                                                
##                                                                
##          EYE               HAIR                          SEX       
##            :9804             :4308                         :  904  
##  Blue Eyes :1949   Black Hair:3738   Agender Characters    :   45  
##  Brown Eyes:1911   Brown Hair:2335   Female Characters     : 3830  
##  Green Eyes: 605   Blond Hair:1574   Genderfluid Characters:    2  
##  Black Eyes: 553   No Hair   :1171   Male Characters       :11595  
##  Red Eyes  : 508   Bald      : 835                                 
##  (Other)   :1046   (Other)   :2415                                 
##                      GSM                        ALIVE      
##                        :16286                      :   54  
##  Bisexual Characters   :   19   Deceased Characters: 3746  
##  Genderfluid Characters:    1   Living Characters  :12576  
##  Homosexual Characters :   66                              
##  Pansexual Characters  :    1                              
##  Transgender Characters:    2                              
##  Transvestites         :    1                              
##   APPEARANCES      FIRST.APPEARANCE      Year     
##  Min.   :   1.00          :  863    Min.   :1939  
##  1st Qu.:   1.00   Jun-92 :   71    1st Qu.:1974  
##  Median :   3.00   May-93 :   69    Median :1990  
##  Mean   :  16.66   Sep-06 :   67    Mean   :1985  
##  3rd Qu.:   8.00   Jan-94 :   66    3rd Qu.:2000  
##  Max.   :4043.00   Oct-93 :   58    Max.   :2013  
##  NA's   :1143      (Other):15182    NA's   :863
str(marvel)
## 'data.frame':    16376 obs. of  13 variables:
##  $ page_id         : Factor w/ 16376 levels "100012","100013",..: 2054 14839 13418 2937 4366 4362 3738 2636 5911 2662 ...
##  $ name            : Factor w/ 16326 levels "","'Spinner (Earth-616)",..: 13910 2322 1 1 14664 1572 12308 6538 13168 7658 ...
##  $ urlslug         : Factor w/ 16326 levels "","\\/%27Spinner_(Earth-616)",..: 13908 2322 1 1 14662 1572 12307 6538 13167 7658 ...
##  $ ID              : Factor w/ 5 levels "","Known to Authorities Identity",..: 5 4 1 1 3 4 4 4 4 4 ...
##  $ ALIGN           : Factor w/ 4 levels "","Bad Characters",..: 3 3 1 1 3 3 3 3 4 3 ...
##  $ EYE             : Factor w/ 25 levels "","Amber Eyes",..: 11 5 1 1 5 5 6 6 6 5 ...
##  $ HAIR            : Factor w/ 25 levels "","Auburn Hair",..: 7 24 1 1 5 14 7 7 7 5 ...
##  $ SEX             : Factor w/ 5 levels "","Agender Characters",..: 5 5 1 1 5 5 5 5 5 5 ...
##  $ GSM             : Factor w/ 7 levels "","Bisexual Characters",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ ALIVE           : Factor w/ 3 levels "","Deceased Characters",..: 3 3 1 1 3 3 3 3 3 3 ...
##  $ APPEARANCES     : int  4043 3360 NA NA 2258 2255 2072 2017 1955 1934 ...
##  $ FIRST.APPEARANCE: Factor w/ 833 levels "","Apr-00","Apr-01",..: 101 505 1 1 651 659 659 591 797 659 ...
##  $ Year            : int  1962 1941 NA NA 1950 1961 1961 1962 1963 1961 ...
glimpse(marvel)
## Observations: 16,376
## Variables: 13
## $ page_id          <fctr> 1678, 7139, 64786,"Wolverine (James \""Logan...
## $ name             <fctr> Spider-Man (Peter Parker), Captain America (...
## $ urlslug          <fctr> \/Spider-Man_(Peter_Parker), \/Captain_Ameri...
## $ ID               <fctr> Secret Identity, Public Identity, , , No Dua...
## $ ALIGN            <fctr> Good Characters, Good Characters, , , Good C...
## $ EYE              <fctr> Hazel Eyes, Blue Eyes, , , Blue Eyes, Blue E...
## $ HAIR             <fctr> Brown Hair, White Hair, , , Blond Hair, No H...
## $ SEX              <fctr> Male Characters, Male Characters, , , Male C...
## $ GSM              <fctr> , , , , , , , , , , , , , , , , , , , , , , ...
## $ ALIVE            <fctr> Living Characters, Living Characters, , , Li...
## $ APPEARANCES      <int> 4043, 3360, NA, NA, 2258, 2255, 2072, 2017, 1...
## $ FIRST.APPEARANCE <fctr> Aug-62, Mar-41, , , Nov-50, Nov-61, Nov-61, ...
## $ Year             <int> 1962, 1941, NA, NA, 1950, 1961, 1961, 1962, 1...
levels(marvel$ID)
## [1] ""                              "Known to Authorities Identity"
## [3] "No Dual Identity"              "Public Identity"              
## [5] "Secret Identity"
levels(marvel$ALIGN)
## [1] ""                   "Bad Characters"     "Good Characters"   
## [4] "Neutral Characters"
levels(marvel$HAIR)
##  [1] ""                      "Auburn Hair"          
##  [3] "Bald"                  "Black Hair"           
##  [5] "Blond Hair"            "Blue Hair"            
##  [7] "Brown Hair"            "Dyed Hair"            
##  [9] "Gold Hair"             "Green Hair"           
## [11] "Grey Hair"             "Light Brown Hair"     
## [13] "Magenta Hair"          "No Hair"              
## [15] "Orange-brown Hair"     "Orange Hair"          
## [17] "Pink Hair"             "Purple Hair"          
## [19] "Red Hair"              "Reddish Blond Hair"   
## [21] "Silver Hair"           "Strawberry Blond Hair"
## [23] "Variable Hair"         "White Hair"           
## [25] "Yellow Hair"
levels(marvel$EYE)
##  [1] ""                "Amber Eyes"      "Black Eyeballs" 
##  [4] "Black Eyes"      "Blue Eyes"       "Brown Eyes"     
##  [7] "Compound Eyes"   "Gold Eyes"       "Green Eyes"     
## [10] "Grey Eyes"       "Hazel Eyes"      "Magenta Eyes"   
## [13] "Multiple Eyes"   "No Eyes"         "One Eye"        
## [16] "Orange Eyes"     "Pink Eyes"       "Purple Eyes"    
## [19] "Red Eyes"        "Silver Eyes"     "Variable Eyes"  
## [22] "Violet Eyes"     "White Eyes"      "Yellow Eyeballs"
## [25] "Yellow Eyes"
tab_count<-table(marvel$ID,marvel$ALIGN)
tab_count
##                                
##                                      Bad Characters Good Characters
##                                 1221           1567             735
##   Known to Authorities Identity    0              3              10
##   No Dual Identity               275            474             643
##   Public Identity                740           1451            1622
##   Secret Identity                620           3208            1607
##                                
##                                 Neutral Characters
##                                                290
##   Known to Authorities Identity                  2
##   No Dual Identity                             389
##   Public Identity                              703
##   Secret Identity                              816
prop.table(tab_count)
##                                
##                                              Bad Characters
##                                 0.0745603322   0.0956888129
##   Known to Authorities Identity 0.0000000000   0.0001831949
##   No Dual Identity              0.0167928676   0.0289447973
##   Public Identity               0.0451880801   0.0886052760
##   Secret Identity               0.0378602833   0.1958964338
##                                
##                                 Good Characters Neutral Characters
##                                    0.0448827553       0.0177088422
##   Known to Authorities Identity    0.0006106497       0.0001221299
##   No Dual Identity                 0.0392647777       0.0237542745
##   Public Identity                  0.0990473864       0.0429286761
##   Secret Identity                  0.0981314118       0.0498290181
sum(prop.table(tab_count))
## [1] 1
prop.table(tab_count,1)#condition on the rows(rows sum=1)
##                                
##                                            Bad Characters Good Characters
##                                 0.32022030     0.41096250      0.19276161
##   Known to Authorities Identity 0.00000000     0.20000000      0.66666667
##   No Dual Identity              0.15440764     0.26614262      0.36103313
##   Public Identity               0.16386182     0.32130204      0.35916740
##   Secret Identity               0.09918413     0.51319789      0.25707887
##                                
##                                 Neutral Characters
##                                         0.07605560
##   Known to Authorities Identity         0.13333333
##   No Dual Identity                      0.21841662
##   Public Identity                       0.15566873
##   Secret Identity                       0.13053911
sum(prop.table(tab_count,1))
## [1] 5
prop.table(tab_count,2)#condition on the columns(columns sum=1)
##                                
##                                              Bad Characters
##                                 0.4275210084   0.2337759212
##   Known to Authorities Identity 0.0000000000   0.0004475608
##   No Dual Identity              0.0962885154   0.0707146054
##   Public Identity               0.2591036415   0.2164702372
##   Secret Identity               0.2170868347   0.4785916754
##                                
##                                 Good Characters Neutral Characters
##                                    0.1591942820       0.1318181818
##   Known to Authorities Identity    0.0021659086       0.0009090909
##   No Dual Identity                 0.1392679229       0.1768181818
##   Public Identity                  0.3513103747       0.3195454545
##   Secret Identity                  0.3480615118       0.3709090909
sum(prop.table(tab_count,2))
## [1] 4
ggplot(marvel,aes(x=ID,fill=ALIGN))+
  geom_bar()+
  theme(axis.text.x = element_text(angle=30))

ggplot(marvel,aes(x=ID,fill=ALIGN))+
  geom_bar(position = "dodge")+
  theme(axis.text.x = element_text(angle=30))

ggplot(marvel,aes(x=ID,fill=ALIGN))+
  geom_bar(position = "fill")+
  ylab("proportion")+
  theme(axis.text.x = element_text(angle=30))

ggplot(marvel,aes(x=ID))+
  geom_bar()+
  theme(axis.text.x = element_text(angle=90))+
  facet_wrap(~ALIGN)

marvel%>%
  select(ALIGN,name,APPEARANCES)%>%  
  mutate(order=round(rank(desc(APPEARANCES)),0))%>%
  arrange(order)%>%
  filter(order<=10)
##                 ALIGN                            name APPEARANCES order
## 1     Good Characters       Spider-Man (Peter Parker)        4043     1
## 2     Good Characters Captain America (Steven Rogers)        3360     2
## 3     Good Characters             Thor (Thor Odinson)        2258     3
## 4     Good Characters      Benjamin Grimm (Earth-616)        2255     4
## 5     Good Characters       Reed Richards (Earth-616)        2072     5
## 6     Good Characters      Hulk (Robert Bruce Banner)        2017     6
## 7  Neutral Characters       Scott Summers (Earth-616)        1955     7
## 8     Good Characters      Jonathan Storm (Earth-616)        1934     8
## 9     Good Characters         Henry McCoy (Earth-616)        1825     9
## 10    Good Characters         Susan Storm (Earth-616)        1713    10
marvel%>%
  select(ALIGN,name,APPEARANCES)%>%  
  ungroup %>%
  group_by(ALIGN)%>%
  mutate(order=round(rank(desc(APPEARANCES)),0))%>%
  arrange(ALIGN,order)%>%
  filter(order<=3)
## # A tibble: 12 x 4
## # Groups:   ALIGN [4]
##                 ALIGN                            name APPEARANCES order
##                <fctr>                          <fctr>       <int> <dbl>
##  1                            Blaine Colt (Earth-616)         429     1
##  2                      Millicent Collins (Earth-616)         321     2
##  3                            Chili Storm (Earth-616)         284     3
##  4     Bad Characters     Victor von Doom (Earth-616)         721     1
##  5     Bad Characters       Norman Osborn (Earth-616)         692     2
##  6     Bad Characters         Wilson Fisk (Earth-616)         503     3
##  7    Good Characters       Spider-Man (Peter Parker)        4043     1
##  8    Good Characters Captain America (Steven Rogers)        3360     2
##  9    Good Characters             Thor (Thor Odinson)        2258     3
## 10 Neutral Characters       Scott Summers (Earth-616)        1955     1
## 11 Neutral Characters      Namor McKenzie (Earth-616)        1528     2
## 12 Neutral Characters  John Jonah Jameson (Earth-616)        1266     3
marvel%>%
  filter(!is.na(Year))%>%
  group_by(Year)%>%
  summarise(count=n())%>%
  mutate(order=round(rank(desc(count)),0))%>%
  arrange(order)%>%
  filter(order<=10)
## # A tibble: 10 x 3
##     Year count order
##    <int> <int> <dbl>
##  1  1993   552     1
##  2  1994   483     2
##  3  1992   453     3
##  4  2006   381     4
##  5  1991   360     5
##  6  1990   357     6
##  7  2008   353     7
##  8  2011   349     8
##  9  2005   336     9
## 10  1997   334    10
marvel_year_appearence <-
marvel%>%
  filter(!is.na(Year))%>%
  group_by(Year)%>%
  summarise(Count=n())%>%
  arrange(Year)

ggplot(marvel_year_appearence,aes(x=Year,y=Count))+
  geom_line(color="yellow",size=1.1)

marvel_year_appearence_align <-
  marvel%>%
  filter(!is.na(Year))%>%
  group_by(Year,ALIGN)%>%
  summarise(Count=n())%>%
  arrange(Year,ALIGN)

ggplot(marvel_year_appearence_align,aes(x=Year,y=Count,color=ALIGN))+
  geom_line(size=1.1)+
  theme(axis.text.x = element_text(angle=90))

ggplot(marvel_year_appearence_align,aes(x=Year,y=Count))+
  geom_line(size=0.9,color="green")+
  facet_wrap(~ALIGN)+
  theme(axis.text.x = element_text(angle=90))