I take data github page MARVEL
Summurize marvel data set with summary function
summary(marvel)
## page_id name
## 100012 : 1 : 51
## 100013 : 1 'Spinner (Earth-616) : 1
## 100099 : 1 \\u00c4kr\\u00e4s (Earth-616) : 1
## 100160 : 1 107 (Earth-616) : 1
## 100209 : 1 11-Ball (Earth-616) : 1
## 100395 : 1 115 (Legion Personality) (Earth-616): 1
## (Other):16370 (Other) :16320
## urlslug
## : 51
## \\/%27Spinner_(Earth-616) : 1
## \\/%C3%84kr%C3%A4s_(Earth-616) : 1
## \\/107_(Earth-616) : 1
## \\/11-Ball_(Earth-616) : 1
## \\/115_(Legion_Personality)_(Earth-616): 1
## (Other) :16320
## ID ALIGN
## :3813 :2856
## Known to Authorities Identity: 15 Bad Characters :6703
## No Dual Identity :1781 Good Characters :4617
## Public Identity :4516 Neutral Characters:2200
## Secret Identity :6251
##
##
## EYE HAIR SEX
## :9804 :4308 : 904
## Blue Eyes :1949 Black Hair:3738 Agender Characters : 45
## Brown Eyes:1911 Brown Hair:2335 Female Characters : 3830
## Green Eyes: 605 Blond Hair:1574 Genderfluid Characters: 2
## Black Eyes: 553 No Hair :1171 Male Characters :11595
## Red Eyes : 508 Bald : 835
## (Other) :1046 (Other) :2415
## GSM ALIVE
## :16286 : 54
## Bisexual Characters : 19 Deceased Characters: 3746
## Genderfluid Characters: 1 Living Characters :12576
## Homosexual Characters : 66
## Pansexual Characters : 1
## Transgender Characters: 2
## Transvestites : 1
## APPEARANCES FIRST.APPEARANCE Year
## Min. : 1.00 : 863 Min. :1939
## 1st Qu.: 1.00 Jun-92 : 71 1st Qu.:1974
## Median : 3.00 May-93 : 69 Median :1990
## Mean : 16.66 Sep-06 : 67 Mean :1985
## 3rd Qu.: 8.00 Jan-94 : 66 3rd Qu.:2000
## Max. :4043.00 Oct-93 : 58 Max. :2013
## NA's :1143 (Other):15182 NA's :863
str(marvel)
## 'data.frame': 16376 obs. of 13 variables:
## $ page_id : Factor w/ 16376 levels "100012","100013",..: 2054 14839 13418 2937 4366 4362 3738 2636 5911 2662 ...
## $ name : Factor w/ 16326 levels "","'Spinner (Earth-616)",..: 13910 2322 1 1 14664 1572 12308 6538 13168 7658 ...
## $ urlslug : Factor w/ 16326 levels "","\\/%27Spinner_(Earth-616)",..: 13908 2322 1 1 14662 1572 12307 6538 13167 7658 ...
## $ ID : Factor w/ 5 levels "","Known to Authorities Identity",..: 5 4 1 1 3 4 4 4 4 4 ...
## $ ALIGN : Factor w/ 4 levels "","Bad Characters",..: 3 3 1 1 3 3 3 3 4 3 ...
## $ EYE : Factor w/ 25 levels "","Amber Eyes",..: 11 5 1 1 5 5 6 6 6 5 ...
## $ HAIR : Factor w/ 25 levels "","Auburn Hair",..: 7 24 1 1 5 14 7 7 7 5 ...
## $ SEX : Factor w/ 5 levels "","Agender Characters",..: 5 5 1 1 5 5 5 5 5 5 ...
## $ GSM : Factor w/ 7 levels "","Bisexual Characters",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ALIVE : Factor w/ 3 levels "","Deceased Characters",..: 3 3 1 1 3 3 3 3 3 3 ...
## $ APPEARANCES : int 4043 3360 NA NA 2258 2255 2072 2017 1955 1934 ...
## $ FIRST.APPEARANCE: Factor w/ 833 levels "","Apr-00","Apr-01",..: 101 505 1 1 651 659 659 591 797 659 ...
## $ Year : int 1962 1941 NA NA 1950 1961 1961 1962 1963 1961 ...
glimpse(marvel)
## Observations: 16,376
## Variables: 13
## $ page_id <fctr> 1678, 7139, 64786,"Wolverine (James \""Logan...
## $ name <fctr> Spider-Man (Peter Parker), Captain America (...
## $ urlslug <fctr> \/Spider-Man_(Peter_Parker), \/Captain_Ameri...
## $ ID <fctr> Secret Identity, Public Identity, , , No Dua...
## $ ALIGN <fctr> Good Characters, Good Characters, , , Good C...
## $ EYE <fctr> Hazel Eyes, Blue Eyes, , , Blue Eyes, Blue E...
## $ HAIR <fctr> Brown Hair, White Hair, , , Blond Hair, No H...
## $ SEX <fctr> Male Characters, Male Characters, , , Male C...
## $ GSM <fctr> , , , , , , , , , , , , , , , , , , , , , , ...
## $ ALIVE <fctr> Living Characters, Living Characters, , , Li...
## $ APPEARANCES <int> 4043, 3360, NA, NA, 2258, 2255, 2072, 2017, 1...
## $ FIRST.APPEARANCE <fctr> Aug-62, Mar-41, , , Nov-50, Nov-61, Nov-61, ...
## $ Year <int> 1962, 1941, NA, NA, 1950, 1961, 1961, 1962, 1...
levels(marvel$ID)
## [1] "" "Known to Authorities Identity"
## [3] "No Dual Identity" "Public Identity"
## [5] "Secret Identity"
levels(marvel$ALIGN)
## [1] "" "Bad Characters" "Good Characters"
## [4] "Neutral Characters"
levels(marvel$HAIR)
## [1] "" "Auburn Hair"
## [3] "Bald" "Black Hair"
## [5] "Blond Hair" "Blue Hair"
## [7] "Brown Hair" "Dyed Hair"
## [9] "Gold Hair" "Green Hair"
## [11] "Grey Hair" "Light Brown Hair"
## [13] "Magenta Hair" "No Hair"
## [15] "Orange-brown Hair" "Orange Hair"
## [17] "Pink Hair" "Purple Hair"
## [19] "Red Hair" "Reddish Blond Hair"
## [21] "Silver Hair" "Strawberry Blond Hair"
## [23] "Variable Hair" "White Hair"
## [25] "Yellow Hair"
levels(marvel$EYE)
## [1] "" "Amber Eyes" "Black Eyeballs"
## [4] "Black Eyes" "Blue Eyes" "Brown Eyes"
## [7] "Compound Eyes" "Gold Eyes" "Green Eyes"
## [10] "Grey Eyes" "Hazel Eyes" "Magenta Eyes"
## [13] "Multiple Eyes" "No Eyes" "One Eye"
## [16] "Orange Eyes" "Pink Eyes" "Purple Eyes"
## [19] "Red Eyes" "Silver Eyes" "Variable Eyes"
## [22] "Violet Eyes" "White Eyes" "Yellow Eyeballs"
## [25] "Yellow Eyes"
tab_count<-table(marvel$ID,marvel$ALIGN)
tab_count
##
## Bad Characters Good Characters
## 1221 1567 735
## Known to Authorities Identity 0 3 10
## No Dual Identity 275 474 643
## Public Identity 740 1451 1622
## Secret Identity 620 3208 1607
##
## Neutral Characters
## 290
## Known to Authorities Identity 2
## No Dual Identity 389
## Public Identity 703
## Secret Identity 816
prop.table(tab_count)
##
## Bad Characters
## 0.0745603322 0.0956888129
## Known to Authorities Identity 0.0000000000 0.0001831949
## No Dual Identity 0.0167928676 0.0289447973
## Public Identity 0.0451880801 0.0886052760
## Secret Identity 0.0378602833 0.1958964338
##
## Good Characters Neutral Characters
## 0.0448827553 0.0177088422
## Known to Authorities Identity 0.0006106497 0.0001221299
## No Dual Identity 0.0392647777 0.0237542745
## Public Identity 0.0990473864 0.0429286761
## Secret Identity 0.0981314118 0.0498290181
sum(prop.table(tab_count))
## [1] 1
prop.table(tab_count,1)#condition on the rows(rows sum=1)
##
## Bad Characters Good Characters
## 0.32022030 0.41096250 0.19276161
## Known to Authorities Identity 0.00000000 0.20000000 0.66666667
## No Dual Identity 0.15440764 0.26614262 0.36103313
## Public Identity 0.16386182 0.32130204 0.35916740
## Secret Identity 0.09918413 0.51319789 0.25707887
##
## Neutral Characters
## 0.07605560
## Known to Authorities Identity 0.13333333
## No Dual Identity 0.21841662
## Public Identity 0.15566873
## Secret Identity 0.13053911
sum(prop.table(tab_count,1))
## [1] 5
prop.table(tab_count,2)#condition on the columns(columns sum=1)
##
## Bad Characters
## 0.4275210084 0.2337759212
## Known to Authorities Identity 0.0000000000 0.0004475608
## No Dual Identity 0.0962885154 0.0707146054
## Public Identity 0.2591036415 0.2164702372
## Secret Identity 0.2170868347 0.4785916754
##
## Good Characters Neutral Characters
## 0.1591942820 0.1318181818
## Known to Authorities Identity 0.0021659086 0.0009090909
## No Dual Identity 0.1392679229 0.1768181818
## Public Identity 0.3513103747 0.3195454545
## Secret Identity 0.3480615118 0.3709090909
sum(prop.table(tab_count,2))
## [1] 4
ggplot(marvel,aes(x=ID,fill=ALIGN))+
geom_bar()+
theme(axis.text.x = element_text(angle=30))
ggplot(marvel,aes(x=ID,fill=ALIGN))+
geom_bar(position = "dodge")+
theme(axis.text.x = element_text(angle=30))
ggplot(marvel,aes(x=ID,fill=ALIGN))+
geom_bar(position = "fill")+
ylab("proportion")+
theme(axis.text.x = element_text(angle=30))
ggplot(marvel,aes(x=ID))+
geom_bar()+
theme(axis.text.x = element_text(angle=90))+
facet_wrap(~ALIGN)
marvel%>%
select(ALIGN,name,APPEARANCES)%>%
mutate(order=round(rank(desc(APPEARANCES)),0))%>%
arrange(order)%>%
filter(order<=10)
## ALIGN name APPEARANCES order
## 1 Good Characters Spider-Man (Peter Parker) 4043 1
## 2 Good Characters Captain America (Steven Rogers) 3360 2
## 3 Good Characters Thor (Thor Odinson) 2258 3
## 4 Good Characters Benjamin Grimm (Earth-616) 2255 4
## 5 Good Characters Reed Richards (Earth-616) 2072 5
## 6 Good Characters Hulk (Robert Bruce Banner) 2017 6
## 7 Neutral Characters Scott Summers (Earth-616) 1955 7
## 8 Good Characters Jonathan Storm (Earth-616) 1934 8
## 9 Good Characters Henry McCoy (Earth-616) 1825 9
## 10 Good Characters Susan Storm (Earth-616) 1713 10
marvel%>%
select(ALIGN,name,APPEARANCES)%>%
ungroup %>%
group_by(ALIGN)%>%
mutate(order=round(rank(desc(APPEARANCES)),0))%>%
arrange(ALIGN,order)%>%
filter(order<=3)
## # A tibble: 12 x 4
## # Groups: ALIGN [4]
## ALIGN name APPEARANCES order
## <fctr> <fctr> <int> <dbl>
## 1 Blaine Colt (Earth-616) 429 1
## 2 Millicent Collins (Earth-616) 321 2
## 3 Chili Storm (Earth-616) 284 3
## 4 Bad Characters Victor von Doom (Earth-616) 721 1
## 5 Bad Characters Norman Osborn (Earth-616) 692 2
## 6 Bad Characters Wilson Fisk (Earth-616) 503 3
## 7 Good Characters Spider-Man (Peter Parker) 4043 1
## 8 Good Characters Captain America (Steven Rogers) 3360 2
## 9 Good Characters Thor (Thor Odinson) 2258 3
## 10 Neutral Characters Scott Summers (Earth-616) 1955 1
## 11 Neutral Characters Namor McKenzie (Earth-616) 1528 2
## 12 Neutral Characters John Jonah Jameson (Earth-616) 1266 3
marvel%>%
filter(!is.na(Year))%>%
group_by(Year)%>%
summarise(count=n())%>%
mutate(order=round(rank(desc(count)),0))%>%
arrange(order)%>%
filter(order<=10)
## # A tibble: 10 x 3
## Year count order
## <int> <int> <dbl>
## 1 1993 552 1
## 2 1994 483 2
## 3 1992 453 3
## 4 2006 381 4
## 5 1991 360 5
## 6 1990 357 6
## 7 2008 353 7
## 8 2011 349 8
## 9 2005 336 9
## 10 1997 334 10
marvel_year_appearence <-
marvel%>%
filter(!is.na(Year))%>%
group_by(Year)%>%
summarise(Count=n())%>%
arrange(Year)
ggplot(marvel_year_appearence,aes(x=Year,y=Count))+
geom_line(color="yellow",size=1.1)
marvel_year_appearence_align <-
marvel%>%
filter(!is.na(Year))%>%
group_by(Year,ALIGN)%>%
summarise(Count=n())%>%
arrange(Year,ALIGN)
ggplot(marvel_year_appearence_align,aes(x=Year,y=Count,color=ALIGN))+
geom_line(size=1.1)+
theme(axis.text.x = element_text(angle=90))
ggplot(marvel_year_appearence_align,aes(x=Year,y=Count))+
geom_line(size=0.9,color="green")+
facet_wrap(~ALIGN)+
theme(axis.text.x = element_text(angle=90))