I wish to apply some gg plot functions to check if the metal&hardrock music lovers like sci-fi and are afraid of snakes. Then I will apply Multi Dimensional Scaling for the ratings about lesson subjects (from History to Law). See My Reference here
#I load the Young Survey data, first file "Responses"
getwd()
## [1] "C:/Users/Lenovo/Desktop"
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
# Load the data
responses <- read.csv("responses.csv")%>%
filter(complete.cases(.))%>%
tbl_df()
head(responses)
## # A tibble: 6 x 150
## Music Slow.songs.or.fast.songs Dance Folk Country Classical.music
## <int> <int> <int> <int> <int> <int>
## 1 5 3 2 1 2 2
## 2 4 4 2 1 1 1
## 3 5 5 2 2 3 4
## 4 5 3 4 3 2 4
## 5 5 3 2 3 2 3
## 6 5 5 5 3 1 2
## # ... with 144 more variables: Musical <int>, Pop <int>, Rock <int>,
## # Metal.or.Hardrock <int>, Punk <int>, Hiphop..Rap <int>,
## # Reggae..Ska <int>, Swing..Jazz <int>, Rock.n.roll <int>,
## # Alternative <int>, Latino <int>, Techno..Trance <int>, Opera <int>,
## # Movies <int>, Horror <int>, Thriller <int>, Comedy <int>,
## # Romantic <int>, Sci.fi <int>, War <int>, Fantasy.Fairy.tales <int>,
## # Animated <int>, Documentary <int>, Western <int>, Action <int>,
## # History <int>, Psychology <int>, Politics <int>, Mathematics <int>,
## # Physics <int>, Internet <int>, PC <int>, Economy.Management <int>,
## # Biology <int>, Chemistry <int>, Reading <int>, Geography <int>,
## # Foreign.languages <int>, Medicine <int>, Law <int>, Cars <int>,
## # Art.exhibitions <int>, Religion <int>, Countryside..outdoors <int>,
## # Dancing <int>, Musical.instruments <int>, Writing <int>,
## # Passive.sport <int>, Active.sport <int>, Gardening <int>,
## # Celebrities <int>, Shopping <int>, Science.and.technology <int>,
## # Theatre <int>, Fun.with.friends <int>, Adrenaline.sports <int>,
## # Pets <int>, Flying <int>, Storm <int>, Darkness <int>, Heights <int>,
## # Spiders <int>, Snakes <int>, Rats <int>, Ageing <int>,
## # Dangerous.dogs <int>, Fear.of.public.speaking <int>, Smoking <fctr>,
## # Alcohol <fctr>, Healthy.eating <int>, Daily.events <int>,
## # Prioritising.workload <int>, Writing.notes <int>, Workaholism <int>,
## # Thinking.ahead <int>, Final.judgement <int>, Reliability <int>,
## # Keeping.promises <int>, Loss.of.interest <int>,
## # Friends.versus.money <int>, Funniness <int>, Fake <int>,
## # Criminal.damage <int>, Decision.making <int>, Elections <int>,
## # Self.criticism <int>, Judgment.calls <int>, Hypochondria <int>,
## # Empathy <int>, Eating.to.survive <int>, Giving <int>,
## # Compassion.to.animals <int>, Borrowed.stuff <int>, Loneliness <int>,
## # Cheating.in.school <int>, Health <int>, Changing.the.past <int>,
## # God <int>, Dreams <int>, Charity <int>, ...
# Load the second file "columns"
columns <- read.csv("columns.csv")%>%
filter(complete.cases(.))%>%
tbl_df()
# lets see how our data looks like by glimpse function.
glimpse(columns)
## Observations: 150
## Variables: 2
## $ original <fctr> I enjoy listening to music., I prefer., Dance, Disco...
## $ short <fctr> Music, Slow songs or fast songs, Dance, Folk, Countr...
#I want to see what the people who rated Metal and Hardrock as "5" rated for scifi
targetgood <- c("5")
responses_metalfive <- filter(responses, Metal.or.Hardrock %in% targetgood)
qplot(x=Sci.fi, data = responses_metalfive)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#And the people rated 1 for Metal and Hardrock, how did they rate for Folk Music?
targetbad <- c("1")
responses_metalone <- filter(responses, Metal.or.Hardrock %in% targetbad)
qplot(x=Folk, data = responses_metalone)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#People rated "1" or "5" how did they rate about Snakes?
qplot(x=Snakes, data = responses_metalfive)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qplot(x=Snakes, data = responses_metalone)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Finally a correlation test to see the snake and Metal Hardrock responses (weak correlation)
cor.test(responses$Snakes, responses$Metal.or.Hardrock)
##
## Pearson's product-moment correlation
##
## data: responses$Snakes and responses$Metal.or.Hardrock
## t = -5.8767, df = 684, p-value = 6.537e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2893435 -0.1467896
## sample estimates:
## cor
## -0.2192363
#Finally I apply Multidimensional Scaling for "History to Law" columns
responses <- responses %>% select(History:Law)
print(head(responses))
## # A tibble: 6 x 15
## History Psychology Politics Mathematics Physics Internet PC
## <int> <int> <int> <int> <int> <int> <int>
## 1 1 5 1 3 3 5 3
## 2 1 3 4 5 2 4 4
## 3 1 2 1 5 2 4 2
## 4 3 2 3 2 2 2 2
## 5 5 3 4 2 3 4 4
## 6 3 3 1 1 1 2 1
## # ... with 8 more variables: Economy.Management <int>, Biology <int>,
## # Chemistry <int>, Reading <int>, Geography <int>,
## # Foreign.languages <int>, Medicine <int>, Law <int>
res_dist <- 1 - cor(responses)
responses <- cmdscale(res_dist,k= 2)
colnames(responses) <- c("x","y")
print(responses)
## x y
## History -0.072610311 -0.32938441
## Psychology 0.117105797 -0.25852170
## Politics -0.314029964 -0.23889370
## Mathematics -0.126670610 0.43755073
## Physics 0.044056668 0.49221776
## Internet -0.402448005 0.24761379
## PC -0.425240888 0.44061460
## Economy.Management -0.492838199 -0.06862771
## Biology 0.585300769 0.14659268
## Chemistry 0.545669823 0.27083596
## Reading 0.290678529 -0.42711839
## Geography -0.099837728 -0.16259552
## Foreign.languages 0.004569279 -0.35370929
## Medicine 0.528792130 0.08203258
## Law -0.182497288 -0.27860737
ggplot(data.frame(responses),aes(x=x,y=y)) + geom_text(label=rownames(responses),angle=45,size=2)