Young People Survey Assignment

I wish to apply some gg plot functions to check if the metal&hardrock music lovers like sci-fi and are afraid of snakes. Then I will apply Multi Dimensional Scaling for the ratings about lesson subjects (from History to Law). See My Reference here

#I load the Young Survey data, first file "Responses"
getwd()

## [1] "C:/Users/Lenovo/Desktop"

library(tidyverse)

## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr

## Conflicts with tidy packages ----------------------------------------------

## filter(): dplyr, stats
## lag():    dplyr, stats

# Load the data
responses <- read.csv("responses.csv")%>%
filter(complete.cases(.))%>%
tbl_df()
head(responses)

## # A tibble: 6 x 150
##   Music Slow.songs.or.fast.songs Dance  Folk Country Classical.music
##   <int>                    <int> <int> <int>   <int>           <int>
## 1     5                        3     2     1       2               2
## 2     4                        4     2     1       1               1
## 3     5                        5     2     2       3               4
## 4     5                        3     4     3       2               4
## 5     5                        3     2     3       2               3
## 6     5                        5     5     3       1               2
## # ... with 144 more variables: Musical <int>, Pop <int>, Rock <int>,
## #   Metal.or.Hardrock <int>, Punk <int>, Hiphop..Rap <int>,
## #   Reggae..Ska <int>, Swing..Jazz <int>, Rock.n.roll <int>,
## #   Alternative <int>, Latino <int>, Techno..Trance <int>, Opera <int>,
## #   Movies <int>, Horror <int>, Thriller <int>, Comedy <int>,
## #   Romantic <int>, Sci.fi <int>, War <int>, Fantasy.Fairy.tales <int>,
## #   Animated <int>, Documentary <int>, Western <int>, Action <int>,
## #   History <int>, Psychology <int>, Politics <int>, Mathematics <int>,
## #   Physics <int>, Internet <int>, PC <int>, Economy.Management <int>,
## #   Biology <int>, Chemistry <int>, Reading <int>, Geography <int>,
## #   Foreign.languages <int>, Medicine <int>, Law <int>, Cars <int>,
## #   Art.exhibitions <int>, Religion <int>, Countryside..outdoors <int>,
## #   Dancing <int>, Musical.instruments <int>, Writing <int>,
## #   Passive.sport <int>, Active.sport <int>, Gardening <int>,
## #   Celebrities <int>, Shopping <int>, Science.and.technology <int>,
## #   Theatre <int>, Fun.with.friends <int>, Adrenaline.sports <int>,
## #   Pets <int>, Flying <int>, Storm <int>, Darkness <int>, Heights <int>,
## #   Spiders <int>, Snakes <int>, Rats <int>, Ageing <int>,
## #   Dangerous.dogs <int>, Fear.of.public.speaking <int>, Smoking <fctr>,
## #   Alcohol <fctr>, Healthy.eating <int>, Daily.events <int>,
## #   Prioritising.workload <int>, Writing.notes <int>, Workaholism <int>,
## #   Thinking.ahead <int>, Final.judgement <int>, Reliability <int>,
## #   Keeping.promises <int>, Loss.of.interest <int>,
## #   Friends.versus.money <int>, Funniness <int>, Fake <int>,
## #   Criminal.damage <int>, Decision.making <int>, Elections <int>,
## #   Self.criticism <int>, Judgment.calls <int>, Hypochondria <int>,
## #   Empathy <int>, Eating.to.survive <int>, Giving <int>,
## #   Compassion.to.animals <int>, Borrowed.stuff <int>, Loneliness <int>,
## #   Cheating.in.school <int>, Health <int>, Changing.the.past <int>,
## #   God <int>, Dreams <int>, Charity <int>, ...

# Load the second file "columns"
columns <- read.csv("columns.csv")%>%
filter(complete.cases(.))%>%
tbl_df()

# lets see how our data looks like by glimpse function.
glimpse(columns)

## Observations: 150
## Variables: 2
## $ original <fctr> I enjoy listening to music., I prefer., Dance, Disco...
## $ short    <fctr> Music, Slow songs or fast songs, Dance, Folk, Countr...

#I want to see what the people who rated Metal and Hardrock as "5" rated for scifi
targetgood <- c("5")
responses_metalfive <- filter(responses, Metal.or.Hardrock %in% targetgood)
qplot(x=Sci.fi, data = responses_metalfive)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#And the people rated 1 for Metal and Hardrock, how did they rate for Folk Music?

targetbad <- c("1")
responses_metalone <- filter(responses, Metal.or.Hardrock %in% targetbad)
qplot(x=Folk, data = responses_metalone)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#People rated "1" or "5" how did they rate about Snakes?

qplot(x=Snakes, data = responses_metalfive)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qplot(x=Snakes, data = responses_metalone)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Finally a correlation test to see the snake and Metal Hardrock responses (weak correlation)
cor.test(responses$Snakes, responses$Metal.or.Hardrock)

## 
##  Pearson's product-moment correlation
## 
## data:  responses$Snakes and responses$Metal.or.Hardrock
## t = -5.8767, df = 684, p-value = 6.537e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2893435 -0.1467896
## sample estimates:
##        cor 
## -0.2192363

#Finally I apply  Multidimensional Scaling for "History to Law" columns

responses <- responses %>% select(History:Law)
print(head(responses))

## # A tibble: 6 x 15
##   History Psychology Politics Mathematics Physics Internet    PC
##     <int>      <int>    <int>       <int>   <int>    <int> <int>
## 1       1          5        1           3       3        5     3
## 2       1          3        4           5       2        4     4
## 3       1          2        1           5       2        4     2
## 4       3          2        3           2       2        2     2
## 5       5          3        4           2       3        4     4
## 6       3          3        1           1       1        2     1
## # ... with 8 more variables: Economy.Management <int>, Biology <int>,
## #   Chemistry <int>, Reading <int>, Geography <int>,
## #   Foreign.languages <int>, Medicine <int>, Law <int>

res_dist <- 1 - cor(responses)

responses <- cmdscale(res_dist,k= 2)

colnames(responses) <- c("x","y")
print(responses)

##                               x           y
## History            -0.072610311 -0.32938441
## Psychology          0.117105797 -0.25852170
## Politics           -0.314029964 -0.23889370
## Mathematics        -0.126670610  0.43755073
## Physics             0.044056668  0.49221776
## Internet           -0.402448005  0.24761379
## PC                 -0.425240888  0.44061460
## Economy.Management -0.492838199 -0.06862771
## Biology             0.585300769  0.14659268
## Chemistry           0.545669823  0.27083596
## Reading             0.290678529 -0.42711839
## Geography          -0.099837728 -0.16259552
## Foreign.languages   0.004569279 -0.35370929
## Medicine            0.528792130  0.08203258
## Law                -0.182497288 -0.27860737

ggplot(data.frame(responses),aes(x=x,y=y)) + geom_text(label=rownames(responses),angle=45,size=2)