In this case study we are going to explore university entrance examinations (YGS/LYS) dataset from 2017. Dataset consists of undergraduate programs offered in 2017. Each program offers an availability (i.e. quota). Then students get placed according to their lists and their scores. Each program is filled with the students ranked by their scores until placements are equal to availability. Student placed to a a program with the highest score forms the maximum score of that program and the last student to be placed forms the minimum score.
# Download from GitHub (do it only once)
download.file("https://mef-bda503.github.io/files/osym_data_2017.RData", "osym_data_2017.RData")
# Install tidyverse if not already installed
if (!("tidyverse" %in% installed.packages())) {
install.packages("tidyverse", repos = "https://cran.r-project.org")
}
# Load tidyverse package
library(tidyverse)
# Load the data
load("osym_data_2017.RData")
This data shows quota of universities in Istanbul
university_quota <- osym_data_2017 %>%
group_by(university_name) %>%
filter(city=='Ä°STANBUL') %>%
summarise(count=n()) %>%
arrange(desc(count))
university_quota
## # A tibble: 51 x 2
## university_name count
## <chr> <int>
## 1 İSTANBUL GELİŞİM ÜNİVERSİTESİ 212
## 2 OKAN ÃœNÄ°VERSÄ°TESÄ° 172
## 3 BEYKENT ÃœNÄ°VERSÄ°TESÄ° 169
## 4 YEDÄ°TEPE ÃœNÄ°VERSÄ°TESÄ° 165
## 5 Ä°STANBUL MEDÄ°POL ÃœNÄ°VERSÄ°TESÄ° 155
## 6 Ä°STANBUL AYDIN ÃœNÄ°VERSÄ°TESÄ° 154
## 7 Ä°STANBUL ÃœNÄ°VERSÄ°TESÄ° 138
## 8 Ä°STANBUL AREL ÃœNÄ°VERSÄ°TESÄ° 135
## 9 Ä°STANBUL BÄ°LGÄ° ÃœNÄ°VERSÄ°TESÄ° 131
## 10 MALTEPE ÃœNÄ°VERSÄ°TESÄ° 123
## # ... with 41 more rows
Let’s visualize this data in barchart.
ggplot(university_quota, aes(x=reorder(university_name,-count), y=count)) +
geom_bar(stat = "identity", aes(fill=university_quota$university_name=='MEF ÃœNÄ°VERSÄ°TESÄ°')) +
labs(title="Quota of University in Istanbul",x="University",y="Count",fill="") +
theme (axis.text.x=element_text (angle=-90,vjust=0.5, hjust=0)) +
scale_fill_manual(values = c('#707070', 'red'),guide=FALSE)
question2 <- osym_data_2017 %>%
select(university_name, max_score, city) %>%
filter(city=='Ä°STANBUL') %>%
group_by(university_name) %>%
summarise(max_puan=max(max_score)) %>%
arrange(desc(max_puan))
question2
## # A tibble: 51 x 2
## university_name max_puan
## <chr> <dbl>
## 1 KOÇ ÜNİVERSİTESİ 569.1112
## 2 Ä°STANBUL ÃœNÄ°VERSÄ°TESÄ° 564.0145
## 3 BOĞAZİÇİ ÜNİVERSİTESİ 562.5765
## 4 Ä°STANBUL MEDÄ°POL ÃœNÄ°VERSÄ°TESÄ° 559.4780
## 5 GALATASARAY ÃœNÄ°VERSÄ°TESÄ° 556.0948
## 6 ACIBADEM MEHMET ALÄ° AYDINLAR ÃœNÄ°VERSÄ°TESÄ° 542.3482
## 7 SABANCI ÃœNÄ°VERSÄ°TESÄ° 538.7725
## 8 YEDÄ°TEPE ÃœNÄ°VERSÄ°TESÄ° 531.3691
## 9 BAHÇEŞEHİR ÜNİVERSİTESİ 530.4845
## 10 Ä°STANBUL AYDIN ÃœNÄ°VERSÄ°TESÄ° 525.5809
## # ... with 41 more rows
Let’s visualize this data in barchart.
ggplot(question2, aes(x=reorder(university_name,-max_puan), y=max_puan)) +
geom_bar(stat = "identity", aes(fill=question2$university_name=='MEF ÃœNÄ°VERSÄ°TESÄ°')) +
labs(title="Maximum score of each university",x="University",y="Maximum score",fill="") +
theme (axis.text.x=element_text (angle=-90,vjust=0.5,hjust=0)) +
scale_fill_manual(values = c('#707070', 'red'),guide=FALSE)
question3 <- osym_data_2017 %>%
select(university_name,general_quota,city) %>%
filter(city=='Ä°STANBUL') %>%
group_by(university_name) %>%
summarise(bolum_sayisi=n(),general_quota=sum(general_quota)) %>%
arrange(desc(general_quota))
question3
## # A tibble: 51 x 3
## university_name bolum_sayisi general_quota
## <chr> <int> <int>
## 1 Ä°STANBUL ÃœNÄ°VERSÄ°TESÄ° 138 17809
## 2 MARMARA ÃœNÄ°VERSÄ°TESÄ° 80 6200
## 3 Ä°STANBUL MEDÄ°POL ÃœNÄ°VERSÄ°TESÄ° 155 4495
## 4 İSTANBUL GELİŞİM ÜNİVERSİTESİ 212 3950
## 5 BEYKENT ÃœNÄ°VERSÄ°TESÄ° 169 3811
## 6 Ä°STANBUL TEKNÄ°K ÃœNÄ°VERSÄ°TESÄ° 76 3684
## 7 YILDIZ TEKNÄ°K ÃœNÄ°VERSÄ°TESÄ° 53 3652
## 8 Ä°STANBUL AYDIN ÃœNÄ°VERSÄ°TESÄ° 154 3578
## 9 YEDÄ°TEPE ÃœNÄ°VERSÄ°TESÄ° 165 3442
## 10 BAHÇEŞEHİR ÜNİVERSİTESİ 122 2774
## # ... with 41 more rows
Let’s visualize this data in barchart.
ggplot(question3, aes(x=reorder(university_name,-bolum_sayisi), y=bolum_sayisi)) +
geom_bar(stat = "identity", aes(fill=question3$university_name=='MEF ÃœNÄ°VERSÄ°TESÄ°')) +
labs(title="Number of Departments of Universities in Ä°stanbul",x="University",y="Departments",fill="") +
theme (axis.text.x=element_text (angle=-90,vjust=0.5,hjust=0)) +
scale_fill_manual(values = c('#707070', 'red'),guide=FALSE)