In this case study we are going to explore university entrance examinations (YGS/LYS) dataset from 2017. Dataset consists of undergraduate programs offered in 2017. Each program offers an availability (i.e. quota). Then students get placed according to their lists and their scores. Each program is filled with the students ranked by their scores until placements are equal to availability. Student placed to a a program with the highest score forms the maximum score of that program and the last student to be placed forms the minimum score.
# Download dataset from GitHub (do it only once)
download.file("https://mef-bda503.github.io/files/osym_data_2017.RData", "osym_data_2017.RData")
# Load tidyverse package
library(tidyverse)
# Load the data
load("osym_data_2017.RData")
The table below shows the number of university departments in Istanbul.
university_departments <- osym_data_2017 %>%
group_by(University_Name=university_name) %>%
filter(city=='İSTANBUL') %>%
summarise(Departments=n()) %>%
arrange(desc(Departments))
## Warning: package 'bindrcpp' was built under R version 3.4.2
university_departments
## # A tibble: 51 x 2
## University_Name Departments
## <chr> <int>
## 1 ÝSTANBUL GELÝÞÝM ÜNÝVERSÝTESÝ 212
## 2 OKAN ÜNÝVERSÝTESÝ 172
## 3 BEYKENT ÜNÝVERSÝTESÝ 169
## 4 YEDÝTEPE ÜNÝVERSÝTESÝ 165
## 5 ÝSTANBUL MEDÝPOL ÜNÝVERSÝTESÝ 155
## 6 ÝSTANBUL AYDIN ÜNÝVERSÝTESÝ 154
## 7 ÝSTANBUL ÜNÝVERSÝTESÝ 138
## 8 ÝSTANBUL AREL ÜNÝVERSÝTESÝ 135
## 9 ÝSTANBUL BÝLGÝ ÜNÝVERSÝTESÝ 131
## 10 MALTEPE ÜNÝVERSÝTESÝ 123
## # ... with 41 more rows
Let’s visualize this data in a barchart.
ggplot(university_departments, aes(x=reorder(University_Name,-Departments), y=Departments)) +
geom_bar(stat = "identity", aes(fill=university_departments$University_Name=='MEF ÜNİVERSİTESİ')) +
labs(title="# of University Departments in Istanbul",x="University",y="# of Deparments",fill="") +
theme (axis.text.x=element_text (angle=-90,vjust=0.5, hjust=0)) +
scale_fill_manual(values = c('#707070', 'red'),guide=FALSE)
maximum_scores <- osym_data_2017 %>%
select(University_Name=university_name, max_score, city) %>%
filter(city=='İSTANBUL') %>%
group_by(University_Name) %>%
summarise(Max_Score=max(max_score)) %>%
arrange(desc(Max_Score))
maximum_scores
## # A tibble: 51 x 2
## University_Name Max_Score
## <chr> <dbl>
## 1 KOÇ ÜNÝVERSÝTESÝ 569.1112
## 2 ÝSTANBUL ÜNÝVERSÝTESÝ 564.0145
## 3 BOÐAZÝÇÝ ÜNÝVERSÝTESÝ 562.5765
## 4 ÝSTANBUL MEDÝPOL ÜNÝVERSÝTESÝ 559.4780
## 5 GALATASARAY ÜNÝVERSÝTESÝ 556.0948
## 6 ACIBADEM MEHMET ALÝ AYDINLAR ÜNÝVERSÝTESÝ 542.3482
## 7 SABANCI ÜNÝVERSÝTESÝ 538.7725
## 8 YEDÝTEPE ÜNÝVERSÝTESÝ 531.3691
## 9 BAHÇEÞEHÝR ÜNÝVERSÝTESÝ 530.4845
## 10 ÝSTANBUL AYDIN ÜNÝVERSÝTESÝ 525.5809
## # ... with 41 more rows
Let’s visualize this data in a barchart.
ggplot(maximum_scores, aes(x=reorder(University_Name,-Max_Score), y=Max_Score)) +
geom_bar(stat = "identity", aes(fill=University_Name=='MEF ÜNİVERSİTESİ')) +
labs(title="Maximum Score of Each University",x="University",y="Maximum score",fill="") +
theme (axis.text.x=element_text (angle=-90,vjust=0.5,hjust=0)) +
scale_fill_manual(values = c('#707070', 'red'),guide=FALSE)
department_quota <- osym_data_2017 %>%
select(University_Name=university_name,general_quota,city) %>%
filter(city=='İSTANBUL') %>%
group_by(University_Name) %>%
summarise(General_Quota=sum(general_quota)) %>%
arrange(desc(General_Quota))
department_quota
## # A tibble: 51 x 2
## University_Name General_Quota
## <chr> <int>
## 1 ÝSTANBUL ÜNÝVERSÝTESÝ 17809
## 2 MARMARA ÜNÝVERSÝTESÝ 6200
## 3 ÝSTANBUL MEDÝPOL ÜNÝVERSÝTESÝ 4495
## 4 ÝSTANBUL GELÝÞÝM ÜNÝVERSÝTESÝ 3950
## 5 BEYKENT ÜNÝVERSÝTESÝ 3811
## 6 ÝSTANBUL TEKNÝK ÜNÝVERSÝTESÝ 3684
## 7 YILDIZ TEKNÝK ÜNÝVERSÝTESÝ 3652
## 8 ÝSTANBUL AYDIN ÜNÝVERSÝTESÝ 3578
## 9 YEDÝTEPE ÜNÝVERSÝTESÝ 3442
## 10 BAHÇEÞEHÝR ÜNÝVERSÝTESÝ 2774
## # ... with 41 more rows
Let’s visualize this data in a barchart.
ggplot(department_quota, aes(x=reorder(University_Name,-General_Quota), y=General_Quota)) +
geom_bar(stat = "identity", aes(fill=University_Name=='MEF ÜNİVERSİTESİ')) +
labs(title="University Department Quotas in İstanbul",x="University",y="Quota",fill="") +
theme (axis.text.x=element_text (angle=-90,vjust=0.5,hjust=0)) +
scale_fill_manual(values = c('#707070', 'red'),guide=FALSE)
maximum_scores <- osym_data_2017 %>%
select(university_name,program_name,max_score) %>%
filter(university_name=='MEF ÜNİVERSİTESİ') %>%
group_by(program_name) %>%
summarise(Max_Score=max(max_score)) %>%
arrange(desc(Max_Score))
maximum_scores
## # A tibble: 44 x 2
## program_name
## <chr>
## 1 Hukuk (Tam Burslu)
## 2 Ýngilizce Öðretmenliði (Ýngilizce) (Tam Burslu)
## 3 Psikoloji (Ýngilizce) (Tam Burslu)
## 4 Ýngilizce Öðretmenliði (Ýngilizce) (%75 Burslu)
## 5 Bilgisayar Mühendisliði (Ýngilizce) (Tam Burslu)
## 6 Endüstri Mühendisliði (Ýngilizce) (Tam Burslu)
## 7 Makine Mühendisliði (Ýngilizce) (Tam Burslu)
## 8 Mimarlýk (Ýngilizce) (Tam Burslu)
## 9 Rehberlik ve Psikolojik Danýþmanlýk (Ýngilizce) (Tam Burslu)
## 10 Siyaset Bilimi ve Uluslararasý Ýliþkiler (Ýngilizce) (Tam Burslu)
## # ... with 34 more rows, and 1 more variables: Max_Score <dbl>
Let’s visualize this data in a barchart.
ggplot(maximum_scores, aes(x=reorder(program_name,-Max_Score), y=Max_Score)) +
geom_bar(stat = "identity") +
labs(title="Maximum Score of Programs on MEF University",x="Programs",y="Maximum score",fill="") +
theme (axis.text.x=element_text (angle=-90,vjust=0.5,hjust=0)) +
scale_fill_manual(values = if_else(maximum_scores>445, '#707070',"red"),guide=FALSE)