Introduction

In this case study we are going to explore university entrance examinations (YGS/LYS) dataset from 2017. Dataset consists of undergraduate programs offered in 2017. Each program offers an availability (i.e. quota). Then students get placed according to their lists and their scores. Each program is filled with the students ranked by their scores until placements are equal to availability. Student placed to a a program with the highest score forms the maximum score of that program and the last student to be placed forms the minimum score.


Load Data

# Download dataset from GitHub (do it only once)
download.file("https://mef-bda503.github.io/files/osym_data_2017.RData", "osym_data_2017.RData")

# Load tidyverse package
library(tidyverse)

# Load the data
load("osym_data_2017.RData")

Number of University Departmens

The table below shows the number of university departments in Istanbul.

university_departments <- osym_data_2017 %>%
    group_by(University_Name=university_name) %>%
    filter(city=='İSTANBUL') %>%
    summarise(Departments=n()) %>%
    arrange(desc(Departments))
## Warning: package 'bindrcpp' was built under R version 3.4.2
university_departments
## # A tibble: 51 x 2
##                  University_Name Departments
##                            <chr>       <int>
##  1 ÝSTANBUL GELÝÞÝM ÜNÝVERSÝTESÝ         212
##  2             OKAN ÜNÝVERSÝTESÝ         172
##  3          BEYKENT ÜNÝVERSÝTESÝ         169
##  4         YEDÝTEPE ÜNÝVERSÝTESÝ         165
##  5 ÝSTANBUL MEDÝPOL ÜNÝVERSÝTESÝ         155
##  6   ÝSTANBUL AYDIN ÜNÝVERSÝTESÝ         154
##  7         ÝSTANBUL ÜNÝVERSÝTESÝ         138
##  8    ÝSTANBUL AREL ÜNÝVERSÝTESÝ         135
##  9   ÝSTANBUL BÝLGÝ ÜNÝVERSÝTESÝ         131
## 10          MALTEPE ÜNÝVERSÝTESÝ         123
## # ... with 41 more rows

Let’s visualize this data in a barchart.

ggplot(university_departments, aes(x=reorder(University_Name,-Departments), y=Departments)) +
  geom_bar(stat = "identity", aes(fill=university_departments$University_Name=='MEF ÜNİVERSİTESİ')) +
  labs(title="# of University Departments in Istanbul",x="University",y="# of Deparments",fill="") +
  theme (axis.text.x=element_text (angle=-90,vjust=0.5, hjust=0)) +
  scale_fill_manual(values = c('#707070', 'red'),guide=FALSE)

Maximum Scores of Universities in Istanbul

maximum_scores <- osym_data_2017 %>%
  select(University_Name=university_name, max_score, city) %>%
  filter(city=='İSTANBUL') %>%
  group_by(University_Name) %>%
  summarise(Max_Score=max(max_score)) %>%
  arrange(desc(Max_Score))

maximum_scores
## # A tibble: 51 x 2
##                              University_Name Max_Score
##                                        <chr>     <dbl>
##  1                          KOÇ ÜNÝVERSÝTESÝ  569.1112
##  2                     ÝSTANBUL ÜNÝVERSÝTESÝ  564.0145
##  3                     BOÐAZÝÇÝ ÜNÝVERSÝTESÝ  562.5765
##  4             ÝSTANBUL MEDÝPOL ÜNÝVERSÝTESÝ  559.4780
##  5                  GALATASARAY ÜNÝVERSÝTESÝ  556.0948
##  6 ACIBADEM MEHMET ALÝ AYDINLAR ÜNÝVERSÝTESÝ  542.3482
##  7                      SABANCI ÜNÝVERSÝTESÝ  538.7725
##  8                     YEDÝTEPE ÜNÝVERSÝTESÝ  531.3691
##  9                   BAHÇEÞEHÝR ÜNÝVERSÝTESÝ  530.4845
## 10               ÝSTANBUL AYDIN ÜNÝVERSÝTESÝ  525.5809
## # ... with 41 more rows

Let’s visualize this data in a barchart.

ggplot(maximum_scores, aes(x=reorder(University_Name,-Max_Score), y=Max_Score)) +
  geom_bar(stat = "identity", aes(fill=University_Name=='MEF ÜNİVERSİTESİ')) +
  labs(title="Maximum Score of Each University",x="University",y="Maximum score",fill="") +
  theme (axis.text.x=element_text (angle=-90,vjust=0.5,hjust=0)) +
  scale_fill_manual(values = c('#707070', 'red'),guide=FALSE)

University Department Quotas in Istanbul

department_quota <- osym_data_2017 %>% 
  select(University_Name=university_name,general_quota,city) %>% 
  filter(city=='İSTANBUL') %>%
  group_by(University_Name) %>% 
  summarise(General_Quota=sum(general_quota)) %>% 
  arrange(desc(General_Quota))

department_quota
## # A tibble: 51 x 2
##                  University_Name General_Quota
##                            <chr>         <int>
##  1         ÝSTANBUL ÜNÝVERSÝTESÝ         17809
##  2          MARMARA ÜNÝVERSÝTESÝ          6200
##  3 ÝSTANBUL MEDÝPOL ÜNÝVERSÝTESÝ          4495
##  4 ÝSTANBUL GELÝÞÝM ÜNÝVERSÝTESÝ          3950
##  5          BEYKENT ÜNÝVERSÝTESÝ          3811
##  6  ÝSTANBUL TEKNÝK ÜNÝVERSÝTESÝ          3684
##  7    YILDIZ TEKNÝK ÜNÝVERSÝTESÝ          3652
##  8   ÝSTANBUL AYDIN ÜNÝVERSÝTESÝ          3578
##  9         YEDÝTEPE ÜNÝVERSÝTESÝ          3442
## 10       BAHÇEÞEHÝR ÜNÝVERSÝTESÝ          2774
## # ... with 41 more rows

Let’s visualize this data in a barchart.

ggplot(department_quota, aes(x=reorder(University_Name,-General_Quota), y=General_Quota)) +
  geom_bar(stat = "identity", aes(fill=University_Name=='MEF ÜNİVERSİTESİ')) +
  labs(title="University Department Quotas in İstanbul",x="University",y="Quota",fill="") +
  theme (axis.text.x=element_text (angle=-90,vjust=0.5,hjust=0)) +
  scale_fill_manual(values = c('#707070', 'red'),guide=FALSE)

Listing Programs in MEF by scores

maximum_scores <- osym_data_2017 %>%
  select(university_name,program_name,max_score) %>%
  filter(university_name=='MEF ÜNİVERSİTESİ') %>%
  group_by(program_name) %>%
  summarise(Max_Score=max(max_score)) %>%
  arrange(desc(Max_Score))
  
maximum_scores
## # A tibble: 44 x 2
##                                                         program_name
##                                                                <chr>
##  1                                                Hukuk (Tam Burslu)
##  2                   Ýngilizce Öðretmenliði (Ýngilizce) (Tam Burslu)
##  3                                Psikoloji (Ýngilizce) (Tam Burslu)
##  4                   Ýngilizce Öðretmenliði (Ýngilizce) (%75 Burslu)
##  5                  Bilgisayar Mühendisliði (Ýngilizce) (Tam Burslu)
##  6                    Endüstri Mühendisliði (Ýngilizce) (Tam Burslu)
##  7                      Makine Mühendisliði (Ýngilizce) (Tam Burslu)
##  8                                 Mimarlýk (Ýngilizce) (Tam Burslu)
##  9      Rehberlik ve Psikolojik Danýþmanlýk (Ýngilizce) (Tam Burslu)
## 10 Siyaset Bilimi ve Uluslararasý Ýliþkiler (Ýngilizce) (Tam Burslu)
## # ... with 34 more rows, and 1 more variables: Max_Score <dbl>

Let’s visualize this data in a barchart.

ggplot(maximum_scores, aes(x=reorder(program_name,-Max_Score), y=Max_Score)) +
  geom_bar(stat = "identity") +
  labs(title="Maximum Score of Programs on MEF University",x="Programs",y="Maximum score",fill="") +
  theme (axis.text.x=element_text (angle=-90,vjust=0.5,hjust=0)) +
  scale_fill_manual(values = if_else(maximum_scores>445, '#707070',"red"),guide=FALSE)