Introduction

In this case study we are going to explore university entrance examinations (YGS/LYS) dataset from 2017. Dataset consists of undergraduate programs offered in 2017. Each program offers an availability (i.e. quota). Then students get placed according to their lists and their scores. Each program is filled with the students ranked by their scores until placements are equal to availability. Student placed to a a program with the highest score forms the maximum score of that program and the last student to be placed forms the minimum score.

Load Data

# Download from GitHub (do it only once)
download.file("https://mef-bda503.github.io/files/osym_data_2017.RData", "osym_data_2017.RData")
# Install tidyverse if not already installed
if (!("tidyverse" %in% installed.packages())) {
    install.packages("tidyverse", repos = "https://cran.r-project.org")
}
# Load tidyverse package
library(tidyverse)
# Load the data
load("osym_data_2017.RData")

Quota of Universities in Istanbul

This data shows quota of universities in Istanbul

university_quota <- osym_data_2017 %>%
    group_by(university_name) %>%
    filter(city=='Ä°STANBUL') %>%
    summarise(count=n()) %>%
    arrange(desc(count))

university_quota
## # A tibble: 51 x 2
##                  university_name count
##                            <chr> <int>
##  1 İSTANBUL GELİŞİM ÜNİVERSİTESİ   212
##  2             OKAN ÃœNÄ°VERSÄ°TESÄ°   172
##  3          BEYKENT ÃœNÄ°VERSÄ°TESÄ°   169
##  4         YEDÄ°TEPE ÃœNÄ°VERSÄ°TESÄ°   165
##  5 Ä°STANBUL MEDÄ°POL ÃœNÄ°VERSÄ°TESÄ°   155
##  6   Ä°STANBUL AYDIN ÃœNÄ°VERSÄ°TESÄ°   154
##  7         Ä°STANBUL ÃœNÄ°VERSÄ°TESÄ°   138
##  8    Ä°STANBUL AREL ÃœNÄ°VERSÄ°TESÄ°   135
##  9   Ä°STANBUL BÄ°LGÄ° ÃœNÄ°VERSÄ°TESÄ°   131
## 10          MALTEPE ÃœNÄ°VERSÄ°TESÄ°   123
## # ... with 41 more rows

Let’s visualize this data in barchart.

ggplot(university_quota, aes(x=reorder(university_name,-count), y=count)) +
  geom_bar(stat = "identity", aes(fill=university_quota$university_name=='MEF ÃœNÄ°VERSÄ°TESÄ°')) +
  labs(title="Quota of University in Istanbul",x="University",y="Count",fill="") +
  theme (axis.text.x=element_text (angle=-90,vjust=0.5, hjust=0)) +
  scale_fill_manual(values = c('#707070', 'red'),guide=FALSE)

Max scores of Universities in Istanbul

question2 <- osym_data_2017 %>%
  select(university_name, max_score, city) %>%
  filter(city=='Ä°STANBUL') %>%
  group_by(university_name) %>%
  summarise(max_puan=max(max_score)) %>%
  arrange(desc(max_puan))

question2
## # A tibble: 51 x 2
##                              university_name max_puan
##                                        <chr>    <dbl>
##  1                          KOÇ ÜNİVERSİTESİ 569.1112
##  2                     Ä°STANBUL ÃœNÄ°VERSÄ°TESÄ° 564.0145
##  3                     BOĞAZİÇİ ÜNİVERSİTESİ 562.5765
##  4             Ä°STANBUL MEDÄ°POL ÃœNÄ°VERSÄ°TESÄ° 559.4780
##  5                  GALATASARAY ÃœNÄ°VERSÄ°TESÄ° 556.0948
##  6 ACIBADEM MEHMET ALÄ° AYDINLAR ÃœNÄ°VERSÄ°TESÄ° 542.3482
##  7                      SABANCI ÃœNÄ°VERSÄ°TESÄ° 538.7725
##  8                     YEDÄ°TEPE ÃœNÄ°VERSÄ°TESÄ° 531.3691
##  9                   BAHÇEŞEHİR ÜNİVERSİTESİ 530.4845
## 10               Ä°STANBUL AYDIN ÃœNÄ°VERSÄ°TESÄ° 525.5809
## # ... with 41 more rows

Let’s visualize this data in barchart.

ggplot(question2, aes(x=reorder(university_name,-max_puan), y=max_puan)) +
  geom_bar(stat = "identity", aes(fill=question2$university_name=='MEF ÃœNÄ°VERSÄ°TESÄ°')) +
  labs(title="Maximum score of each university",x="University",y="Maximum score",fill="") +
  theme (axis.text.x=element_text (angle=-90,vjust=0.5,hjust=0)) +
  scale_fill_manual(values = c('#707070', 'red'),guide=FALSE)

Number of departments of Universities in Istanbul

question3 <- osym_data_2017 %>% 
  select(university_name,general_quota,city) %>% 
  filter(city=='Ä°STANBUL') %>%
  group_by(university_name) %>% 
  summarise(bolum_sayisi=n(),general_quota=sum(general_quota)) %>% 
  arrange(desc(general_quota))

question3
## # A tibble: 51 x 3
##                  university_name bolum_sayisi general_quota
##                            <chr>        <int>         <int>
##  1         Ä°STANBUL ÃœNÄ°VERSÄ°TESÄ°          138         17809
##  2          MARMARA ÃœNÄ°VERSÄ°TESÄ°           80          6200
##  3 Ä°STANBUL MEDÄ°POL ÃœNÄ°VERSÄ°TESÄ°          155          4495
##  4 İSTANBUL GELİŞİM ÜNİVERSİTESİ          212          3950
##  5          BEYKENT ÃœNÄ°VERSÄ°TESÄ°          169          3811
##  6  Ä°STANBUL TEKNÄ°K ÃœNÄ°VERSÄ°TESÄ°           76          3684
##  7    YILDIZ TEKNÄ°K ÃœNÄ°VERSÄ°TESÄ°           53          3652
##  8   Ä°STANBUL AYDIN ÃœNÄ°VERSÄ°TESÄ°          154          3578
##  9         YEDÄ°TEPE ÃœNÄ°VERSÄ°TESÄ°          165          3442
## 10       BAHÇEŞEHİR ÜNİVERSİTESİ          122          2774
## # ... with 41 more rows

Let’s visualize this data in barchart.

ggplot(question3, aes(x=reorder(university_name,-bolum_sayisi), y=bolum_sayisi)) +
  geom_bar(stat = "identity", aes(fill=question3$university_name=='MEF ÃœNÄ°VERSÄ°TESÄ°')) +
  labs(title="Number of Departments of Universities in Ä°stanbul",x="University",y="Departments",fill="") +
  theme (axis.text.x=element_text (angle=-90,vjust=0.5,hjust=0)) +
  scale_fill_manual(values = c('#707070', 'red'),guide=FALSE)