install.packages("readxl")
## 
## The downloaded binary packages are in
##  /var/folders/qv/khc24w491dn9d18j77490j380000gn/T//RtmppFJPJJ/downloaded_packages
install.packages("tidyverse")
## 
## The downloaded binary packages are in
##  /var/folders/qv/khc24w491dn9d18j77490j380000gn/T//RtmppFJPJJ/downloaded_packages

Used This Libraries

library(readxl)
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0     ✔ purrr   0.2.5
## ✔ tibble  1.4.2     ✔ dplyr   0.7.7
## ✔ tidyr   0.8.2     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
tmp<-tempfile(fileext=".xlsx")
download.file("https://github.com/MEF-BDA503/pj18-mehmetakk/blob/master/egm_example_data.xlsx?raw=true",mode = 'wb',destfile=tmp)
raw_data<-readxl::read_excel(tmp,skip=7,col_names=FALSE)
file.remove(tmp)
## [1] TRUE
head(raw_data)
## # A tibble: 6 x 15
##   X__1  X__2    X__3   X__4   X__5   X__6 X__7    X__8 X__9  X__10  X__11
##   <chr> <chr>  <dbl>  <dbl>  <dbl>  <dbl> <chr>  <dbl> <chr> <chr>  <dbl>
## 1 06.0… Axa … 3.33e4 2.11e8 4.33e7 1.94e8 23     38686 856   522   4.01e4
## 2 06.0… BNP … 1.87e5 1.55e9 2.02e8 1.30e9 1196  149094 48359 29668 2.27e5
## 3 06.0… Cign… 1.23e5 5.64e8 1.04e8 5.01e8 57    107774 23016 2304  1.33e5
## 4 06.0… Fiba… 3.88e4 2.48e8 3.46e7 2.16e8 103    33841 8088  1075  4.30e4
## 5 06.0… Gara… 1.13e6 8.42e9 1.17e9 6.86e9 5669  963726 1918… 58214 1.21e6
## 6 06.0… Grou… 6.21e4 8.09e8 8.31e7 6.17e8 1963   61411 8351  1012  7.08e4
## # ... with 4 more variables: X__12 <dbl>, X__13 <dbl>, X__14 <dbl>,
## #   X__15 <dbl>
tail(raw_data)
## # A tibble: 6 x 15
##   X__1  X__2    X__3   X__4   X__5   X__6 X__7    X__8 X__9  X__10  X__11
##   <chr> <chr>  <dbl>  <dbl>  <dbl>  <dbl> <chr>  <dbl> <chr> <chr>  <dbl>
## 1 30.0… Halk… 511848 2.85e9 5.28e8 2.45e9 228   296337 2281… 56967 581442
## 2 30.0… Katı… 205994 9.08e8 1.82e8 8.13e8 50    174654 48494 52861 276009
## 3 30.0… Metl… 187483 1.37e9 2.34e8 1.14e9 312   153787 35060 17179 206026
## 4 30.0… NN H… 265725 2.61e9 3.82e8 1.98e9 3771  265714 33290 10259 309263
## 5 30.0… Vakı… 502994 5.38e9 7.57e8 4.18e9 6138  628544 75807 29694 734045
## 6 30.0… Zira… 598703 3.11e9 5.90e8 2.61e9 869   406934 2179… 53558 678456
## # ... with 4 more variables: X__12 <dbl>, X__13 <dbl>, X__14 <dbl>,
## #   X__15 <dbl>
colnames(raw_data) <- c("date","pension_fund_company","n_of_participants",  "fund_size_participants",   "gov_contribution", "contribution", "n_of_pensioners",  "n_of_ind_contracts",   "n_of_group_ind_contracts", "n_of_employer_group_certificates", "n_total","size_of_ind_contracts",  "size_of_group_ind_contracts",  "size_of_employer_group_certificates"   ,"size_total")
egm_data <- raw_data %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) 
print(egm_data,width=Inf)
## # A tibble: 1,362 x 15
##    date       pension_fund_company            n_of_participants
##    <chr>      <chr>                                       <dbl>
##  1 06.01.2017 Axa Hayat ve Emeklilik                      33327
##  2 06.01.2017 BNP Paribas Cardif Emeklilik               187289
##  3 06.01.2017 Cigna Finans Emeklilik ve Hayat            122997
##  4 06.01.2017 Fiba Emeklilik ve Hayat                     38754
##  5 06.01.2017 Garanti Emeklilik ve Hayat                1134051
##  6 06.01.2017 Groupama Emeklilik                          62080
##  7 06.01.2017 Halk Hayat ve Emeklilik                    445422
##  8 06.01.2017 Katılım Emeklilik ve Hayat                 162207
##  9 06.01.2017 Metlife Emeklilik ve Hayat                 179609
## 10 06.01.2017 NN Hayat ve Emeklilik                      268929
##    fund_size_participants gov_contribution contribution n_of_pensioners
##                     <dbl>            <dbl>        <dbl> <chr>          
##  1              210804109         43342953    193638362 23             
##  2             1545869615        202292735   1299990793 1196           
##  3              564291355        103628903    501174178 57             
##  4              247746061         34576421    216460096 103            
##  5             8416458527       1169252346   6858941516 5669           
##  6              808664284         83060092    616889142 1963           
##  7             1948976004        349943882   1778929612 111            
##  8              533523277        105023045    508788631 20             
##  9             1015462865        167726855    908695745 164            
## 10             2078148069        278835177   1691654417 2337           
##    n_of_ind_contracts n_of_group_ind_contracts
##                 <dbl> <chr>                   
##  1              38686 856                     
##  2             149094 48359                   
##  3             107774 23016                   
##  4              33841 8088                    
##  5             963726 191898                  
##  6              61411 8351                    
##  7             214731 238995                  
##  8             109836 46228                   
##  9             143625 35857                   
## 10             261658 39099                   
##    n_of_employer_group_certificates n_total size_of_ind_contracts
##    <chr>                              <dbl>                 <dbl>
##  1 522                                40064             179784075
##  2 29668                             227121            1011515864
##  3 2304                              133094             410000280
##  4 1075                               43004             177009961
##  5 58214                            1213838            5088145866
##  6 1012                               70774             540569297
##  7 49978                             503704             739849661
##  8 49067                             205131             349524378
##  9 18318                             197800             626504799
## 10 11007                             311764            1410370190
##    size_of_group_ind_contracts size_of_employer_group_certificates
##                          <dbl>                               <dbl>
##  1                     7791701                             3542753
##  2                   216811175                            56924248
##  3                    78595897                             3171981
##  4                    30913194                             5888490
##  5                  1436618301                           223937666
##  6                    66914330                             2532006
##  7                   928353692                            99514882
##  8                   112523243                            38497518
##  9                   160814973                           111520785
## 10                   229308207                            27057100
##    size_total
##         <dbl>
##  1  191118529
##  2 1285251288
##  3  491768158
##  4  213811645
##  5 6748701834
##  6  610015633
##  7 1767718235
##  8  500545140
##  9  898840557
## 10 1666735497
## # ... with 1,352 more rows
saveRDS(egm_data,file="egm_data.rds")

ANALYSIS

egm_data %>% 
  select(date,n_of_participants,pension_fund_company,n_of_pensioners,n_of_ind_contracts) %>% arrange(n_of_pensioners) %>% 
  filter(n_of_participants>10000 & n_of_participants<50000)
## # A tibble: 226 x 5
##    date  n_of_participan… pension_fund_co… n_of_pensioners n_of_ind_contr…
##    <chr>            <dbl> <chr>            <chr>                     <dbl>
##  1 06.0…            38754 Fiba Emeklilik … 103                       33841
##  2 13.0…            38698 Fiba Emeklilik … 103                       33793
##  3 20.0…            38633 Fiba Emeklilik … 106                       33714
##  4 27.0…            38579 Fiba Emeklilik … 106                       33633
##  5 31.0…            38518 Fiba Emeklilik … 110                       33559
##  6 03.0…            38510 Fiba Emeklilik … 110                       33543
##  7 10.0…            38513 Fiba Emeklilik … 110                       33558
##  8 17.0…            38396 Fiba Emeklilik … 112                       33451
##  9 24.0…            38311 Fiba Emeklilik … 114                       33370
## 10 28.0…            38308 Fiba Emeklilik … 115                       33343
## # ... with 216 more rows
egm_data %>% 
  select(date,n_of_participants,pension_fund_company,n_of_pensioners,n_of_ind_contracts) %>% arrange(n_of_pensioners) %>% 
  filter(n_of_participants>50000 & n_of_participants<100000)
## # A tibble: 76 x 5
##    date  n_of_participan… pension_fund_co… n_of_pensioners n_of_ind_contr…
##    <chr>            <dbl> <chr>            <chr>                     <dbl>
##  1 06.0…            62080 Groupama Emekli… 1963                      61411
##  2 13.0…            61925 Groupama Emekli… 1981                      61246
##  3 20.0…            61774 Groupama Emekli… 1998                      61064
##  4 27.0…            61590 Groupama Emekli… 2023                      60852
##  5 31.0…            61591 Groupama Emekli… 2027                      60855
##  6 03.0…            61576 Groupama Emekli… 2039                      60833
##  7 10.0…            61427 Groupama Emekli… 2056                      60654
##  8 17.0…            61298 Groupama Emekli… 2073                      60495
##  9 24.0…            61053 Groupama Emekli… 2096                      60222
## 10 28.0…            61050 Groupama Emekli… 2102                      60217
## # ... with 66 more rows
egm_data  %>% group_by(date) %>%summarise(SumOfParticipants= sum(n_of_participants))
## # A tibble: 76 x 2
##    date       SumOfParticipants
##    <chr>                  <dbl>
##  1 01.09.2017           6832480
##  2 01.12.2017           6899633
##  3 02.02.2018           6926449
##  4 02.03.2018           6948787
##  5 02.06.2017           6792371
##  6 03.02.2017           6720725
##  7 03.03.2017           6745783
##  8 03.11.2017           6870150
##  9 04.08.2017           6825395
## 10 05.01.2018           6928979
## # ... with 66 more rows

Including Plots

ggplot(data=egm_data ,aes(x = n_of_participants,y =n_of_employer_group_certificates)) + geom_point()