library(readxl)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages --------------------- tidyverse 1.2.1 --
## <U+221A> tibble  1.4.2     <U+221A> purrr   0.2.5
## <U+221A> tidyr   0.8.1     <U+221A> stringr 1.3.1
## <U+221A> readr   1.1.1     <U+221A> forcats 0.3.0
## -- Conflicts ------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Loading Data

Loading data with read_xlsx.

#setwd("C:\\Users\\STUDENT\\Desktop\\R")
data <- read_xlsx("egmdata.xlsx",1)

First ten row of Data.

data %>%
  tbl_df
## # A tibble: 1,368 x 15
##    date  pension_fund_co~ n_of_participan~ fund_size_parti~
##    <chr> <chr>                       <dbl>            <dbl>
##  1 06.0~ Aegon Emeklilik~            42826        139193651
##  2 06.0~ Allianz Hayat v~           118867       1840701596
##  3 06.0~ Allianz Yaşam v~           726684       7387398235
##  4 06.0~ Anadolu Hayat E~          1119572       9975066501
##  5 06.0~ Asya Emeklilik ~           129389        337754079
##  6 06.0~ Avivasa Emeklil~           876894      10550032358
##  7 06.0~ Axa Hayat ve Em~            33327        210804109
##  8 06.0~ BNP Paribas Car~           187289       1545869615
##  9 06.0~ Cigna Finans Em~           122997        564291355
## 10 06.0~ Fiba Emeklilik ~            38754        247746061
## # ... with 1,358 more rows, and 11 more variables: gov_contribution <dbl>,
## #   contribution <dbl>, n_of_pensioners <chr>, n_of_ind_contracts <dbl>,
## #   n_of_group_ind_contracts <chr>,
## #   n_of_employer_group_certificates <chr>, n_total <dbl>,
## #   size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## #   size_of_employer_group_certificates <dbl>, size_total <dbl>

Plots of ggplot2

We could see the relations with ggplot2 graphics. x coordinate is number of participants of employers and y coordinate is fund size of participants and color is contribution level of employers.

ggplot(data = data, aes(x = n_of_participants, y = fund_size_participants, color = contribution)) + geom_point()
## Warning: Removed 1 rows containing missing values (geom_point).

#Listing companies which have better proposals Lets see which company is best for fund and other proposals. We see “Allianz Hayat ve Emeklilik” is the best company for retirement probabilities.

data %>%
 mutate(propTotal = size_total / n_total, propFund = fund_size_participants / n_of_participants) %>%
 select(pension_fund_company,propTotal,propFund)%>%
  filter(propTotal + propFund > 30000) %>%

  arrange(desc(propFund))
## # A tibble: 45 x 3
##    pension_fund_company       propTotal propFund
##    <chr>                          <dbl>    <dbl>
##  1 Allianz Hayat ve Emeklilik    13551.   21602.
##  2 Allianz Hayat ve Emeklilik    13503.   21545.
##  3 Allianz Hayat ve Emeklilik    13453.   21420.
##  4 Allianz Hayat ve Emeklilik    13355.   21283.
##  5 Allianz Hayat ve Emeklilik    13405.   21276.
##  6 Allianz Hayat ve Emeklilik    13365.   21248.
##  7 Allianz Hayat ve Emeklilik    13316.   21188.
##  8 Allianz Hayat ve Emeklilik    13163.   21081.
##  9 Allianz Hayat ve Emeklilik    13195.   21078.
## 10 Allianz Hayat ve Emeklilik    13188.   21074.
## # ... with 35 more rows

Contracts and Goverment Contribution

Here, I declare contracts variable to sum up the contracst columns and summarise them to conclude which companies are the most supported by goverment and seek the correlation between contracts and goverment supply. To see this, I apply below code as you can see and then applied ggplot and set the color as company names.

data %>%
  group_by(pension_fund_company) %>%
  mutate(contracts = n_of_ind_contracts + n_of_ind_contracts) %>%
  select(gov_contribution,contracts) %>%
  summarise(govContAvg = mean(gov_contribution),contAvg = mean(contracts)) %>%
  arrange(desc(govContAvg)) %>%
  ggplot(aes(x = govContAvg, y = contAvg, color = pension_fund_company)) + geom_point() 
## Adding missing grouping variables: `pension_fund_company`
## Warning: Removed 1 rows containing missing values (geom_point).

And to be clear, you can see the list of companies

data %>%
  group_by(pension_fund_company) %>%
  mutate(contracts = n_of_ind_contracts + n_of_ind_contracts) %>%
  select(gov_contribution,contracts) %>%
  summarise(govContAvg = mean(gov_contribution),contAvg = mean(contracts)) %>%
  arrange(desc(govContAvg))
## Adding missing grouping variables: `pension_fund_company`
## # A tibble: 20 x 3
##    pension_fund_company             govContAvg  contAvg
##    <chr>                                 <dbl>    <dbl>
##  1 Anadolu Hayat Emeklilik         1812891868. 1917969.
##  2 Avivasa Emeklilik ve Hayat      1659169052. 2000998.
##  3 Garanti Emeklilik ve Hayat      1421056644. 2018201.
##  4 Allianz Yaşam ve Emeklilik       986490138. 1268631.
##  5 Vakıf Emeklilik ve Hayat         712214429. 1201556.
##  6 Vakıf Emeklilik                  597511296. 1131911.
##  7 Ziraat Hayat ve Emeklilik        492573034.  756586.
##  8 Halk Hayat ve Emeklilik          449886158.  509265.
##  9 NN Hayat ve Emeklilik            335923127.  525506.
## 10 Allianz Hayat ve Emeklilik       272992884.  214963.
## 11 BNP Paribas Cardif Emeklilik     241038796.  295783.
## 12 Metlife Emeklilik ve Hayat       204758206.  300211.
## 13 Katılım Emeklilik ve Hayat       141938310.  270083.
## 14 Cigna Finans Emeklilik ve Hayat  116977057.  198454.
## 15 Asya Emeklilik ve Hayat           75743346.   76840.
## 16 Bereket Emeklilik ve Hayat        70691530.   68554.
## 17 Axa Hayat ve Emeklilik            54726264.   81346.
## 18 Fiba Emeklilik ve Hayat           50442742.   68160.
## 19 Aegon Emeklilik ve Hayat          15441978.   65663.
## 20 Groupama Emeklilik                      NA       NA

Personal gatherings of Companies

So, I summed the number of personas and their applications on retirement companies and summarise it in table. After that, I measure the distinct values on table to see which companies are good and which are worse by the measure of individual impact.

data %>%group_by(pension_fund_company)%>%
  mutate(indSum = (as.numeric(n_of_participants) + as.numeric(n_of_pensioners) + as.numeric(n_of_ind_contracts) + as.numeric(n_of_employer_group_certificates))) %>%
  select(indSum) %>%
  arrange(desc(indSum)) %>%
  distinct(pension_fund_company) 
## Adding missing grouping variables: `pension_fund_company`
## # A tibble: 20 x 1
## # Groups:   pension_fund_company [20]
##    pension_fund_company           
##    <chr>                          
##  1 Garanti Emeklilik ve Hayat     
##  2 Anadolu Hayat Emeklilik        
##  3 Avivasa Emeklilik ve Hayat     
##  4 Allianz Yaşam ve Emeklilik     
##  5 Vakıf Emeklilik ve Hayat       
##  6 Vakıf Emeklilik                
##  7 Ziraat Hayat ve Emeklilik      
##  8 Halk Hayat ve Emeklilik        
##  9 NN Hayat ve Emeklilik          
## 10 Katılım Emeklilik ve Hayat     
## 11 BNP Paribas Cardif Emeklilik   
## 12 Metlife Emeklilik ve Hayat     
## 13 Allianz Hayat ve Emeklilik     
## 14 Cigna Finans Emeklilik ve Hayat
## 15 Asya Emeklilik ve Hayat        
## 16 Bereket Emeklilik ve Hayat     
## 17 Fiba Emeklilik ve Hayat        
## 18 Groupama Emeklilik             
## 19 Axa Hayat ve Emeklilik         
## 20 Aegon Emeklilik ve Hayat

If you want to see the graph of this table:

data %>%group_by(pension_fund_company)%>%
  mutate(indSum = (as.numeric(n_of_participants) + as.numeric(n_of_pensioners) + as.numeric(n_of_ind_contracts) + as.numeric(n_of_employer_group_certificates))) %>%
  select(indSum) %>%
  arrange(desc(indSum)) %>% ggplot(aes(x = pension_fund_company,y = indSum, color = pension_fund_company)) + geom_point()
## Adding missing grouping variables: `pension_fund_company`
## Warning: Removed 1 rows containing missing values (geom_point).

Total Change of Companies

At last, you can want to see changes in total or other variables. I can simlpy make a graph of size_total to show total changes.

data %>% group_by(pension_fund_company) %>%
  select(size_total,n_total) %>%
  arrange(size_total) %>%
  ggplot(aes(x = n_total, y = size_total,color=pension_fund_company)) + geom_line()
## Adding missing grouping variables: `pension_fund_company`
## Warning: Removed 1 rows containing missing values (geom_path).

If you don’t understand easily, I can show you the table of this chart to see the top values on it.You can see the top values by the means of avg size_total value.

data %>% group_by(pension_fund_company) %>%
  select(size_total,n_total) %>%
  summarise(sizeAvg = mean(size_total),nAvg = mean(n_total)) %>%
  arrange(desc(sizeAvg)) %>%
  distinct
## Adding missing grouping variables: `pension_fund_company`
## # A tibble: 20 x 3
##    pension_fund_company                sizeAvg     nAvg
##    <chr>                                 <dbl>    <dbl>
##  1 Avivasa Emeklilik ve Hayat      9427183177. 1114567.
##  2 Anadolu Hayat Emeklilik         8951986575. 1309971.
##  3 Garanti Emeklilik ve Hayat      7342203249. 1264901.
##  4 Allianz Yaşam ve Emeklilik      6337016594.  932642.
##  5 Vakıf Emeklilik ve Hayat        3964040974.  706774.
##  6 Vakıf Emeklilik                 3672775808.  673302.
##  7 Ziraat Hayat ve Emeklilik       2186487302.  635485.
##  8 Halk Hayat ve Emeklilik         2095445535.  541190.
##  9 NN Hayat ve Emeklilik           1800532651.  309378.
## 10 Allianz Hayat ve Emeklilik      1559554275.  129664.
## 11 BNP Paribas Cardif Emeklilik    1370387322.  223224.
## 12 Metlife Emeklilik ve Hayat      1011349369.  203391.
## 13 Katılım Emeklilik ve Hayat       638027668.  233372.
## 14 Cigna Finans Emeklilik ve Hayat  510333188.  121007.
## 15 Fiba Emeklilik ve Hayat          334790910.   48915.
## 16 Asya Emeklilik ve Hayat          288930339.  132885 
## 17 Bereket Emeklilik ve Hayat       259709813.  119469.
## 18 Axa Hayat ve Emeklilik           227035040.   42409.
## 19 Aegon Emeklilik ve Hayat         102665246.   47594.
## 20 Groupama Emeklilik                      NA       NA