library(readxl)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages --------------------- tidyverse 1.2.1 --
## <U+221A> tibble 1.4.2 <U+221A> purrr 0.2.5
## <U+221A> tidyr 0.8.1 <U+221A> stringr 1.3.1
## <U+221A> readr 1.1.1 <U+221A> forcats 0.3.0
## -- Conflicts ------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Loading data with read_xlsx.
#setwd("C:\\Users\\STUDENT\\Desktop\\R")
data <- read_xlsx("egmdata.xlsx",1)
First ten row of Data.
data %>%
tbl_df
## # A tibble: 1,368 x 15
## date pension_fund_co~ n_of_participan~ fund_size_parti~
## <chr> <chr> <dbl> <dbl>
## 1 06.0~ Aegon Emeklilik~ 42826 139193651
## 2 06.0~ Allianz Hayat v~ 118867 1840701596
## 3 06.0~ Allianz Yaşam v~ 726684 7387398235
## 4 06.0~ Anadolu Hayat E~ 1119572 9975066501
## 5 06.0~ Asya Emeklilik ~ 129389 337754079
## 6 06.0~ Avivasa Emeklil~ 876894 10550032358
## 7 06.0~ Axa Hayat ve Em~ 33327 210804109
## 8 06.0~ BNP Paribas Car~ 187289 1545869615
## 9 06.0~ Cigna Finans Em~ 122997 564291355
## 10 06.0~ Fiba Emeklilik ~ 38754 247746061
## # ... with 1,358 more rows, and 11 more variables: gov_contribution <dbl>,
## # contribution <dbl>, n_of_pensioners <chr>, n_of_ind_contracts <dbl>,
## # n_of_group_ind_contracts <chr>,
## # n_of_employer_group_certificates <chr>, n_total <dbl>,
## # size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## # size_of_employer_group_certificates <dbl>, size_total <dbl>
We could see the relations with ggplot2 graphics. x coordinate is number of participants of employers and y coordinate is fund size of participants and color is contribution level of employers.
ggplot(data = data, aes(x = n_of_participants, y = fund_size_participants, color = contribution)) + geom_point()
## Warning: Removed 1 rows containing missing values (geom_point).
#Listing companies which have better proposals Lets see which company is best for fund and other proposals. We see “Allianz Hayat ve Emeklilik” is the best company for retirement probabilities.
data %>%
mutate(propTotal = size_total / n_total, propFund = fund_size_participants / n_of_participants) %>%
select(pension_fund_company,propTotal,propFund)%>%
filter(propTotal + propFund > 30000) %>%
arrange(desc(propFund))
## # A tibble: 45 x 3
## pension_fund_company propTotal propFund
## <chr> <dbl> <dbl>
## 1 Allianz Hayat ve Emeklilik 13551. 21602.
## 2 Allianz Hayat ve Emeklilik 13503. 21545.
## 3 Allianz Hayat ve Emeklilik 13453. 21420.
## 4 Allianz Hayat ve Emeklilik 13355. 21283.
## 5 Allianz Hayat ve Emeklilik 13405. 21276.
## 6 Allianz Hayat ve Emeklilik 13365. 21248.
## 7 Allianz Hayat ve Emeklilik 13316. 21188.
## 8 Allianz Hayat ve Emeklilik 13163. 21081.
## 9 Allianz Hayat ve Emeklilik 13195. 21078.
## 10 Allianz Hayat ve Emeklilik 13188. 21074.
## # ... with 35 more rows
Here, I declare contracts variable to sum up the contracst columns and summarise them to conclude which companies are the most supported by goverment and seek the correlation between contracts and goverment supply. To see this, I apply below code as you can see and then applied ggplot and set the color as company names.
data %>%
group_by(pension_fund_company) %>%
mutate(contracts = n_of_ind_contracts + n_of_ind_contracts) %>%
select(gov_contribution,contracts) %>%
summarise(govContAvg = mean(gov_contribution),contAvg = mean(contracts)) %>%
arrange(desc(govContAvg)) %>%
ggplot(aes(x = govContAvg, y = contAvg, color = pension_fund_company)) + geom_point()
## Adding missing grouping variables: `pension_fund_company`
## Warning: Removed 1 rows containing missing values (geom_point).
And to be clear, you can see the list of companies
data %>%
group_by(pension_fund_company) %>%
mutate(contracts = n_of_ind_contracts + n_of_ind_contracts) %>%
select(gov_contribution,contracts) %>%
summarise(govContAvg = mean(gov_contribution),contAvg = mean(contracts)) %>%
arrange(desc(govContAvg))
## Adding missing grouping variables: `pension_fund_company`
## # A tibble: 20 x 3
## pension_fund_company govContAvg contAvg
## <chr> <dbl> <dbl>
## 1 Anadolu Hayat Emeklilik 1812891868. 1917969.
## 2 Avivasa Emeklilik ve Hayat 1659169052. 2000998.
## 3 Garanti Emeklilik ve Hayat 1421056644. 2018201.
## 4 Allianz Yaşam ve Emeklilik 986490138. 1268631.
## 5 Vakıf Emeklilik ve Hayat 712214429. 1201556.
## 6 Vakıf Emeklilik 597511296. 1131911.
## 7 Ziraat Hayat ve Emeklilik 492573034. 756586.
## 8 Halk Hayat ve Emeklilik 449886158. 509265.
## 9 NN Hayat ve Emeklilik 335923127. 525506.
## 10 Allianz Hayat ve Emeklilik 272992884. 214963.
## 11 BNP Paribas Cardif Emeklilik 241038796. 295783.
## 12 Metlife Emeklilik ve Hayat 204758206. 300211.
## 13 Katılım Emeklilik ve Hayat 141938310. 270083.
## 14 Cigna Finans Emeklilik ve Hayat 116977057. 198454.
## 15 Asya Emeklilik ve Hayat 75743346. 76840.
## 16 Bereket Emeklilik ve Hayat 70691530. 68554.
## 17 Axa Hayat ve Emeklilik 54726264. 81346.
## 18 Fiba Emeklilik ve Hayat 50442742. 68160.
## 19 Aegon Emeklilik ve Hayat 15441978. 65663.
## 20 Groupama Emeklilik NA NA
So, I summed the number of personas and their applications on retirement companies and summarise it in table. After that, I measure the distinct values on table to see which companies are good and which are worse by the measure of individual impact.
data %>%group_by(pension_fund_company)%>%
mutate(indSum = (as.numeric(n_of_participants) + as.numeric(n_of_pensioners) + as.numeric(n_of_ind_contracts) + as.numeric(n_of_employer_group_certificates))) %>%
select(indSum) %>%
arrange(desc(indSum)) %>%
distinct(pension_fund_company)
## Adding missing grouping variables: `pension_fund_company`
## # A tibble: 20 x 1
## # Groups: pension_fund_company [20]
## pension_fund_company
## <chr>
## 1 Garanti Emeklilik ve Hayat
## 2 Anadolu Hayat Emeklilik
## 3 Avivasa Emeklilik ve Hayat
## 4 Allianz Yaşam ve Emeklilik
## 5 Vakıf Emeklilik ve Hayat
## 6 Vakıf Emeklilik
## 7 Ziraat Hayat ve Emeklilik
## 8 Halk Hayat ve Emeklilik
## 9 NN Hayat ve Emeklilik
## 10 Katılım Emeklilik ve Hayat
## 11 BNP Paribas Cardif Emeklilik
## 12 Metlife Emeklilik ve Hayat
## 13 Allianz Hayat ve Emeklilik
## 14 Cigna Finans Emeklilik ve Hayat
## 15 Asya Emeklilik ve Hayat
## 16 Bereket Emeklilik ve Hayat
## 17 Fiba Emeklilik ve Hayat
## 18 Groupama Emeklilik
## 19 Axa Hayat ve Emeklilik
## 20 Aegon Emeklilik ve Hayat
If you want to see the graph of this table:
data %>%group_by(pension_fund_company)%>%
mutate(indSum = (as.numeric(n_of_participants) + as.numeric(n_of_pensioners) + as.numeric(n_of_ind_contracts) + as.numeric(n_of_employer_group_certificates))) %>%
select(indSum) %>%
arrange(desc(indSum)) %>% ggplot(aes(x = pension_fund_company,y = indSum, color = pension_fund_company)) + geom_point()
## Adding missing grouping variables: `pension_fund_company`
## Warning: Removed 1 rows containing missing values (geom_point).
At last, you can want to see changes in total or other variables. I can simlpy make a graph of size_total to show total changes.
data %>% group_by(pension_fund_company) %>%
select(size_total,n_total) %>%
arrange(size_total) %>%
ggplot(aes(x = n_total, y = size_total,color=pension_fund_company)) + geom_line()
## Adding missing grouping variables: `pension_fund_company`
## Warning: Removed 1 rows containing missing values (geom_path).
If you don’t understand easily, I can show you the table of this chart to see the top values on it.You can see the top values by the means of avg size_total value.
data %>% group_by(pension_fund_company) %>%
select(size_total,n_total) %>%
summarise(sizeAvg = mean(size_total),nAvg = mean(n_total)) %>%
arrange(desc(sizeAvg)) %>%
distinct
## Adding missing grouping variables: `pension_fund_company`
## # A tibble: 20 x 3
## pension_fund_company sizeAvg nAvg
## <chr> <dbl> <dbl>
## 1 Avivasa Emeklilik ve Hayat 9427183177. 1114567.
## 2 Anadolu Hayat Emeklilik 8951986575. 1309971.
## 3 Garanti Emeklilik ve Hayat 7342203249. 1264901.
## 4 Allianz Yaşam ve Emeklilik 6337016594. 932642.
## 5 Vakıf Emeklilik ve Hayat 3964040974. 706774.
## 6 Vakıf Emeklilik 3672775808. 673302.
## 7 Ziraat Hayat ve Emeklilik 2186487302. 635485.
## 8 Halk Hayat ve Emeklilik 2095445535. 541190.
## 9 NN Hayat ve Emeklilik 1800532651. 309378.
## 10 Allianz Hayat ve Emeklilik 1559554275. 129664.
## 11 BNP Paribas Cardif Emeklilik 1370387322. 223224.
## 12 Metlife Emeklilik ve Hayat 1011349369. 203391.
## 13 Katılım Emeklilik ve Hayat 638027668. 233372.
## 14 Cigna Finans Emeklilik ve Hayat 510333188. 121007.
## 15 Fiba Emeklilik ve Hayat 334790910. 48915.
## 16 Asya Emeklilik ve Hayat 288930339. 132885
## 17 Bereket Emeklilik ve Hayat 259709813. 119469.
## 18 Axa Hayat ve Emeklilik 227035040. 42409.
## 19 Aegon Emeklilik ve Hayat 102665246. 47594.
## 20 Groupama Emeklilik NA NA