title: “RMarkdown-Homework-EGM (Pension Monitoring Center) Data Analysis-tidyverse,shiny” author: “Leyla Yigit” date: “31 Nov 2018” output: html_document — —
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.8
## v tidyr 0.8.1 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts ------------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(readxl)
library(dplyr )
library(tidyverse )
library(lubridate )
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
#install.packages("tidyverse")
#install.packages("lubridate")
#install.packages("shiny")
#Download Raw Data
# Create a temporary file
tmp<-tempfile(fileext=".xlsx")
# Download file from repository to the temp file
download.file("https://github.com/MEF-BDA503/pj18-Leyla.Yigit/blob/master/AssignmentWeek4/egm_example_data.xlsx?raw=true",mode="wb",destfile=tmp)
# Read that excel file using readxl package's read_excel function. You might need to adjust the parameters (skip, col_names) according to your raw file's format.
raw_data<-readxl::read_excel(tmp,skip=7,col_names=FALSE)
# Remove the temp file
file.remove(tmp)
## [1] TRUE
head(raw_data)
## # A tibble: 6 x 15
## X__1 X__2 X__3 X__4 X__5 X__6 X__7 X__8 X__9 X__10 X__11
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <chr> <dbl>
## 1 06.0~ Axa ~ 3.33e4 2.11e8 4.33e7 1.94e8 23 38686 856 522 4.01e4
## 2 06.0~ BNP ~ 1.87e5 1.55e9 2.02e8 1.30e9 1196 149094 48359 29668 2.27e5
## 3 06.0~ Cign~ 1.23e5 5.64e8 1.04e8 5.01e8 57 107774 23016 2304 1.33e5
## 4 06.0~ Fiba~ 3.88e4 2.48e8 3.46e7 2.16e8 103 33841 8088 1075 4.30e4
## 5 06.0~ Gara~ 1.13e6 8.42e9 1.17e9 6.86e9 5669 963726 1918~ 58214 1.21e6
## 6 06.0~ Grou~ 6.21e4 8.09e8 8.31e7 6.17e8 1963 61411 8351 1012 7.08e4
## # ... with 4 more variables: X__12 <dbl>, X__13 <dbl>, X__14 <dbl>,
## # X__15 <dbl>
colnames(raw_data) <- c("date","pension_fund_company","n_of_participants", "fund_size_participants", "gov_contribution", "contribution", "n_of_pensioners", "n_of_ind_contracts", "n_of_group_ind_contracts", "n_of_employer_group_certificates", "n_total","size_of_ind_contracts", "size_of_group_ind_contracts", "size_of_employer_group_certificates" ,"size_total")
head(raw_data)
## # A tibble: 6 x 15
## date pension_fund_co~ n_of_participan~ fund_size_parti~ gov_contribution
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 06.0~ Axa Hayat ve Em~ 33327 210804109 43342953
## 2 06.0~ BNP Paribas Car~ 187289 1545869615 202292735
## 3 06.0~ Cigna Finans Em~ 122997 564291355 103628903
## 4 06.0~ Fiba Emeklilik ~ 38754 247746061 34576421
## 5 06.0~ Garanti Emeklil~ 1134051 8416458527 1169252346
## 6 06.0~ Groupama Emekli~ 62080 808664284 83060092
## # ... with 10 more variables: contribution <dbl>, n_of_pensioners <chr>,
## # n_of_ind_contracts <dbl>, n_of_group_ind_contracts <chr>,
## # n_of_employer_group_certificates <chr>, n_total <dbl>,
## # size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## # size_of_employer_group_certificates <dbl>, size_total <dbl>
#replace nulls
egm_data <- raw_data %>%mutate_if(is.numeric,funs(ifelse(is.na(.),0,.)))
print(egm_data,width=Inf)
## # A tibble: 1,362 x 15
## date pension_fund_company n_of_participants
## <chr> <chr> <dbl>
## 1 06.01.2017 Axa Hayat ve Emeklilik 33327
## 2 06.01.2017 BNP Paribas Cardif Emeklilik 187289
## 3 06.01.2017 Cigna Finans Emeklilik ve Hayat 122997
## 4 06.01.2017 Fiba Emeklilik ve Hayat 38754
## 5 06.01.2017 Garanti Emeklilik ve Hayat 1134051
## 6 06.01.2017 Groupama Emeklilik 62080
## 7 06.01.2017 Halk Hayat ve Emeklilik 445422
## 8 06.01.2017 Katilim Emeklilik ve Hayat 162207
## 9 06.01.2017 Metlife Emeklilik ve Hayat 179609
## 10 06.01.2017 NN Hayat ve Emeklilik 268929
## fund_size_participants gov_contribution contribution n_of_pensioners
## <dbl> <dbl> <dbl> <chr>
## 1 210804109 43342953 193638362 23
## 2 1545869615 202292735 1299990793 1196
## 3 564291355 103628903 501174178 57
## 4 247746061 34576421 216460096 103
## 5 8416458527 1169252346 6858941516 5669
## 6 808664284 83060092 616889142 1963
## 7 1948976004 349943882 1778929612 111
## 8 533523277 105023045 508788631 20
## 9 1015462865 167726855 908695745 164
## 10 2078148069 278835177 1691654417 2337
## n_of_ind_contracts n_of_group_ind_contracts
## <dbl> <chr>
## 1 38686 856
## 2 149094 48359
## 3 107774 23016
## 4 33841 8088
## 5 963726 191898
## 6 61411 8351
## 7 214731 238995
## 8 109836 46228
## 9 143625 35857
## 10 261658 39099
## n_of_employer_group_certificates n_total size_of_ind_contracts
## <chr> <dbl> <dbl>
## 1 522 40064 179784075
## 2 29668 227121 1011515864
## 3 2304 133094 410000280
## 4 1075 43004 177009961
## 5 58214 1213838 5088145866
## 6 1012 70774 540569297
## 7 49978 503704 739849661
## 8 49067 205131 349524378
## 9 18318 197800 626504799
## 10 11007 311764 1410370190
## size_of_group_ind_contracts size_of_employer_group_certificates
## <dbl> <dbl>
## 1 7791701 3542753
## 2 216811175 56924248
## 3 78595897 3171981
## 4 30913194 5888490
## 5 1436618301 223937666
## 6 66914330 2532006
## 7 928353692 99514882
## 8 112523243 38497518
## 9 160814973 111520785
## 10 229308207 27057100
## size_total
## <dbl>
## 1 191118529
## 2 1285251288
## 3 491768158
## 4 213811645
## 5 6748701834
## 6 610015633
## 7 1767718235
## 8 500545140
## 9 898840557
## 10 1666735497
## # ... with 1,352 more rows
saveRDS(egm_data,file="egm_data.rds")
egm_data %>% arrange(date) %>% head
## # A tibble: 6 x 15
## date pension_fund_co~ n_of_participan~ fund_size_parti~ gov_contribution
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 01.0~ Aegon Emeklilik~ 41576 149113833 15998576
## 2 01.0~ Allianz Hayat v~ 112346 2118537355 285834417
## 3 01.0~ Allianz Yasam v~ 777792 8664935956 1035285127
## 4 01.0~ Anadolu Hayat E~ 1130984 11697637621 1888805342
## 5 01.0~ Avivasa Emeklil~ 886288 12329050750 1733463562
## 6 01.0~ Axa Hayat ve Em~ 34721 277870267 57163496
## # ... with 10 more variables: contribution <dbl>, n_of_pensioners <chr>,
## # n_of_ind_contracts <dbl>, n_of_group_ind_contracts <chr>,
## # n_of_employer_group_certificates <chr>, n_total <dbl>,
## # size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## # size_of_employer_group_certificates <dbl>, size_total <dbl>
library(lubridate)
#egm_data$Date <- month(date, label = FALSE, abbr = TRUE, locale = Sys.getlocale("LC_TIME"))
#month(date)
egm_data %>% arrange(date) %>% head
## # A tibble: 6 x 15
## date pension_fund_co~ n_of_participan~ fund_size_parti~ gov_contribution
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 01.0~ Aegon Emeklilik~ 41576 149113833 15998576
## 2 01.0~ Allianz Hayat v~ 112346 2118537355 285834417
## 3 01.0~ Allianz Yasam v~ 777792 8664935956 1035285127
## 4 01.0~ Anadolu Hayat E~ 1130984 11697637621 1888805342
## 5 01.0~ Avivasa Emeklil~ 886288 12329050750 1733463562
## 6 01.0~ Axa Hayat ve Em~ 34721 277870267 57163496
## # ... with 10 more variables: contribution <dbl>, n_of_pensioners <chr>,
## # n_of_ind_contracts <dbl>, n_of_group_ind_contracts <chr>,
## # n_of_employer_group_certificates <chr>, n_total <dbl>,
## # size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## # size_of_employer_group_certificates <dbl>, size_total <dbl>
#add a new column as total_contrübution
egm_sum_participants<-egm_data %>%
group_by(pension_fund_company) %>% summarise(sum_n_of_participants = sum(n_of_participants))
egm_sum_participants
## # A tibble: 20 x 2
## pension_fund_company sum_n_of_participants
## <chr> <dbl>
## 1 Aegon Emeklilik ve Hayat 3122545
## 2 Allianz Hayat ve Emeklilik 8443863
## 3 Allianz Yasam ve Emeklilik 58208883
## 4 Anadolu Hayat Emeklilik 85178418
## 5 Asya Emeklilik ve Hayat 759778
## 6 Avivasa Emeklilik ve Hayat 66254824
## 7 Axa Hayat ve Emeklilik 2637494
## 8 Bereket Emeklilik ve Hayat 7902561
## 9 BNP Paribas Cardif Emeklilik 14040648
## 10 Cigna Finans Emeklilik ve Hayat 8541193
## 11 Fiba Emeklilik ve Hayat 3178155
## 12 Garanti Emeklilik ve Hayat 90095257
## 13 Groupama Emeklilik 4378774
## 14 Halk Hayat ve Emeklilik 36328312
## 15 Katilim Emeklilik ve Hayat 13724745
## 16 Metlife Emeklilik ve Hayat 14065319
## 17 NN Hayat ve Emeklilik 20256740
## 18 Vakif Emeklilik 16470117
## 19 Vakif Emeklilik ve Hayat 19994777
## 20 Ziraat Hayat ve Emeklilik 42384783
You can also embed plots, for example:
egm_sum_participants
## # A tibble: 20 x 2
## pension_fund_company sum_n_of_participants
## <chr> <dbl>
## 1 Aegon Emeklilik ve Hayat 3122545
## 2 Allianz Hayat ve Emeklilik 8443863
## 3 Allianz Yasam ve Emeklilik 58208883
## 4 Anadolu Hayat Emeklilik 85178418
## 5 Asya Emeklilik ve Hayat 759778
## 6 Avivasa Emeklilik ve Hayat 66254824
## 7 Axa Hayat ve Emeklilik 2637494
## 8 Bereket Emeklilik ve Hayat 7902561
## 9 BNP Paribas Cardif Emeklilik 14040648
## 10 Cigna Finans Emeklilik ve Hayat 8541193
## 11 Fiba Emeklilik ve Hayat 3178155
## 12 Garanti Emeklilik ve Hayat 90095257
## 13 Groupama Emeklilik 4378774
## 14 Halk Hayat ve Emeklilik 36328312
## 15 Katilim Emeklilik ve Hayat 13724745
## 16 Metlife Emeklilik ve Hayat 14065319
## 17 NN Hayat ve Emeklilik 20256740
## 18 Vakif Emeklilik 16470117
## 19 Vakif Emeklilik ve Hayat 19994777
## 20 Ziraat Hayat ve Emeklilik 42384783
egm_sum_participants %>%
ggplot(data = ., aes(x = sum_n_of_participants , y =pension_fund_company , color = sum_n_of_participants)) +
labs(y = "company", x="sum of participants", fill="Brands") +
geom_col(alpha = 0.3, color = "tomato")+
theme(plot.margin = margin(2,.8,2,.9, "cm"),
plot.background = element_rect(fill = "grey"))
ggplot(data=egm_sum_participants ,aes(x =sum_n_of_participants ,y =pension_fund_company, color = pension_fund_company)) + geom_area()
#daily total contrübution
#install.packages("ggplot2")
date_sum <- egm_data %>%
group_by(( date)) %>%
mutate(sum_of = sum(n_of_participants))
date_sum
## # A tibble: 1,362 x 17
## # Groups: (date) [76]
## date pension_fund_co~ n_of_participan~ fund_size_parti~
## <chr> <chr> <dbl> <dbl>
## 1 06.0~ Axa Hayat ve Em~ 33327 210804109
## 2 06.0~ BNP Paribas Car~ 187289 1545869615
## 3 06.0~ Cigna Finans Em~ 122997 564291355
## 4 06.0~ Fiba Emeklilik ~ 38754 247746061
## 5 06.0~ Garanti Emeklil~ 1134051 8416458527
## 6 06.0~ Groupama Emekli~ 62080 808664284
## 7 06.0~ Halk Hayat ve E~ 445422 1948976004
## 8 06.0~ Katilim Emeklil~ 162207 533523277
## 9 06.0~ Metlife Emeklil~ 179609 1015462865
## 10 06.0~ NN Hayat ve Eme~ 268929 2078148069
## # ... with 1,352 more rows, and 13 more variables: gov_contribution <dbl>,
## # contribution <dbl>, n_of_pensioners <chr>, n_of_ind_contracts <dbl>,
## # n_of_group_ind_contracts <chr>,
## # n_of_employer_group_certificates <chr>, n_total <dbl>,
## # size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## # size_of_employer_group_certificates <dbl>, size_total <dbl>,
## # `(date)` <chr>, sum_of <dbl>
ggplot(data=egm_data, aes(x=pension_fund_company, y=fund_size_participants, fill=pension_fund_company)) +
geom_bar(stat="identity")+
theme_classic() + xlab("Firms") +
theme(axis.text.x = element_text(angle = 90)) + ylab("Median Number of Participants")
egm_data
## # A tibble: 1,362 x 15
## date pension_fund_co~ n_of_participan~ fund_size_parti~
## <chr> <chr> <dbl> <dbl>
## 1 06.0~ Axa Hayat ve Em~ 33327 210804109
## 2 06.0~ BNP Paribas Car~ 187289 1545869615
## 3 06.0~ Cigna Finans Em~ 122997 564291355
## 4 06.0~ Fiba Emeklilik ~ 38754 247746061
## 5 06.0~ Garanti Emeklil~ 1134051 8416458527
## 6 06.0~ Groupama Emekli~ 62080 808664284
## 7 06.0~ Halk Hayat ve E~ 445422 1948976004
## 8 06.0~ Katilim Emeklil~ 162207 533523277
## 9 06.0~ Metlife Emeklil~ 179609 1015462865
## 10 06.0~ NN Hayat ve Eme~ 268929 2078148069
## # ... with 1,352 more rows, and 11 more variables: gov_contribution <dbl>,
## # contribution <dbl>, n_of_pensioners <chr>, n_of_ind_contracts <dbl>,
## # n_of_group_ind_contracts <chr>,
## # n_of_employer_group_certificates <chr>, n_total <dbl>,
## # size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## # size_of_employer_group_certificates <dbl>, size_total <dbl>
# Example 1
set.seed(1234)
x <- rnorm(200)
# Histogram
hp<-qplot(x =x, fill=..count.., geom="histogram")
hp
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Sequential color scheme
hp+scale_fill_gradient(low="blue", high="red")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
egm_data
## # A tibble: 1,362 x 15
## date pension_fund_co~ n_of_participan~ fund_size_parti~
## <chr> <chr> <dbl> <dbl>
## 1 06.0~ Axa Hayat ve Em~ 33327 210804109
## 2 06.0~ BNP Paribas Car~ 187289 1545869615
## 3 06.0~ Cigna Finans Em~ 122997 564291355
## 4 06.0~ Fiba Emeklilik ~ 38754 247746061
## 5 06.0~ Garanti Emeklil~ 1134051 8416458527
## 6 06.0~ Groupama Emekli~ 62080 808664284
## 7 06.0~ Halk Hayat ve E~ 445422 1948976004
## 8 06.0~ Katilim Emeklil~ 162207 533523277
## 9 06.0~ Metlife Emeklil~ 179609 1015462865
## 10 06.0~ NN Hayat ve Eme~ 268929 2078148069
## # ... with 1,352 more rows, and 11 more variables: gov_contribution <dbl>,
## # contribution <dbl>, n_of_pensioners <chr>, n_of_ind_contracts <dbl>,
## # n_of_group_ind_contracts <chr>,
## # n_of_employer_group_certificates <chr>, n_total <dbl>,
## # size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## # size_of_employer_group_certificates <dbl>, size_total <dbl>
egm_market_share<-egm_data %>%
group_by(pension_fund_company) %>% summarise(market_share = sum(n_of_ind_contracts))
egm_market_share
## # A tibble: 20 x 2
## pension_fund_company market_share
## <chr> <dbl>
## 1 Aegon Emeklilik ve Hayat 2461185
## 2 Allianz Hayat ve Emeklilik 8055384
## 3 Allianz Yasam ve Emeklilik 47580021
## 4 Anadolu Hayat Emeklilik 71944036
## 5 Asya Emeklilik ve Hayat 229781
## 6 Avivasa Emeklilik ve Hayat 75049480
## 7 Axa Hayat ve Emeklilik 3091142
## 8 Bereket Emeklilik ve Hayat 2365119
## 9 BNP Paribas Cardif Emeklilik 11239752
## 10 Cigna Finans Emeklilik ve Hayat 7541258
## 11 Fiba Emeklilik ve Hayat 2590090
## 12 Garanti Emeklilik ve Hayat 76691648
## 13 Groupama Emeklilik 4307738
## 14 Halk Hayat ve Emeklilik 19352067
## 15 Katilim Emeklilik ve Hayat 10263172
## 16 Metlife Emeklilik ve Hayat 11408003
## 17 NN Hayat ve Emeklilik 19969222
## 18 Vakif Emeklilik 19808450
## 19 Vakif Emeklilik ve Hayat 24631904
## 20 Ziraat Hayat ve Emeklilik 28750271
theme_set(theme_classic())
# Source: Frequency table
df <- as.data.frame(table(egm_market_share$pension_fund_company))
colnames(df) <- c("class", "freq")
pie <- ggplot(df, aes(x = "", y=freq, fill = factor(class))) +
geom_bar(width = 1, stat = "identity") +
theme(axis.line = element_blank(),
plot.title = element_text(hjust=0.5)) +
labs(fill="class",
x=NULL,
y=NULL,
title="Pie Chart of class",
caption="egm")+
coord_polar(theta = "y")
pie + coord_polar(theta = "y", start=0)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
# Source: Categorical variable.
# mpg$class
pie <- ggplot(mpg, aes(x = "", fill = factor(class))) +
geom_bar(width = 1) +
theme(axis.line = element_blank(),
plot.title = element_text(hjust=0.5)) +
labs(fill="class",
x=NULL,
y=NULL,
title="Pie Chart of class",
caption="egm")
pie + coord_polar(theta = "y", start=0)