title: “RMarkdown-Homework-EGM (Pension Monitoring Center) Data Analysis-tidyverse,shiny” author: “Leyla Yigit” date: “31 Nov 2018” output: html_document — —

library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0     v purrr   0.2.5
## v tibble  1.4.2     v dplyr   0.7.8
## v tidyr   0.8.1     v stringr 1.3.1
## v readr   1.1.1     v forcats 0.3.0
## -- Conflicts ------------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(readxl)
library(dplyr )
library(tidyverse )
library(lubridate )
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
#install.packages("tidyverse")
#install.packages("lubridate")
#install.packages("shiny")
#Download Raw Data
# Create a temporary file
tmp<-tempfile(fileext=".xlsx")
# Download file from repository to the temp file
download.file("https://github.com/MEF-BDA503/pj18-Leyla.Yigit/blob/master/AssignmentWeek4/egm_example_data.xlsx?raw=true",mode="wb",destfile=tmp)
# Read that excel file using readxl package's read_excel function. You might need to adjust the parameters (skip, col_names) according to your raw file's format.
raw_data<-readxl::read_excel(tmp,skip=7,col_names=FALSE)
# Remove the temp file
file.remove(tmp)
## [1] TRUE
head(raw_data)
## # A tibble: 6 x 15
##   X__1  X__2    X__3   X__4   X__5   X__6 X__7    X__8 X__9  X__10  X__11
##   <chr> <chr>  <dbl>  <dbl>  <dbl>  <dbl> <chr>  <dbl> <chr> <chr>  <dbl>
## 1 06.0~ Axa ~ 3.33e4 2.11e8 4.33e7 1.94e8 23     38686 856   522   4.01e4
## 2 06.0~ BNP ~ 1.87e5 1.55e9 2.02e8 1.30e9 1196  149094 48359 29668 2.27e5
## 3 06.0~ Cign~ 1.23e5 5.64e8 1.04e8 5.01e8 57    107774 23016 2304  1.33e5
## 4 06.0~ Fiba~ 3.88e4 2.48e8 3.46e7 2.16e8 103    33841 8088  1075  4.30e4
## 5 06.0~ Gara~ 1.13e6 8.42e9 1.17e9 6.86e9 5669  963726 1918~ 58214 1.21e6
## 6 06.0~ Grou~ 6.21e4 8.09e8 8.31e7 6.17e8 1963   61411 8351  1012  7.08e4
## # ... with 4 more variables: X__12 <dbl>, X__13 <dbl>, X__14 <dbl>,
## #   X__15 <dbl>
colnames(raw_data) <- c("date","pension_fund_company","n_of_participants",  "fund_size_participants",   "gov_contribution", "contribution", "n_of_pensioners",  "n_of_ind_contracts",   "n_of_group_ind_contracts", "n_of_employer_group_certificates", "n_total","size_of_ind_contracts",  "size_of_group_ind_contracts",  "size_of_employer_group_certificates"   ,"size_total")
head(raw_data)
## # A tibble: 6 x 15
##   date  pension_fund_co~ n_of_participan~ fund_size_parti~ gov_contribution
##   <chr> <chr>                       <dbl>            <dbl>            <dbl>
## 1 06.0~ Axa Hayat ve Em~            33327        210804109         43342953
## 2 06.0~ BNP Paribas Car~           187289       1545869615        202292735
## 3 06.0~ Cigna Finans Em~           122997        564291355        103628903
## 4 06.0~ Fiba Emeklilik ~            38754        247746061         34576421
## 5 06.0~ Garanti Emeklil~          1134051       8416458527       1169252346
## 6 06.0~ Groupama Emekli~            62080        808664284         83060092
## # ... with 10 more variables: contribution <dbl>, n_of_pensioners <chr>,
## #   n_of_ind_contracts <dbl>, n_of_group_ind_contracts <chr>,
## #   n_of_employer_group_certificates <chr>, n_total <dbl>,
## #   size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## #   size_of_employer_group_certificates <dbl>, size_total <dbl>
#replace nulls
egm_data <- raw_data %>%mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) 
print(egm_data,width=Inf)
## # A tibble: 1,362 x 15
##    date       pension_fund_company            n_of_participants
##    <chr>      <chr>                                       <dbl>
##  1 06.01.2017 Axa Hayat ve Emeklilik                      33327
##  2 06.01.2017 BNP Paribas Cardif Emeklilik               187289
##  3 06.01.2017 Cigna Finans Emeklilik ve Hayat            122997
##  4 06.01.2017 Fiba Emeklilik ve Hayat                     38754
##  5 06.01.2017 Garanti Emeklilik ve Hayat                1134051
##  6 06.01.2017 Groupama Emeklilik                          62080
##  7 06.01.2017 Halk Hayat ve Emeklilik                    445422
##  8 06.01.2017 Katilim Emeklilik ve Hayat                 162207
##  9 06.01.2017 Metlife Emeklilik ve Hayat                 179609
## 10 06.01.2017 NN Hayat ve Emeklilik                      268929
##    fund_size_participants gov_contribution contribution n_of_pensioners
##                     <dbl>            <dbl>        <dbl> <chr>          
##  1              210804109         43342953    193638362 23             
##  2             1545869615        202292735   1299990793 1196           
##  3              564291355        103628903    501174178 57             
##  4              247746061         34576421    216460096 103            
##  5             8416458527       1169252346   6858941516 5669           
##  6              808664284         83060092    616889142 1963           
##  7             1948976004        349943882   1778929612 111            
##  8              533523277        105023045    508788631 20             
##  9             1015462865        167726855    908695745 164            
## 10             2078148069        278835177   1691654417 2337           
##    n_of_ind_contracts n_of_group_ind_contracts
##                 <dbl> <chr>                   
##  1              38686 856                     
##  2             149094 48359                   
##  3             107774 23016                   
##  4              33841 8088                    
##  5             963726 191898                  
##  6              61411 8351                    
##  7             214731 238995                  
##  8             109836 46228                   
##  9             143625 35857                   
## 10             261658 39099                   
##    n_of_employer_group_certificates n_total size_of_ind_contracts
##    <chr>                              <dbl>                 <dbl>
##  1 522                                40064             179784075
##  2 29668                             227121            1011515864
##  3 2304                              133094             410000280
##  4 1075                               43004             177009961
##  5 58214                            1213838            5088145866
##  6 1012                               70774             540569297
##  7 49978                             503704             739849661
##  8 49067                             205131             349524378
##  9 18318                             197800             626504799
## 10 11007                             311764            1410370190
##    size_of_group_ind_contracts size_of_employer_group_certificates
##                          <dbl>                               <dbl>
##  1                     7791701                             3542753
##  2                   216811175                            56924248
##  3                    78595897                             3171981
##  4                    30913194                             5888490
##  5                  1436618301                           223937666
##  6                    66914330                             2532006
##  7                   928353692                            99514882
##  8                   112523243                            38497518
##  9                   160814973                           111520785
## 10                   229308207                            27057100
##    size_total
##         <dbl>
##  1  191118529
##  2 1285251288
##  3  491768158
##  4  213811645
##  5 6748701834
##  6  610015633
##  7 1767718235
##  8  500545140
##  9  898840557
## 10 1666735497
## # ... with 1,352 more rows
saveRDS(egm_data,file="egm_data.rds")
egm_data %>% arrange(date) %>% head
## # A tibble: 6 x 15
##   date  pension_fund_co~ n_of_participan~ fund_size_parti~ gov_contribution
##   <chr> <chr>                       <dbl>            <dbl>            <dbl>
## 1 01.0~ Aegon Emeklilik~            41576        149113833         15998576
## 2 01.0~ Allianz Hayat v~           112346       2118537355        285834417
## 3 01.0~ Allianz Yasam v~           777792       8664935956       1035285127
## 4 01.0~ Anadolu Hayat E~          1130984      11697637621       1888805342
## 5 01.0~ Avivasa Emeklil~           886288      12329050750       1733463562
## 6 01.0~ Axa Hayat ve Em~            34721        277870267         57163496
## # ... with 10 more variables: contribution <dbl>, n_of_pensioners <chr>,
## #   n_of_ind_contracts <dbl>, n_of_group_ind_contracts <chr>,
## #   n_of_employer_group_certificates <chr>, n_total <dbl>,
## #   size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## #   size_of_employer_group_certificates <dbl>, size_total <dbl>
library(lubridate)
#egm_data$Date <- month(date, label = FALSE, abbr = TRUE, locale = Sys.getlocale("LC_TIME"))
#month(date)
egm_data %>% arrange(date) %>% head
## # A tibble: 6 x 15
##   date  pension_fund_co~ n_of_participan~ fund_size_parti~ gov_contribution
##   <chr> <chr>                       <dbl>            <dbl>            <dbl>
## 1 01.0~ Aegon Emeklilik~            41576        149113833         15998576
## 2 01.0~ Allianz Hayat v~           112346       2118537355        285834417
## 3 01.0~ Allianz Yasam v~           777792       8664935956       1035285127
## 4 01.0~ Anadolu Hayat E~          1130984      11697637621       1888805342
## 5 01.0~ Avivasa Emeklil~           886288      12329050750       1733463562
## 6 01.0~ Axa Hayat ve Em~            34721        277870267         57163496
## # ... with 10 more variables: contribution <dbl>, n_of_pensioners <chr>,
## #   n_of_ind_contracts <dbl>, n_of_group_ind_contracts <chr>,
## #   n_of_employer_group_certificates <chr>, n_total <dbl>,
## #   size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## #   size_of_employer_group_certificates <dbl>, size_total <dbl>
#add a new column as total_contrübution
egm_sum_participants<-egm_data %>% 
    group_by(pension_fund_company) %>% summarise(sum_n_of_participants = sum(n_of_participants)) 
    

egm_sum_participants
## # A tibble: 20 x 2
##    pension_fund_company            sum_n_of_participants
##    <chr>                                           <dbl>
##  1 Aegon Emeklilik ve Hayat                      3122545
##  2 Allianz Hayat ve Emeklilik                    8443863
##  3 Allianz Yasam ve Emeklilik                   58208883
##  4 Anadolu Hayat Emeklilik                      85178418
##  5 Asya Emeklilik ve Hayat                        759778
##  6 Avivasa Emeklilik ve Hayat                   66254824
##  7 Axa Hayat ve Emeklilik                        2637494
##  8 Bereket Emeklilik ve Hayat                    7902561
##  9 BNP Paribas Cardif Emeklilik                 14040648
## 10 Cigna Finans Emeklilik ve Hayat               8541193
## 11 Fiba Emeklilik ve Hayat                       3178155
## 12 Garanti Emeklilik ve Hayat                   90095257
## 13 Groupama Emeklilik                            4378774
## 14 Halk Hayat ve Emeklilik                      36328312
## 15 Katilim Emeklilik ve Hayat                   13724745
## 16 Metlife Emeklilik ve Hayat                   14065319
## 17 NN Hayat ve Emeklilik                        20256740
## 18 Vakif Emeklilik                              16470117
## 19 Vakif Emeklilik ve Hayat                     19994777
## 20 Ziraat Hayat ve Emeklilik                    42384783

Including Plots

You can also embed plots, for example:

egm_sum_participants
## # A tibble: 20 x 2
##    pension_fund_company            sum_n_of_participants
##    <chr>                                           <dbl>
##  1 Aegon Emeklilik ve Hayat                      3122545
##  2 Allianz Hayat ve Emeklilik                    8443863
##  3 Allianz Yasam ve Emeklilik                   58208883
##  4 Anadolu Hayat Emeklilik                      85178418
##  5 Asya Emeklilik ve Hayat                        759778
##  6 Avivasa Emeklilik ve Hayat                   66254824
##  7 Axa Hayat ve Emeklilik                        2637494
##  8 Bereket Emeklilik ve Hayat                    7902561
##  9 BNP Paribas Cardif Emeklilik                 14040648
## 10 Cigna Finans Emeklilik ve Hayat               8541193
## 11 Fiba Emeklilik ve Hayat                       3178155
## 12 Garanti Emeklilik ve Hayat                   90095257
## 13 Groupama Emeklilik                            4378774
## 14 Halk Hayat ve Emeklilik                      36328312
## 15 Katilim Emeklilik ve Hayat                   13724745
## 16 Metlife Emeklilik ve Hayat                   14065319
## 17 NN Hayat ve Emeklilik                        20256740
## 18 Vakif Emeklilik                              16470117
## 19 Vakif Emeklilik ve Hayat                     19994777
## 20 Ziraat Hayat ve Emeklilik                    42384783
egm_sum_participants %>% 
  ggplot(data = ., aes(x = sum_n_of_participants , y =pension_fund_company , color = sum_n_of_participants)) + 
  labs(y = "company", x="sum of participants", fill="Brands") + 
  geom_col(alpha = 0.3, color = "tomato")+
    theme(plot.margin = margin(2,.8,2,.9, "cm"),
        plot.background = element_rect(fill = "grey"))

ggplot(data=egm_sum_participants ,aes(x =sum_n_of_participants  ,y =pension_fund_company, color = pension_fund_company)) +  geom_area()

#daily total contrübution
 #install.packages("ggplot2")

date_sum <- egm_data %>% 
  group_by(( date)) %>% 
  mutate(sum_of = sum(n_of_participants))
    
date_sum    
## # A tibble: 1,362 x 17
## # Groups:   (date) [76]
##    date  pension_fund_co~ n_of_participan~ fund_size_parti~
##    <chr> <chr>                       <dbl>            <dbl>
##  1 06.0~ Axa Hayat ve Em~            33327        210804109
##  2 06.0~ BNP Paribas Car~           187289       1545869615
##  3 06.0~ Cigna Finans Em~           122997        564291355
##  4 06.0~ Fiba Emeklilik ~            38754        247746061
##  5 06.0~ Garanti Emeklil~          1134051       8416458527
##  6 06.0~ Groupama Emekli~            62080        808664284
##  7 06.0~ Halk Hayat ve E~           445422       1948976004
##  8 06.0~ Katilim Emeklil~           162207        533523277
##  9 06.0~ Metlife Emeklil~           179609       1015462865
## 10 06.0~ NN Hayat ve Eme~           268929       2078148069
## # ... with 1,352 more rows, and 13 more variables: gov_contribution <dbl>,
## #   contribution <dbl>, n_of_pensioners <chr>, n_of_ind_contracts <dbl>,
## #   n_of_group_ind_contracts <chr>,
## #   n_of_employer_group_certificates <chr>, n_total <dbl>,
## #   size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## #   size_of_employer_group_certificates <dbl>, size_total <dbl>,
## #   `(date)` <chr>, sum_of <dbl>
ggplot(data=egm_data, aes(x=pension_fund_company, y=fund_size_participants, fill=pension_fund_company)) +
  geom_bar(stat="identity")+ 
  theme_classic() + xlab("Firms") + 
  theme(axis.text.x = element_text(angle = 90)) + ylab("Median Number of Participants")

egm_data
## # A tibble: 1,362 x 15
##    date  pension_fund_co~ n_of_participan~ fund_size_parti~
##    <chr> <chr>                       <dbl>            <dbl>
##  1 06.0~ Axa Hayat ve Em~            33327        210804109
##  2 06.0~ BNP Paribas Car~           187289       1545869615
##  3 06.0~ Cigna Finans Em~           122997        564291355
##  4 06.0~ Fiba Emeklilik ~            38754        247746061
##  5 06.0~ Garanti Emeklil~          1134051       8416458527
##  6 06.0~ Groupama Emekli~            62080        808664284
##  7 06.0~ Halk Hayat ve E~           445422       1948976004
##  8 06.0~ Katilim Emeklil~           162207        533523277
##  9 06.0~ Metlife Emeklil~           179609       1015462865
## 10 06.0~ NN Hayat ve Eme~           268929       2078148069
## # ... with 1,352 more rows, and 11 more variables: gov_contribution <dbl>,
## #   contribution <dbl>, n_of_pensioners <chr>, n_of_ind_contracts <dbl>,
## #   n_of_group_ind_contracts <chr>,
## #   n_of_employer_group_certificates <chr>, n_total <dbl>,
## #   size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## #   size_of_employer_group_certificates <dbl>, size_total <dbl>
# Example 1
set.seed(1234)
x <- rnorm(200)
# Histogram
hp<-qplot(x =x, fill=..count.., geom="histogram") 
hp
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Sequential color scheme
hp+scale_fill_gradient(low="blue", high="red")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

egm_data
## # A tibble: 1,362 x 15
##    date  pension_fund_co~ n_of_participan~ fund_size_parti~
##    <chr> <chr>                       <dbl>            <dbl>
##  1 06.0~ Axa Hayat ve Em~            33327        210804109
##  2 06.0~ BNP Paribas Car~           187289       1545869615
##  3 06.0~ Cigna Finans Em~           122997        564291355
##  4 06.0~ Fiba Emeklilik ~            38754        247746061
##  5 06.0~ Garanti Emeklil~          1134051       8416458527
##  6 06.0~ Groupama Emekli~            62080        808664284
##  7 06.0~ Halk Hayat ve E~           445422       1948976004
##  8 06.0~ Katilim Emeklil~           162207        533523277
##  9 06.0~ Metlife Emeklil~           179609       1015462865
## 10 06.0~ NN Hayat ve Eme~           268929       2078148069
## # ... with 1,352 more rows, and 11 more variables: gov_contribution <dbl>,
## #   contribution <dbl>, n_of_pensioners <chr>, n_of_ind_contracts <dbl>,
## #   n_of_group_ind_contracts <chr>,
## #   n_of_employer_group_certificates <chr>, n_total <dbl>,
## #   size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## #   size_of_employer_group_certificates <dbl>, size_total <dbl>
egm_market_share<-egm_data %>% 
    group_by(pension_fund_company) %>% summarise(market_share = sum(n_of_ind_contracts)) 

egm_market_share
## # A tibble: 20 x 2
##    pension_fund_company            market_share
##    <chr>                                  <dbl>
##  1 Aegon Emeklilik ve Hayat             2461185
##  2 Allianz Hayat ve Emeklilik           8055384
##  3 Allianz Yasam ve Emeklilik          47580021
##  4 Anadolu Hayat Emeklilik             71944036
##  5 Asya Emeklilik ve Hayat               229781
##  6 Avivasa Emeklilik ve Hayat          75049480
##  7 Axa Hayat ve Emeklilik               3091142
##  8 Bereket Emeklilik ve Hayat           2365119
##  9 BNP Paribas Cardif Emeklilik        11239752
## 10 Cigna Finans Emeklilik ve Hayat      7541258
## 11 Fiba Emeklilik ve Hayat              2590090
## 12 Garanti Emeklilik ve Hayat          76691648
## 13 Groupama Emeklilik                   4307738
## 14 Halk Hayat ve Emeklilik             19352067
## 15 Katilim Emeklilik ve Hayat          10263172
## 16 Metlife Emeklilik ve Hayat          11408003
## 17 NN Hayat ve Emeklilik               19969222
## 18 Vakif Emeklilik                     19808450
## 19 Vakif Emeklilik ve Hayat            24631904
## 20 Ziraat Hayat ve Emeklilik           28750271
theme_set(theme_classic())

# Source: Frequency table
df <- as.data.frame(table(egm_market_share$pension_fund_company))
colnames(df) <- c("class", "freq")
pie <- ggplot(df, aes(x = "", y=freq, fill = factor(class))) + 
  geom_bar(width = 1, stat = "identity") +
  theme(axis.line = element_blank(), 
        plot.title = element_text(hjust=0.5)) + 
  labs(fill="class", 
       x=NULL, 
       y=NULL, 
       title="Pie Chart of class", 
       caption="egm")+
   coord_polar(theta = "y")

pie + coord_polar(theta = "y", start=0)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.

# Source: Categorical variable.
# mpg$class
pie <- ggplot(mpg, aes(x = "", fill = factor(class))) + 
  geom_bar(width = 1) +
  theme(axis.line = element_blank(), 
        plot.title = element_text(hjust=0.5)) + 
  labs(fill="class", 
       x=NULL, 
       y=NULL, 
       title="Pie Chart of class", 
       caption="egm")
  
pie + coord_polar(theta = "y", start=0)