I downloaded the raw data from my GitHub repository.
tmp<-tempfile(fileext=".xlsx")
download.file(
"https://github.com/MEF-BDA503/pj18-mkerimacar/blob/master/egm_example_data.xlsx?raw=true",
destfile=tmp,mode="wb")
raw_data<-readxl::read_excel(tmp,col_names=TRUE)
file.remove(tmp)
## [1] TRUE
head(raw_data)
## # A tibble: 6 x 15
## date pension_fund_co~ n_of_participan~ fund_size_parti~ gov_contribution
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 06.0~ Aegon Emeklilik~ 42826 139193651 14409322
## 2 06.0~ Allianz Hayat v~ 118867 1840701596 231156574
## 3 06.0~ Allianz YaÅŸam v~ 726684 7387398235 815356276
## 4 06.0~ Anadolu Hayat E~ 1119572 9975066501 1484745501
## 5 06.0~ Asya Emeklilik ~ 129389 337754079 77689275
## 6 06.0~ Avivasa Emeklil~ 876894 10550032358 1355045933
## # ... with 10 more variables: contribution <dbl>, n_of_pensioners <chr>,
## # n_of_ind_contracts <dbl>, n_of_group_ind_contracts <chr>,
## # n_of_employer_group_certificates <chr>, n_total <dbl>,
## # size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## # size_of_employer_group_certificates <dbl>, size_total <dbl>
tail(raw_data)
## # A tibble: 6 x 15
## date pension_fund_co~ n_of_participan~ fund_size_parti~ gov_contribution
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 30.0~ Halk Hayat ve E~ 511848 2845016752 527923905
## 2 30.0~ Katılım Emeklil~ 205994 907989162 182110867
## 3 30.0~ Metlife Emeklil~ 187483 1368676067 233517732
## 4 30.0~ NN Hayat ve Eme~ 265725 2607702561 382046370
## 5 30.0~ Vakıf Emeklilik~ 502994 5375415236 757466988
## 6 30.0~ Ziraat Hayat ve~ 598703 3105479818 590220958
## # ... with 10 more variables: contribution <dbl>, n_of_pensioners <chr>,
## # n_of_ind_contracts <dbl>, n_of_group_ind_contracts <chr>,
## # n_of_employer_group_certificates <chr>, n_total <dbl>,
## # size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## # size_of_employer_group_certificates <dbl>, size_total <dbl>
I think getting medians of participants is a good parameter to measure how firms are performing.
raw_data <- raw_data %>%
group_by(pension_fund_company) %>%
mutate(medians = median(n_of_participants))
Barplot of firms presented below by their median numbers of participants.
ggplot(data=raw_data, aes(x=pension_fund_company, y=medians, fill=pension_fund_company)) +
geom_bar(stat="identity")+
theme_classic() + xlab("Firms") +
theme(axis.text.x = element_text(angle = 90)) + ylab("Median Number of Participants")
## Warning: Removed 76 rows containing missing values (position_stack).