R Markdown
# Create a temporary file
tmp<-tempfile(fileext=".xlsx")
# Download file from repository to the temp file
download.file("https://github.com/MEF-BDA503/pj18-elmasriomer/blob/master/egm_example_data.xlsx?raw=true",destfile=tmp)
# Read that excel file using readxl package's read_excel function. You might need to adjust the parameters (skip, col_names) according to your raw file's format.
raw_data<-readxl::read_excel(tmp,skip=0,col_names=TRUE)
# Remove the temp file
file.remove(tmp)
## [1] TRUE
head(raw_data)
## # A tibble: 6 x 15
## date pension_fund_co… n_of_participan… fund_size_parti… gov_contribution
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 06.0… Aegon Emeklilik… 42826 139193651 14409322
## 2 06.0… Allianz Hayat v… 118867 1840701596 231156574
## 3 06.0… Allianz Yaşam v… 726684 7387398235 815356276
## 4 06.0… Anadolu Hayat E… 1119572 9975066501 1484745501
## 5 06.0… Asya Emeklilik … 129389 337754079 77689275
## 6 06.0… Avivasa Emeklil… 876894 10550032358 1355045933
## # ... with 10 more variables: contribution <dbl>, n_of_pensioners <chr>,
## # n_of_ind_contracts <dbl>, n_of_group_ind_contracts <chr>,
## # n_of_employer_group_certificates <chr>, n_total <dbl>,
## # size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## # size_of_employer_group_certificates <dbl>, size_total <dbl>
tail(raw_data)
## # A tibble: 6 x 15
## date pension_fund_co… n_of_participan… fund_size_parti… gov_contribution
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 30.0… Halk Hayat ve E… 511848 2845016752 527923905
## 2 30.0… Katılım Emeklil… 205994 907989162 182110867
## 3 30.0… Metlife Emeklil… 187483 1368676067 233517732
## 4 30.0… NN Hayat ve Eme… 265725 2607702561 382046370
## 5 30.0… Vakıf Emeklilik… 502994 5375415236 757466988
## 6 30.0… Ziraat Hayat ve… 598703 3105479818 590220958
## # ... with 10 more variables: contribution <dbl>, n_of_pensioners <chr>,
## # n_of_ind_contracts <dbl>, n_of_group_ind_contracts <chr>,
## # n_of_employer_group_certificates <chr>, n_total <dbl>,
## # size_of_ind_contracts <dbl>, size_of_group_ind_contracts <dbl>,
## # size_of_employer_group_certificates <dbl>, size_total <dbl>
k = raw_data[raw_data$pension_fund_company == 'Aegon Emeklilik ve Hayat' ,c(1,2,3)] %>% arrange(desc(n_of_participants))
print(k)
## # A tibble: 76 x 3
## date pension_fund_company n_of_participants
## <chr> <chr> <dbl>
## 1 06.01.2017 Aegon Emeklilik ve Hayat 42826
## 2 13.01.2017 Aegon Emeklilik ve Hayat 42790
## 3 31.01.2017 Aegon Emeklilik ve Hayat 42737
## 4 20.01.2017 Aegon Emeklilik ve Hayat 42733
## 5 03.02.2017 Aegon Emeklilik ve Hayat 42722
## 6 27.01.2017 Aegon Emeklilik ve Hayat 42686
## 7 10.02.2017 Aegon Emeklilik ve Hayat 42619
## 8 17.02.2017 Aegon Emeklilik ve Hayat 42559
## 9 28.02.2017 Aegon Emeklilik ve Hayat 42506
## 10 24.02.2017 Aegon Emeklilik ve Hayat 42483
## # ... with 66 more rows
#head(k)
#ggplot(data = k, aes(x = 1:nrow(k), y = n_of_participants)) + geom_line()
#ggplot(data = raw_data, aes(x = n_of_participants, y = fund_size_participants)) +
# geom_point()
raw_data %>% group_by(pension_fund_company) %>% summarise(max_n_of_participants = max(n_of_participants))
## # A tibble: 20 x 2
## pension_fund_company max_n_of_participants
## <chr> <dbl>
## 1 Aegon Emeklilik ve Hayat 42826
## 2 Allianz Hayat ve Emeklilik 118867
## 3 Allianz YaÅŸam ve Emeklilik 780983
## 4 Anadolu Hayat Emeklilik 1152670
## 5 Asya Emeklilik ve Hayat 129389
## 6 Avivasa Emeklilik ve Hayat 886986
## 7 Axa Hayat ve Emeklilik 35893
## 8 Bereket Emeklilik ve Hayat 124327
## 9 BNP Paribas Cardif Emeklilik 187408
## 10 Cigna Finans Emeklilik ve Hayat 122997
## 11 Fiba Emeklilik ve Hayat 96807
## 12 Garanti Emeklilik ve Hayat 1211803
## 13 Groupama Emeklilik NA
## 14 Halk Hayat ve Emeklilik 511848
## 15 Katılım Emeklilik ve Hayat 205994
## 16 Metlife Emeklilik ve Hayat 188893
## 17 NN Hayat ve Emeklilik 268929
## 18 Vakıf Emeklilik 474386
## 19 Vakıf Emeklilik ve Hayat 503330
## 20 Ziraat Hayat ve Emeklilik 598703
raw_data %>% rowwise() %>% mutate(date = lubridate::as_date(dmy(date))) %>% ungroup() %>% ggplot(data = ., aes(x = date,
y = n_of_participants, color = pension_fund_company)) + geom_line()
## Warning: Removed 1 rows containing missing values (geom_path).
#%>% ggplot(data = k, aes(x = date, y = n_of_participants)) + geom_line()
raw_data %>% rowwise() %>% mutate(date = lubridate::as_date(dmy(date))) %>% ungroup() %>% ggplot(data = ., aes(x = date,
y = size_total, color = pension_fund_company)) + geom_line()
## Warning: Removed 1 rows containing missing values (geom_path).