knitr::opts_chunk$set(echo = TRUE) ## Downloaded These Packages
#install.packages("readxl")
#install.packages("tidyverse")
library(readxl)
library(tidyverse)
## -- Attaching packages ------------------------------------------------------ tidyverse 1.2.1 --
## v ggplot2 3.0.0 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.6
## v tidyr 0.8.1 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts --------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# Create a temporary file
tmp<-tempfile(fileext=".xlsx")
# Download file from repository to the temp file
download.file("https://github.com/MEF-BDA503/pj18-aydemirbusra/blob/master/busra%20aydemir2016mayis.xlsx?raw=true",destfile=tmp)
# Read that excel file using readxl package's read_excel function. You might need to adjust the parameters (skip, col_names) according to your raw file's format.
raw_data<-readxl::read_excel(tmp,skip=7,col_names=FALSE)
# Remove the temp file
file.remove(tmp)
## [1] TRUE
# Remove the last two rows because they are irrelevant (total and empty rows)
raw_data <- raw_data %>% slice(-c(49,50))
# Let's see our raw data
head(raw_data)
## # A tibble: 6 x 10
## X__1 X__2 X__3 X__4 X__5 X__6 X__7 X__8 X__9 X__10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO NA 68 68 NA NA 0 0 68 68
## 2 ASTON MARTIN NA 4 4 NA NA 0 0 4 4
## 3 AUDI NA 2352 2352 NA NA 0 0 2352 2352
## 4 BENTLEY NA 1 1 NA NA 0 0 1 1
## 5 BMW NA 3174 3174 NA NA 0 0 3174 3174
## 6 CHERY NA 22 22 NA NA 0 0 22 22
colnames(raw_data) <- c("brand_name","auto_dom","auto_imp","auto_total","comm_dom","comm_imp","comm_total","total_dom","total_imp","total_total")
car_data_may_16 <- raw_data %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>% mutate(year=2016,month=5)
print(car_data_may_16,width=Inf)
## # A tibble: 48 x 12
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO 0 68 68 0 0 0
## 2 ASTON MARTIN 0 4 4 0 0 0
## 3 AUDI 0 2352 2352 0 0 0
## 4 BENTLEY 0 1 1 0 0 0
## 5 BMW 0 3174 3174 0 0 0
## 6 CHERY 0 22 22 0 0 0
## 7 CITROEN 0 1929 1929 99 398 497
## 8 DACIA 0 4179 4179 0 572 572
## 9 DS 0 24 24 0 0 0
## 10 FERRARI 0 3 3 0 0 0
## total_dom total_imp total_total year month
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 68 68 2016 5
## 2 0 4 4 2016 5
## 3 0 2352 2352 2016 5
## 4 0 1 1 2016 5
## 5 0 3174 3174 2016 5
## 6 0 22 22 2016 5
## 7 99 2327 2426 2016 5
## 8 0 4751 4751 2016 5
## 9 0 24 24 2016 5
## 10 0 3 3 2016 5
## # ... with 38 more rows
saveRDS(car_data_may_16,file="data_may_16.rds")
car_data_may_16 %>%
filter(auto_total > 0 & comm_total > 0) %>%
select(brand_name,total_total) %>%
arrange(desc(total_total))
## # A tibble: 14 x 2
## brand_name total_total
## <chr> <dbl>
## 1 RENAULT 12741
## 2 VOLKSWAGEN 12436
## 3 FORD 10051
## 4 FIAT 9636
## 5 TOYOTA 6067
## 6 HYUNDAI 4752
## 7 DACIA 4751
## 8 MERCEDES-BENZ 3764
## 9 PEUGEOT 3022
## 10 NISSAN 2638
## 11 CITROEN 2426
## 12 KIA 1443
## 13 MITSUBISHI 327
## 14 SSANGYONG 107