library(dplyr)
library(tidyverse)
library(scales)
library(readxl)
First, car sales excel for November 2017 is downloaded from “Otomotiv Distibütörleri Derneği” website. webSite. File is renamed to “odd_retail_sales_2017_11.xlsx”" and uploaded to github github link
Below the excel file is downloaded from github to a local data frame (raw_data) and refined for analysis
tmp<-tempfile(fileext=".xlsx")
download.file("https://github.com/MEF-BDA503/pj18-elmasriomer/blob/master/odd_retail_sales_2017_11.XLSX?raw=true",destfile=tmp,mode="wb")
raw_data<-readxl::read_excel(tmp,skip=7,col_names=FALSE)
file.remove(tmp)
## [1] TRUE
raw_data <- raw_data %>% slice(-c(46,47))
head(raw_data)
## # A tibble: 6 x 10
## X__1 X__2 X__3 X__4 X__5 X__6 X__7 X__8 X__9 X__10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO NA 26 26 NA NA 0 0 26 26
## 2 ASTON MARTIN NA 1 1 NA NA 0 0 1 1
## 3 AUDI NA 2326 2326 NA NA 0 0 2326 2326
## 4 BENTLEY NA 0 0 NA NA 0 0 0 0
## 5 BMW NA 2399 2399 NA NA 0 0 2399 2399
## 6 CHERY NA NA 0 NA NA 0 0 0 0
In order to make easy to read, we have named column names.
#column names for raw data
colnames(raw_data) <- c("brand_name","auto_dom","auto_imp","auto_total","comm_dom","comm_imp","comm_total","total_dom","total_imp","total_total")
#replace NA values with 0 and add year/month cols
car_data_nov_17 <- raw_data %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.)))%>% mutate(year=2017,month=11)
print(car_data_nov_17,width=Inf)
## # A tibble: 45 x 12
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO 0 26 26 0 0 0
## 2 ASTON MARTIN 0 1 1 0 0 0
## 3 AUDI 0 2326 2326 0 0 0
## 4 BENTLEY 0 0 0 0 0 0
## 5 BMW 0 2399 2399 0 0 0
## 6 CHERY 0 0 0 0 0 0
## 7 CITROEN 0 745 745 0 294 294
## 8 DACIA 0 4238 4238 0 486 486
## 9 DS 0 0 0 0 0 0
## 10 FERRARI 0 2 2 0 0 0
## total_dom total_imp total_total year month
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 26 26 2017 11
## 2 0 1 1 2017 11
## 3 0 2326 2326 2017 11
## 4 0 0 0 2017 11
## 5 0 2399 2399 2017 11
## 6 0 0 0 2017 11
## 7 0 1039 1039 2017 11
## 8 0 4724 4724 2017 11
## 9 0 0 0 2017 11
## 10 0 2 2 2017 11
## # ... with 35 more rows
saveRDS(car_data_nov_17,file="C:/Users/USER/Documents/Data_Analytics/odd_retail_sales_2017_11.rds")
car_data_nov_17 %>%
filter(auto_total > 0 & comm_total > 0) %>%
select(brand_name,total_total) %>%
arrange(desc(total_total))
## # A tibble: 14 x 2
## brand_name total_total
## <chr> <dbl>
## 1 RENAULT 13984
## 2 VOLKSWAGEN 12937
## 3 FORD 12733
## 4 FIAT 12066
## 5 HYUNDAI 5205
## 6 PEUGEOT 4753
## 7 DACIA 4724
## 8 TOYOTA 4223
## 9 NISSAN 4184
## 10 MERCEDES-BENZ 3112
## 11 KIA 1270
## 12 CITROEN 1039
## 13 MITSUBISHI 306
## 14 SSANGYONG 42
car_data_nov_17 %>%
filter(auto_total > 0 & comm_total > 0) %>%
select(brand_name,total_total) %>%
arrange(desc(total_total)) %>%
mutate(percentage = percent(total_total/sum(total_total))) %>% slice(1:10)
## # A tibble: 10 x 3
## brand_name total_total percentage
## <chr> <dbl> <chr>
## 1 RENAULT 13984 17.4%
## 2 VOLKSWAGEN 12937 16.1%
## 3 FORD 12733 15.8%
## 4 FIAT 12066 15.0%
## 5 HYUNDAI 5205 6.5%
## 6 PEUGEOT 4753 5.9%
## 7 DACIA 4724 5.9%
## 8 TOYOTA 4223 5.2%
## 9 NISSAN 4184 5.2%
## 10 MERCEDES-BENZ 3112 3.9%
car_data_nov_17 %>%
select(brand_name, auto_total, comm_total, total_total) %>%
mutate(average_total=mean(total_total)) %>%
filter(total_total > average_total) %>%
arrange(desc(total_total))
## # A tibble: 15 x 5
## brand_name auto_total comm_total total_total average_total
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 RENAULT 11975 2009 13984 2241.
## 2 VOLKSWAGEN 10228 2709 12937 2241.
## 3 FORD 4476 8257 12733 2241.
## 4 FIAT 5358 6708 12066 2241.
## 5 OPEL 6069 0 6069 2241.
## 6 HYUNDAI 4939 266 5205 2241.
## 7 PEUGEOT 3258 1495 4753 2241.
## 8 DACIA 4238 486 4724 2241.
## 9 TOYOTA 3762 461 4223 2241.
## 10 NISSAN 3964 220 4184 2241.
## 11 MERCEDES-BENZ 2409 703 3112 2241.
## 12 SKODA 2574 0 2574 2241.
## 13 BMW 2399 0 2399 2241.
## 14 AUDI 2326 0 2326 2241.
## 15 HONDA 2283 0 2283 2241.
filtered_df <- car_data_nov_17 %>% dplyr::filter(substr(brand_name,1,1) == "M")
select(filtered_df, brand_name, total_total) %>%
arrange(desc(total_total)) %>%
print(filtered_df)
## # A tibble: 5 x 2
## brand_name total_total
## <chr> <dbl>
## 1 MERCEDES-BENZ 3112
## 2 MITSUBISHI 306
## 3 MINI 229
## 4 MAZDA 131
## 5 MASERATI 17