library(dplyr)
library(tidyverse)
library(scales)
First, car sales excel for November 2016 is downloaded from “Otomotiv Distibütörleri Derneği” website. webSite. File is renamed to “odd_retail_sales_2016_11.xlsx”" and uploaded to github github link
Below the excel file is downloaded from github to a local data frame (raw_data) and refined for analysis
tmp5<-tempfile(fileext=".xlsx")
download.file("https://github.com/MEF-BDA503/pj18-TarikOzcelik81/blob/master/odd_retail_sales_2016_11.xlsx?raw=true",destfile=tmp5,mode="wb")
raw_data<-readxl::read_excel(tmp5,skip=7,col_names=FALSE)
file.remove(tmp5)
## [1] TRUE
raw_data <- raw_data %>% slice(-c(49,50))
head(raw_data)
## # A tibble: 6 x 10
## X__1 X__2 X__3 X__4 X__5 X__6 X__7 X__8 X__9 X__10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO NA 53 53 NA NA 0 0 53 53
## 2 ASTON MARTIN NA 2 2 NA NA 0 0 2 2
## 3 AUDI NA 3189 3189 NA NA 0 0 3189 3189
## 4 BENTLEY NA 2 2 NA NA 0 0 2 2
## 5 BMW NA 4611 4611 NA NA 0 0 4611 4611
## 6 CHERY NA 0 0 NA NA 0 0 0 0
Below raw data is made better for human eye and prepared fur cumulative analysis
#column names for raw data
colnames(raw_data) <- c("brand_name","auto_dom","auto_imp","auto_total","comm_dom","comm_imp","comm_total","total_dom","total_imp","total_total")
#replace NA values with 0 and add year/month cols
car_data_nov_16 <- raw_data %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.)))%>% mutate(year=2018,month=9)
print(car_data_nov_16,width=Inf)
## # A tibble: 48 x 12
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO 0 53 53 0 0 0
## 2 ASTON MARTIN 0 2 2 0 0 0
## 3 AUDI 0 3189 3189 0 0 0
## 4 BENTLEY 0 2 2 0 0 0
## 5 BMW 0 4611 4611 0 0 0
## 6 CHERY 0 0 0 0 0 0
## 7 CITROEN 0 2017 2017 274 711 985
## 8 DACIA 0 4745 4745 0 747 747
## 9 DS 0 42 42 0 0 0
## 10 FERRARI 0 2 2 0 0 0
## total_dom total_imp total_total year month
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 53 53 2018 9
## 2 0 2 2 2018 9
## 3 0 3189 3189 2018 9
## 4 0 2 2 2018 9
## 5 0 4611 4611 2018 9
## 6 0 0 0 2018 9
## 7 274 2728 3002 2018 9
## 8 0 5492 5492 2018 9
## 9 0 42 42 2018 9
## 10 0 2 2 2018 9
## # ... with 38 more rows
saveRDS(car_data_nov_16,file="C:/Users/to59290/Documents/odd_car_sales_data_nov_16.rds")
total sales and percentage by brand name
car_data_nov_16 %>%
filter(auto_total > 0 & comm_total > 0) %>%
select(brand_name,total_total) %>%
arrange(desc(total_total)) %>%
mutate(percentage = percent(total_total/sum(total_total)))
## # A tibble: 14 x 3
## brand_name total_total percentage
## <chr> <dbl> <chr>
## 1 VOLKSWAGEN 14359 15.2%
## 2 FIAT 13229 14.0%
## 3 RENAULT 12760 13.5%
## 4 FORD 12500 13.2%
## 5 TOYOTA 8323 8.8%
## 6 MERCEDES-BENZ 6933 7.3%
## 7 HYUNDAI 5829 6.2%
## 8 DACIA 5492 5.8%
## 9 NISSAN 4429 4.7%
## 10 PEUGEOT 4220 4.5%
## 11 CITROEN 3002 3.2%
## 12 KIA 2789 3.0%
## 13 MITSUBISHI 463 0.5%
## 14 SSANGYONG 77 0.1%
top 10 sales by number and percentage for auto domestic, auto import, commercial domestic, commercial import
#first we find long format
car_data_nov_16_long <- car_data_nov_16 %>%
select(brand_name, auto_dom , auto_imp, comm_dom,comm_imp ) %>%
gather(key=sales_sta,value=sales_num, -brand_name)
top_10_sales <- car_data_nov_16_long %>%
arrange(desc(sales_num)) %>%
mutate(percentage = percent(sales_num/sum(sales_num))) %>%
slice(1:10)
top_10_sales
## # A tibble: 10 x 4
## brand_name sales_sta sales_num percentage
## <chr> <chr> <dbl> <chr>
## 1 VOLKSWAGEN auto_imp 10587 8.66%
## 2 FORD comm_dom 7187 5.88%
## 3 OPEL auto_imp 7129 5.83%
## 4 RENAULT auto_dom 6905 5.65%
## 5 FIAT auto_dom 6638 5.43%
## 6 MERCEDES-BENZ auto_imp 5858 4.79%
## 7 TOYOTA auto_dom 5762 4.71%
## 8 FIAT comm_dom 5595 4.57%
## 9 DACIA auto_imp 4745 3.88%
## 10 BMW auto_imp 4611 3.77%
top 5 sale numbers for each auto domestic, auto import, commercial domestic, commercial import
auto_imp_v <- car_data_nov_16_long %>% filter(sales_sta=="auto_imp") %>% arrange(desc(sales_num)) %>% select(sales_num) %>% slice(1:5)
auto_dom_v <- car_data_nov_16_long %>% filter(sales_sta=="auto_dom") %>% arrange(desc(sales_num)) %>% select(sales_num) %>% slice(1:5)
comm_imp_v <- car_data_nov_16_long %>% filter(sales_sta=="comm_imp") %>% arrange(desc(sales_num)) %>% select(sales_num) %>% slice(1:5)
comm_dom_v <- car_data_nov_16_long %>% filter(sales_sta=="comm_dom") %>% arrange(desc(sales_num)) %>% select(sales_num) %>% slice(1:5)
data_f <- data.frame(auto_dom_v,auto_imp_v,comm_dom_v,comm_imp_v)
colnames(data_f) <- c("top auto dom","top auto imp","top comm dom","top comm imp")
data_f
## top auto dom top auto imp top comm dom top comm imp
## 1 6905 10587 7187 3772
## 2 6638 7129 5595 1929
## 3 5762 5858 385 1075
## 4 2665 4745 274 831
## 5 2121 4611 182 747