ODD 2016-2018 Car Sales Analysis

Tarık Özçelik

25.11.2018

library(dplyr)
library(tidyverse)
library(ggplot2)
library(scales)

get .rds file

Below the excel file is downloaded from github to a local data frame (raw_data) and refined for analysis

#download from github 
#total_sales<- readRDS(url("https://github.com/MEF-BDA503/pj18-muharremcakir81/blob/master/Week4/car_data_aggregate.rds?raw=true"))
#download from local
total_sales <- readRDS("car_data_aggregate.rds")

#data cleaning for the irrelevant rows
total_sales <- total_sales  %>% filter(!startsWith(brand_name,"ODD"))
total_sales <- total_sales  %>% filter(!startsWith(brand_name,"TOPLAM"))

#check rows and columns
glimpse(total_sales)
## Observations: 1,477
## Variables: 12
## $ brand_name  <chr> "ALFA ROMEO", "ASTON MARTIN", "AUDI", "BENTLEY", "...
## $ auto_dom    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 632, 91, 1471, 460, 0, ...
## $ auto_imp    <dbl> 13, 2, 350, 0, 158, 134, 1141, 9, 3, 57, 705, 502,...
## $ auto_total  <dbl> 13, 2, 350, 0, 158, 134, 1141, 9, 3, 689, 796, 197...
## $ comm_dom    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 789, 1421, 0, 0, 0, 59,...
## $ comm_imp    <dbl> 0, 0, 0, 0, 0, 197, 319, 0, 0, 199, 139, 0, 39, 0,...
## $ comm_total  <dbl> 0, 0, 0, 0, 0, 197, 319, 0, 0, 988, 1560, 0, 39, 0...
## $ total_dom   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1421, 1512, 1471, 460, ...
## $ total_imp   <dbl> 13, 2, 350, 0, 158, 331, 1460, 9, 3, 256, 844, 502...
## $ total_total <dbl> 13, 2, 350, 0, 158, 331, 1460, 9, 3, 1677, 2356, 1...
## $ year        <dbl> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 20...
## $ month       <dbl> 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,...

total auto sales by year and month

#group sales by month and sort
sales_by_month <- total_sales %>% 
                  group_by(year, month)   %>% 
                  summarise(total_auto=sum(auto_total)) %>%
                  arrange(year, month)
#add year-month column for date sotted geom line 
sales_by_month$Date <- zoo::as.yearmon(paste(sales_by_month$year,sales_by_month$month), "%Y %m")
sales_by_month$DateFormated <- format(sales_by_month$Date,"%Y-%m")

#draw the line 
sales_by_month %>% 
                  ggplot(aes(x = DateFormated, y = total_auto, group=1, colour=month )) + 
                  geom_line() + 
                  geom_point() +
                  theme(legend.position = "none", axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.0, size = 10)) + 
                  labs( x="date" , y = "total auto sales")

top 10 total auto sales brands by order

brand_total_top_sales <-  total_sales %>% 
                          group_by(brand_name) %>%
                          summarise(brand_total_auto=sum(auto_total)) %>%
                          arrange(desc(brand_total_auto)) %>%
                          slice(1:10)

brand_total_top_sales %>% ggplot(aes(x = reorder(brand_name, - brand_total_auto ), y = brand_total_auto, fill=brand_name )) + 
                          geom_bar(stat="identity") +
                          theme(legend.position = "none", axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.0, size = 10)) + 
                          labs(x="brand name", y = "total auto sales" ) 

top 3 brands sales per month

# brand=total_sales$brand_name[(which.max(total_sales$auto_total))]
month_champion_of_top3 <- total_sales %>% 
                  filter(brand_name %in% c("RENAULT","VOLKSWAGEN","FIAT"))  %>% 
                  select(brand_name,auto_total, year, month)
              

#add year-month column 
month_champion_of_top3$Date <- zoo::as.yearmon(paste(month_champion_of_top3$year,month_champion_of_top3$month), "%Y %m")
month_champion_of_top3$DateFormated <- format(month_champion_of_top3$Date,"%Y-%m")

month_champion_of_top3 %>% 
                  ggplot(aes(x = DateFormated, y = auto_total, group=brand_name , colour=brand_name  )) + 
                  geom_line() + 
                  geom_point()+
                  labs( x="date" , y = "total auto sales")  +
                  theme(legend.position = "bottom", axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.0, size = 10))