ODD Sales Assignment

From Raw to Civilized Data

Find the data

First, car sales excel for November 2016 is downloaded from “Otomotiv Distibütörleri Derneği” website. webSite. File is renamed to “odd_retail_sales_2016_11.xlsx”" and uploaded to github github link

Download Raw Data

Below the excel file is downloaded from github to a local data frame (raw_data) and refined for analysis

tmp5<-tempfile(fileext=".xlsx")
download.file("https://github.com/MEF-BDA503/pj18-TarikOzcelik81/blob/master/odd_retail_sales_2016_11.xlsx?raw=true",destfile=tmp5,mode="wb")
raw_data<-readxl::read_excel(tmp5,skip=7,col_names=FALSE)
file.remove(tmp5)

## [1] TRUE

raw_data <- raw_data %>% slice(-c(49,50))
head(raw_data)

## # A tibble: 6 x 10
##   X__1          X__2  X__3  X__4  X__5  X__6  X__7  X__8  X__9 X__10
##   <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO      NA    53    53    NA    NA     0     0    53    53
## 2 ASTON MARTIN    NA     2     2    NA    NA     0     0     2     2
## 3 AUDI            NA  3189  3189    NA    NA     0     0  3189  3189
## 4 BENTLEY         NA     2     2    NA    NA     0     0     2     2
## 5 BMW             NA  4611  4611    NA    NA     0     0  4611  4611
## 6 CHERY           NA     0     0    NA    NA     0     0     0     0

Civilize Data

Below raw data is made better for human eye and prepared fur cumulative analysis

#column names for raw data
colnames(raw_data) <- c("brand_name","auto_dom","auto_imp","auto_total","comm_dom","comm_imp","comm_total","total_dom","total_imp","total_total")
#replace NA values with 0 and add year/month cols
car_data_nov_16 <- raw_data %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.)))%>% mutate(year=2018,month=9)
print(car_data_nov_16,width=Inf)

## # A tibble: 48 x 12
##    brand_name   auto_dom auto_imp auto_total comm_dom comm_imp comm_total
##    <chr>           <dbl>    <dbl>      <dbl>    <dbl>    <dbl>      <dbl>
##  1 ALFA ROMEO          0       53         53        0        0          0
##  2 ASTON MARTIN        0        2          2        0        0          0
##  3 AUDI                0     3189       3189        0        0          0
##  4 BENTLEY             0        2          2        0        0          0
##  5 BMW                 0     4611       4611        0        0          0
##  6 CHERY               0        0          0        0        0          0
##  7 CITROEN             0     2017       2017      274      711        985
##  8 DACIA               0     4745       4745        0      747        747
##  9 DS                  0       42         42        0        0          0
## 10 FERRARI             0        2          2        0        0          0
##    total_dom total_imp total_total  year month
##        <dbl>     <dbl>       <dbl> <dbl> <dbl>
##  1         0        53          53  2018     9
##  2         0         2           2  2018     9
##  3         0      3189        3189  2018     9
##  4         0         2           2  2018     9
##  5         0      4611        4611  2018     9
##  6         0         0           0  2018     9
##  7       274      2728        3002  2018     9
##  8         0      5492        5492  2018     9
##  9         0        42          42  2018     9
## 10         0         2           2  2018     9
## # ... with 38 more rows

Save Data to Local

saveRDS(car_data_nov_16,file="C:/Users/to59290/Documents/odd_car_sales_data_nov_16.rds")

Further Analysis with Data

total sales and percentage by brand name

car_data_nov_16 %>% 
  filter(auto_total > 0 & comm_total > 0) %>%
  select(brand_name,total_total) %>%
  arrange(desc(total_total))  %>%
  mutate(percentage = percent(total_total/sum(total_total)))

## # A tibble: 14 x 3
##    brand_name    total_total percentage
##    <chr>               <dbl> <chr>     
##  1 VOLKSWAGEN          14359 15.2%     
##  2 FIAT                13229 14.0%     
##  3 RENAULT             12760 13.5%     
##  4 FORD                12500 13.2%     
##  5 TOYOTA               8323 8.8%      
##  6 MERCEDES-BENZ        6933 7.3%      
##  7 HYUNDAI              5829 6.2%      
##  8 DACIA                5492 5.8%      
##  9 NISSAN               4429 4.7%      
## 10 PEUGEOT              4220 4.5%      
## 11 CITROEN              3002 3.2%      
## 12 KIA                  2789 3.0%      
## 13 MITSUBISHI            463 0.5%      
## 14 SSANGYONG              77 0.1%

top 10 sales by number and percentage for auto domestic, auto import, commercial domestic, commercial import

#first we find long format
car_data_nov_16_long <- car_data_nov_16 %>%
  select(brand_name, auto_dom , auto_imp, comm_dom,comm_imp ) %>%
  gather(key=sales_sta,value=sales_num, -brand_name) 
top_10_sales <- car_data_nov_16_long %>%
  arrange(desc(sales_num))  %>% 
  mutate(percentage = percent(sales_num/sum(sales_num)))   %>% 
  slice(1:10)
top_10_sales

## # A tibble: 10 x 4
##    brand_name    sales_sta sales_num percentage
##    <chr>         <chr>         <dbl> <chr>     
##  1 VOLKSWAGEN    auto_imp      10587 8.66%     
##  2 FORD          comm_dom       7187 5.88%     
##  3 OPEL          auto_imp       7129 5.83%     
##  4 RENAULT       auto_dom       6905 5.65%     
##  5 FIAT          auto_dom       6638 5.43%     
##  6 MERCEDES-BENZ auto_imp       5858 4.79%     
##  7 TOYOTA        auto_dom       5762 4.71%     
##  8 FIAT          comm_dom       5595 4.57%     
##  9 DACIA         auto_imp       4745 3.88%     
## 10 BMW           auto_imp       4611 3.77%

top 5 sale numbers for each auto domestic, auto import, commercial domestic, commercial import

auto_imp_v <- car_data_nov_16_long  %>% filter(sales_sta=="auto_imp")  %>% arrange(desc(sales_num))  %>% select(sales_num) %>% slice(1:5) 
auto_dom_v <- car_data_nov_16_long  %>% filter(sales_sta=="auto_dom")  %>% arrange(desc(sales_num))  %>% select(sales_num) %>% slice(1:5)
comm_imp_v <- car_data_nov_16_long  %>% filter(sales_sta=="comm_imp")  %>% arrange(desc(sales_num))  %>% select(sales_num) %>% slice(1:5)
comm_dom_v <- car_data_nov_16_long  %>% filter(sales_sta=="comm_dom")  %>% arrange(desc(sales_num))  %>% select(sales_num) %>% slice(1:5)
data_f <- data.frame(auto_dom_v,auto_imp_v,comm_dom_v,comm_imp_v)
colnames(data_f) <- c("top auto dom","top auto imp","top comm dom","top comm imp")
data_f

##   top auto dom top auto imp top comm dom top comm imp
## 1         6905        10587         7187         3772
## 2         6638         7129         5595         1929
## 3         5762         5858          385         1075
## 4         2665         4745          274          831
## 5         2121         4611          182          747