ODD Sales Assignment

From Raw to Civilized Data

Find the data

First, car sales excel for November 2017 is downloaded from “Otomotiv Distibütörleri Derneği” website. webSite. File is renamed to “odd_retail_sales_2017_11.xlsx”" and uploaded to github github link

Download Raw Data

Below the excel file is downloaded from github to a local data frame (raw_data) and refined for analysis

tmp<-tempfile(fileext=".xlsx")
download.file("https://github.com/MEF-BDA503/pj18-elmasriomer/blob/master/odd_retail_sales_2017_11.XLSX?raw=true",destfile=tmp,mode="wb")
raw_data<-readxl::read_excel(tmp,skip=7,col_names=FALSE)
file.remove(tmp)

## [1] TRUE

raw_data <- raw_data %>% slice(-c(46,47))
head(raw_data)

## # A tibble: 6 x 10
##   X__1          X__2  X__3  X__4  X__5  X__6  X__7  X__8  X__9 X__10
##   <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO      NA    26    26    NA    NA     0     0    26    26
## 2 ASTON MARTIN    NA     1     1    NA    NA     0     0     1     1
## 3 AUDI            NA  2326  2326    NA    NA     0     0  2326  2326
## 4 BENTLEY         NA     0     0    NA    NA     0     0     0     0
## 5 BMW             NA  2399  2399    NA    NA     0     0  2399  2399
## 6 CHERY           NA    NA     0    NA    NA     0     0     0     0

Civilize Data

In order to make easy to read, we have named column names.

#column names for raw data
colnames(raw_data) <- c("brand_name","auto_dom","auto_imp","auto_total","comm_dom","comm_imp","comm_total","total_dom","total_imp","total_total")
#replace NA values with 0 and add year/month cols
car_data_nov_17 <- raw_data %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.)))%>% mutate(year=2017,month=11)
print(car_data_nov_17,width=Inf)

## # A tibble: 45 x 12
##    brand_name   auto_dom auto_imp auto_total comm_dom comm_imp comm_total
##    <chr>           <dbl>    <dbl>      <dbl>    <dbl>    <dbl>      <dbl>
##  1 ALFA ROMEO          0       26         26        0        0          0
##  2 ASTON MARTIN        0        1          1        0        0          0
##  3 AUDI                0     2326       2326        0        0          0
##  4 BENTLEY             0        0          0        0        0          0
##  5 BMW                 0     2399       2399        0        0          0
##  6 CHERY               0        0          0        0        0          0
##  7 CITROEN             0      745        745        0      294        294
##  8 DACIA               0     4238       4238        0      486        486
##  9 DS                  0        0          0        0        0          0
## 10 FERRARI             0        2          2        0        0          0
##    total_dom total_imp total_total  year month
##        <dbl>     <dbl>       <dbl> <dbl> <dbl>
##  1         0        26          26  2017    11
##  2         0         1           1  2017    11
##  3         0      2326        2326  2017    11
##  4         0         0           0  2017    11
##  5         0      2399        2399  2017    11
##  6         0         0           0  2017    11
##  7         0      1039        1039  2017    11
##  8         0      4724        4724  2017    11
##  9         0         0           0  2017    11
## 10         0         2           2  2017    11
## # ... with 35 more rows

Saving Data to Local Documentary

saveRDS(car_data_nov_17,file="C:/Users/USER/Documents/Data_Analytics/odd_retail_sales_2017_11.rds")

Analysis with Data

Default code to see the both of automobile and commercial vehicle sales over 0 valuewith brandnames as descending in the total sales

car_data_nov_17 %>% 
  filter(auto_total > 0 & comm_total > 0) %>%
  select(brand_name,total_total) %>%
  arrange(desc(total_total))

## # A tibble: 14 x 2
##    brand_name    total_total
##    <chr>               <dbl>
##  1 RENAULT             13984
##  2 VOLKSWAGEN          12937
##  3 FORD                12733
##  4 FIAT                12066
##  5 HYUNDAI              5205
##  6 PEUGEOT              4753
##  7 DACIA                4724
##  8 TOYOTA               4223
##  9 NISSAN               4184
## 10 MERCEDES-BENZ        3112
## 11 KIA                  1270
## 12 CITROEN              1039
## 13 MITSUBISHI            306
## 14 SSANGYONG              42

Total sales of both of automobile and commercial vehicles where these are over 0 value and percentage by brand name

car_data_nov_17 %>% 
  filter(auto_total > 0 & comm_total > 0) %>%
  select(brand_name,total_total) %>%
  arrange(desc(total_total))  %>%
  mutate(percentage = percent(total_total/sum(total_total))) %>% slice(1:10)

## # A tibble: 10 x 3
##    brand_name    total_total percentage
##    <chr>               <dbl> <chr>     
##  1 RENAULT             13984 17.4%     
##  2 VOLKSWAGEN          12937 16.1%     
##  3 FORD                12733 15.8%     
##  4 FIAT                12066 15.0%     
##  5 HYUNDAI              5205 6.5%      
##  6 PEUGEOT              4753 5.9%      
##  7 DACIA                4724 5.9%      
##  8 TOYOTA               4223 5.2%      
##  9 NISSAN               4184 5.2%      
## 10 MERCEDES-BENZ        3112 3.9%

Now, I would like to see a list which total sales is greater than average total sales value in all brands.

car_data_nov_17 %>% 
  select(brand_name, auto_total, comm_total, total_total) %>%
  mutate(average_total=mean(total_total)) %>%  
  filter(total_total > average_total)  %>%
  arrange(desc(total_total))

## # A tibble: 15 x 5
##    brand_name    auto_total comm_total total_total average_total
##    <chr>              <dbl>      <dbl>       <dbl>         <dbl>
##  1 RENAULT            11975       2009       13984         2241.
##  2 VOLKSWAGEN         10228       2709       12937         2241.
##  3 FORD                4476       8257       12733         2241.
##  4 FIAT                5358       6708       12066         2241.
##  5 OPEL                6069          0        6069         2241.
##  6 HYUNDAI             4939        266        5205         2241.
##  7 PEUGEOT             3258       1495        4753         2241.
##  8 DACIA               4238        486        4724         2241.
##  9 TOYOTA              3762        461        4223         2241.
## 10 NISSAN              3964        220        4184         2241.
## 11 MERCEDES-BENZ       2409        703        3112         2241.
## 12 SKODA               2574          0        2574         2241.
## 13 BMW                 2399          0        2399         2241.
## 14 AUDI                2326          0        2326         2241.
## 15 HONDA               2283          0        2283         2241.

Also I would like to see a vehicle list which their brand names start with “M” letter with total of automotive and commercial sales

  filtered_df <- car_data_nov_17 %>% dplyr::filter(substr(brand_name,1,1) == "M")
  select(filtered_df, brand_name, total_total) %>%
  arrange(desc(total_total))  %>%
  print(filtered_df)

## # A tibble: 5 x 2
##   brand_name    total_total
##   <chr>               <dbl>
## 1 MERCEDES-BENZ        3112
## 2 MITSUBISHI            306
## 3 MINI                  229
## 4 MAZDA                 131
## 5 MASERATI               17