Initial Analysis

raw_data <- readRDS("C:/Users/ezegokce/Desktop/car_data_aggregate.rds")

head(raw_data)
## # A tibble: 6 x 12
##   brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
##   <chr>         <dbl>    <dbl>      <dbl>    <dbl>    <dbl>      <dbl>
## 1 ALFA ROMEO        0       13         13        0        0          0
## 2 ASTON MAR~        0        2          2        0        0          0
## 3 AUDI              0      350        350        0        0          0
## 4 BENTLEY           0        0          0        0        0          0
## 5 BMW               0      158        158        0        0          0
## 6 CITROEN           0      134        134        0      197        197
## # ... with 5 more variables: total_dom <dbl>, total_imp <dbl>,
## #   total_total <dbl>, year <dbl>, month <dbl>
tail(raw_data)
## # A tibble: 6 x 12
##   brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
##   <chr>         <dbl>    <dbl>      <dbl>    <dbl>    <dbl>      <dbl>
## 1 SSANGYONG         0       19         19        0        3          3
## 2 TATA              0        0          0        0        9          9
## 3 TOYOTA         1298      149       1447        0       34         34
## 4 VOLKSWAGEN        0     2792       2792        0     1736       1736
## 5 VOLVO             0      187        187        0        0          0
## 6 TOPLAM:        7375    15983      23358     4815     4540       9355
## # ... with 5 more variables: total_dom <dbl>, total_imp <dbl>,
## #   total_total <dbl>, year <dbl>, month <dbl>
#raw_data$brand_name <- str_replace(raw_data$brand_name,"ASTON MARTÄ°N","ASTON MARTIN") ## correction on strings
dim(raw_data) # find out the index of latest row number
## [1] 1490   12
raw_data <- raw_data %>% slice(-c(1490))# delete the last row , since it shows the total value
tail(raw_data)
## # A tibble: 6 x 12
##   brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
##   <chr>         <dbl>    <dbl>      <dbl>    <dbl>    <dbl>      <dbl>
## 1 SMART             0        0          0        0        0          0
## 2 SSANGYONG         0       19         19        0        3          3
## 3 TATA              0        0          0        0        9          9
## 4 TOYOTA         1298      149       1447        0       34         34
## 5 VOLKSWAGEN        0     2792       2792        0     1736       1736
## 6 VOLVO             0      187        187        0        0          0
## # ... with 5 more variables: total_dom <dbl>, total_imp <dbl>,
## #   total_total <dbl>, year <dbl>, month <dbl>
raw_data %>%  ##  Have a look at the brand names and remove dublicated brand names
filter(total_total>0 & comm_total>0) %>%
distinct(sort(brand_name))
## # A tibble: 19 x 1
##    `sort(brand_name)`
##    <chr>             
##  1 CITROEN           
##  2 DACIA             
##  3 FIAT              
##  4 FORD              
##  5 HYUNDAI           
##  6 ISUZU             
##  7 IVECO             
##  8 KARSAN            
##  9 KIA               
## 10 MERCEDES-BENZ     
## 11 MITSUBISHI        
## 12 NISSAN            
## 13 PEUGEOT           
## 14 RENAULT           
## 15 SSANGYONG         
## 16 TATA              
## 17 TOPLAM:           
## 18 TOYOTA            
## 19 VOLKSWAGEN
raw_data$Date <- zoo::as.yearmon(paste(raw_data$year, raw_data$month), "%Y %m") ## combining year and month
raw_data$Date2 <- format(raw_data$Date,"%Y-%m")

raw_data
## # A tibble: 1,489 x 14
##    brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
##    <chr>         <dbl>    <dbl>      <dbl>    <dbl>    <dbl>      <dbl>
##  1 ALFA ROMEO        0       13         13        0        0          0
##  2 ASTON MAR~        0        2          2        0        0          0
##  3 AUDI              0      350        350        0        0          0
##  4 BENTLEY           0        0          0        0        0          0
##  5 BMW               0      158        158        0        0          0
##  6 CITROEN           0      134        134        0      197        197
##  7 DACIA             0     1141       1141        0      319        319
##  8 DS                0        9          9        0        0          0
##  9 FERRARI           0        3          3        0        0          0
## 10 FIAT            632       57        689      789      199        988
## # ... with 1,479 more rows, and 7 more variables: total_dom <dbl>,
## #   total_imp <dbl>, total_total <dbl>, year <dbl>, month <dbl>, Date <S3:
## #   yearmon>, Date2 <chr>
summarise(group_by(raw_data, year),total=sum(total_total)) ## shows the total count of sales for each year.
## # A tibble: 3 x 2
##    year   total
##   <dbl>   <dbl>
## 1  2016 1224231
## 2  2017 1168944
## 3  2018  672371
raw_data %>% ## shows  total sales for each brand in monthly breakdown
filter(total_total>0) %>%
select(Date,total_total)%>%
arrange(desc(total_total))
## # A tibble: 1,328 x 2
##    Date          total_total
##    <S3: yearmon>       <dbl>
##  1 Jun 2016            91540
##  2 May 2017            85422
##  3 Mar 2016            82948
##  4 Mar 2018            76345
##  5 Apr 2017            75988
##  6 Sep 2017            71352
##  7 Sep 2016            67593
##  8 Jun 2018            51037
##  9 Feb 2018            47009
## 10 Jan 2017            35323
## # ... with 1,318 more rows
#raw_data %>%
#summarise(Date,mean=mean(total_total))  # total count of sales in the given period.

date_based_totals <- raw_data %>%  # grouping by three criterias
group_by(year, month,total_total)  



print(date_based_totals)
## # A tibble: 1,489 x 14
## # Groups:   year, month, total_total [1,286]
##    brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
##    <chr>         <dbl>    <dbl>      <dbl>    <dbl>    <dbl>      <dbl>
##  1 ALFA ROMEO        0       13         13        0        0          0
##  2 ASTON MAR~        0        2          2        0        0          0
##  3 AUDI              0      350        350        0        0          0
##  4 BENTLEY           0        0          0        0        0          0
##  5 BMW               0      158        158        0        0          0
##  6 CITROEN           0      134        134        0      197        197
##  7 DACIA             0     1141       1141        0      319        319
##  8 DS                0        9          9        0        0          0
##  9 FERRARI           0        3          3        0        0          0
## 10 FIAT            632       57        689      789      199        988
## # ... with 1,479 more rows, and 7 more variables: total_dom <dbl>,
## #   total_imp <dbl>, total_total <dbl>, year <dbl>, month <dbl>, Date <S3:
## #   yearmon>, Date2 <chr>
mutate(raw_data, ratio=auto_total/total_total) # insert a mutate command
## # A tibble: 1,489 x 15
##    brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
##    <chr>         <dbl>    <dbl>      <dbl>    <dbl>    <dbl>      <dbl>
##  1 ALFA ROMEO        0       13         13        0        0          0
##  2 ASTON MAR~        0        2          2        0        0          0
##  3 AUDI              0      350        350        0        0          0
##  4 BENTLEY           0        0          0        0        0          0
##  5 BMW               0      158        158        0        0          0
##  6 CITROEN           0      134        134        0      197        197
##  7 DACIA             0     1141       1141        0      319        319
##  8 DS                0        9          9        0        0          0
##  9 FERRARI           0        3          3        0        0          0
## 10 FIAT            632       57        689      789      199        988
## # ... with 1,479 more rows, and 8 more variables: total_dom <dbl>,
## #   total_imp <dbl>, total_total <dbl>, year <dbl>, month <dbl>, Date <S3:
## #   yearmon>, Date2 <chr>, ratio <dbl>
raw_data
## # A tibble: 1,489 x 14
##    brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
##    <chr>         <dbl>    <dbl>      <dbl>    <dbl>    <dbl>      <dbl>
##  1 ALFA ROMEO        0       13         13        0        0          0
##  2 ASTON MAR~        0        2          2        0        0          0
##  3 AUDI              0      350        350        0        0          0
##  4 BENTLEY           0        0          0        0        0          0
##  5 BMW               0      158        158        0        0          0
##  6 CITROEN           0      134        134        0      197        197
##  7 DACIA             0     1141       1141        0      319        319
##  8 DS                0        9          9        0        0          0
##  9 FERRARI           0        3          3        0        0          0
## 10 FIAT            632       57        689      789      199        988
## # ... with 1,479 more rows, and 7 more variables: total_dom <dbl>,
## #   total_imp <dbl>, total_total <dbl>, year <dbl>, month <dbl>, Date <S3:
## #   yearmon>, Date2 <chr>

Including Plots

You can also embed plots, for example:

## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.