raw_data <- readRDS("C:/Users/ezegokce/Desktop/car_data_aggregate.rds")
head(raw_data)
## # A tibble: 6 x 12
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO 0 13 13 0 0 0
## 2 ASTON MAR~ 0 2 2 0 0 0
## 3 AUDI 0 350 350 0 0 0
## 4 BENTLEY 0 0 0 0 0 0
## 5 BMW 0 158 158 0 0 0
## 6 CITROEN 0 134 134 0 197 197
## # ... with 5 more variables: total_dom <dbl>, total_imp <dbl>,
## # total_total <dbl>, year <dbl>, month <dbl>
tail(raw_data)
## # A tibble: 6 x 12
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SSANGYONG 0 19 19 0 3 3
## 2 TATA 0 0 0 0 9 9
## 3 TOYOTA 1298 149 1447 0 34 34
## 4 VOLKSWAGEN 0 2792 2792 0 1736 1736
## 5 VOLVO 0 187 187 0 0 0
## 6 TOPLAM: 7375 15983 23358 4815 4540 9355
## # ... with 5 more variables: total_dom <dbl>, total_imp <dbl>,
## # total_total <dbl>, year <dbl>, month <dbl>
#raw_data$brand_name <- str_replace(raw_data$brand_name,"ASTON MARTÄ°N","ASTON MARTIN") ## correction on strings
dim(raw_data) # find out the index of latest row number
## [1] 1490 12
raw_data <- raw_data %>% slice(-c(1490))# delete the last row , since it shows the total value
tail(raw_data)
## # A tibble: 6 x 12
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SMART 0 0 0 0 0 0
## 2 SSANGYONG 0 19 19 0 3 3
## 3 TATA 0 0 0 0 9 9
## 4 TOYOTA 1298 149 1447 0 34 34
## 5 VOLKSWAGEN 0 2792 2792 0 1736 1736
## 6 VOLVO 0 187 187 0 0 0
## # ... with 5 more variables: total_dom <dbl>, total_imp <dbl>,
## # total_total <dbl>, year <dbl>, month <dbl>
raw_data %>% ## Have a look at the brand names and remove dublicated brand names
filter(total_total>0 & comm_total>0) %>%
distinct(sort(brand_name))
## # A tibble: 19 x 1
## `sort(brand_name)`
## <chr>
## 1 CITROEN
## 2 DACIA
## 3 FIAT
## 4 FORD
## 5 HYUNDAI
## 6 ISUZU
## 7 IVECO
## 8 KARSAN
## 9 KIA
## 10 MERCEDES-BENZ
## 11 MITSUBISHI
## 12 NISSAN
## 13 PEUGEOT
## 14 RENAULT
## 15 SSANGYONG
## 16 TATA
## 17 TOPLAM:
## 18 TOYOTA
## 19 VOLKSWAGEN
raw_data$Date <- zoo::as.yearmon(paste(raw_data$year, raw_data$month), "%Y %m") ## combining year and month
raw_data$Date2 <- format(raw_data$Date,"%Y-%m")
raw_data
## # A tibble: 1,489 x 14
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO 0 13 13 0 0 0
## 2 ASTON MAR~ 0 2 2 0 0 0
## 3 AUDI 0 350 350 0 0 0
## 4 BENTLEY 0 0 0 0 0 0
## 5 BMW 0 158 158 0 0 0
## 6 CITROEN 0 134 134 0 197 197
## 7 DACIA 0 1141 1141 0 319 319
## 8 DS 0 9 9 0 0 0
## 9 FERRARI 0 3 3 0 0 0
## 10 FIAT 632 57 689 789 199 988
## # ... with 1,479 more rows, and 7 more variables: total_dom <dbl>,
## # total_imp <dbl>, total_total <dbl>, year <dbl>, month <dbl>, Date <S3:
## # yearmon>, Date2 <chr>
summarise(group_by(raw_data, year),total=sum(total_total)) ## shows the total count of sales for each year.
## # A tibble: 3 x 2
## year total
## <dbl> <dbl>
## 1 2016 1224231
## 2 2017 1168944
## 3 2018 672371
raw_data %>% ## shows total sales for each brand in monthly breakdown
filter(total_total>0) %>%
select(Date,total_total)%>%
arrange(desc(total_total))
## # A tibble: 1,328 x 2
## Date total_total
## <S3: yearmon> <dbl>
## 1 Jun 2016 91540
## 2 May 2017 85422
## 3 Mar 2016 82948
## 4 Mar 2018 76345
## 5 Apr 2017 75988
## 6 Sep 2017 71352
## 7 Sep 2016 67593
## 8 Jun 2018 51037
## 9 Feb 2018 47009
## 10 Jan 2017 35323
## # ... with 1,318 more rows
#raw_data %>%
#summarise(Date,mean=mean(total_total)) # total count of sales in the given period.
date_based_totals <- raw_data %>% # grouping by three criterias
group_by(year, month,total_total)
print(date_based_totals)
## # A tibble: 1,489 x 14
## # Groups: year, month, total_total [1,286]
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO 0 13 13 0 0 0
## 2 ASTON MAR~ 0 2 2 0 0 0
## 3 AUDI 0 350 350 0 0 0
## 4 BENTLEY 0 0 0 0 0 0
## 5 BMW 0 158 158 0 0 0
## 6 CITROEN 0 134 134 0 197 197
## 7 DACIA 0 1141 1141 0 319 319
## 8 DS 0 9 9 0 0 0
## 9 FERRARI 0 3 3 0 0 0
## 10 FIAT 632 57 689 789 199 988
## # ... with 1,479 more rows, and 7 more variables: total_dom <dbl>,
## # total_imp <dbl>, total_total <dbl>, year <dbl>, month <dbl>, Date <S3:
## # yearmon>, Date2 <chr>
mutate(raw_data, ratio=auto_total/total_total) # insert a mutate command
## # A tibble: 1,489 x 15
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO 0 13 13 0 0 0
## 2 ASTON MAR~ 0 2 2 0 0 0
## 3 AUDI 0 350 350 0 0 0
## 4 BENTLEY 0 0 0 0 0 0
## 5 BMW 0 158 158 0 0 0
## 6 CITROEN 0 134 134 0 197 197
## 7 DACIA 0 1141 1141 0 319 319
## 8 DS 0 9 9 0 0 0
## 9 FERRARI 0 3 3 0 0 0
## 10 FIAT 632 57 689 789 199 988
## # ... with 1,479 more rows, and 8 more variables: total_dom <dbl>,
## # total_imp <dbl>, total_total <dbl>, year <dbl>, month <dbl>, Date <S3:
## # yearmon>, Date2 <chr>, ratio <dbl>
raw_data
## # A tibble: 1,489 x 14
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO 0 13 13 0 0 0
## 2 ASTON MAR~ 0 2 2 0 0 0
## 3 AUDI 0 350 350 0 0 0
## 4 BENTLEY 0 0 0 0 0 0
## 5 BMW 0 158 158 0 0 0
## 6 CITROEN 0 134 134 0 197 197
## 7 DACIA 0 1141 1141 0 319 319
## 8 DS 0 9 9 0 0 0
## 9 FERRARI 0 3 3 0 0 0
## 10 FIAT 632 57 689 789 199 988
## # ... with 1,479 more rows, and 7 more variables: total_dom <dbl>,
## # total_imp <dbl>, total_total <dbl>, year <dbl>, month <dbl>, Date <S3:
## # yearmon>, Date2 <chr>
You can also embed plots, for example:
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
Note that the echo = FALSE
parameter was added to the code chunk to prevent printing of the R code that generated the plot.