## Load tidyverse
library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------------------ tidyverse 1.2.1 --
## <U+221A> ggplot2 3.0.0 <U+221A> purrr 0.2.5
## <U+221A> tibble 1.4.2 <U+221A> dplyr 0.7.6
## <U+221A> tidyr 0.8.1 <U+221A> stringr 1.3.1
## <U+221A> readr 1.1.1 <U+221A> forcats 0.3.0
## -- Conflicts --------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readr)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(readr)
library(stringr)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
At the second part of the assignment I made two plots. One is shows total sales in Turkey and another one shows the total sales of each brand. After the data cleaning what strikes me from this analysis is car sales dropped sharply at the beginning of every year possibly due to tax increases at the beginning of each year.
#Reading data
car_data_aggregate <- readRDS("C:\\Users\\Efehan\\Documents\\car_data_aggregate.rds")
#Merging month and year into another column via zoo package.
car_data_aggregate$Date <- zoo::as.yearmon(paste(car_data_aggregate$year, car_data_aggregate$month), "%Y %m")
car_data_aggregate$Date2 <- format(car_data_aggregate$Date,"%Y-%m")
#We do not need certain rows at the data. brand_name can not be total and we do not need disclaimer from ODD at the brand_name column.
car_data_aggregate <- car_data_aggregate %>% filter(brand_name !="TOPLAM:" & !str_detect(brand_name,"ODD"))
#Uniting Aston Martin's.
car_data_aggregate$brand_name <- str_replace(car_data_aggregate$brand_name,"ASTON MARTÄ°N","ASTON MARTIN")
#Total sales in Turkey between 2016 and 2018.
ggplot(car_data_aggregate,aes(x=as.yearqtr(Date2),y=total_total))+
geom_col(fill="green")+
theme_bw()+
theme(panel.grid.major=element_blank(),panel.grid.minor=element_blank(),text = element_text(size=20))
## Don't know how to automatically pick scale for object of type yearqtr. Defaulting to continuous.
## Warning: Removed 942 rows containing missing values (position_stack).
# Now let's see which brand sold at most in Turkey between 2016 and 2018. But first, we need to summarize the data.
summary <- car_data_aggregate %>% group_by(brand_name) %>% summarize(total_total=sum(total_total))
ggplot(summary,aes(x=reorder(brand_name,total_total),y=total_total))+
coord_flip()+
geom_col(fill="green")+
theme_bw()+
theme(panel.grid.major=element_blank(),panel.grid.minor=element_blank(),text = element_text(size=13), axis.text.y=element_text(size=8))+
scale_y_continuous(labels=comma)