library(dplyr)
library(tidyverse)
library(ggplot2)
library(scales)
Below the excel file is downloaded from github to a local data frame (raw_data) and refined for analysis
#download from github
#total_sales<- readRDS(url("https://github.com/MEF-BDA503/pj18-muharremcakir81/blob/master/Week4/car_data_aggregate.rds?raw=true"))
#download from local
total_sales <- readRDS("car_data_aggregate.rds")
#data cleaning for the irrelevant rows
total_sales <- total_sales %>% filter(!startsWith(brand_name,"ODD"))
total_sales <- total_sales %>% filter(!startsWith(brand_name,"TOPLAM"))
#check rows and columns
glimpse(total_sales)
## Observations: 1,477
## Variables: 12
## $ brand_name <chr> "ALFA ROMEO", "ASTON MARTIN", "AUDI", "BENTLEY", "...
## $ auto_dom <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 632, 91, 1471, 460, 0, ...
## $ auto_imp <dbl> 13, 2, 350, 0, 158, 134, 1141, 9, 3, 57, 705, 502,...
## $ auto_total <dbl> 13, 2, 350, 0, 158, 134, 1141, 9, 3, 689, 796, 197...
## $ comm_dom <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 789, 1421, 0, 0, 0, 59,...
## $ comm_imp <dbl> 0, 0, 0, 0, 0, 197, 319, 0, 0, 199, 139, 0, 39, 0,...
## $ comm_total <dbl> 0, 0, 0, 0, 0, 197, 319, 0, 0, 988, 1560, 0, 39, 0...
## $ total_dom <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1421, 1512, 1471, 460, ...
## $ total_imp <dbl> 13, 2, 350, 0, 158, 331, 1460, 9, 3, 256, 844, 502...
## $ total_total <dbl> 13, 2, 350, 0, 158, 331, 1460, 9, 3, 1677, 2356, 1...
## $ year <dbl> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 20...
## $ month <dbl> 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,...
#group sales by month and sort
sales_by_month <- total_sales %>%
group_by(year, month) %>%
summarise(total_auto=sum(auto_total)) %>%
arrange(year, month)
#add year-month column for date sotted geom line
sales_by_month$Date <- zoo::as.yearmon(paste(sales_by_month$year,sales_by_month$month), "%Y %m")
sales_by_month$DateFormated <- format(sales_by_month$Date,"%Y-%m")
#draw the line
sales_by_month %>%
ggplot(aes(x = DateFormated, y = total_auto, group=1, colour=month )) +
geom_line() +
geom_point() +
theme(legend.position = "none", axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.0, size = 10)) +
labs( x="date" , y = "total auto sales")
brand_total_top_sales <- total_sales %>%
group_by(brand_name) %>%
summarise(brand_total_auto=sum(auto_total)) %>%
arrange(desc(brand_total_auto)) %>%
slice(1:10)
brand_total_top_sales %>% ggplot(aes(x = reorder(brand_name, - brand_total_auto ), y = brand_total_auto, fill=brand_name )) +
geom_bar(stat="identity") +
theme(legend.position = "none", axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.0, size = 10)) +
labs(x="brand name", y = "total auto sales" )
# brand=total_sales$brand_name[(which.max(total_sales$auto_total))]
month_champion_of_top3 <- total_sales %>%
filter(brand_name %in% c("RENAULT","VOLKSWAGEN","FIAT")) %>%
select(brand_name,auto_total, year, month)
#add year-month column
month_champion_of_top3$Date <- zoo::as.yearmon(paste(month_champion_of_top3$year,month_champion_of_top3$month), "%Y %m")
month_champion_of_top3$DateFormated <- format(month_champion_of_top3$Date,"%Y-%m")
month_champion_of_top3 %>%
ggplot(aes(x = DateFormated, y = auto_total, group=brand_name , colour=brand_name )) +
geom_line() +
geom_point()+
labs( x="date" , y = "total auto sales") +
theme(legend.position = "bottom", axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.0, size = 10))