#get data, filter data and str_replace for ASTON MARTÄ°N"
car_agg <- readRDS("C:\\Users\\Kafein\\Documents\\github2\\pj18-Leyla.Yigit\\AssignmentWeek3\\car_data_aggregate.rds")
car_agg <- car_agg %>% filter(brand_name !="TOPLAM:" & !str_detect(brand_name,"ODD"))
car_agg$brand_name <- str_replace(car_agg$brand_name,"ASTON MARTÄ°N","ASTON MARTIN")
## # A tibble: 1,477 x 12
##    brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
##    <chr>         <dbl>    <dbl>      <dbl>    <dbl>    <dbl>      <dbl>
##  1 ALFA ROMEO        0       13         13        0        0          0
##  2 ASTON MAR~        0        2          2        0        0          0
##  3 AUDI              0      350        350        0        0          0
##  4 BENTLEY           0        0          0        0        0          0
##  5 BMW               0      158        158        0        0          0
##  6 CITROEN           0      134        134        0      197        197
##  7 DACIA             0     1141       1141        0      319        319
##  8 DS                0        9          9        0        0          0
##  9 FERRARI           0        3          3        0        0          0
## 10 FIAT            632       57        689      789      199        988
## # ... with 1,467 more rows, and 5 more variables: total_dom <dbl>,
## #   total_imp <dbl>, total_total <dbl>, year <dbl>, month <dbl>
#glimpse(car_agg) # gives structure of data

## function (data) 
## {
##     as_tibble(data, .name_repair = "check_unique")
## }
## <bytecode: 0x000000001cc5e670>
## <environment: namespace:dplyr>
## # A tibble: 33 x 1
##    Date      
##    <date>    
##  1 2018-09-30
##  2 2018-08-31
##  3 2018-07-31
##  4 2018-06-30
##  5 2018-05-31
##  6 2018-04-30
##  7 2018-03-31
##  8 2018-02-28
##  9 2018-01-31
## 10 2017-12-31
## # ... with 23 more rows

#Calculate the mean of sales numbers

#Total car sales per year and month .
yearly_sales <- car_agg %>% group_by(year) %>% summarise(total_auto=sum(auto_total))%>% arrange(year)%>%slice(1:12)
## # A tibble: 3 x 2
##    year total_auto
##   <dbl>      <dbl>
## 1  2016     755368
## 2  2017     678539
## 3  2018     361913
monthly_sales <- car_agg %>% group_by(Date) %>% summarise(total_auto=sum(auto_total))%>% arrange(Date)%>%slice(1:12)
## # A tibble: 12 x 2
##    Date       total_auto
##    <date>          <dbl>
##  1 2016-01-31      23278
##  2 2016-02-29      40588
##  3 2016-03-31      63629
##  4 2016-04-30      65618
##  5 2016-05-31      73832
##  6 2016-06-30      70567
##  7 2016-07-31      45566
##  8 2016-08-31      53977
##  9 2016-09-30      50777
## 10 2016-10-31      63709
## 11 2016-11-30      95783
## 12 2016-12-31     108044
#Total car sales per year and month  graph
ggplot(monthly_sales) + geom_point(aes(x=Date, y=total_auto, color="steelblue")) + geom_smooth(aes(x=Date, y=total_auto, color="steelblue")) # Same as above but specifying the aesthetics inside the geoms.
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#ou might want to add the plot’s main title and perhaps change the X and Y axis titles. This can be accomplished using the labs layer, meant for specifying the labels. However, manipulating the size, color of the labels is the job of the ‘Theme’.
gg <- ggplot(monthly_sales, aes(x=Date, y=total_auto, color="cut")) + geom_point() + labs(title="Scatterplot", x="Date", y="amount")  # add axis lables and plot title.

#adjusting theme
gg1 <- gg + theme(plot.title=element_text(size=30, face="bold"), 
                  axis.title.y=element_text(size=25)) + 
  scale_color_discrete(name="Cut of diamonds")  # add title and axis text, change legend title.
print(gg1)  # print the plot

#make clorfull 
#adjusting theme
ggplot(data = car_agg, aes(x = Date, y = total_total),size=100,height=10,width=20) +
    geom_point(alpha = 0.3, aes(color = brand_name))

#gg1 + facet_wrap( ~ brand_name, ncol=3)  # columns defined by 'cut'

ggplot(data = car_agg, aes(x = brand_name, y = total_imp)) +

#By adding points to boxplot, we can have a better idea of the number of measurements and of their distribution:

ggplot(data = car_agg, aes(x = brand_name, y = total_imp)) +
    geom_boxplot(alpha = 0) +
    geom_jitter(alpha = 0.3, color = "tomato")+
theme(axis.text.x = element_text(angle = 90)) + ylab("Total Cars Ä°mported")

#Let’s calculate number of counts per year for each species. First we need to group the data and count records within each group:

count_by_month <- car_agg %>%
    group_by(Date) %>%

## # A tibble: 33 x 2
##    Date           n
##    <date>     <int>
##  1 2016-01-31    46
##  2 2016-02-29    48
##  3 2016-03-31    46
##  4 2016-04-30    48
##  5 2016-05-31    48
##  6 2016-06-30    46
##  7 2016-07-31    48
##  8 2016-08-31    48
##  9 2016-09-30    46
## 10 2016-10-31    47
## # ... with 23 more rows
p <- ggplot(count_by_month, aes(x=Date, y=n),color=Date)
p + geom_col()+
  geom_bar(stat="identity", position=position_dodge())+
   geom_text(aes(label=n), vjust=1.6, color="white", size=3, position = position_dodge(0.10))+
  labs(y = "The Sales of Each Brands", x="Years", fill="Brands") +

#p + geom_line() + geom_point()
#p + geom_line() + geom_point(aes(color=Date))
#ggplot line
luxury_data = car_agg %>% filter(brand_name %in% c("BMW", "MERCEDES-BENZ", "AUDI"))
luxury_data %>% 
  mutate(date = as.Date(paste(year, month, 1, sep='-'))) %>% 
  ggplot(data = ., aes(x = date, y = total_total, color = brand_name)) + 
  labs(y = "The Sales of Each Brands", x="Months", fill="Brands") + 
  geom_line()+ geom_point()

# the ggpairs function from the GGally package to plot all pairs of scatterplots for several variables
car_agg_summary <-
  car_agg %>%

## # A tibble: 3 x 4
##    year total_total auto_total comm_total
##   <dbl>       <dbl>      <dbl>      <dbl>
## 1  2016      982150     755368     226782
## 2  2017      900859     678539     222320
## 3  2018      462904     361913     100991
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
##     nasa
ggpairs(car_agg_summary %>% select( total_total, auto_total, comm_total))

car_agg_summary_brand <-
  car_agg %>%

## # A tibble: 49 x 4
##    brand_name    total_total auto_total comm_total
##    <chr>               <dbl>      <dbl>      <dbl>
##  1 RENAULT            318500     279831      38669
##  2 FIAT               275900     142022     133878
##  3 FORD               271157     100172     170985
##  4 VOLKSWAGEN         262041     198371      63670
##  5 HYUNDAI            131666     125446       6220
##  6 DACIA              117978     103206      14772
##  7 OPEL               117122     117122          0
##  8 TOYOTA             106950      95582      11368
##  9 PEUGEOT             98036      74042      23994
## 10 MERCEDES-BENZ       93944      70584      23360
## # ... with 39 more rows
ggplot(car_agg_summary_brand, aes(x = total_total, y = auto_total, color = auto_total)) + 
  geom_point(size = 3) + 
  ggtitle("Auto trend in the total cars number") + 
  xlab("total_total") + ylab("auto_total")+
 geom_jitter(alpha = 0.3, color = "tomato")+
    theme(plot.margin = margin(2,.8,2,.8, "cm"),
        plot.background = element_rect(fill = "darkgrey"))

ggplot(car_agg_summary_brand, aes(area = total_total, fill = brand_name, label = brand_name)) +
  geom_treemap() +
  geom_treemap_text(fontface = "italic", colour = "white", place = "centre", grow = TRUE)+
  theme(legend.position = "bottom")