R Markdown

#get data, filter data and str_replace for ASTON MARTÄ°N"
car_agg <- readRDS("C:\\Users\\Kafein\\Documents\\github2\\pj18-Leyla.Yigit\\AssignmentWeek3\\car_data_aggregate.rds")
car_agg <- car_agg %>% filter(brand_name !="TOPLAM:" & !str_detect(brand_name,"ODD"))
car_agg$brand_name <- str_replace(car_agg$brand_name,"ASTON MARTÄ°N","ASTON MARTIN")
tbl_df(car_agg)
## # A tibble: 1,477 x 12
##    brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
##    <chr>         <dbl>    <dbl>      <dbl>    <dbl>    <dbl>      <dbl>
##  1 ALFA ROMEO        0       13         13        0        0          0
##  2 ASTON MAR~        0        2          2        0        0          0
##  3 AUDI              0      350        350        0        0          0
##  4 BENTLEY           0        0          0        0        0          0
##  5 BMW               0      158        158        0        0          0
##  6 CITROEN           0      134        134        0      197        197
##  7 DACIA             0     1141       1141        0      319        319
##  8 DS                0        9          9        0        0          0
##  9 FERRARI           0        3          3        0        0          0
## 10 FIAT            632       57        689      789      199        988
## # ... with 1,467 more rows, and 5 more variables: total_dom <dbl>,
## #   total_imp <dbl>, total_total <dbl>, year <dbl>, month <dbl>
#glimpse(car_agg) # gives structure of data

Including Plots

You can also embed plots, for example:

## function (data) 
## {
##     as_tibble(data, .name_repair = "check_unique")
## }
## <bytecode: 0x000000001cc5e670>
## <environment: namespace:dplyr>
## # A tibble: 33 x 1
##    Date      
##    <date>    
##  1 2018-09-30
##  2 2018-08-31
##  3 2018-07-31
##  4 2018-06-30
##  5 2018-05-31
##  6 2018-04-30
##  7 2018-03-31
##  8 2018-02-28
##  9 2018-01-31
## 10 2017-12-31
## # ... with 23 more rows

#Calculate the mean of sales numbers

#Total car sales per year and month .
yearly_sales <- car_agg %>% group_by(year) %>% summarise(total_auto=sum(auto_total))%>% arrange(year)%>%slice(1:12)
yearly_sales
## # A tibble: 3 x 2
##    year total_auto
##   <dbl>      <dbl>
## 1  2016     755368
## 2  2017     678539
## 3  2018     361913
monthly_sales <- car_agg %>% group_by(Date) %>% summarise(total_auto=sum(auto_total))%>% arrange(Date)%>%slice(1:12)
monthly_sales
## # A tibble: 12 x 2
##    Date       total_auto
##    <date>          <dbl>
##  1 2016-01-31      23278
##  2 2016-02-29      40588
##  3 2016-03-31      63629
##  4 2016-04-30      65618
##  5 2016-05-31      73832
##  6 2016-06-30      70567
##  7 2016-07-31      45566
##  8 2016-08-31      53977
##  9 2016-09-30      50777
## 10 2016-10-31      63709
## 11 2016-11-30      95783
## 12 2016-12-31     108044
#Total car sales per year and month  graph
ggplot(monthly_sales) + geom_point(aes(x=Date, y=total_auto, color="steelblue")) + geom_smooth(aes(x=Date, y=total_auto, color="steelblue")) # Same as above but specifying the aesthetics inside the geoms.
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#Scatterplot
#ou might want to add the plot’s main title and perhaps change the X and Y axis titles. This can be accomplished using the labs layer, meant for specifying the labels. However, manipulating the size, color of the labels is the job of the ‘Theme’.
library(ggplot2)
gg <- ggplot(monthly_sales, aes(x=Date, y=total_auto, color="cut")) + geom_point() + labs(title="Scatterplot", x="Date", y="amount")  # add axis lables and plot title.
print(gg)

#adjusting theme
gg1 <- gg + theme(plot.title=element_text(size=30, face="bold"), 
                  axis.text.x=element_text(size=15), 
                  axis.text.y=element_text(size=15),
                  axis.title.x=element_text(size=25),
                  axis.title.y=element_text(size=25)) + 
  scale_color_discrete(name="Cut of diamonds")  # add title and axis text, change legend title.
print(gg1)  # print the plot

library(ggplot2)
#make clorfull 
#adjusting theme
ggplot(data = car_agg, aes(x = Date, y = total_total),size=100,height=10,width=20) +
    geom_point(alpha = 0.3, aes(color = brand_name))

#gg1 + facet_wrap( ~ brand_name, ncol=3)  # columns defined by 'cut'
#Boxplot

ggplot(data = car_agg, aes(x = brand_name, y = total_imp)) +
    geom_boxplot()

#By adding points to boxplot, we can have a better idea of the number of measurements and of their distribution:

ggplot(data = car_agg, aes(x = brand_name, y = total_imp)) +
    geom_boxplot(alpha = 0) +
    geom_jitter(alpha = 0.3, color = "tomato")+
theme(axis.text.x = element_text(angle = 90)) + ylab("Total Cars Ä°mported")

#Let’s calculate number of counts per year for each species. First we need to group the data and count records within each group:

count_by_month <- car_agg %>%
    group_by(Date) %>%
    tally

count_by_month
## # A tibble: 33 x 2
##    Date           n
##    <date>     <int>
##  1 2016-01-31    46
##  2 2016-02-29    48
##  3 2016-03-31    46
##  4 2016-04-30    48
##  5 2016-05-31    48
##  6 2016-06-30    46
##  7 2016-07-31    48
##  8 2016-08-31    48
##  9 2016-09-30    46
## 10 2016-10-31    47
## # ... with 23 more rows
p <- ggplot(count_by_month, aes(x=Date, y=n),color=Date)
p + geom_col()+
  geom_bar(stat="identity", position=position_dodge())+
   geom_text(aes(label=n), vjust=1.6, color="white", size=3, position = position_dodge(0.10))+
    scale_fill_brewer(palette="Paired")+
  labs(y = "The Sales of Each Brands", x="Years", fill="Brands") +
  theme_minimal()

#p + geom_line() + geom_point()
#p + geom_line() + geom_point(aes(color=Date))
#ggplot line
luxury_data = car_agg %>% filter(brand_name %in% c("BMW", "MERCEDES-BENZ", "AUDI"))
luxury_data %>% 
  mutate(date = as.Date(paste(year, month, 1, sep='-'))) %>% 
  ggplot(data = ., aes(x = date, y = total_total, color = brand_name)) + 
  labs(y = "The Sales of Each Brands", x="Months", fill="Brands") + 
  geom_line()+ geom_point()

# the ggpairs function from the GGally package to plot all pairs of scatterplots for several variables
car_agg_summary <-
  car_agg %>%
  group_by(year)%>%
  summarise(total_total=sum(total_total),auto_total=sum(auto_total),comm_total=sum(comm_total))%>%
  arrange(desc(total_total))

car_agg_summary
## # A tibble: 3 x 4
##    year total_total auto_total comm_total
##   <dbl>       <dbl>      <dbl>      <dbl>
## 1  2016      982150     755368     226782
## 2  2017      900859     678539     222320
## 3  2018      462904     361913     100991
#install.packages("GGally")
library("GGally")
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
## 
##     nasa
ggpairs(car_agg_summary %>% select( total_total, auto_total, comm_total))

car_agg_summary_brand <-
  car_agg %>%
  group_by(brand_name)%>%
  summarise(total_total=sum(total_total),auto_total=sum(auto_total),comm_total=sum(comm_total))%>%
  arrange(desc(total_total))

car_agg_summary_brand
## # A tibble: 49 x 4
##    brand_name    total_total auto_total comm_total
##    <chr>               <dbl>      <dbl>      <dbl>
##  1 RENAULT            318500     279831      38669
##  2 FIAT               275900     142022     133878
##  3 FORD               271157     100172     170985
##  4 VOLKSWAGEN         262041     198371      63670
##  5 HYUNDAI            131666     125446       6220
##  6 DACIA              117978     103206      14772
##  7 OPEL               117122     117122          0
##  8 TOYOTA             106950      95582      11368
##  9 PEUGEOT             98036      74042      23994
## 10 MERCEDES-BENZ       93944      70584      23360
## # ... with 39 more rows
ggplot(car_agg_summary_brand, aes(x = total_total, y = auto_total, color = auto_total)) + 
  geom_point(size = 3) + 
  ggtitle("Auto trend in the total cars number") + 
  xlab("total_total") + ylab("auto_total")+
 geom_jitter(alpha = 0.3, color = "tomato")+
    theme(plot.margin = margin(2,.8,2,.8, "cm"),
        plot.background = element_rect(fill = "darkgrey"))

library(ggplot2)
#install.packages("treemapify")
library(treemapify)   
ggplot(car_agg_summary_brand, aes(area = total_total, fill = brand_name, label = brand_name)) +
  geom_treemap() +
  geom_treemap_text(fontface = "italic", colour = "white", place = "centre", grow = TRUE)+
  theme(legend.position = "bottom")