Install necessary packages first.

options(repos=structure(c(CRAN="http://cran.r-project.org")))
options(repos="https://cran.rstudio.com" )

# install.packages("tidyverse", repos = "https://cran.r-project.org")
# install.packages("readxl", repos = "https://cran.r-project.org")
# devtools::install_github("tidyverse/dplyr")
# install.packages("magrittr",repos = "https://cran.r-project.org")
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 3.0.0     <U+221A> purrr   0.2.5
## <U+221A> tibble  1.4.2     <U+221A> dplyr   0.7.7
## <U+221A> tidyr   0.8.1     <U+221A> stringr 1.3.1
## <U+221A> readr   1.1.1     <U+221A> forcats 0.3.0
## -- Conflicts ------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract

Coding

tmp <-tempfile(fileext=".xlsx")
#download file from rep1ository to the temporary file
download.file("https://github.com/MEF-BDA503/pj18-gokceezeroglu/blob/master/odd_retail_sales_2016_12.xlsx?raw=true",destfile=tmp, mode="wb")

#read that excel file  using readxl package's read excel function
raw_data<-readxl::read_excel(tmp,skip=7,col_names=FALSE)

head(raw_data)
## # A tibble: 6 x 10
##   X__1          X__2  X__3  X__4  X__5  X__6  X__7  X__8  X__9 X__10
##   <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO      NA    38    38    NA    NA     0     0    38    38
## 2 ASTON MARTIN    NA     1     1    NA    NA     0     0     1     1
## 3 AUDI            NA  2992  2992    NA    NA     0     0  2992  2992
## 4 BENTLEY         NA    NA     0    NA    NA     0     0     0     0
## 5 BMW             NA  1694  1694    NA    NA     0     0  1694  1694
## 6 CHERY           NA     0     0    NA    NA     0     0     0     0
tail(raw_data)
## # A tibble: 6 x 10
##   X__1        X__2  X__3   X__4  X__5  X__6  X__7  X__8  X__9  X__10
##   <chr>      <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 TATA          NA    NA      0    NA    NA     0     0     0      0
## 2 TOYOTA      5680  1175   6855    NA   948   948  5680  2123   7803
## 3 VOLKSWAGEN    NA 14539  14539    NA  3543  3543     0 18082  18082
## 4 VOLVO         NA   788    788    NA    NA     0     0   788    788
## 5 <NA>          NA    NA     NA    NA    NA    NA    NA    NA     NA
## 6 TOPLAM:    32979 75065 108044 18251 15617 33868 51230 90682 141912
#remove  unrequired lines
raw_data <-raw_data %>% slice(-c(49,50))
#check again
print(tail(raw_data))
## # A tibble: 6 x 10
##   X__1        X__2  X__3  X__4  X__5  X__6  X__7  X__8  X__9 X__10
##   <chr>      <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SUBARU        NA   220   220    NA    NA     0     0   220   220
## 2 SUZUKI        NA   536   536    NA    NA     0     0   536   536
## 3 TATA          NA    NA     0    NA    NA     0     0     0     0
## 4 TOYOTA      5680  1175  6855    NA   948   948  5680  2123  7803
## 5 VOLKSWAGEN    NA 14539 14539    NA  3543  3543     0 18082 18082
## 6 VOLVO         NA   788   788    NA    NA     0     0   788   788
colnames(raw_data) <- c("brand_name","auto_dom","auto_imp","auto_total","comm_dom","comm_imp","comm_total","total_dom","total_imp","total_total")
print(raw_data)
## # A tibble: 48 x 10
##    brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
##    <chr>         <dbl>    <dbl>      <dbl>    <dbl>    <dbl>      <dbl>
##  1 ALFA ROMEO       NA       38         38       NA       NA          0
##  2 ASTON MAR~       NA        1          1       NA       NA          0
##  3 AUDI             NA     2992       2992       NA       NA          0
##  4 BENTLEY          NA       NA          0       NA       NA          0
##  5 BMW              NA     1694       1694       NA       NA          0
##  6 CHERY            NA        0          0       NA       NA          0
##  7 CITROEN          NA     2275       2275      241     1010       1251
##  8 DACIA            NA     6324       6324       NA      873        873
##  9 DS               NA       99         99       NA       NA          0
## 10 FERRARI          NA        2          2       NA       NA          0
## # ... with 38 more rows, and 3 more variables: total_dom <dbl>,
## #   total_imp <dbl>, total_total <dbl>

Random Analysis

car_data_dec_16 <- raw_data %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>% mutate(year=2016,month=12)
print(car_data_dec_16,width=Inf)
## # A tibble: 48 x 12
##    brand_name   auto_dom auto_imp auto_total comm_dom comm_imp comm_total
##    <chr>           <dbl>    <dbl>      <dbl>    <dbl>    <dbl>      <dbl>
##  1 ALFA ROMEO          0       38         38        0        0          0
##  2 ASTON MARTIN        0        1          1        0        0          0
##  3 AUDI                0     2992       2992        0        0          0
##  4 BENTLEY             0        0          0        0        0          0
##  5 BMW                 0     1694       1694        0        0          0
##  6 CHERY               0        0          0        0        0          0
##  7 CITROEN             0     2275       2275      241     1010       1251
##  8 DACIA               0     6324       6324        0      873        873
##  9 DS                  0       99         99        0        0          0
## 10 FERRARI             0        2          2        0        0          0
##    total_dom total_imp total_total  year month
##        <dbl>     <dbl>       <dbl> <dbl> <dbl>
##  1         0        38          38  2016    12
##  2         0         1           1  2016    12
##  3         0      2992        2992  2016    12
##  4         0         0           0  2016    12
##  5         0      1694        1694  2016    12
##  6         0         0           0  2016    12
##  7       241      3285        3526  2016    12
##  8         0      7197        7197  2016    12
##  9         0        99          99  2016    12
## 10         0         2           2  2016    12
## # ... with 38 more rows
print(car_data_dec_16)
## # A tibble: 48 x 12
##    brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
##    <chr>         <dbl>    <dbl>      <dbl>    <dbl>    <dbl>      <dbl>
##  1 ALFA ROMEO        0       38         38        0        0          0
##  2 ASTON MAR~        0        1          1        0        0          0
##  3 AUDI              0     2992       2992        0        0          0
##  4 BENTLEY           0        0          0        0        0          0
##  5 BMW               0     1694       1694        0        0          0
##  6 CHERY             0        0          0        0        0          0
##  7 CITROEN           0     2275       2275      241     1010       1251
##  8 DACIA             0     6324       6324        0      873        873
##  9 DS                0       99         99        0        0          0
## 10 FERRARI           0        2          2        0        0          0
## # ... with 38 more rows, and 5 more variables: total_dom <dbl>,
## #   total_imp <dbl>, total_total <dbl>, year <dbl>, month <dbl>
saveRDS(car_data_dec_16,file="C:/Users/ezegokce/Desktop/503/odd_retail_sales_2016_12_2.rds")

car_data_dec_16 %>% 
  filter(auto_total > 0 & comm_total > 0) %>%
  select(brand_name,total_total) %>%
  arrange(desc(total_total))
## # A tibble: 14 x 2
##    brand_name    total_total
##    <chr>               <dbl>
##  1 RENAULT             21078
##  2 VOLKSWAGEN          18082
##  3 FIAT                16698
##  4 FORD                15780
##  5 TOYOTA               7803
##  6 DACIA                7197
##  7 HYUNDAI              7160
##  8 NISSAN               5681
##  9 PEUGEOT              5186
## 10 MERCEDES-BENZ        4226
## 11 CITROEN              3526
## 12 KIA                  1990
## 13 MITSUBISHI            581
## 14 SSANGYONG              45
car_data_dec_16 %>%
  filter(total_total>100) %>%
  select(brand_name,total_total)%>%
   arrange(desc(total_total))
## # A tibble: 30 x 2
##    brand_name total_total
##    <chr>            <dbl>
##  1 RENAULT          21078
##  2 VOLKSWAGEN       18082
##  3 FIAT             16698
##  4 FORD             15780
##  5 OPEL              8707
##  6 TOYOTA            7803
##  7 DACIA             7197
##  8 HYUNDAI           7160
##  9 NISSAN            5681
## 10 PEUGEOT           5186
## # ... with 20 more rows