Install necessary packages first.
options(repos=structure(c(CRAN="http://cran.r-project.org")))
options(repos="https://cran.rstudio.com" )
# install.packages("tidyverse", repos = "https://cran.r-project.org")
# install.packages("readxl", repos = "https://cran.r-project.org")
# devtools::install_github("tidyverse/dplyr")
# install.packages("magrittr",repos = "https://cran.r-project.org")
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 3.0.0 <U+221A> purrr 0.2.5
## <U+221A> tibble 1.4.2 <U+221A> dplyr 0.7.7
## <U+221A> tidyr 0.8.1 <U+221A> stringr 1.3.1
## <U+221A> readr 1.1.1 <U+221A> forcats 0.3.0
## -- Conflicts ------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
Coding
tmp <-tempfile(fileext=".xlsx")
#download file from rep1ository to the temporary file
download.file("https://github.com/MEF-BDA503/pj18-gokceezeroglu/blob/master/odd_retail_sales_2016_12.xlsx?raw=true",destfile=tmp, mode="wb")
#read that excel file using readxl package's read excel function
raw_data<-readxl::read_excel(tmp,skip=7,col_names=FALSE)
head(raw_data)
## # A tibble: 6 x 10
## X__1 X__2 X__3 X__4 X__5 X__6 X__7 X__8 X__9 X__10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO NA 38 38 NA NA 0 0 38 38
## 2 ASTON MARTIN NA 1 1 NA NA 0 0 1 1
## 3 AUDI NA 2992 2992 NA NA 0 0 2992 2992
## 4 BENTLEY NA NA 0 NA NA 0 0 0 0
## 5 BMW NA 1694 1694 NA NA 0 0 1694 1694
## 6 CHERY NA 0 0 NA NA 0 0 0 0
tail(raw_data)
## # A tibble: 6 x 10
## X__1 X__2 X__3 X__4 X__5 X__6 X__7 X__8 X__9 X__10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 TATA NA NA 0 NA NA 0 0 0 0
## 2 TOYOTA 5680 1175 6855 NA 948 948 5680 2123 7803
## 3 VOLKSWAGEN NA 14539 14539 NA 3543 3543 0 18082 18082
## 4 VOLVO NA 788 788 NA NA 0 0 788 788
## 5 <NA> NA NA NA NA NA NA NA NA NA
## 6 TOPLAM: 32979 75065 108044 18251 15617 33868 51230 90682 141912
#remove unrequired lines
raw_data <-raw_data %>% slice(-c(49,50))
#check again
print(tail(raw_data))
## # A tibble: 6 x 10
## X__1 X__2 X__3 X__4 X__5 X__6 X__7 X__8 X__9 X__10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SUBARU NA 220 220 NA NA 0 0 220 220
## 2 SUZUKI NA 536 536 NA NA 0 0 536 536
## 3 TATA NA NA 0 NA NA 0 0 0 0
## 4 TOYOTA 5680 1175 6855 NA 948 948 5680 2123 7803
## 5 VOLKSWAGEN NA 14539 14539 NA 3543 3543 0 18082 18082
## 6 VOLVO NA 788 788 NA NA 0 0 788 788
colnames(raw_data) <- c("brand_name","auto_dom","auto_imp","auto_total","comm_dom","comm_imp","comm_total","total_dom","total_imp","total_total")
print(raw_data)
## # A tibble: 48 x 10
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO NA 38 38 NA NA 0
## 2 ASTON MAR~ NA 1 1 NA NA 0
## 3 AUDI NA 2992 2992 NA NA 0
## 4 BENTLEY NA NA 0 NA NA 0
## 5 BMW NA 1694 1694 NA NA 0
## 6 CHERY NA 0 0 NA NA 0
## 7 CITROEN NA 2275 2275 241 1010 1251
## 8 DACIA NA 6324 6324 NA 873 873
## 9 DS NA 99 99 NA NA 0
## 10 FERRARI NA 2 2 NA NA 0
## # ... with 38 more rows, and 3 more variables: total_dom <dbl>,
## # total_imp <dbl>, total_total <dbl>
Random Analysis
car_data_dec_16 <- raw_data %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>% mutate(year=2016,month=12)
print(car_data_dec_16,width=Inf)
## # A tibble: 48 x 12
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO 0 38 38 0 0 0
## 2 ASTON MARTIN 0 1 1 0 0 0
## 3 AUDI 0 2992 2992 0 0 0
## 4 BENTLEY 0 0 0 0 0 0
## 5 BMW 0 1694 1694 0 0 0
## 6 CHERY 0 0 0 0 0 0
## 7 CITROEN 0 2275 2275 241 1010 1251
## 8 DACIA 0 6324 6324 0 873 873
## 9 DS 0 99 99 0 0 0
## 10 FERRARI 0 2 2 0 0 0
## total_dom total_imp total_total year month
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 38 38 2016 12
## 2 0 1 1 2016 12
## 3 0 2992 2992 2016 12
## 4 0 0 0 2016 12
## 5 0 1694 1694 2016 12
## 6 0 0 0 2016 12
## 7 241 3285 3526 2016 12
## 8 0 7197 7197 2016 12
## 9 0 99 99 2016 12
## 10 0 2 2 2016 12
## # ... with 38 more rows
print(car_data_dec_16)
## # A tibble: 48 x 12
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO 0 38 38 0 0 0
## 2 ASTON MAR~ 0 1 1 0 0 0
## 3 AUDI 0 2992 2992 0 0 0
## 4 BENTLEY 0 0 0 0 0 0
## 5 BMW 0 1694 1694 0 0 0
## 6 CHERY 0 0 0 0 0 0
## 7 CITROEN 0 2275 2275 241 1010 1251
## 8 DACIA 0 6324 6324 0 873 873
## 9 DS 0 99 99 0 0 0
## 10 FERRARI 0 2 2 0 0 0
## # ... with 38 more rows, and 5 more variables: total_dom <dbl>,
## # total_imp <dbl>, total_total <dbl>, year <dbl>, month <dbl>
saveRDS(car_data_dec_16,file="C:/Users/ezegokce/Desktop/503/odd_retail_sales_2016_12_2.rds")
car_data_dec_16 %>%
filter(auto_total > 0 & comm_total > 0) %>%
select(brand_name,total_total) %>%
arrange(desc(total_total))
## # A tibble: 14 x 2
## brand_name total_total
## <chr> <dbl>
## 1 RENAULT 21078
## 2 VOLKSWAGEN 18082
## 3 FIAT 16698
## 4 FORD 15780
## 5 TOYOTA 7803
## 6 DACIA 7197
## 7 HYUNDAI 7160
## 8 NISSAN 5681
## 9 PEUGEOT 5186
## 10 MERCEDES-BENZ 4226
## 11 CITROEN 3526
## 12 KIA 1990
## 13 MITSUBISHI 581
## 14 SSANGYONG 45
car_data_dec_16 %>%
filter(total_total>100) %>%
select(brand_name,total_total)%>%
arrange(desc(total_total))
## # A tibble: 30 x 2
## brand_name total_total
## <chr> <dbl>
## 1 RENAULT 21078
## 2 VOLKSWAGEN 18082
## 3 FIAT 16698
## 4 FORD 15780
## 5 OPEL 8707
## 6 TOYOTA 7803
## 7 DACIA 7197
## 8 HYUNDAI 7160
## 9 NISSAN 5681
## 10 PEUGEOT 5186
## # ... with 20 more rows