Install and load the readxl & tidyverse packages
#install.packages("readxl")
#install.packages("tidyverse")
library(readxl)
library(tidyverse)
## -- Attaching packages -------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 3.1.0 <U+221A> purrr 0.2.5
## <U+221A> tibble 1.4.2 <U+221A> dplyr 0.7.7
## <U+221A> tidyr 0.8.2 <U+221A> stringr 1.3.1
## <U+221A> readr 1.1.1 <U+221A> forcats 0.3.0
## -- Conflicts ----------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
tmp<-tempfile(fileext=".xlsx")
download.file("https://github.com/MEF-BDA503/pj18-ozenm/blob/master/odd_otomobil_satislar_201607.xlsx?raw=true",mode = 'wb',destfile=tmp)
odd_201607<-readxl::read_excel(tmp,skip=7,col_names=FALSE)
file.remove(tmp)
## [1] TRUE
View(odd_201607)
head(odd_201607)
## # A tibble: 6 x 10
## X__1 X__2 X__3 X__4 X__5 X__6 X__7 X__8 X__9 X__10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO NA 33 33 NA NA 0 0 33 33
## 2 ASTON MARTIN NA 2 2 NA NA 0 0 2 2
## 3 AUDI NA 905 905 NA NA 0 0 905 905
## 4 BENTLEY NA 1 1 NA NA 0 0 1 1
## 5 BMW NA 1856 1856 NA NA 0 0 1856 1856
## 6 CHERY NA 22 22 NA NA 0 0 22 22
tail(odd_201607)
## # A tibble: 6 x 10
## X__1 X__2 X__3 X__4 X__5 X__6 X__7 X__8 X__9 X__10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 TATA NA NA 0 NA NA 0 0 0 0
## 2 TOYOTA 2141 754 2895 NA 310 310 2141 1064 3205
## 3 VOLKSWAGEN NA 7239 7239 NA 1845 1845 0 9084 9084
## 4 VOLVO NA 130 130 NA NA 0 0 130 130
## 5 <NA> NA NA NA NA NA NA NA NA NA
## 6 TOPLAM: 9411 36155 45566 6969 5998 12967 16380 42153 58533
odd_201607 <- odd_201607 %>% slice(-c(49,50))
#colnames(odd_201607) <- c("brand_name","domstc_auto_cnt","imprt_auto_cnt","total_auto_cnt","domstc_comm_vehcl_cnt","imprt_comm_vehcl_cnt","total_comm_vehcl_cnt","domstc_total_cnt","imprt_total_cnt","total_cnt")
colnames(odd_201607) <- c("brand_name","auto_dom","auto_imp","auto_total","comm_dom","comm_imp","comm_total","total_dom","total_imp","total_total")
car_data_jul_16 <- odd_201607 %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>% mutate(year=2016,month=7)
print(car_data_jul_16,width=Inf)
## # A tibble: 48 x 12
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO 0 33 33 0 0 0
## 2 ASTON MARTIN 0 2 2 0 0 0
## 3 AUDI 0 905 905 0 0 0
## 4 BENTLEY 0 1 1 0 0 0
## 5 BMW 0 1856 1856 0 0 0
## 6 CHERY 0 22 22 0 0 0
## 7 CITROEN 0 1221 1221 100 477 577
## 8 DACIA 0 2680 2680 0 320 320
## 9 DS 0 19 19 0 0 0
## 10 FERRARI 0 0 0 0 0 0
## total_dom total_imp total_total year month
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 33 33 2016 7
## 2 0 2 2 2016 7
## 3 0 905 905 2016 7
## 4 0 1 1 2016 7
## 5 0 1856 1856 2016 7
## 6 0 22 22 2016 7
## 7 100 1698 1798 2016 7
## 8 0 3000 3000 2016 7
## 9 0 19 19 2016 7
## 10 0 0 0 2016 7
## # ... with 38 more rows
saveRDS(car_data_jul_16,file="odd_car_sales_data_jul_16.rds")
car_data_jul_16 %>%
select(brand_name,auto_dom) %>%
arrange(desc(auto_dom)) %>%
filter(auto_dom>0)
## # A tibble: 6 x 2
## brand_name auto_dom
## <chr> <dbl>
## 1 FIAT 2678
## 2 TOYOTA 2141
## 3 RENAULT 2006
## 4 HYUNDAI 1968
## 5 HONDA 376
## 6 FORD 242
car_data_jul_16 %>%
filter(auto_total > 0 | comm_total > 0) %>%
select(brand_name,auto_total,comm_total,total_total) %>%
arrange(desc(total_total))
## # A tibble: 41 x 4
## brand_name auto_total comm_total total_total
## <chr> <dbl> <dbl> <dbl>
## 1 VOLKSWAGEN 7239 1845 9084
## 2 FORD 2267 4103 6370
## 3 RENAULT 5513 761 6274
## 4 FIAT 2849 3080 5929
## 5 OPEL 3495 0 3495
## 6 HYUNDAI 3254 153 3407
## 7 TOYOTA 2895 310 3205
## 8 DACIA 2680 320 3000
## 9 PEUGEOT 1497 619 2116
## 10 MERCEDES-BENZ 1651 253 1904
## # ... with 31 more rows