#Create a temporary file
tmp=tempfile(fileext=".xlsx")
#Download file from repository to the temp file
download.file("https://github.com/MEF-BDA503/pj18-istema/blob/master/files/odd_retail_sales_2017_06.xlsx?raw=true",
destfile=tmp,mode='wb')
#Read that excel file using readxl package's read_excel function. You might need to adjust the parameters (skip, col_names) according to your raw file's format.
raw_data<-readxl::read_excel(tmp,skip=7,col_names=FALSE)
# Remove the temp file
file.remove(tmp)
## [1] TRUE
# Remove the last two rows because they are irrelevant (total and empty rows)
raw_data <- raw_data %>% slice(-c(46,47))
# Let's see our raw data
head(raw_data)
## # A tibble: 6 x 10
## X__1 X__2 X__3 X__4 X__5 X__6 X__7 X__8 X__9 X__10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO NA 24 24 NA NA 0 0 24 24
## 2 ASTON MARTIN NA 2 2 NA NA 0 0 2 2
## 3 AUDI NA 2177 2177 NA NA 0 0 2177 2177
## 4 BENTLEY NA 1 1 NA NA 0 0 1 1
## 5 BMW NA 1409 1409 NA NA 0 0 1409 1409
## 6 CHERY NA NA 0 NA NA 0 0 0 0
# Change the column names in the data.
colnames(raw_data) <- c("brand_name","auto_dom","auto_imp","auto_total","comm_dom","comm_imp","comm_total","total_dom","total_imp","total_total")
# Replace NA values with 0 and label the time period with year and month.
car_data_jun_17 <- raw_data %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>% mutate(year=2017,month=6)
print(car_data_jun_17,width=Inf)
## # A tibble: 45 x 12
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO 0 24 24 0 0 0
## 2 ASTON MARTIN 0 2 2 0 0 0
## 3 AUDI 0 2177 2177 0 0 0
## 4 BENTLEY 0 1 1 0 0 0
## 5 BMW 0 1409 1409 0 0 0
## 6 CHERY 0 0 0 0 0 0
## 7 CITROEN 0 1473 1473 1 1176 1177
## 8 DACIA 0 2933 2933 0 650 650
## 9 DS 0 1 1 0 0 0
## 10 FERRARI 0 1 1 0 0 0
## total_dom total_imp total_total year month
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 24 24 2017 6
## 2 0 2 2 2017 6
## 3 0 2177 2177 2017 6
## 4 0 1 1 2017 6
## 5 0 1409 1409 2017 6
## 6 0 0 0 2017 6
## 7 1 2649 2650 2017 6
## 8 0 3583 3583 2017 6
## 9 0 1 1 2017 6
## 10 0 1 1 2017 6
## # ... with 35 more rows
saveRDS(car_data_jun_17,file="C:/Users/win7pro/Documents/GitHub/pj18-istema/files/odd_car_sales_data_jun_17.rds")
i.list of domestic sales for auto & commercial vehicles
#list of domestic sales for auto & commercial vehicles
car_data_jun_17 %>%
filter(auto_dom > 0 | comm_dom > 0) %>%
select(brand_name,total_dom) %>%
arrange(desc(total_dom))
## # A tibble: 10 x 2
## brand_name total_dom
## <chr> <dbl>
## 1 FIAT 10165
## 2 RENAULT 7905
## 3 FORD 5535
## 4 TOYOTA 3331
## 5 HYUNDAI 2209
## 6 HONDA 1581
## 7 ISUZU 141
## 8 KARSAN 119
## 9 MITSUBISHI 15
## 10 CITROEN 1
ii.list of import sales for brands auto & commercial vehicles based on the majority of commercial or auto sales.
car_data_jun_17 %>%
filter(auto_imp > 0 & comm_imp > 0) %>%
select(brand_name,auto_imp,comm_imp,total_imp) %>%
transmute(brand_name,total_imp,
auto_comm = ifelse(auto_imp > comm_imp,"auto","comm")) %>%
arrange(desc(total_imp))
## # A tibble: 14 x 3
## brand_name total_imp auto_comm
## <chr> <dbl> <chr>
## 1 VOLKSWAGEN 9431 auto
## 2 RENAULT 4328 auto
## 3 FORD 3645 auto
## 4 DACIA 3583 auto
## 5 PEUGEOT 3525 auto
## 6 MERCEDES-BENZ 3080 auto
## 7 CITROEN 2649 auto
## 8 NISSAN 2581 auto
## 9 HYUNDAI 2422 auto
## 10 KIA 1153 auto
## 11 TOYOTA 918 auto
## 12 FIAT 525 comm
## 13 MITSUBISHI 185 comm
## 14 SSANGYONG 117 auto