My file contains ODD data from January 2017
# Create a temporary file
tmp<-tempfile(fileext=".xlsx")
# Download file from repository to the temp file
download.file("https://github.com/MEF-BDA503/pj18-baturusta/blob/master/files/odd_retail_sales_2017_01.xlsx?raw=true"
,destfile=tmp, mode = "wb")
# Read that excel file using readxl package's read_excel function. You might need to adjust the parameters (skip, col_names) according to your raw file's format.
raw_data<-readxl::read_excel(tmp, col_names= FALSE, skip = 7)
# Remove the temp file
file.remove(tmp)
## [1] TRUE
# Remove the last two rows because they are irrelevant (total and empty rows)
raw_data <- raw_data %>% slice(-c(48,49))
# Let's see our raw data
tail(raw_data)
## # A tibble: 6 x 10
## X__1 X__2 X__3 X__4 X__5 X__6 X__7 X__8 X__9 X__10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SUZUKI NA 79 79 NA NA 0 0 79 79
## 2 TOYOTA 1488 439 1927 NA 356 356 1488 795 2283
## 3 VOLKSWAGEN NA 3059 3059 NA 1255 1255 0 4314 4314
## 4 VOLVO NA 155 155 NA NA 0 0 155 155
## 5 <NA> NA NA NA NA NA NA NA NA NA
## 6 TOPLAM: 8793 16896 25689 4990 4644 9634 13783 21540 35323
In this step, I reintroduce column names and replace NA values with 0.
# Use the same column names in your data.
colnames(raw_data) <- c("brand_name","auto_dom","auto_imp","auto_total","comm_dom","comm_imp","comm_total","total_dom","total_imp","total_total")
# Now we replace NA values with 0 and label the time period with year and month, so when we merge the data we won't be confused.
car_data_jan_17 <- raw_data %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>% mutate(year=2017,month=1)
print(car_data_jan_17, width=Inf)
## # A tibble: 47 x 12
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO 0 17 17 0 0 0
## 2 ASTON MARTIN 0 1 1 0 0 0
## 3 AUDI 0 625 625 0 0 0
## 4 BENTLEY 0 1 1 0 0 0
## 5 BMW 0 570 570 0 0 0
## 6 CHERY 0 0 0 0 0 0
## 7 CITROEN 0 515 515 3 438 441
## 8 DACIA 0 1471 1471 0 235 235
## 9 DS 0 9 9 0 0 0
## 10 FERRARI 0 0 0 0 0 0
## total_dom total_imp total_total year month
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 17 17 2017 1
## 2 0 1 1 2017 1
## 3 0 625 625 2017 1
## 4 0 1 1 2017 1
## 5 0 570 570 2017 1
## 6 0 0 0 2017 1
## 7 3 953 956 2017 1
## 8 0 1706 1706 2017 1
## 9 0 9 9 2017 1
## 10 0 0 0 2017 1
## # ... with 37 more rows
Saving our data to an RDS file.
saveRDS(car_data_jan_17, file="~/odd_car_sales_data_jan_17.rds")
Let’s see the ranking of brands that have sold both imported and domestic vehicles and see the ratio.
#First, I filter the data and exclude TOLAM: row. Then show the result by adding a ratio column.
car_data_jan_17 %>%
filter(auto_dom > 0 & auto_imp > 0, brand_name != "TOPLAM:") %>%
select(brand_name, auto_dom, auto_imp) %>%
mutate(auto_ratio = auto_imp / auto_dom) %>%
arrange(auto_ratio)
## # A tibble: 6 x 4
## brand_name auto_dom auto_imp auto_ratio
## <chr> <dbl> <dbl> <dbl>
## 1 FIAT 1515 106 0.0700
## 2 HONDA 1111 206 0.185
## 3 RENAULT 3511 844 0.240
## 4 TOYOTA 1488 439 0.295
## 5 HYUNDAI 810 1413 1.74
## 6 FORD 358 1175 3.28