ODD_Homework_Batur

Download Raw Data

My file contains ODD data from January 2017

# Create a temporary file
tmp<-tempfile(fileext=".xlsx")
# Download file from repository to the temp file
download.file("https://github.com/MEF-BDA503/pj18-baturusta/blob/master/files/odd_retail_sales_2017_01.xlsx?raw=true"
              ,destfile=tmp, mode = "wb")
# Read that excel file using readxl package's read_excel function. You might need to adjust the parameters (skip, col_names) according to your raw file's format.
raw_data<-readxl::read_excel(tmp, col_names= FALSE, skip = 7)
# Remove the temp file
file.remove(tmp)

## [1] TRUE

# Remove the last two rows because they are irrelevant (total and empty rows)
raw_data <- raw_data %>% slice(-c(48,49))

# Let's see our raw data
tail(raw_data)

## # A tibble: 6 x 10
##   X__1        X__2  X__3  X__4  X__5  X__6  X__7  X__8  X__9 X__10
##   <chr>      <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SUZUKI        NA    79    79    NA    NA     0     0    79    79
## 2 TOYOTA      1488   439  1927    NA   356   356  1488   795  2283
## 3 VOLKSWAGEN    NA  3059  3059    NA  1255  1255     0  4314  4314
## 4 VOLVO         NA   155   155    NA    NA     0     0   155   155
## 5 <NA>          NA    NA    NA    NA    NA    NA    NA    NA    NA
## 6 TOPLAM:     8793 16896 25689  4990  4644  9634 13783 21540 35323

Make Data Civilized

In this step, I reintroduce column names and replace NA values with 0.

# Use the same column names in your data.
colnames(raw_data) <- c("brand_name","auto_dom","auto_imp","auto_total","comm_dom","comm_imp","comm_total","total_dom","total_imp","total_total")
# Now we replace NA values with 0 and label the time period with year and month, so when we merge the data we won't be confused.
car_data_jan_17 <- raw_data %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>% mutate(year=2017,month=1)

print(car_data_jan_17, width=Inf)

## # A tibble: 47 x 12
##    brand_name   auto_dom auto_imp auto_total comm_dom comm_imp comm_total
##    <chr>           <dbl>    <dbl>      <dbl>    <dbl>    <dbl>      <dbl>
##  1 ALFA ROMEO          0       17         17        0        0          0
##  2 ASTON MARTIN        0        1          1        0        0          0
##  3 AUDI                0      625        625        0        0          0
##  4 BENTLEY             0        1          1        0        0          0
##  5 BMW                 0      570        570        0        0          0
##  6 CHERY               0        0          0        0        0          0
##  7 CITROEN             0      515        515        3      438        441
##  8 DACIA               0     1471       1471        0      235        235
##  9 DS                  0        9          9        0        0          0
## 10 FERRARI             0        0          0        0        0          0
##    total_dom total_imp total_total  year month
##        <dbl>     <dbl>       <dbl> <dbl> <dbl>
##  1         0        17          17  2017     1
##  2         0         1           1  2017     1
##  3         0       625         625  2017     1
##  4         0         1           1  2017     1
##  5         0       570         570  2017     1
##  6         0         0           0  2017     1
##  7         3       953         956  2017     1
##  8         0      1706        1706  2017     1
##  9         0         9           9  2017     1
## 10         0         0           0  2017     1
## # ... with 37 more rows

Save Your Civilized Data

Saving our data to an RDS file.

saveRDS(car_data_jan_17, file="~/odd_car_sales_data_jan_17.rds")

Some Analysis

Let’s see the ranking of brands that have sold both imported and domestic vehicles and see the ratio.

#First, I filter the data and exclude TOLAM: row. Then show the result by adding a ratio column.
car_data_jan_17 %>%
  filter(auto_dom > 0 & auto_imp > 0, brand_name != "TOPLAM:") %>%
  select(brand_name, auto_dom, auto_imp) %>%
  mutate(auto_ratio = auto_imp / auto_dom) %>%
  arrange(auto_ratio)

## # A tibble: 6 x 4
##   brand_name auto_dom auto_imp auto_ratio
##   <chr>         <dbl>    <dbl>      <dbl>
## 1 FIAT           1515      106     0.0700
## 2 HONDA          1111      206     0.185 
## 3 RENAULT        3511      844     0.240 
## 4 TOYOTA         1488      439     0.295 
## 5 HYUNDAI         810     1413     1.74  
## 6 FORD            358     1175     3.28

ODD_Homework_Batur_Usta

Batur Usta

28 Ekim 2018

Download Raw Data

Make Data Civilized

Save Your Civilized Data

Some Analysis