The raw data (excel file) has been uploaded to my github repository. First we download that file and put the data in a temp file. Then we can read that excel file into R and finally we will remove the temp file.
# Create a temporary file
tmp<-tempfile(fileext=".xlsx")
# Download file from repository to the temp file
download.file("https://github.com/MEF-BDA503/pj18-KadirKemal/blob/master/Week2/Odd_Retail_Sales_2017_08.xlsx?raw=true",destfile=tmp)
# Read that excel file using readxl package's read_excel function. You might need to adjust the parameters (skip, col_names) according to your raw file's format.
raw_data<-readxl::read_excel(tmp,skip=7,col_names=FALSE)
# Remove the temp file
file.remove(tmp)
## [1] TRUE
You can also embed plots, for example:
# Let's see our raw data
head(raw_data)
## # A tibble: 6 x 10
## X__1 X__2 X__3 X__4 X__5 X__6 X__7 X__8 X__9 X__10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO NA 31 31 NA NA 0 0 31 31
## 2 ASTON MARTIN NA 3 3 NA NA 0 0 3 3
## 3 AUDI NA 903 903 NA NA 0 0 903 903
## 4 BENTLEY NA NA 0 NA NA 0 0 0 0
## 5 BMW NA 2230 2230 NA NA 0 0 2230 2230
## 6 CHERY NA 1 1 NA NA 0 0 1 1
lastRowIndex = length(raw_data$X__1)-2
raw_data = raw_data[1:lastRowIndex, c(1:10)]
# Let's see our raw data
head(raw_data)
## # A tibble: 6 x 10
## X__1 X__2 X__3 X__4 X__5 X__6 X__7 X__8 X__9 X__10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO NA 31 31 NA NA 0 0 31 31
## 2 ASTON MARTIN NA 3 3 NA NA 0 0 3 3
## 3 AUDI NA 903 903 NA NA 0 0 903 903
## 4 BENTLEY NA NA 0 NA NA 0 0 0 0
## 5 BMW NA 2230 2230 NA NA 0 0 2230 2230
## 6 CHERY NA 1 1 NA NA 0 0 1 1
tail(raw_data)
## # A tibble: 6 x 10
## X__1 X__2 X__3 X__4 X__5 X__6 X__7 X__8 X__9 X__10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SUBARU NA 103 103 NA NA 0 0 103 103
## 2 SUZUKI NA 370 370 NA NA 0 0 370 370
## 3 TATA NA NA 0 NA NA 0 0 0 0
## 4 TOYOTA 2374 1104 3478 NA 549 549 2374 1653 4027
## 5 VOLKSWAGEN NA 8438 8438 NA 2729 2729 0 11167 11167
## 6 VOLVO NA 203 203 NA NA 0 0 203 203
colnames(raw_data) <- c("brand_name","auto_dom","auto_imp","auto_total","comm_dom","comm_imp","comm_total","total_dom","total_imp","total_total")
head(raw_data)
## # A tibble: 6 x 10
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO NA 31 31 NA NA 0
## 2 ASTON MAR… NA 3 3 NA NA 0
## 3 AUDI NA 903 903 NA NA 0
## 4 BENTLEY NA NA 0 NA NA 0
## 5 BMW NA 2230 2230 NA NA 0
## 6 CHERY NA 1 1 NA NA 0
## # ... with 3 more variables: total_dom <dbl>, total_imp <dbl>,
## # total_total <dbl>
raw_data[is.na(raw_data)] = 0
head(raw_data)
## # A tibble: 6 x 10
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO 0 31 31 0 0 0
## 2 ASTON MAR… 0 3 3 0 0 0
## 3 AUDI 0 903 903 0 0 0
## 4 BENTLEY 0 0 0 0 0 0
## 5 BMW 0 2230 2230 0 0 0
## 6 CHERY 0 1 1 0 0 0
## # ... with 3 more variables: total_dom <dbl>, total_imp <dbl>,
## # total_total <dbl>
car_data_agu_16 = raw_data
car_data_agu_16 %>% mutate(year=2016,month=8)
## # A tibble: 48 x 12
## brand_name auto_dom auto_imp auto_total comm_dom comm_imp comm_total
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALFA ROMEO 0 31 31 0 0 0
## 2 ASTON MAR… 0 3 3 0 0 0
## 3 AUDI 0 903 903 0 0 0
## 4 BENTLEY 0 0 0 0 0 0
## 5 BMW 0 2230 2230 0 0 0
## 6 CHERY 0 1 1 0 0 0
## 7 CITROEN 0 1080 1080 131 592 723
## 8 DACIA 0 3120 3120 0 389 389
## 9 DS 0 34 34 0 0 0
## 10 FERRARI 0 0 0 0 0 0
## # ... with 38 more rows, and 5 more variables: total_dom <dbl>,
## # total_imp <dbl>, total_total <dbl>, year <dbl>, month <dbl>
saveRDS(car_data_agu_16,file="~/Documents/GitHub/pj18-KadirKemal/Week2/odd_car_sales_data_agu_16.rds")
car_data_agu_16[car_data_agu_16$auto_total > 0 & car_data_agu_16$comm_total > 0,c(1,4,7)]
## # A tibble: 14 x 3
## brand_name auto_total comm_total
## <chr> <dbl> <dbl>
## 1 CITROEN 1080 723
## 2 DACIA 3120 389
## 3 FIAT 3325 4395
## 4 FORD 2679 4936
## 5 HYUNDAI 3382 240
## 6 KIA 1378 354
## 7 MERCEDES-BENZ 2456 485
## 8 MITSUBISHI 61 387
## 9 NISSAN 2183 91
## 10 PEUGEOT 1601 973
## 11 RENAULT 6589 864
## 12 SSANGYONG 55 22
## 13 TOYOTA 3478 549
## 14 VOLKSWAGEN 8438 2729
mean(car_data_agu_16$auto_total)
## [1] 1124.521
summary(car_data_agu_16)
## brand_name auto_dom auto_imp auto_total
## Length:48 Min. : 0.0 Min. : 0.0 Min. : 0.0
## Class :character 1st Qu.: 0.0 1st Qu.: 3.0 1st Qu.: 3.0
## Mode :character Median : 0.0 Median : 93.5 Median : 93.5
## Mean : 212.7 Mean : 911.8 Mean :1124.5
## 3rd Qu.: 0.0 3rd Qu.:1433.8 3rd Qu.:2003.0
## Max. :3063.0 Max. :8438.0 Max. :8438.0
## comm_dom comm_imp comm_total total_dom
## Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0.0
## Median : 0.0 Median : 0.0 Median : 0.0 Median : 0.0
## Mean : 186.8 Mean : 179.4 Mean : 366.2 Mean : 399.5
## 3rd Qu.: 0.0 3rd Qu.: 166.5 3rd Qu.: 193.5 3rd Qu.: 0.0
## Max. :4449.0 Max. :2729.0 Max. :4936.0 Max. :7014.0
## total_imp total_total
## Min. : 0.0 Min. : 0.00
## 1st Qu.: 5.0 1st Qu.: 5.75
## Median : 132.5 Median : 147.00
## Mean : 1091.2 Mean : 1490.75
## 3rd Qu.: 1784.8 3rd Qu.: 2241.00
## Max. :11167.0 Max. :11167.00