# Load the package to the session
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.0.0 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.8
## v tidyr 0.8.1 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts ------------------------------------------------------------------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl)
#Download Raw Data
# Create a temporary file
tmp<-tempfile(fileext=".xlsx")
# Download file from repository to the temp file
download.file("https://github.com/MEF-BDA503/pj18-Leyla.Yigit/blob/master/AssignmentWeek2/odd_retail_sales_2016_10.xlsx?raw=true",mode="wb",destfile=tmp)
# Read that excel file using readxl package's read_excel function. You might need to adjust the parameters (skip, col_names) according to your raw file's format.
raw_data<-readxl::read_excel(tmp,skip=7,col_names=FALSE)
# Remove the temp file
file.remove(tmp)
## [1] TRUE
plot(pressure)
raw_data <- raw_data %>% slice(-c(48,49))
Note that the echo = FALSE
parameter was added to the code chunk to prevent printing of the R code that generated the plot.
head(raw_data)
## # A tibble: 6 x 10
## X__1 X__2 X__3 X__4 X__5 X__6 X__7 X__8 X__9 X__10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ASTON MARTIN NA 1 1 NA NA 0 0 1 1
## 2 AUDI NA 2448 2448 NA NA 0 0 2448 2448
## 3 BENTLEY NA 1 1 NA NA 0 0 1 1
## 4 BMW NA 2205 2205 NA NA 0 0 2205 2205
## 5 CHERY NA 0 0 NA NA 0 0 0 0
## 6 CITROEN NA 1381 1381 126 636 762 126 2017 2143
#In order to make the data standardized and workable we need to define column names and remove NA values for this example.
#Please use the same column names in your examples also.
# Use the same column names in your data.
colnames(raw_data) <- c("MARKA","oto_yerli","oto_ithal","oto_toplam","ticari_yerli","ticari_ithal","ticari_toplam","toplam_yerli","toplam_ithal","toplam_toplam")
# Now we replace NA values with 0 and label the time period with year and month, so when we merge the data we won't be confused.
car_data_oct_16<- raw_data %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>% mutate(year=2016,month=10)
print(car_data_oct_16,width=Inf)
## # A tibble: 47 x 12
## MARKA oto_yerli oto_ithal oto_toplam ticari_yerli ticari_ithal
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ASTON MARTIN 0 1 1 0 0
## 2 AUDI 0 2448 2448 0 0
## 3 BENTLEY 0 1 1 0 0
## 4 BMW 0 2205 2205 0 0
## 5 CHERY 0 0 0 0 0
## 6 CITROEN 0 1381 1381 126 636
## 7 DACIA 0 3989 3989 0 249
## 8 DS 0 17 17 0 0
## 9 FERRARI 0 1 1 0 0
## 10 FIAT 4390 207 4597 4036 495
## ticari_toplam toplam_yerli toplam_ithal toplam_toplam year month
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 0 1 1 2016 10
## 2 0 0 2448 2448 2016 10
## 3 0 0 1 1 2016 10
## 4 0 0 2205 2205 2016 10
## 5 0 0 0 0 2016 10
## 6 762 126 2017 2143 2016 10
## 7 249 0 4238 4238 2016 10
## 8 0 0 17 17 2016 10
## 9 0 0 1 1 2016 10
## 10 4531 8426 702 9128 2016 10
## # ... with 37 more rows
#One of the best methods is to save your data to an RDS or RData file.
#The difference is RDS can hold only one object but RData can hold many. Since we have only one data frame here we will go with RDS.
# Save an object to a file
saveRDS(car_data_oct_16, file = "car_data_oct_16.rds")
car_data_oct_16 %>%
filter(oto_toplam > 0 & ticari_toplam > 0) %>%
select(MARKA,toplam_toplam) %>%
arrange(desc(toplam_toplam))
## # A tibble: 14 x 2
## MARKA toplam_toplam
## <chr> <dbl>
## 1 VOLKSWAGEN 12229
## 2 FIAT 9128
## 3 RENAULT 8882
## 4 FORD 8840
## 5 TOYOTA 4436
## 6 HYUNDAI 4313
## 7 DACIA 4238
## 8 PEUGEOT 3171
## 9 MERCEDES-BENZ 3029
## 10 NISSAN 2452
## 11 CITROEN 2143
## 12 KIA 1447
## 13 MITSUBISHI 286
## 14 SSANGYONG 94
** Analysis Result: VOLKSWAGEN has the highest number of sales; on the other hand SSANGYONG has the lowest sales number **
#Calculate the mean of sales numbers
car_data_oct_16 %>%
summarise(sales_num_mean=mean(toplam_toplam),dom_sales_num_maen=mean(toplam_yerli),imp_sales_num_mean=mean(toplam_ithal),com_sales_num_mean=mean(ticari_ithal),oto_sales_num_mean=mean(oto_ithal))
## # A tibble: 1 x 5
## sales_num_mean dom_sales_num_m~ imp_sales_num_m~ com_sales_num_m~
## <dbl> <dbl> <dbl> <dbl>
## 1 1765. 530. 1235. 203.
## # ... with 1 more variable: oto_sales_num_mean <dbl>
** Analysis Result: As a result,in 2016,10 customers prefer import cars more than domestic ones.From this import cars, sales of indiviual cars is higher than commercial car sales **
car_data_oct_16 %>%
filter(MARKA=='VOLKSWAGEN' ) %>%
select(MARKA,(toplam_toplam),(toplam_yerli) ,(toplam_ithal))%>%
arrange (desc(sum(toplam_toplam)))
## # A tibble: 1 x 4
## MARKA toplam_toplam toplam_yerli toplam_ithal
## <chr> <dbl> <dbl> <dbl>
## 1 VOLKSWAGEN 12229 0 12229
** Analysis Result:VOLKSWAGEN is an import car and has 12229 number of sales 2016,10 **
car_data_oct_16 %>%
filter(toplam_toplam==0) %>%
select(MARKA,toplam_toplam) %>%
arrange( MARKA)
## # A tibble: 7 x 2
## MARKA toplam_toplam
## <chr> <dbl>
## 1 CHERY 0
## 2 GEELY 0
## 3 LAMBORGHINI 0
## 4 LANCIA 0
## 5 OTOKAR 0
## 6 PROTON 0
## 7 TATA 0
** Analysis Result:There are 7 car brand have 0 number of sales in 2016,10 **
car_data_oct_16 %>%
group_by(MARKA) %>%
filter(sum(toplam_toplam)>2000) %>%
select(MARKA,toplam_ithal,toplam_yerli,toplam_toplam) %>%
arrange(desc(toplam_toplam))
## # A tibble: 16 x 4
## # Groups: MARKA [16]
## MARKA toplam_ithal toplam_yerli toplam_toplam
## <chr> <dbl> <dbl> <dbl>
## 1 VOLKSWAGEN 12229 0 12229
## 2 FIAT 702 8426 9128
## 3 RENAULT 4557 4325 8882
## 4 FORD 2977 5863 8840
## 5 OPEL 4630 0 4630
## 6 TOYOTA 1820 2616 4436
## 7 HYUNDAI 2468 1845 4313
## 8 DACIA 4238 0 4238
## 9 PEUGEOT 3056 115 3171
## 10 MERCEDES-BENZ 3029 0 3029
## 11 SKODA 2685 0 2685
## 12 NISSAN 2452 0 2452
## 13 AUDI 2448 0 2448
## 14 BMW 2205 0 2205
## 15 CITROEN 2017 126 2143
## 16 SEAT 2121 0 2121
car_data_oct_16_comp <-
car_data_oct_16 %>%
# group_by(MARKA)%>%
#aggregate( by=list(toplam_yerli,toplam_ithal,toplam_toplam), FUN=sum)
summarise(dip_toplam=sum(toplam_toplam),ticari_toplam=sum(ticari_toplam),oto_toplam=sum(oto_toplam))
#mutate(dip_toplam = sum(toplam_toplam))
car_data_oct_16_comp
## # A tibble: 1 x 3
## dip_toplam ticari_toplam oto_toplam
## <dbl> <dbl> <dbl>
## 1 82963 19254 63709
Analysis Result:Indıvidual car type is more preffred by customers than commercial ones.