RMarkdown-Homework-ODDAnalysis

# Load the package to the session
library(tidyverse)

## -- Attaching packages --------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --

## v ggplot2 3.0.0     v purrr   0.2.5
## v tibble  1.4.2     v dplyr   0.7.8
## v tidyr   0.8.1     v stringr 1.3.1
## v readr   1.1.1     v forcats 0.3.0

## -- Conflicts ------------------------------------------------------------------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(readxl)

#Download Raw Data
# Create a temporary file
tmp<-tempfile(fileext=".xlsx")
# Download file from repository to the temp file
download.file("https://github.com/MEF-BDA503/pj18-Leyla.Yigit/blob/master/AssignmentWeek2/odd_retail_sales_2016_10.xlsx?raw=true",mode="wb",destfile=tmp)
# Read that excel file using readxl package's read_excel function. You might need to adjust the parameters (skip, col_names) according to your raw file's format.
raw_data<-readxl::read_excel(tmp,skip=7,col_names=FALSE)
# Remove the temp file
file.remove(tmp)

## [1] TRUE

Remove the last two rows because they are irrelevant (total and empty rows)

plot(pressure)

raw_data <- raw_data %>% slice(-c(48,49))

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Let’s see our raw data

head(raw_data)

## # A tibble: 6 x 10
##   X__1          X__2  X__3  X__4  X__5  X__6  X__7  X__8  X__9 X__10
##   <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ASTON MARTIN    NA     1     1    NA    NA     0     0     1     1
## 2 AUDI            NA  2448  2448    NA    NA     0     0  2448  2448
## 3 BENTLEY         NA     1     1    NA    NA     0     0     1     1
## 4 BMW             NA  2205  2205    NA    NA     0     0  2205  2205
## 5 CHERY           NA     0     0    NA    NA     0     0     0     0
## 6 CITROEN         NA  1381  1381   126   636   762   126  2017  2143

Make Data Civilized

#In order to make the data standardized and workable we need to define column names and remove NA values for this example. 
#Please use the same column names in your examples also.
# Use the same column names in your data.
colnames(raw_data) <- c("MARKA","oto_yerli","oto_ithal","oto_toplam","ticari_yerli","ticari_ithal","ticari_toplam","toplam_yerli","toplam_ithal","toplam_toplam")
# Now we replace NA values with 0 and label the time period with year and month, so when we merge the data we won't be confused.
car_data_oct_16<- raw_data %>% mutate_if(is.numeric,funs(ifelse(is.na(.),0,.))) %>% mutate(year=2016,month=10)

print(car_data_oct_16,width=Inf)

## # A tibble: 47 x 12
##    MARKA        oto_yerli oto_ithal oto_toplam ticari_yerli ticari_ithal
##    <chr>            <dbl>     <dbl>      <dbl>        <dbl>        <dbl>
##  1 ASTON MARTIN         0         1          1            0            0
##  2 AUDI                 0      2448       2448            0            0
##  3 BENTLEY              0         1          1            0            0
##  4 BMW                  0      2205       2205            0            0
##  5 CHERY                0         0          0            0            0
##  6 CITROEN              0      1381       1381          126          636
##  7 DACIA                0      3989       3989            0          249
##  8 DS                   0        17         17            0            0
##  9 FERRARI              0         1          1            0            0
## 10 FIAT              4390       207       4597         4036          495
##    ticari_toplam toplam_yerli toplam_ithal toplam_toplam  year month
##            <dbl>        <dbl>        <dbl>         <dbl> <dbl> <dbl>
##  1             0            0            1             1  2016    10
##  2             0            0         2448          2448  2016    10
##  3             0            0            1             1  2016    10
##  4             0            0         2205          2205  2016    10
##  5             0            0            0             0  2016    10
##  6           762          126         2017          2143  2016    10
##  7           249            0         4238          4238  2016    10
##  8             0            0           17            17  2016    10
##  9             0            0            1             1  2016    10
## 10          4531         8426          702          9128  2016    10
## # ... with 37 more rows

Save Your Civilized Data

#One of the best methods is to save your data to an RDS or RData file. 
#The difference is RDS can hold only one object but RData can hold many. Since we have only one data frame here we will go with RDS.
# Save an object to a file
saveRDS(car_data_oct_16, file = "car_data_oct_16.rds")

ANALYSIS

–1— a list of total sales of brands with both automobile and commercial vehicle sales ordered in decreasing total sales.

car_data_oct_16 %>% 
  filter(oto_toplam > 0 & ticari_toplam > 0) %>%
  select(MARKA,toplam_toplam) %>%
  arrange(desc(toplam_toplam))

## # A tibble: 14 x 2
##    MARKA         toplam_toplam
##    <chr>                 <dbl>
##  1 VOLKSWAGEN            12229
##  2 FIAT                   9128
##  3 RENAULT                8882
##  4 FORD                   8840
##  5 TOYOTA                 4436
##  6 HYUNDAI                4313
##  7 DACIA                  4238
##  8 PEUGEOT                3171
##  9 MERCEDES-BENZ          3029
## 10 NISSAN                 2452
## 11 CITROEN                2143
## 12 KIA                    1447
## 13 MITSUBISHI              286
## 14 SSANGYONG                94

** Analysis Result: VOLKSWAGEN has the highest number of sales; on the other hand SSANGYONG has the lowest sales number **

#Calculate the mean of sales numbers

car_data_oct_16 %>%
  summarise(sales_num_mean=mean(toplam_toplam),dom_sales_num_maen=mean(toplam_yerli),imp_sales_num_mean=mean(toplam_ithal),com_sales_num_mean=mean(ticari_ithal),oto_sales_num_mean=mean(oto_ithal))

## # A tibble: 1 x 5
##   sales_num_mean dom_sales_num_m~ imp_sales_num_m~ com_sales_num_m~
##            <dbl>            <dbl>            <dbl>            <dbl>
## 1          1765.             530.            1235.             203.
## # ... with 1 more variable: oto_sales_num_mean <dbl>

** Analysis Result: As a result,in 2016,10 customers prefer import cars more than domestic ones.From this import cars, sales of indiviual cars is higher than commercial car sales **

–2—Get the VOLKSWAGEN sales numbers, compare commercial and car types.

car_data_oct_16 %>% 
  filter(MARKA=='VOLKSWAGEN' ) %>% 
  select(MARKA,(toplam_toplam),(toplam_yerli)  ,(toplam_ithal))%>%
  arrange (desc(sum(toplam_toplam)))

## # A tibble: 1 x 4
##   MARKA      toplam_toplam toplam_yerli toplam_ithal
##   <chr>              <dbl>        <dbl>        <dbl>
## 1 VOLKSWAGEN         12229            0        12229

** Analysis Result:VOLKSWAGEN is an import car and has 12229 number of sales 2016,10 **

–3—Get the car names sales number is equal to 0 and order car name asc

car_data_oct_16 %>%
  filter(toplam_toplam==0) %>%
  select(MARKA,toplam_toplam) %>%
  arrange( MARKA)

## # A tibble: 7 x 2
##   MARKA       toplam_toplam
##   <chr>               <dbl>
## 1 CHERY                   0
## 2 GEELY                   0
## 3 LAMBORGHINI             0
## 4 LANCIA                  0
## 5 OTOKAR                  0
## 6 PROTON                  0
## 7 TATA                    0

** Analysis Result:There are 7 car brand have 0 number of sales in 2016,10 **

–4—Get the car names and numbers sales number more than 2000 and how many of this number domestic or import and order sales number desc.

car_data_oct_16 %>%
  group_by(MARKA) %>%
  filter(sum(toplam_toplam)>2000) %>%
  select(MARKA,toplam_ithal,toplam_yerli,toplam_toplam) %>%
  arrange(desc(toplam_toplam))

## # A tibble: 16 x 4
## # Groups:   MARKA [16]
##    MARKA         toplam_ithal toplam_yerli toplam_toplam
##    <chr>                <dbl>        <dbl>         <dbl>
##  1 VOLKSWAGEN           12229            0         12229
##  2 FIAT                   702         8426          9128
##  3 RENAULT               4557         4325          8882
##  4 FORD                  2977         5863          8840
##  5 OPEL                  4630            0          4630
##  6 TOYOTA                1820         2616          4436
##  7 HYUNDAI               2468         1845          4313
##  8 DACIA                 4238            0          4238
##  9 PEUGEOT               3056          115          3171
## 10 MERCEDES-BENZ         3029            0          3029
## 11 SKODA                 2685            0          2685
## 12 NISSAN                2452            0          2452
## 13 AUDI                  2448            0          2448
## 14 BMW                   2205            0          2205
## 15 CITROEN               2017          126          2143
## 16 SEAT                  2121            0          2121

–5—Compare cars and commercial cars, which brand has the highest sales. In that highest group, do customers prefer domestic or import

car_data_oct_16_comp <-
  car_data_oct_16 %>%
 # group_by(MARKA)%>%
  #aggregate( by=list(toplam_yerli,toplam_ithal,toplam_toplam), FUN=sum)
  summarise(dip_toplam=sum(toplam_toplam),ticari_toplam=sum(ticari_toplam),oto_toplam=sum(oto_toplam))
  #mutate(dip_toplam = sum(toplam_toplam))

car_data_oct_16_comp

## # A tibble: 1 x 3
##   dip_toplam ticari_toplam oto_toplam
##        <dbl>         <dbl>      <dbl>
## 1      82963         19254      63709

Analysis Result:Indıvidual car type is more preffred by customers than commercial ones.

RMarkdown-Homework-ODDAnalysis_Assignment2

Leyla Yigit

31 Ekim 2018