Source: Tidyverse Recap
First I had a quick review of Travel Weather data and finished all exercises.
#Load package tidyverse
library(tidyverse)
# Begin by loading the dataset
library(readr)
load("D:/Users/tkartalkaya/Desktop/travel_weather.RData")
travel_weather %>% tbl_df()
## # A tibble: 731 x 7
## year month day Amsterdam London NYC Venice
## * <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2015 11 1 8 8 16 13
## 2 2015 11 2 10 11 15 10
## 3 2015 11 3 9 11 16 9
## 4 2015 11 4 12 11 17 10
## 5 2015 11 5 13 13 18 12
## 6 2015 11 6 16 14 21 13
## 7 2015 11 7 16 14 17 14
## 8 2015 11 8 12 12 11 13
## 9 2015 11 9 13 12 11 11
## 10 2015 11 10 14 14 12 11
## # ... with 721 more rows
glimpse(travel_weather)
## Observations: 731
## Variables: 7
## $ year <dbl> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015...
## $ month <dbl> 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, ...
## $ day <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1...
## $ Amsterdam <dbl> 8, 10, 9, 12, 13, 16, 16, 12, 13, 14, 13, 13, 11, 11...
## $ London <dbl> 8, 11, 11, 11, 13, 14, 14, 12, 12, 14, 13, 12, 10, 1...
## $ NYC <dbl> 16, 15, 16, 17, 18, 21, 17, 11, 11, 12, 12, 13, 11, ...
## $ Venice <dbl> 13, 10, 9, 10, 12, 13, 14, 13, 11, 11, 9, 11, 8, 11,...
1
## [1] 1
travel_weather %>% select(year, month, day, Venice)
## # A tibble: 731 x 4
## year month day Venice
## * <dbl> <dbl> <dbl> <dbl>
## 1 2015 11 1 13
## 2 2015 11 2 10
## 3 2015 11 3 9
## 4 2015 11 4 10
## 5 2015 11 5 12
## 6 2015 11 6 13
## 7 2015 11 7 14
## 8 2015 11 8 13
## 9 2015 11 9 11
## 10 2015 11 10 11
## # ... with 721 more rows
2
## [1] 2
travel_weather %>% select(Venice:Amsterdam)
## # A tibble: 731 x 4
## Venice NYC London Amsterdam
## * <dbl> <dbl> <dbl> <dbl>
## 1 13 16 8 8
## 2 10 15 11 10
## 3 9 16 11 9
## 4 10 17 11 12
## 5 12 18 13 13
## 6 13 21 14 16
## 7 14 17 14 16
## 8 13 11 12 12
## 9 11 11 12 13
## 10 11 12 14 14
## # ... with 721 more rows
3
## [1] 3
travel_weather %>% select(-NYC, -London)
## # A tibble: 731 x 5
## year month day Amsterdam Venice
## * <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2015 11 1 8 13
## 2 2015 11 2 10 10
## 3 2015 11 3 9 9
## 4 2015 11 4 12 10
## 5 2015 11 5 13 12
## 6 2015 11 6 16 13
## 7 2015 11 7 16 14
## 8 2015 11 8 12 13
## 9 2015 11 9 13 11
## 10 2015 11 10 14 11
## # ... with 721 more rows
4
## [1] 4
travel_weather %>% rename(`New York` = NYC)
## # A tibble: 731 x 7
## year month day Amsterdam London `New York` Venice
## * <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2015 11 1 8 8 16 13
## 2 2015 11 2 10 11 15 10
## 3 2015 11 3 9 11 16 9
## 4 2015 11 4 12 11 17 10
## 5 2015 11 5 13 13 18 12
## 6 2015 11 6 16 14 21 13
## 7 2015 11 7 16 14 17 14
## 8 2015 11 8 12 12 11 13
## 9 2015 11 9 13 12 11 11
## 10 2015 11 10 14 14 12 11
## # ... with 721 more rows
1
## [1] 1
travel_weather %>%
arrange(NYC)
## # A tibble: 731 x 7
## year month day Amsterdam London NYC Venice
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016 2 14 2 3 -14 6
## 2 2016 2 13 1 2 -10 4
## 3 2016 1 5 6 8 -7 2
## 4 2017 1 9 6 7 -7 -2
## 5 2016 1 19 -2 0 -6 1
## 6 2016 2 12 2 1 -6 6
## 7 2016 12 16 6 6 -6 4
## 8 2017 1 8 4 9 -6 -2
## 9 2017 1 7 1 8 -5 -3
## 10 2017 3 11 7 10 -5 9
## # ... with 721 more rows
2
## [1] 2
travel_weather %>%
arrange(NYC,desc(Amsterdam))
## # A tibble: 731 x 7
## year month day Amsterdam London NYC Venice
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016 2 14 2 3 -14 6
## 2 2016 2 13 1 2 -10 4
## 3 2016 1 5 6 8 -7 2
## 4 2017 1 9 6 7 -7 -2
## 5 2016 12 16 6 6 -6 4
## 6 2017 1 8 4 9 -6 -2
## 7 2016 2 12 2 1 -6 6
## 8 2016 1 19 -2 0 -6 1
## 9 2017 3 15 9 11 -5 10
## 10 2017 3 11 7 10 -5 9
## # ... with 721 more rows
3
## [1] 3
travel_weather %>%
arrange (desc(year),desc(month),desc(day))
## # A tibble: 731 x 7
## year month day Amsterdam London NYC Venice
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2017 10 31 9 9 11 11
## 2 2017 10 30 8 6 12 13
## 3 2017 10 29 11 11 18 9
## 4 2017 10 28 12 10 17 10
## 5 2017 10 27 12 9 13 13
## 6 2017 10 26 13 10 13 13
## 7 2017 10 25 13 14 17 13
## 8 2017 10 24 13 16 21 13
## 9 2017 10 23 13 13 20 13
## 10 2017 10 22 11 11 19 13
## # ... with 721 more rows
4
## [1] 4
travel_weather %>%
arrange(London - Amsterdam)
## # A tibble: 731 x 7
## year month day Amsterdam London NYC Venice
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016 12 25 10 0 6 6
## 2 2015 12 25 9 0 17 4
## 3 2016 5 31 18 11 26 19
## 4 2016 6 1 19 12 24 17
## 5 2016 4 10 10 4 5 16
## 6 2016 6 7 20 14 24 22
## 7 2016 5 6 17 12 11 18
## 8 2016 5 8 21 16 14 17
## 9 2016 5 10 19 14 14 18
## 10 2016 6 3 16 11 19 19
## # ... with 721 more rows
1
## [1] 1
travel_weather %>%
mutate(VAdiff = Venice - Amsterdam)
## # A tibble: 731 x 8
## year month day Amsterdam London NYC Venice VAdiff
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2015 11 1 8 8 16 13 5
## 2 2015 11 2 10 11 15 10 0
## 3 2015 11 3 9 11 16 9 0
## 4 2015 11 4 12 11 17 10 -2
## 5 2015 11 5 13 13 18 12 -1
## 6 2015 11 6 16 14 21 13 -3
## 7 2015 11 7 16 14 17 14 -2
## 8 2015 11 8 12 12 11 13 1
## 9 2015 11 9 13 12 11 11 -2
## 10 2015 11 10 14 14 12 11 -3
## # ... with 721 more rows
2
## [1] 2
travel_weather %>%
mutate(VwarmerA = Venice > Amsterdam)
## # A tibble: 731 x 8
## year month day Amsterdam London NYC Venice VwarmerA
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl>
## 1 2015 11 1 8 8 16 13 TRUE
## 2 2015 11 2 10 11 15 10 FALSE
## 3 2015 11 3 9 11 16 9 FALSE
## 4 2015 11 4 12 11 17 10 FALSE
## 5 2015 11 5 13 13 18 12 FALSE
## 6 2015 11 6 16 14 21 13 FALSE
## 7 2015 11 7 16 14 17 14 FALSE
## 8 2015 11 8 12 12 11 13 TRUE
## 9 2015 11 9 13 12 11 11 FALSE
## 10 2015 11 10 14 14 12 11 FALSE
## # ... with 721 more rows
3
## [1] 3
travel_weather %>%
transmute(year,month,day,
VwarmerA = ifelse(Venice > Amsterdam,'warmer', 'colder'))
## # A tibble: 731 x 4
## year month day VwarmerA
## <dbl> <dbl> <dbl> <chr>
## 1 2015 11 1 warmer
## 2 2015 11 2 colder
## 3 2015 11 3 colder
## 4 2015 11 4 colder
## 5 2015 11 5 colder
## 6 2015 11 6 colder
## 7 2015 11 7 colder
## 8 2015 11 8 warmer
## 9 2015 11 9 colder
## 10 2015 11 10 colder
## # ... with 721 more rows
1
## [1] 1
travel_weather %>%
summarise(Venice_mean=mean(Venice),NYC_mean=mean(NYC))
## # A tibble: 1 x 2
## Venice_mean NYC_mean
## <dbl> <dbl>
## 1 14.31601 14.41313
2
## [1] 2
travel_weather %>%
group_by(month) %>%
summarise(Amsterdam_mean=round(mean(Amsterdam),2))
## # A tibble: 12 x 2
## month Amsterdam_mean
## <dbl> <dbl>
## 1 1 3.00
## 2 2 4.32
## 3 3 6.92
## 4 4 8.43
## 5 5 14.48
## 6 6 17.28
## 7 7 18.02
## 8 8 17.68
## 9 9 15.95
## 10 10 11.68
## 11 11 7.65
## 12 12 6.97
3
## [1] 3
travel_weather %>%
group_by(year,month) %>%
summarise(AwarmerN_n=sum(Amsterdam > NYC))
## # A tibble: 24 x 3
## # Groups: year [?]
## year month AwarmerN_n
## <dbl> <dbl> <int>
## 1 2015 11 11
## 2 2015 12 12
## 3 2016 1 23
## 4 2016 2 16
## 5 2016 3 5
## 6 2016 4 10
## 7 2016 5 8
## 8 2016 6 1
## 9 2016 7 1
## 10 2016 8 0
## # ... with 14 more rows
4
## [1] 4
travel_weather %>%
group_by(year,month) %>%
summarise(London_min=min(London),London_median=median(London),London_max=max(London))
## # A tibble: 24 x 5
## # Groups: year [?]
## year month London_min London_median London_max
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2015 11 1 11 14
## 2 2015 12 0 10 14
## 3 2016 1 0 6 11
## 4 2016 2 1 4 12
## 5 2016 3 2 6 11
## 6 2016 4 4 8 11
## 7 2016 5 8 13 16
## 8 2016 6 11 16 19
## 9 2016 7 14 18 24
## 10 2016 8 14 18 24
## # ... with 14 more rows
travel_weather %>%
transmute(year,month,day,Amsterdam,A_prev=lag(Amsterdam),A_next=lead(Amsterdam),
A_prev_diff=Amsterdam-A_prev,A_next_diff=Amsterdam-A_next)
## # A tibble: 731 x 8
## year month day Amsterdam A_prev A_next A_prev_diff A_next_diff
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2015 11 1 8 NA 10 NA -2
## 2 2015 11 2 10 8 9 2 1
## 3 2015 11 3 9 10 12 -1 -3
## 4 2015 11 4 12 9 13 3 -1
## 5 2015 11 5 13 12 16 1 -3
## 6 2015 11 6 16 13 16 3 0
## 7 2015 11 7 16 16 12 0 4
## 8 2015 11 8 12 16 13 -4 -1
## 9 2015 11 9 13 12 14 1 -1
## 10 2015 11 10 14 13 13 1 1
## # ... with 721 more rows
travel_weather %>%
slice(1:3)
## # A tibble: 3 x 7
## year month day Amsterdam London NYC Venice
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2015 11 1 8 8 16 13
## 2 2015 11 2 10 11 15 10
## 3 2015 11 3 9 11 16 9
travel_weather %>%
group_by(year) %>%
slice(1:3)
## # A tibble: 9 x 7
## # Groups: year [3]
## year month day Amsterdam London NYC Venice
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2015 11 1 8 8 16 13
## 2 2015 11 2 10 11 15 10
## 3 2015 11 3 9 11 16 9
## 4 2016 1 1 4 3 3 2
## 5 2016 1 2 6 10 2 0
## 6 2016 1 3 7 8 4 3
## 7 2017 1 1 1 7 7 2
## 8 2017 1 2 3 2 3 1
## 9 2017 1 3 4 2 5 3
#Transform to long format by melting the data
#Though you should not include date columns
travel_weather_long <-
travel_weather %>%
gather(key=City,value=Temperature,-year,-month,-day)
travel_weather_long
## # A tibble: 2,924 x 5
## year month day City Temperature
## <dbl> <dbl> <dbl> <chr> <dbl>
## 1 2015 11 1 Amsterdam 8
## 2 2015 11 2 Amsterdam 10
## 3 2015 11 3 Amsterdam 9
## 4 2015 11 4 Amsterdam 12
## 5 2015 11 5 Amsterdam 13
## 6 2015 11 6 Amsterdam 16
## 7 2015 11 7 Amsterdam 16
## 8 2015 11 8 Amsterdam 12
## 9 2015 11 9 Amsterdam 13
## 10 2015 11 10 Amsterdam 14
## # ... with 2,914 more rows
#Now group by and summarise to get average temperatures for each city and month
travel_weather_long %>%
group_by(month,City) %>%
summarise(temp_avg=round(mean(Temperature))) %>%
#Now spread the months to the columns
spread(month,temp_avg)
## # A tibble: 4 x 13
## City `1` `2` `3` `4` `5` `6` `7` `8` `9` `10`
## * <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Amsterdam 3 4 7 8 14 17 18 18 16 12
## 2 London 4 6 8 9 13 17 18 18 16 12
## 3 NYC 2 4 7 13 17 22 26 25 22 16
## 4 Venice 2 7 11 14 18 22 25 25 20 14
## # ... with 2 more variables: `11` <dbl>, `12` <dbl>
#Method 1
travel_weather %>%
select(Amsterdam:Venice) %>%
summarise_all(funs(round(mean(.))))
## # A tibble: 1 x 4
## Amsterdam London NYC Venice
## <dbl> <dbl> <dbl> <dbl>
## 1 11 12 14 14
#Method 2
travel_weather %>%
summarise_at(vars(Amsterdam:Venice),funs(round(mean(.))))
## # A tibble: 1 x 4
## Amsterdam London NYC Venice
## <dbl> <dbl> <dbl> <dbl>
## 1 11 12 14 14
#Method 2
travel_weather %>%
mutate_at(vars(Amsterdam,London,Venice),funs(diff_NYC=abs(NYC-.))) %>%
select(-Amsterdam,-London,-Venice)
## # A tibble: 731 x 7
## year month day NYC Amsterdam_diff_NYC London_diff_NYC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2015 11 1 16 8 8
## 2 2015 11 2 15 5 4
## 3 2015 11 3 16 7 5
## 4 2015 11 4 17 5 6
## 5 2015 11 5 18 5 5
## 6 2015 11 6 21 5 7
## 7 2015 11 7 17 1 3
## 8 2015 11 8 11 1 1
## 9 2015 11 9 11 2 1
## 10 2015 11 10 12 2 2
## # ... with 721 more rows, and 1 more variables: Venice_diff_NYC <dbl>
My answers of the final exercices are shown below.
travel_weather %>%
filter( (Amsterdam>London) & (Amsterdam<Venice))
## # A tibble: 165 x 7
## year month day Amsterdam London NYC Venice
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2015 11 21 5 3 9 8
## 2 2015 11 22 3 1 9 8
## 3 2016 1 13 4 3 -3 6
## 4 2016 1 16 2 1 8 4
## 5 2016 2 3 5 4 11 8
## 6 2016 2 11 4 3 -4 7
## 7 2016 2 12 2 1 -6 6
## 8 2016 2 23 4 3 3 11
## 9 2016 2 24 2 1 9 10
## 10 2016 2 25 2 1 9 8
## # ... with 155 more rows
travel_weather %>%
filter(NYC>Amsterdam)%>%
group_by(year,month)%>%
summarise_at(vars(Amsterdam,NYC),funs(round(mean(.),1)))%>%
mutate(NYCwA_diff=NYC-Amsterdam)%>%
select(-Amsterdam,-NYC)%>%
arrange(desc(NYCwA_diff))
## # A tibble: 24 x 3
## # Groups: year [3]
## year month NYCwA_diff
## <dbl> <dbl> <dbl>
## 1 2016 8 8.5
## 2 2016 7 8.1
## 3 2017 9 7.9
## 4 2016 4 7.6
## 5 2017 7 7.4
## 6 2017 4 7.4
## 7 2017 8 6.5
## 8 2016 11 6.4
## 9 2016 3 6.3
## 10 2016 6 6.0
## # ... with 14 more rows
travel_weather %>%
gather(key=City,value=Temperature, -year, -month, -day) %>%
group_by(year, month, day) %>%
mutate(max_temp =max(Temperature)) %>%
filter(Temperature==max_temp) %>%
arrange(year,month,day) %>%
select(year, month, day,City,Temperature)
## # A tibble: 834 x 5
## # Groups: year, month, day [731]
## year month day City Temperature
## <dbl> <dbl> <dbl> <chr> <dbl>
## 1 2015 11 1 NYC 16
## 2 2015 11 2 NYC 15
## 3 2015 11 3 NYC 16
## 4 2015 11 4 NYC 17
## 5 2015 11 5 NYC 18
## 6 2015 11 6 NYC 21
## 7 2015 11 7 NYC 17
## 8 2015 11 8 Venice 13
## 9 2015 11 9 Amsterdam 13
## 10 2015 11 10 Amsterdam 14
## # ... with 824 more rows