This exercise is designed a recap to introduction to tidyverse from the very basics.
Solution of the exercices in this documents shown below.
First we are going to download data then load the weather data.
# Load the package to the session
library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 2.2.1 <U+221A> purrr 0.2.4
## <U+221A> tibble 1.3.4 <U+221A> dplyr 0.7.4
## <U+221A> tidyr 0.7.2 <U+221A> stringr 1.2.0
## <U+221A> readr 1.1.1 <U+221A> forcats 0.2.0
## -- Conflicts ---------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# Download file from gitHub
download.file("https://mef-bda503.github.io/files/travel_weather.RData", "travel_weather.RData")
# Load the data set file
load("travel_weather.RData")
Now let’s take a look at our data.
travel_weather %>%
tbl_df()
## # A tibble: 731 x 7
## year month day Amsterdam London NYC Venice
## * <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2015 11 1 8 8 16 13
## 2 2015 11 2 10 11 15 10
## 3 2015 11 3 9 11 16 9
## 4 2015 11 4 12 11 17 10
## 5 2015 11 5 13 13 18 12
## 6 2015 11 6 16 14 21 13
## 7 2015 11 7 16 14 17 14
## 8 2015 11 8 12 12 11 13
## 9 2015 11 9 13 12 11 11
## 10 2015 11 10 14 14 12 11
## # ... with 721 more rows
travel_weather %>% filter( Amsterdam>London & Amsterdam < Venice)
## # A tibble: 165 x 7
## year month day Amsterdam London NYC Venice
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2015 11 21 5 3 9 8
## 2 2015 11 22 3 1 9 8
## 3 2016 1 13 4 3 -3 6
## 4 2016 1 16 2 1 8 4
## 5 2016 2 3 5 4 11 8
## 6 2016 2 11 4 3 -4 7
## 7 2016 2 12 2 1 -6 6
## 8 2016 2 23 4 3 3 11
## 9 2016 2 24 2 1 9 10
## 10 2016 2 25 2 1 9 8
## # ... with 155 more rows
travel_weather %>%
filter(NYC>Amsterdam) %>%
group_by (year, month) %>%
summarise(NYCwA_diff = round(mean(NYC-Amsterdam),digits=1) ) %>%
arrange(desc(NYCwA_diff))
## # A tibble: 24 x 3
## # Groups: year [3]
## year month NYCwA_diff
## <dbl> <dbl> <dbl>
## 1 2016 8 8.4
## 2 2016 7 8.1
## 3 2017 9 7.9
## 4 2016 4 7.5
## 5 2017 4 7.4
## 6 2017 7 7.3
## 7 2017 8 6.5
## 8 2016 11 6.4
## 9 2016 3 6.3
## 10 2016 6 6.0
## # ... with 14 more rows
travel_weather %>%
gather(key=City,value=Temperature, -year, -month, -day) %>%
group_by(year, month, day) %>%
mutate(max_temp =max(Temperature)) %>%
filter(Temperature==max_temp) %>%
arrange(year,month,day) %>%
select(year, month, day,City,Temperature)
## # A tibble: 834 x 5
## # Groups: year, month, day [731]
## year month day City Temperature
## <dbl> <dbl> <dbl> <chr> <dbl>
## 1 2015 11 1 NYC 16
## 2 2015 11 2 NYC 15
## 3 2015 11 3 NYC 16
## 4 2015 11 4 NYC 17
## 5 2015 11 5 NYC 18
## 6 2015 11 6 NYC 21
## 7 2015 11 7 NYC 17
## 8 2015 11 8 Venice 13
## 9 2015 11 9 Amsterdam 13
## 10 2015 11 10 Amsterdam 14
## # ... with 824 more rows