Travel Weather Data

Source: Tidyverse Recap

First I had a quick review of Travel Weather data and finished all exercises.

#Load package tidyverse
library(tidyverse)

# Begin by loading the dataset
library(readr)
load("D:/Users/tkartalkaya/Desktop/travel_weather.RData")

travel_weather %>% tbl_df()
## # A tibble: 731 x 7
##     year month   day Amsterdam London   NYC Venice
##  * <dbl> <dbl> <dbl>     <dbl>  <dbl> <dbl>  <dbl>
##  1  2015    11     1         8      8    16     13
##  2  2015    11     2        10     11    15     10
##  3  2015    11     3         9     11    16      9
##  4  2015    11     4        12     11    17     10
##  5  2015    11     5        13     13    18     12
##  6  2015    11     6        16     14    21     13
##  7  2015    11     7        16     14    17     14
##  8  2015    11     8        12     12    11     13
##  9  2015    11     9        13     12    11     11
## 10  2015    11    10        14     14    12     11
## # ... with 721 more rows
glimpse(travel_weather)
## Observations: 731
## Variables: 7
## $ year      <dbl> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015...
## $ month     <dbl> 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, ...
## $ day       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1...
## $ Amsterdam <dbl> 8, 10, 9, 12, 13, 16, 16, 12, 13, 14, 13, 13, 11, 11...
## $ London    <dbl> 8, 11, 11, 11, 13, 14, 14, 12, 12, 14, 13, 12, 10, 1...
## $ NYC       <dbl> 16, 15, 16, 17, 18, 21, 17, 11, 11, 12, 12, 13, 11, ...
## $ Venice    <dbl> 13, 10, 9, 10, 12, 13, 14, 13, 11, 11, 9, 11, 8, 11,...

select/rename

1
## [1] 1
travel_weather %>% select(year, month, day, Venice)
## # A tibble: 731 x 4
##     year month   day Venice
##  * <dbl> <dbl> <dbl>  <dbl>
##  1  2015    11     1     13
##  2  2015    11     2     10
##  3  2015    11     3      9
##  4  2015    11     4     10
##  5  2015    11     5     12
##  6  2015    11     6     13
##  7  2015    11     7     14
##  8  2015    11     8     13
##  9  2015    11     9     11
## 10  2015    11    10     11
## # ... with 721 more rows
2
## [1] 2
travel_weather %>% select(Venice:Amsterdam)
## # A tibble: 731 x 4
##    Venice   NYC London Amsterdam
##  *  <dbl> <dbl>  <dbl>     <dbl>
##  1     13    16      8         8
##  2     10    15     11        10
##  3      9    16     11         9
##  4     10    17     11        12
##  5     12    18     13        13
##  6     13    21     14        16
##  7     14    17     14        16
##  8     13    11     12        12
##  9     11    11     12        13
## 10     11    12     14        14
## # ... with 721 more rows
3
## [1] 3
travel_weather %>% select(-NYC, -London)
## # A tibble: 731 x 5
##     year month   day Amsterdam Venice
##  * <dbl> <dbl> <dbl>     <dbl>  <dbl>
##  1  2015    11     1         8     13
##  2  2015    11     2        10     10
##  3  2015    11     3         9      9
##  4  2015    11     4        12     10
##  5  2015    11     5        13     12
##  6  2015    11     6        16     13
##  7  2015    11     7        16     14
##  8  2015    11     8        12     13
##  9  2015    11     9        13     11
## 10  2015    11    10        14     11
## # ... with 721 more rows
4
## [1] 4
travel_weather %>% rename(`New York` = NYC)
## # A tibble: 731 x 7
##     year month   day Amsterdam London `New York` Venice
##  * <dbl> <dbl> <dbl>     <dbl>  <dbl>      <dbl>  <dbl>
##  1  2015    11     1         8      8         16     13
##  2  2015    11     2        10     11         15     10
##  3  2015    11     3         9     11         16      9
##  4  2015    11     4        12     11         17     10
##  5  2015    11     5        13     13         18     12
##  6  2015    11     6        16     14         21     13
##  7  2015    11     7        16     14         17     14
##  8  2015    11     8        12     12         11     13
##  9  2015    11     9        13     12         11     11
## 10  2015    11    10        14     14         12     11
## # ... with 721 more rows

arrange

1
## [1] 1
travel_weather %>%
  arrange(NYC)
## # A tibble: 731 x 7
##     year month   day Amsterdam London   NYC Venice
##    <dbl> <dbl> <dbl>     <dbl>  <dbl> <dbl>  <dbl>
##  1  2016     2    14         2      3   -14      6
##  2  2016     2    13         1      2   -10      4
##  3  2016     1     5         6      8    -7      2
##  4  2017     1     9         6      7    -7     -2
##  5  2016     1    19        -2      0    -6      1
##  6  2016     2    12         2      1    -6      6
##  7  2016    12    16         6      6    -6      4
##  8  2017     1     8         4      9    -6     -2
##  9  2017     1     7         1      8    -5     -3
## 10  2017     3    11         7     10    -5      9
## # ... with 721 more rows
2
## [1] 2
travel_weather %>%
  arrange(NYC,desc(Amsterdam))
## # A tibble: 731 x 7
##     year month   day Amsterdam London   NYC Venice
##    <dbl> <dbl> <dbl>     <dbl>  <dbl> <dbl>  <dbl>
##  1  2016     2    14         2      3   -14      6
##  2  2016     2    13         1      2   -10      4
##  3  2016     1     5         6      8    -7      2
##  4  2017     1     9         6      7    -7     -2
##  5  2016    12    16         6      6    -6      4
##  6  2017     1     8         4      9    -6     -2
##  7  2016     2    12         2      1    -6      6
##  8  2016     1    19        -2      0    -6      1
##  9  2017     3    15         9     11    -5     10
## 10  2017     3    11         7     10    -5      9
## # ... with 721 more rows
3
## [1] 3
travel_weather %>%
  arrange (desc(year),desc(month),desc(day))
## # A tibble: 731 x 7
##     year month   day Amsterdam London   NYC Venice
##    <dbl> <dbl> <dbl>     <dbl>  <dbl> <dbl>  <dbl>
##  1  2017    10    31         9      9    11     11
##  2  2017    10    30         8      6    12     13
##  3  2017    10    29        11     11    18      9
##  4  2017    10    28        12     10    17     10
##  5  2017    10    27        12      9    13     13
##  6  2017    10    26        13     10    13     13
##  7  2017    10    25        13     14    17     13
##  8  2017    10    24        13     16    21     13
##  9  2017    10    23        13     13    20     13
## 10  2017    10    22        11     11    19     13
## # ... with 721 more rows
4
## [1] 4
travel_weather %>%
  arrange(London - Amsterdam)
## # A tibble: 731 x 7
##     year month   day Amsterdam London   NYC Venice
##    <dbl> <dbl> <dbl>     <dbl>  <dbl> <dbl>  <dbl>
##  1  2016    12    25        10      0     6      6
##  2  2015    12    25         9      0    17      4
##  3  2016     5    31        18     11    26     19
##  4  2016     6     1        19     12    24     17
##  5  2016     4    10        10      4     5     16
##  6  2016     6     7        20     14    24     22
##  7  2016     5     6        17     12    11     18
##  8  2016     5     8        21     16    14     17
##  9  2016     5    10        19     14    14     18
## 10  2016     6     3        16     11    19     19
## # ... with 721 more rows

mutate/transmute

1
## [1] 1
travel_weather %>%
  mutate(VAdiff = Venice - Amsterdam)
## # A tibble: 731 x 8
##     year month   day Amsterdam London   NYC Venice VAdiff
##    <dbl> <dbl> <dbl>     <dbl>  <dbl> <dbl>  <dbl>  <dbl>
##  1  2015    11     1         8      8    16     13      5
##  2  2015    11     2        10     11    15     10      0
##  3  2015    11     3         9     11    16      9      0
##  4  2015    11     4        12     11    17     10     -2
##  5  2015    11     5        13     13    18     12     -1
##  6  2015    11     6        16     14    21     13     -3
##  7  2015    11     7        16     14    17     14     -2
##  8  2015    11     8        12     12    11     13      1
##  9  2015    11     9        13     12    11     11     -2
## 10  2015    11    10        14     14    12     11     -3
## # ... with 721 more rows
2
## [1] 2
travel_weather %>%
  mutate(VwarmerA = Venice > Amsterdam)
## # A tibble: 731 x 8
##     year month   day Amsterdam London   NYC Venice VwarmerA
##    <dbl> <dbl> <dbl>     <dbl>  <dbl> <dbl>  <dbl>    <lgl>
##  1  2015    11     1         8      8    16     13     TRUE
##  2  2015    11     2        10     11    15     10    FALSE
##  3  2015    11     3         9     11    16      9    FALSE
##  4  2015    11     4        12     11    17     10    FALSE
##  5  2015    11     5        13     13    18     12    FALSE
##  6  2015    11     6        16     14    21     13    FALSE
##  7  2015    11     7        16     14    17     14    FALSE
##  8  2015    11     8        12     12    11     13     TRUE
##  9  2015    11     9        13     12    11     11    FALSE
## 10  2015    11    10        14     14    12     11    FALSE
## # ... with 721 more rows
3
## [1] 3
travel_weather %>%
  transmute(year,month,day,
            VwarmerA = ifelse(Venice > Amsterdam,'warmer', 'colder'))
## # A tibble: 731 x 4
##     year month   day VwarmerA
##    <dbl> <dbl> <dbl>    <chr>
##  1  2015    11     1   warmer
##  2  2015    11     2   colder
##  3  2015    11     3   colder
##  4  2015    11     4   colder
##  5  2015    11     5   colder
##  6  2015    11     6   colder
##  7  2015    11     7   colder
##  8  2015    11     8   warmer
##  9  2015    11     9   colder
## 10  2015    11    10   colder
## # ... with 721 more rows

group_by/summarise

1
## [1] 1
travel_weather %>%
  summarise(Venice_mean=mean(Venice),NYC_mean=mean(NYC))
## # A tibble: 1 x 2
##   Venice_mean NYC_mean
##         <dbl>    <dbl>
## 1    14.31601 14.41313
2
## [1] 2
travel_weather %>%
  group_by(month) %>%
  summarise(Amsterdam_mean=round(mean(Amsterdam),2))
## # A tibble: 12 x 2
##    month Amsterdam_mean
##    <dbl>          <dbl>
##  1     1           3.00
##  2     2           4.32
##  3     3           6.92
##  4     4           8.43
##  5     5          14.48
##  6     6          17.28
##  7     7          18.02
##  8     8          17.68
##  9     9          15.95
## 10    10          11.68
## 11    11           7.65
## 12    12           6.97
3
## [1] 3
travel_weather %>%
  group_by(year,month) %>%
  summarise(AwarmerN_n=sum(Amsterdam > NYC))
## # A tibble: 24 x 3
## # Groups:   year [?]
##     year month AwarmerN_n
##    <dbl> <dbl>      <int>
##  1  2015    11         11
##  2  2015    12         12
##  3  2016     1         23
##  4  2016     2         16
##  5  2016     3          5
##  6  2016     4         10
##  7  2016     5          8
##  8  2016     6          1
##  9  2016     7          1
## 10  2016     8          0
## # ... with 14 more rows
4
## [1] 4
travel_weather %>%
  group_by(year,month) %>%
  summarise(London_min=min(London),London_median=median(London),London_max=max(London))
## # A tibble: 24 x 5
## # Groups:   year [?]
##     year month London_min London_median London_max
##    <dbl> <dbl>      <dbl>         <dbl>      <dbl>
##  1  2015    11          1            11         14
##  2  2015    12          0            10         14
##  3  2016     1          0             6         11
##  4  2016     2          1             4         12
##  5  2016     3          2             6         11
##  6  2016     4          4             8         11
##  7  2016     5          8            13         16
##  8  2016     6         11            16         19
##  9  2016     7         14            18         24
## 10  2016     8         14            18         24
## # ... with 14 more rows

Advanced Examples

Lead and Lag

travel_weather %>%
  transmute(year,month,day,Amsterdam,A_prev=lag(Amsterdam),A_next=lead(Amsterdam),
            A_prev_diff=Amsterdam-A_prev,A_next_diff=Amsterdam-A_next)
## # A tibble: 731 x 8
##     year month   day Amsterdam A_prev A_next A_prev_diff A_next_diff
##    <dbl> <dbl> <dbl>     <dbl>  <dbl>  <dbl>       <dbl>       <dbl>
##  1  2015    11     1         8     NA     10          NA          -2
##  2  2015    11     2        10      8      9           2           1
##  3  2015    11     3         9     10     12          -1          -3
##  4  2015    11     4        12      9     13           3          -1
##  5  2015    11     5        13     12     16           1          -3
##  6  2015    11     6        16     13     16           3           0
##  7  2015    11     7        16     16     12           0           4
##  8  2015    11     8        12     16     13          -4          -1
##  9  2015    11     9        13     12     14           1          -1
## 10  2015    11    10        14     13     13           1           1
## # ... with 721 more rows

slice

travel_weather %>%
  slice(1:3)
## # A tibble: 3 x 7
##    year month   day Amsterdam London   NYC Venice
##   <dbl> <dbl> <dbl>     <dbl>  <dbl> <dbl>  <dbl>
## 1  2015    11     1         8      8    16     13
## 2  2015    11     2        10     11    15     10
## 3  2015    11     3         9     11    16      9
travel_weather %>%
  group_by(year) %>%
  slice(1:3)
## # A tibble: 9 x 7
## # Groups:   year [3]
##    year month   day Amsterdam London   NYC Venice
##   <dbl> <dbl> <dbl>     <dbl>  <dbl> <dbl>  <dbl>
## 1  2015    11     1         8      8    16     13
## 2  2015    11     2        10     11    15     10
## 3  2015    11     3         9     11    16      9
## 4  2016     1     1         4      3     3      2
## 5  2016     1     2         6     10     2      0
## 6  2016     1     3         7      8     4      3
## 7  2017     1     1         1      7     7      2
## 8  2017     1     2         3      2     3      1
## 9  2017     1     3         4      2     5      3

Gather and Spread

#Transform to long format by melting the data
#Though you should not include date columns
travel_weather_long <-
  travel_weather %>%
  gather(key=City,value=Temperature,-year,-month,-day)
travel_weather_long
## # A tibble: 2,924 x 5
##     year month   day      City Temperature
##    <dbl> <dbl> <dbl>     <chr>       <dbl>
##  1  2015    11     1 Amsterdam           8
##  2  2015    11     2 Amsterdam          10
##  3  2015    11     3 Amsterdam           9
##  4  2015    11     4 Amsterdam          12
##  5  2015    11     5 Amsterdam          13
##  6  2015    11     6 Amsterdam          16
##  7  2015    11     7 Amsterdam          16
##  8  2015    11     8 Amsterdam          12
##  9  2015    11     9 Amsterdam          13
## 10  2015    11    10 Amsterdam          14
## # ... with 2,914 more rows
#Now group by and summarise to get average temperatures for each city and month
travel_weather_long %>%
  group_by(month,City) %>%
  summarise(temp_avg=round(mean(Temperature))) %>%
  #Now spread the months to the columns
  spread(month,temp_avg)
## # A tibble: 4 x 13
##        City   `1`   `2`   `3`   `4`   `5`   `6`   `7`   `8`   `9`  `10`
## *     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Amsterdam     3     4     7     8    14    17    18    18    16    12
## 2    London     4     6     8     9    13    17    18    18    16    12
## 3       NYC     2     4     7    13    17    22    26    25    22    16
## 4    Venice     2     7    11    14    18    22    25    25    20    14
## # ... with 2 more variables: `11` <dbl>, `12` <dbl>

_all and _at prefixes

#Method 1
travel_weather %>%
  select(Amsterdam:Venice) %>%
  summarise_all(funs(round(mean(.))))
## # A tibble: 1 x 4
##   Amsterdam London   NYC Venice
##       <dbl>  <dbl> <dbl>  <dbl>
## 1        11     12    14     14
#Method 2
travel_weather %>%
  summarise_at(vars(Amsterdam:Venice),funs(round(mean(.))))
## # A tibble: 1 x 4
##   Amsterdam London   NYC Venice
##       <dbl>  <dbl> <dbl>  <dbl>
## 1        11     12    14     14
#Method 2
travel_weather %>%
  mutate_at(vars(Amsterdam,London,Venice),funs(diff_NYC=abs(NYC-.))) %>%
  select(-Amsterdam,-London,-Venice)
## # A tibble: 731 x 7
##     year month   day   NYC Amsterdam_diff_NYC London_diff_NYC
##    <dbl> <dbl> <dbl> <dbl>              <dbl>           <dbl>
##  1  2015    11     1    16                  8               8
##  2  2015    11     2    15                  5               4
##  3  2015    11     3    16                  7               5
##  4  2015    11     4    17                  5               6
##  5  2015    11     5    18                  5               5
##  6  2015    11     6    21                  5               7
##  7  2015    11     7    17                  1               3
##  8  2015    11     8    11                  1               1
##  9  2015    11     9    11                  2               1
## 10  2015    11    10    12                  2               2
## # ... with 721 more rows, and 1 more variables: Venice_diff_NYC <dbl>

Final Exercises

My answers of the final exercices are shown below.

  1. Return the dates which Amsterdam is strictly warmer than London but strictly colder than Venice
travel_weather %>%
  filter(  (Amsterdam>London) & (Amsterdam<Venice))
## # A tibble: 165 x 7
##     year month   day Amsterdam London   NYC Venice
##    <dbl> <dbl> <dbl>     <dbl>  <dbl> <dbl>  <dbl>
##  1  2015    11    21         5      3     9      8
##  2  2015    11    22         3      1     9      8
##  3  2016     1    13         4      3    -3      6
##  4  2016     1    16         2      1     8      4
##  5  2016     2     3         5      4    11      8
##  6  2016     2    11         4      3    -4      7
##  7  2016     2    12         2      1    -6      6
##  8  2016     2    23         4      3     3     11
##  9  2016     2    24         2      1     9     10
## 10  2016     2    25         2      1     9      8
## # ... with 155 more rows
  1. For each month of each year calculate the average difference between NYC and Amsterdam for the days NYC is strictly warmer than Amsterdam, rounded by 1 decimal. Arrange from the highest difference to the lowest.
travel_weather %>%
    filter(NYC>Amsterdam)%>%
    group_by(year,month)%>%
    summarise_at(vars(Amsterdam,NYC),funs(round(mean(.),1)))%>%
    mutate(NYCwA_diff=NYC-Amsterdam)%>%
    select(-Amsterdam,-NYC)%>%
    arrange(desc(NYCwA_diff))
## # A tibble: 24 x 3
## # Groups:   year [3]
##     year month NYCwA_diff
##    <dbl> <dbl>      <dbl>
##  1  2016     8        8.5
##  2  2016     7        8.1
##  3  2017     9        7.9
##  4  2016     4        7.6
##  5  2017     7        7.4
##  6  2017     4        7.4
##  7  2017     8        6.5
##  8  2016    11        6.4
##  9  2016     3        6.3
## 10  2016     6        6.0
## # ... with 14 more rows
  1. Return the warmest city and its temperature of each day.
travel_weather %>%
  gather(key=City,value=Temperature, -year, -month, -day) %>%
  group_by(year, month, day) %>%
  mutate(max_temp =max(Temperature)) %>%
  filter(Temperature==max_temp)  %>%
  arrange(year,month,day) %>%
  select(year, month, day,City,Temperature)
## # A tibble: 834 x 5
## # Groups:   year, month, day [731]
##     year month   day      City Temperature
##    <dbl> <dbl> <dbl>     <chr>       <dbl>
##  1  2015    11     1       NYC          16
##  2  2015    11     2       NYC          15
##  3  2015    11     3       NYC          16
##  4  2015    11     4       NYC          17
##  5  2015    11     5       NYC          18
##  6  2015    11     6       NYC          21
##  7  2015    11     7       NYC          17
##  8  2015    11     8    Venice          13
##  9  2015    11     9 Amsterdam          13
## 10  2015    11    10 Amsterdam          14
## # ... with 824 more rows