NYC Bicycle Data Set

Ahsen Ceren Karasu Dec,2017



About Data Set

This dataset is a daily record of the number of bicycles crossing into or out of Manhattan via one of the East River bridges (that is, excluding Bronx thruways and the non-bikeable Hudson River tunnels) for a stretch of 9 months.

Content

A count of the number of bicycles on each of the bridges in question is provided on a day-by-day basis, along with information on maximum and minimum temperature and precipitation.

Purpose

Cycling in New York City is associated with mixed cycling conditions that include dense urban proximities, relatively flat terrain, congested roadways with “stop-and-go” traffic, and streets with heavy pedestrian activity. The city’s large cycling population includes utility cyclists, such as delivery and messenger services; cycling clubs for recreational cyclists; and, increasingly, commuters.

In this study we will see NYC cycling density and effect of wheather on it.

Analysis

#packages:

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.3
library(rmarkdown)
## Warning: package 'rmarkdown' was built under R version 3.4.3
#Import Data
setwd('C:/Users/pc/Documents/Group Project/')
mydata = read.csv("nyc_bicy.csv")
head(mydata)
##   X Date Day High.Temp..Â.F. Low.Temp..Â.F. Precipitation Brooklyn.Bridge
## 1 0    1   1            78.1           66.0          0.01            1704
## 2 1    2   2            55.0           48.9          0.15             827
## 3 2    3   3            39.9           34.0          0.09             526
## 4 3    4   4            44.1           33.1      0.47 (S)             521
## 5 4    5   5            42.1           26.1             0            1416
## 6 5    6   6            45.0           30.0             0            1885
##   Manhattan.Bridge Williamsburg.Bridge Queensboro.Bridge Total
## 1             3126                4115              2552 11497
## 2             1646                2565              1884  6922
## 3             1232                1695              1306  4759
## 4             1067                1440              1307  4335
## 5             2617                3081              2357  9471
## 6             3329                3856              2849 11919
#Summary for checking objective types.
glimpse(mydata)
## Observations: 210
## Variables: 11
## $ X                   <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, ...
## $ Date                <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...
## $ Day                 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...
## $ High.Temp..Â.F.     <dbl> 78.1, 55.0, 39.9, 44.1, 42.1, 45.0, 57.0, ...
## $ Low.Temp..Â.F.      <dbl> 66.0, 48.9, 34.0, 33.1, 26.1, 30.0, 53.1, ...
## $ Precipitation       <fctr> 0.01, 0.15, 0.09, 0.47 (S), 0, 0, 0.09, 0...
## $ Brooklyn.Bridge     <dbl> 1704, 827, 526, 521, 1416, 1885, 1276, 198...
## $ Manhattan.Bridge    <int> 3126, 1646, 1232, 1067, 2617, 3329, 2581, ...
## $ Williamsburg.Bridge <dbl> 4115, 2565, 1695, 1440, 3081, 3856, 3282, ...
## $ Queensboro.Bridge   <dbl> 2552, 1884, 1306, 1307, 2357, 2849, 2457, ...
## $ Total               <int> 11497, 6922, 4759, 4335, 9471, 11919, 9596...
#Summary for checking objective types. Here i saw that temperature is fahrenheit and date same with day
glimpse(mydata)
## Observations: 210
## Variables: 11
## $ X                   <int> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, ...
## $ Date                <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...
## $ Day                 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...
## $ High.Temp..Â.F.     <dbl> 78.1, 55.0, 39.9, 44.1, 42.1, 45.0, 57.0, ...
## $ Low.Temp..Â.F.      <dbl> 66.0, 48.9, 34.0, 33.1, 26.1, 30.0, 53.1, ...
## $ Precipitation       <fctr> 0.01, 0.15, 0.09, 0.47 (S), 0, 0, 0.09, 0...
## $ Brooklyn.Bridge     <dbl> 1704, 827, 526, 521, 1416, 1885, 1276, 198...
## $ Manhattan.Bridge    <int> 3126, 1646, 1232, 1067, 2617, 3329, 2581, ...
## $ Williamsburg.Bridge <dbl> 4115, 2565, 1695, 1440, 3081, 3856, 3282, ...
## $ Queensboro.Bridge   <dbl> 2552, 1884, 1306, 1307, 2357, 2849, 2457, ...
## $ Total               <int> 11497, 6922, 4759, 4335, 9471, 11919, 9596...
#For starting to mutate i will convert fahreneit values to celsius(it's hard) and remove date
newdata=mydata%>% select(Date, Day, High.Temp..Â.F.,Low.Temp..Â.F. ,Precipitation,Brooklyn.Bridge,Manhattan.Bridge,Williamsburg.Bridge,Queensboro.Bridge,Total)%>% mutate(HighDegree=High.Temp..Â.F.-32)%>%mutate (HighDegree2=HighDegree*5)%>%mutate (HighDegree3=HighDegree2/9)%>% mutate(LowDegree=Low.Temp..Â.F.-32)%>%mutate (LowDegree2=LowDegree*5)%>%mutate (LowDegree3=LowDegree2/9)
## Warning: package 'bindrcpp' was built under R version 3.4.3
nycbic=newdata%>% select(Day,Precipitation,Brooklyn.Bridge,Manhattan.Bridge,Williamsburg.Bridge,Queensboro.Bridge,Total,LowDegree3,HighDegree3)%>%rename(HighDegree=HighDegree3)%>%rename(LowDegree=LowDegree3)

glimpse(nycbic)
## Observations: 210
## Variables: 9
## $ Day                 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...
## $ Precipitation       <fctr> 0.01, 0.15, 0.09, 0.47 (S), 0, 0, 0.09, 0...
## $ Brooklyn.Bridge     <dbl> 1704, 827, 526, 521, 1416, 1885, 1276, 198...
## $ Manhattan.Bridge    <int> 3126, 1646, 1232, 1067, 2617, 3329, 2581, ...
## $ Williamsburg.Bridge <dbl> 4115, 2565, 1695, 1440, 3081, 3856, 3282, ...
## $ Queensboro.Bridge   <dbl> 2552, 1884, 1306, 1307, 2357, 2849, 2457, ...
## $ Total               <int> 11497, 6922, 4759, 4335, 9471, 11919, 9596...
## $ LowDegree           <dbl> 18.8888889, 9.3888889, 1.1111111, 0.611111...
## $ HighDegree          <dbl> 25.611111, 12.777778, 4.388889, 6.722222, ...
#Finally i will add average temperature and round the numbers.

nnycbic=nycbic%>%mutate(Average.Temp=(LowDegree+HighDegree)/2)
head(nnycbic)
##   Day Precipitation Brooklyn.Bridge Manhattan.Bridge Williamsburg.Bridge
## 1   1          0.01            1704             3126                4115
## 2   2          0.15             827             1646                2565
## 3   3          0.09             526             1232                1695
## 4   4      0.47 (S)             521             1067                1440
## 5   5             0            1416             2617                3081
## 6   6             0            1885             3329                3856
##   Queensboro.Bridge Total  LowDegree HighDegree Average.Temp
## 1              2552 11497 18.8888889  25.611111    22.250000
## 2              1884  6922  9.3888889  12.777778    11.083333
## 3              1306  4759  1.1111111   4.388889     2.750000
## 4              1307  4335  0.6111111   6.722222     3.666667
## 5              2357  9471 -3.2777778   5.611111     1.166667
## 6              2849 11919 -1.1111111   7.222222     3.055556
#Here we can see in NYC temperature changes between 0 to 25 celsius degree when data collected.
library(ggplot2)
ggplot(nnycbic, aes(nnycbic$Average.Temp)) +
 geom_histogram(fill= ('pink'), color='black', binwidth=1) +
 scale_x_continuous(limit=c(0.0, 25.0), breaks=seq(0.0, 25.0, by = 5)) +
 labs(x= 'Temperature', y= 'Frequency') +
 ggtitle('Temperature Frequency of Data')

#When we analyze number of cycling on Manhattan Bridge and temperature. Here we can see average people merly use bicycle when temperature under 10 degrees also they don't choose bicycle to transport when temperature is high.
library(ggplot2)
ggplot(data=nnycbic, aes(x=Average.Temp, y=Manhattan.Bridge)) +
    geom_bar(stat="identity")

“Do or do not. There is no try” Master Yoda