From 903a8b167e84395973ad841490a322d28f73adbe Mon Sep 17 00:00:00 2001
From: Flavio Poletti <flavio.poletti@hotmail.ch>
Date: Thu, 18 Nov 2021 11:31:48 +0100
Subject: [PATCH 1/8] filter stop_times without min/max times

---
 R/joins.R                    |  2 +-
 R/raptor.R                   | 14 +++++++++-----
 man/filter_feed_by_date.Rd   | 10 +++++-----
 man/filter_stop_times.Rd     |  6 +++---
 tests/testthat/test-raptor.R |  6 ++++++
 5 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/R/joins.R b/R/joins.R
index 046f81f7..d56690aa 100644
--- a/R/joins.R
+++ b/R/joins.R
@@ -154,7 +154,7 @@ filter_feed_by_stops = function(gtfs_obj, stop_ids = NULL, stop_names = NULL) {
 #'          \code{\link{filter_feed_by_trips}}, \code{\link{filter_feed_by_date}}
 #' @export
 filter_feed_by_date = function(gtfs_obj, extract_date,
-                               min_departure_time = "00:00:00", max_arrival_time = "48:00:00") {
+                               min_departure_time, max_arrival_time) {
   st = filter_stop_times(gtfs_obj, extract_date, min_departure_time, max_arrival_time)
   st <- dplyr::as_tibble(st)
   attributes(st)$stops <- NULL
diff --git a/R/raptor.R b/R/raptor.R
index 320b0052..7666e5bd 100644
--- a/R/raptor.R
+++ b/R/raptor.R
@@ -440,10 +440,10 @@ travel_times = function(filtered_stop_times,
 #' 
 #' @param gtfs_obj a gtfs feed
 #' @param extract_date date to extract trips from this day (Date or "YYYY-MM-DD" string)
-#' @param min_departure_time The earliest departure time. Can be given as "HH:MM:SS", 
+#' @param min_departure_time (optional) The earliest departure time. Can be given as "HH:MM:SS", 
 #'                           hms object or numeric value in seconds.
-#' @param max_arrival_time The latest arrival time. Can be given as "HH:MM:SS", 
-#'                         hms object or numeric value in seconds
+#' @param max_arrival_time (optional) The latest arrival time. Can be given as "HH:MM:SS", 
+#'                         hms object or numeric value in seconds.
 #' 
 #' @return Filtered `stop_times` data.table for [travel_times()] and [raptor()].
 #' 
@@ -462,10 +462,14 @@ filter_stop_times = function(gtfs_obj,
   if(is.character(extract_date)) {
     extract_date <- as.Date(extract_date)
   }
-  if(is.character(min_departure_time)) {
+  if(missing(min_departure_time)) {
+    min_departure_time <- 0 
+  } else if(is.character(min_departure_time)) {
     min_departure_time <- hhmmss_to_seconds(min_departure_time)
   }
-  if(is.character(max_arrival_time)) {
+  if(missing(max_arrival_time)) {
+    max_arrival_time <- max(gtfs_obj$stop_times$arrival_time)+1
+  } else if(is.character(max_arrival_time)) {
     max_arrival_time <- hhmmss_to_seconds(max_arrival_time)
   }
   min_departure_time <- as.numeric(min_departure_time)
diff --git a/man/filter_feed_by_date.Rd b/man/filter_feed_by_date.Rd
index fea767b6..27afc7fb 100644
--- a/man/filter_feed_by_date.Rd
+++ b/man/filter_feed_by_date.Rd
@@ -7,8 +7,8 @@
 filter_feed_by_date(
   gtfs_obj,
   extract_date,
-  min_departure_time = "00:00:00",
-  max_arrival_time = "48:00:00"
+  min_departure_time,
+  max_arrival_time
 )
 }
 \arguments{
@@ -16,11 +16,11 @@ filter_feed_by_date(
 
 \item{extract_date}{date to extract trips from this day (Date or "YYYY-MM-DD" string)}
 
-\item{min_departure_time}{The earliest departure time. Can be given as "HH:MM:SS",
+\item{min_departure_time}{(optional) The earliest departure time. Can be given as "HH:MM:SS",
 hms object or numeric value in seconds.}
 
-\item{max_arrival_time}{The latest arrival time. Can be given as "HH:MM:SS",
-hms object or numeric value in seconds}
+\item{max_arrival_time}{(optional) The latest arrival time. Can be given as "HH:MM:SS",
+hms object or numeric value in seconds.}
 }
 \value{
 tidygtfs object with filtered tables
diff --git a/man/filter_stop_times.Rd b/man/filter_stop_times.Rd
index ff3ec3e7..c5332b23 100644
--- a/man/filter_stop_times.Rd
+++ b/man/filter_stop_times.Rd
@@ -11,11 +11,11 @@ filter_stop_times(gtfs_obj, extract_date, min_departure_time, max_arrival_time)
 
 \item{extract_date}{date to extract trips from this day (Date or "YYYY-MM-DD" string)}
 
-\item{min_departure_time}{The earliest departure time. Can be given as "HH:MM:SS",
+\item{min_departure_time}{(optional) The earliest departure time. Can be given as "HH:MM:SS",
 hms object or numeric value in seconds.}
 
-\item{max_arrival_time}{The latest arrival time. Can be given as "HH:MM:SS",
-hms object or numeric value in seconds}
+\item{max_arrival_time}{(optional) The latest arrival time. Can be given as "HH:MM:SS",
+hms object or numeric value in seconds.}
 }
 \value{
 Filtered \code{stop_times} data.table for \code{\link[=travel_times]{travel_times()}} and \code{\link[=raptor]{raptor()}}.
diff --git a/tests/testthat/test-raptor.R b/tests/testthat/test-raptor.R
index 16f0abeb..85038bc5 100644
--- a/tests/testthat/test-raptor.R
+++ b/tests/testthat/test-raptor.R
@@ -345,3 +345,9 @@ test_that("raptor with filtered feed", {
                      stop_name = "One", time_range = 3600)
   expect_equal(tt1, tt2)
 })
+
+test_that("filter feed without min/max time", {
+  st.1 = filter_stop_times(g, "2018-10-01")
+  st.2 = filter_stop_times(g, "2018-10-01", "00:00:00", 999*3600)
+  expect_true(all(st.1 == st.2))
+})

From 344cf533045826c4540ad5bd4cdb0a22355c20f3 Mon Sep 17 00:00:00 2001
From: Flavio Poletti <flavio.poletti@hotmail.ch>
Date: Thu, 18 Nov 2021 11:32:17 +0100
Subject: [PATCH 2/8] change by_route default to TRUE to match doc

---
 man/get_stop_frequency.Rd      | 5 +++--
 tests/testthat/test-headways.R | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/man/get_stop_frequency.Rd b/man/get_stop_frequency.Rd
index a8919750..6d3651bd 100644
--- a/man/get_stop_frequency.Rd
+++ b/man/get_stop_frequency.Rd
@@ -9,7 +9,7 @@ get_stop_frequency(
   start_hour = 6,
   end_hour = 22,
   service_ids = NULL,
-  by_route = FALSE
+  by_route = TRUE
 )
 }
 \arguments{
@@ -23,7 +23,8 @@ get_stop_frequency(
 identifying a particular service id. If not provided the service_id
 with the most departures is used}
 
-\item{by_route}{default TRUE, if FALSE then calculate headway for any line coming through the stop in the same direction on the same schedule.}
+\item{by_route}{Default TRUE, if FALSE then calculate headway for any line coming
+through the stop in the same direction on the same schedule.}
 }
 \value{
 dataframe of stops with the number of departures and the headway
diff --git a/tests/testthat/test-headways.R b/tests/testthat/test-headways.R
index 71b54dbd..fa77738b 100644
--- a/tests/testthat/test-headways.R
+++ b/tests/testthat/test-headways.R
@@ -2,8 +2,8 @@ context("Frequencies are calculated correctly")
 
 # TODO rewrite with synthesized sample data
 test_that("Stop frequencies (headways) for included data are as expected", {
-  expect_equal(nrow(get_stop_frequency(gtfs_duke)), 47)
-  expect_equal(nrow(get_stop_frequency(gtfs_duke, start_hour = 10, end_hour = 11)), 41)
+  expect_equal(nrow(get_stop_frequency(gtfs_duke, by_route = FALSE)), 47)
+  expect_equal(nrow(get_stop_frequency(gtfs_duke, start_hour = 10, end_hour = 11, by_route = FALSE)), 41)
 
   stops_frequency <- get_stop_frequency(gtfs_duke, service_ids = "c_853_b_19828_d_64")
   ex_address <- stops_frequency$mean_headway[stops_frequency$stop_id==778058]

From 6ee8b24ad4c9da246cab0d53eea3cbe5ea574024 Mon Sep 17 00:00:00 2001
From: Flavio Poletti <flavio.poletti@hotmail.ch>
Date: Thu, 18 Nov 2021 11:32:29 +0100
Subject: [PATCH 3/8] use start/end_time instead of start/end_hour

---
 R/frequencies.R                | 44 +++++++++++++++++++++-------------
 man/get_stop_frequency.Rd      | 14 ++++++-----
 tests/testthat/test-headways.R |  2 +-
 3 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/R/frequencies.R b/R/frequencies.R
index fffda5cd..a981d73d 100644
--- a/R/frequencies.R
+++ b/R/frequencies.R
@@ -2,15 +2,18 @@
 #' 
 #' Note that some GTFS feeds contain a frequency data frame already. 
 #' Consider using this instead, as it will be more accurate than what 
-#' tidytransit calculates. 
+#' tidytransit calculates.
 #' 
 #' @param gtfs_obj a list of gtfs dataframes as read by [read_gtfs()].
-#' @param start_hour (optional) an integer indicating the start hour (default 6)
-#' @param end_hour (optional) an integer indicating the end hour (default 22)
-#' @param service_ids (optional) a set of service_ids from the calendar dataframe 
+#' @param start_time analysis start time, can be given as "HH:MM:SS", 
+#'                   hms object or numeric value in seconds.
+#' @param end_time analysis perdiod end time, can be given as "HH:MM:SS", 
+#'                 hms object or numeric value in seconds.
+#' @param service_ids A set of service_ids from the calendar dataframe 
 #'                    identifying a particular service id. If not provided the service_id 
-#'                    with the most departures is used
-#' @param by_route default TRUE, if FALSE then calculate headway for any line coming through the stop in the same direction on the same schedule. 
+#'                    with the most departures is used.
+#' @param by_route Default TRUE, if FALSE then calculate headway for any line coming 
+#'                 through the stop in the same direction on the same schedule. 
 #' @return dataframe of stops with the number of departures and the headway 
 #'         (departures divided by timespan) as columns.
 #'
@@ -24,11 +27,16 @@
 #' x <- order(stop_frequency$mean_headway)
 #' head(stop_frequency[x,])
 get_stop_frequency <- function(gtfs_obj,
-                               start_hour = 6,
-                               end_hour = 22,
+                               start_time = "06:00:00",
+                               end_time = "22:00:00",
                                service_ids = NULL,
-                               by_route = FALSE) {
+                               by_route = TRUE) {
   n_deps <- direction_id <- NULL
+  
+  if(is.character(start_time)) start_time <- hhmmss_to_seconds(start_time)
+  if(is.character(end_time)) end_time <- hhmmss_to_seconds(end_time)
+  
+  # get service id with most departures
   if(is.null(service_ids)) {
     dep_per_trip = gtfs_obj$stop_times %>% 
       dplyr::group_by(trip_id) %>% dplyr::count(name = "n_deps") %>% 
@@ -39,27 +47,29 @@ get_stop_frequency <- function(gtfs_obj,
       dplyr::arrange(dplyr::desc(n_deps))
     service_ids = dep_per_service_id$service_id[1]  
   }
+  
+  # filter stop_times to service_ids and start/end_time
   trips = gtfs_obj$trips %>% filter(service_id %in% service_ids)
   
-  # TODO change times to hms or strings
   stop_times = gtfs_obj$stop_times %>%
     filter(trip_id %in% trips$trip_id) %>% 
-    filter(departure_time >= start_hour*3600 & arrival_time <= end_hour*3600) %>% 
+    filter(departure_time >= start_time & arrival_time <= end_time) %>% 
     left_join(trips[c("trip_id", "route_id", "direction_id", "service_id")], "trip_id") 
   
-  # find number of departure per stop_id, route_id, service_id
+  # find number of departure per stop_id (route_id, direction_id, service_id)
   if(by_route) {
-  freq = stop_times %>% 
-    dplyr::group_by(stop_id, route_id, direction_id, service_id) %>% 
-    dplyr::count(name = "n_departures") %>% dplyr::ungroup()
+    freq = stop_times %>% 
+      dplyr::group_by(stop_id, route_id, direction_id, service_id) %>% 
+      dplyr::count(name = "n_departures") %>% dplyr::ungroup()
   } else {
     freq = stop_times %>% 
       dplyr::group_by(stop_id, service_id) %>% 
       dplyr::count(name = "n_departures") %>% dplyr::ungroup()
   }
+  
   # calculate average headway
-  duration = (end_hour-start_hour)*3600
+  duration = as.numeric(end_time-start_time)
   freq$mean_headway <- round(duration / freq$n_departures)
-
+  
   freq
 }
diff --git a/man/get_stop_frequency.Rd b/man/get_stop_frequency.Rd
index 6d3651bd..d6992d51 100644
--- a/man/get_stop_frequency.Rd
+++ b/man/get_stop_frequency.Rd
@@ -6,8 +6,8 @@
 \usage{
 get_stop_frequency(
   gtfs_obj,
-  start_hour = 6,
-  end_hour = 22,
+  start_time = "06:00:00",
+  end_time = "22:00:00",
   service_ids = NULL,
   by_route = TRUE
 )
@@ -15,13 +15,15 @@ get_stop_frequency(
 \arguments{
 \item{gtfs_obj}{a list of gtfs dataframes as read by \code{\link[=read_gtfs]{read_gtfs()}}.}
 
-\item{start_hour}{(optional) an integer indicating the start hour (default 6)}
+\item{start_time}{analysis start time, can be given as "HH:MM:SS",
+hms object or numeric value in seconds. as numeric}
 
-\item{end_hour}{(optional) an integer indicating the end hour (default 22)}
+\item{end_time}{analysis perdiod end time, can be given as "HH:MM:SS",
+hms object or numeric value in seconds.}
 
-\item{service_ids}{(optional) a set of service_ids from the calendar dataframe
+\item{service_ids}{A set of service_ids from the calendar dataframe
 identifying a particular service id. If not provided the service_id
-with the most departures is used}
+with the most departures is used.}
 
 \item{by_route}{Default TRUE, if FALSE then calculate headway for any line coming
 through the stop in the same direction on the same schedule.}
diff --git a/tests/testthat/test-headways.R b/tests/testthat/test-headways.R
index fa77738b..06485461 100644
--- a/tests/testthat/test-headways.R
+++ b/tests/testthat/test-headways.R
@@ -3,7 +3,7 @@ context("Frequencies are calculated correctly")
 # TODO rewrite with synthesized sample data
 test_that("Stop frequencies (headways) for included data are as expected", {
   expect_equal(nrow(get_stop_frequency(gtfs_duke, by_route = FALSE)), 47)
-  expect_equal(nrow(get_stop_frequency(gtfs_duke, start_hour = 10, end_hour = 11, by_route = FALSE)), 41)
+  expect_equal(nrow(get_stop_frequency(gtfs_duke, start_time = 10*3600, end_time = 11*3600, by_route = FALSE)), 41)
 
   stops_frequency <- get_stop_frequency(gtfs_duke, service_ids = "c_853_b_19828_d_64")
   ex_address <- stops_frequency$mean_headway[stops_frequency$stop_id==778058]

From 8ce5344e372e5246b94f69bdbbbc486ed6c2b78a Mon Sep 17 00:00:00 2001
From: Flavio Poletti <flavio.poletti@hotmail.ch>
Date: Thu, 18 Nov 2021 11:33:01 +0100
Subject: [PATCH 4/8] add get_route_frequency

use start/end_time
remove dow parameter
return headways in seconds
---
 NAMESPACE                      |  1 +
 R/frequencies.R                | 54 ++++++++++++++++++++++++++++++++--
 man/get_route_frequency.Rd     | 41 ++++++++++++++++++++++++++
 man/get_stop_frequency.Rd      |  6 ++--
 tests/testthat/test-headways.R | 12 ++++++++
 5 files changed, 109 insertions(+), 5 deletions(-)
 create mode 100644 man/get_route_frequency.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 4175ba9e..2bf2ca6c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -10,6 +10,7 @@ export(filter_feed_by_trips)
 export(filter_stop_times)
 export(filter_stops)
 export(get_feedlist)
+export(get_route_frequency)
 export(get_route_geometry)
 export(get_stop_frequency)
 export(get_trip_geometry)
diff --git a/R/frequencies.R b/R/frequencies.R
index a981d73d..cc182e84 100644
--- a/R/frequencies.R
+++ b/R/frequencies.R
@@ -10,12 +10,12 @@
 #' @param end_time analysis perdiod end time, can be given as "HH:MM:SS", 
 #'                 hms object or numeric value in seconds.
 #' @param service_ids A set of service_ids from the calendar dataframe 
-#'                    identifying a particular service id. If not provided the service_id 
+#'                    identifying a particular service id. If not provided, the service_id 
 #'                    with the most departures is used.
 #' @param by_route Default TRUE, if FALSE then calculate headway for any line coming 
 #'                 through the stop in the same direction on the same schedule. 
 #' @return dataframe of stops with the number of departures and the headway 
-#'         (departures divided by timespan) as columns.
+#'         (departures divided by timespan) in seconds as columns
 #'
 #' @importFrom dplyr %>%
 #' @importFrom rlang .data !! quo enquo
@@ -73,3 +73,53 @@ get_stop_frequency <- function(gtfs_obj,
   
   freq
 }
+
+#' Get Route Frequency
+#' 
+#' Note that some GTFS feeds contain a frequency data frame already. 
+#' Consider using this instead, as it will be more accurate than what 
+#' tidytransit calculates. 
+#' 
+#' @param gtfs_obj a list of gtfs dataframes as read by the trread package.
+#' @param start_time analysis start time, can be given as "HH:MM:SS", 
+#'                   hms object or numeric value in seconds.
+#' @param end_time analysis perdiod end time, can be given as "HH:MM:SS", 
+#'                 hms object or numeric value in seconds.
+#' @param service_ids A set of service_ids from the calendar dataframe 
+#'                    identifying a particular service id. If not provided, the service_id 
+#'                    with the most departures is used.
+#' @return a dataframe of routes with variables or headway/frequency in seconds for a route 
+#'         within a given time frame
+#' @export
+#' @examples 
+#' data(gtfs_duke)
+#' routes_frequency <- get_route_frequency(gtfs_duke)
+#' x <- order(routes_frequency$median_headways)
+#' head(routes_frequency[x,])
+get_route_frequency <- function(gtfs_obj,
+                                start_time = "06:00:00",
+                                end_time = "22:00:00",
+                                service_ids = NULL) {
+  total_departures <- median_headways <- mean_headways <- NULL
+  n_departures <- mean_headway <- st_dev_headways <- stop_count <- NULL
+  if(feed_contains(gtfs_obj, "frequencies") && nrow(gtfs_obj$frequencies) > 0) {  
+    message("A pre-calculated frequencies dataframe exists for this feed already, 
+            consider using that.") 
+  } 
+  departures_per_stop = get_stop_frequency(gtfs_obj, start_time, end_time, 
+                                           service_ids, by_route = TRUE)
+
+  if(dim(departures_per_stop)[[1]] != 0) {
+    routes_frequency = departures_per_stop %>% 
+      group_by(route_id) %>%
+      summarise(total_departures = sum(n_departures),
+                median_headways = round(median(mean_headway)),
+                mean_headways = round(mean(mean_headway)),
+                st_dev_headways = round(sd(mean_headway), 2),
+                stop_count = dplyr::n())
+  } else {
+    warning("Failed to calculate frequency, try passing a service_id from calendar_df.")
+  }
+  
+  return(routes_frequency)
+}
diff --git a/man/get_route_frequency.Rd b/man/get_route_frequency.Rd
new file mode 100644
index 00000000..3edbbdb9
--- /dev/null
+++ b/man/get_route_frequency.Rd
@@ -0,0 +1,41 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/frequencies.R
+\name{get_route_frequency}
+\alias{get_route_frequency}
+\title{Get Route Frequency}
+\usage{
+get_route_frequency(
+  gtfs_obj,
+  start_time = "06:00:00",
+  end_time = "22:00:00",
+  service_ids = NULL
+)
+}
+\arguments{
+\item{gtfs_obj}{a list of gtfs dataframes as read by the trread package.}
+
+\item{start_time}{analysis start time, can be given as "HH:MM:SS",
+hms object or numeric value in seconds.}
+
+\item{end_time}{analysis perdiod end time, can be given as "HH:MM:SS",
+hms object or numeric value in seconds.}
+
+\item{service_ids}{A set of service_ids from the calendar dataframe
+identifying a particular service id. If not provided, the service_id
+with the most departures is used.}
+}
+\value{
+a dataframe of routes with variables or headway/frequency in seconds for a route
+within a given time frame
+}
+\description{
+Note that some GTFS feeds contain a frequency data frame already.
+Consider using this instead, as it will be more accurate than what
+tidytransit calculates.
+}
+\examples{
+data(gtfs_duke)
+routes_frequency <- get_route_frequency(gtfs_duke)
+x <- order(routes_frequency$median_headways)
+head(routes_frequency[x,])
+}
diff --git a/man/get_stop_frequency.Rd b/man/get_stop_frequency.Rd
index d6992d51..9408ad3f 100644
--- a/man/get_stop_frequency.Rd
+++ b/man/get_stop_frequency.Rd
@@ -16,13 +16,13 @@ get_stop_frequency(
 \item{gtfs_obj}{a list of gtfs dataframes as read by \code{\link[=read_gtfs]{read_gtfs()}}.}
 
 \item{start_time}{analysis start time, can be given as "HH:MM:SS",
-hms object or numeric value in seconds. as numeric}
+hms object or numeric value in seconds.}
 
 \item{end_time}{analysis perdiod end time, can be given as "HH:MM:SS",
 hms object or numeric value in seconds.}
 
 \item{service_ids}{A set of service_ids from the calendar dataframe
-identifying a particular service id. If not provided the service_id
+identifying a particular service id. If not provided, the service_id
 with the most departures is used.}
 
 \item{by_route}{Default TRUE, if FALSE then calculate headway for any line coming
@@ -30,7 +30,7 @@ through the stop in the same direction on the same schedule.}
 }
 \value{
 dataframe of stops with the number of departures and the headway
-(departures divided by timespan) as columns.
+(departures divided by timespan) in seconds as columns
 }
 \description{
 Note that some GTFS feeds contain a frequency data frame already.
diff --git a/tests/testthat/test-headways.R b/tests/testthat/test-headways.R
index 06485461..e845a079 100644
--- a/tests/testthat/test-headways.R
+++ b/tests/testthat/test-headways.R
@@ -16,3 +16,15 @@ test_that("Stop frequencies (headways) for included data are as expected", {
     colnames(stops_frequency_by_route), 
     c("stop_id", "route_id", "direction_id", "service_id", "n_departures", "mean_headway"))
 })
+
+test_that("Route frequencies (headways)", {
+  # TODO rewrite with synthesized sample data
+  routes_frequency <- get_route_frequency(gtfs_duke)
+  expect_equal(routes_frequency[routes_frequency$route_id == 1679, ]$median_headways, 24*60)
+})
+
+test_that("Route frequencies (headways) w/ service id", {
+  # TODO rewrite with synthesized sample data
+  routes_frequency <- get_route_frequency(gtfs_duke, service_id = "c_883_b_21967_d_31")
+  expect_equal(routes_frequency[routes_frequency$route_id == 1680, ]$median_headways, (53+1/3)*60)
+})

From c421eed2db25933b1355a160c4b19573a840afee Mon Sep 17 00:00:00 2001
From: Flavio Poletti <flavio.poletti@hotmail.ch>
Date: Thu, 18 Nov 2021 11:33:13 +0100
Subject: [PATCH 5/8] restore frequency vignette

---
 vignettes/frequency.Rmd | 390 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 390 insertions(+)
 create mode 100644 vignettes/frequency.Rmd

diff --git a/vignettes/frequency.Rmd b/vignettes/frequency.Rmd
new file mode 100644
index 00000000..00ddd588
--- /dev/null
+++ b/vignettes/frequency.Rmd
@@ -0,0 +1,390 @@
+---
+title: "Transit (GTFS) Service & Headway Mapping with R"
+author: "Tom Buckley"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{tidytransit-headways}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r setup, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+library(dplyr)
+library(tidytransit)
+library(ggplot2)
+library(sf)
+summarise <- function(...) dplyr::summarise(..., .groups = "drop")
+```
+
+# Introduction
+
+The focus of this vignette is on how to use R to make graphics about where and how often 
+transit service operates based on schedule data published in the [General Transit Feed Specification](http://gtfs.org/). 
+We'll focus on the New York City Metropolitan Transit Agency's Subway schedule for this 
+vignette, but you can easily apply it to thousands of other GTFS data sources. See the 
+[tidytransit introductory vignette](http://tidytransit.r-transit.org/articles/introduction.html#finding-more-gtfs-feeds) 
+for instructions on finding data for other cities and operators. 
+
+# Setup
+
+You'll need to have tidytransit installed. Please see the [install instructions](http://tidytransit.r-transit.org/articles/introduction.html#installation-dependencies) for more details. 
+
+# Outline
+
+So, to review, we're going to:
+
+1) Import Transit Data (GTFS)
+2) Identify Weekday Schedules of Service
+3) Calculate Headways
+4) Map Headways By Route
+5) Map Departures by Stop and Route
+
+# 1) Import Transit Data (GTFS)
+
+We'll start by importing a snapshot of the NYC MTA's subway schedule, which is included 
+with the package when [installed](http://tidytransit.r-transit.org/index.html#installation).  
+
+```{r}
+local_gtfs_path <- system.file("extdata", "google_transit_nyc_subway.zip", package = "tidytransit")
+gtfs <- read_gtfs(local_gtfs_path)
+```
+
+But note that you can also just uncomment the line below and import the data from the NYC MTA's URL directly. 
+
+```{r}
+# gtfs <- read_gtfs("http://web.mta.info/developers/data/nyct/subway/google_transit.zip")
+```
+
+# 2) Identify Weekday Schedules of Service
+
+GTFS feeds typically contain a schedule of all the schedules of service for a given system. 
+Selecting a schedule of service in NYC allows us to focus on, for example, non-holiday 
+weekday service, in the Fall of 2019. In some feeds, service selection can be more or less 
+complicated than NYC. In any case, you'll want to read the [service patterns](http://tidytransit.r-transit.org/articles/servicepatterns.html) 
+vignette included in this package in order to see how you can select the right service for 
+your needs. 
+
+We use one of the functions described in that vignette to create a table on the gtfs feed 
+that lets us filter by weekday/weekend service. 
+
+```{r}
+gtfs <- set_servicepattern(gtfs)
+```
+
+After setting the service patterns, we can summarise each service by the number of trips and 
+stops. We'll also summarise the total distance covered by all trips in the service, and then 
+check that against the total distance covered by the average route. First, we need to 
+calculate the distance of each part of the route shapes.  
+
+```{r}
+shp1 <- shapes_as_sf(gtfs$shapes)
+shp1 <- st_transform(shp1, crs=2263)
+shp1$length <- st_length(shp1)
+shp2 <- shp1 %>% 
+  as.data.frame() %>% 
+  select(shape_id,length,-geometry) 
+```
+
+Now we're ready to roll the statistics up to services. 
+
+```{r}
+service_pattern_summary <- gtfs$trips %>%
+  left_join(gtfs$.$servicepatterns, by="service_id") %>% 
+  left_join(shp2, by="shape_id") %>%
+  left_join(gtfs$stop_times, by="trip_id") %>% 
+  group_by(servicepattern_id) %>% 
+  summarise(trips = n(), 
+            routes = n_distinct(route_id),
+            total_distance_per_day_km = sum(as.numeric(length), 
+                                    na.rm=TRUE)/1e3,
+            route_avg_distance_km = (sum(as.numeric(length),
+                                    na.rm=TRUE)/1e3)/(trips*routes),
+            stops=(n_distinct(stop_id)/2))
+```
+
+We can also add the number of days that each service is in operation. 
+
+```{r}
+service_pattern_summary <- gtfs$.$dates_servicepatterns %>% 
+  group_by(servicepattern_id) %>% 
+  summarise(days_in_service = n()) %>% 
+  left_join(service_pattern_summary, by="servicepattern_id")
+```
+
+And then we'll print the summary. 
+
+```{r}
+knitr::kable(service_pattern_summary)
+```
+
+It seems that if we want to summarise the most common patterns of service in the NYC Metro 
+system, we should use the `s_e25d6ca` service pattern, as it has the most days in service, 
+the most trips, stops, and routes. 
+
+We'll use that pattern below to pull out the Service ID's that we need to use to identify 
+trips in the GTFS feed for which we want to summarise service. 
+
+```{r}
+service_ids <- gtfs$.$servicepattern %>% 
+  filter(servicepattern_id == 's_e25d6ca') %>% 
+  pull(service_id)
+
+head(service_ids) %>% 
+  knitr::kable()
+```
+
+So, what are these service_id codes? How they are put together varies from operator to 
+operator. The important thing is that the service_id's are also a field on the `trips` 
+table, which describes all the trips taken in the system. 
+
+Lets see how many trips fall under each of these service_id's on the trips table, and how 
+they relate to routes. 
+
+```{r}
+gtfs$trips %>%
+  filter(service_id %in% service_ids) %>%
+  group_by(service_id, route_id) %>%
+  summarise(count = n()) %>% 
+  head() %>%
+  knitr::kable()
+```
+
+Given the one-to-one relationship between service-id's and routes, we might conclude that 
+the NYC Subway GTFS creates service_id's for each route that a trip runs on. Some GTFS feeds
+are simpler: a single service_id might relate to 'all vehicle trips running every weekdays'. 
+Service patterns get us around complications like this by describing service in terms of 
+exhaustive calendar dates, regardless of whether an operator may break out each route as 
+a different service. 
+
+# 3) Calculate Headways
+
+So, now that we've used service patterns to identify the set of service_ids that refer to 
+all weekday trips, we can summarize service between 6 am and 10 am for the NYC Subway 
+system on weekdays.  
+
+```{r}
+am_freq <- get_stop_frequency(gtfs, start_time = 6*3600, end_time = 10*3600, service_ids = service_ids, by_route = T)
+```
+
+```{r}
+knitr::kable(head(am_freq))
+```
+
+This table includes columns for the id for a given stop, the route_id, our selected 
+service_id's, and the number of departures and the average headway for a given direction 
+from 6 am to 10 am on weekdays. 
+
+The `get_stop_frequency` function simply counts the number of departures within the time 
+frame to get departures per stop. Then, to get headways, it divides the number of minutes 
+by the number of departures, and rounds to the nearest integer. 
+
+Lets have a look at the headways for the 1 train, which runs from the Bronx down to the 
+Bottom of Manhattan. 
+
+First, we filter the `am_freq` data frame to just stops going in 1 direction on the 1 train, 
+and then we join to the original `stops` table, which includes a more descriptive stop_name. 
+
+```{r}
+one_line_stops <- am_freq %>% 
+    filter(route_id==1 & direction_id==0) %>%
+    left_join(gtfs$stops, by ="stop_id")
+```
+
+As we can see, some stops seem to have higher headways than others, even when the train is 
+running in the same direction. This may be counterintuitive, because we might expect the 
+train to run through every stop the same amount of times for a given direction. 
+
+Lets inspect the stops at which headways are higher. 
+
+```{r}
+one_line_stops %>% 
+  arrange(desc(mean_headway)) %>% 
+  select(stop_name, n_departures, mean_headway) %>% 
+  head() %>%
+  knitr::kable()
+```
+
+And those at which headways are lower:
+
+```{r}
+one_line_stops %>% 
+  arrange(desc(mean_headway)) %>% 
+  select(stop_name, n_departures, mean_headway) %>% 
+  tail() %>%
+  knitr::kable()
+```
+
+Here we can see that the 242-Van Cortland Stop, the last stop up North, in the Bronx, has 
+noticeably higher headways (8 mins) at this time of day than the South Ferry Stop, 
+which is at the south end of Manhattan.
+
+Lets also plot the headways at these stops on a map to see how they are distributed across 
+the city. 
+First, we'll use the `stops_as_sf` function, which converts the latitudes and longitudes on 
+the stops table in the GTFS feed into [simple features](https://r-spatial.github.io/sf/articles/sf1.html).  
+
+```{r}
+nyc_stops_sf <- stops_as_sf(gtfs$stops)
+```
+
+Now we can join those stop coordinates to the 1 line's calculated stop headways. 
+
+```{r}
+one_line_stops_sf <- nyc_stops_sf %>%
+  right_join(one_line_stops, by="stop_id") 
+```
+
+And then use ggplot's `geom_sf` to plot the headways. 
+
+```{r}
+one_line_stops_sf %>% 
+  ggplot() + 
+  geom_sf(aes(color = mean_headway)) +
+  theme_bw()
+```
+
+On the map too, we can see that there is some variation in stop headways. During certain 
+times of the day, the 1 train skips stops north of a certain stop in manhattan, presumably 
+in order to turn around and provide shorter headways to stops south of that stop.
+
+Finally, we can easily summarise what the headways are like along the entire route now, by 
+using R's default summary function for the vector of headways. 
+
+```{r}
+summary(one_line_stops$headway)
+```
+
+This is the same method that tidytransit uses to summarise headways along all routes in the 
+system when we use the `get_route_frequency` function, which we'll try next.  
+
+# 4) Map Headways By Route
+
+Now we'll use the `get_route_frequency` function to summarise transit service by route, for 
+the same time period. 
+
+```{r}
+am_route_freq <- get_route_frequency(gtfs, service_ids = service_ids, start_time = 6*3600, end_time = 10*3600) 
+head(am_route_freq) %>%
+  knitr::kable()
+```
+
+Since, under the hood, this table is a summary of stop frequencies along each route, it 
+includes the same variables as a summary of the headways at each stop along the route, as 
+well as a sum of all departures. Again, its important to note that this summary is based on 
+the trips that happened within the time frame we specify. 
+
+As with the stops, we can easily join this table to simple features and then plot it on a 
+map. Note that here too we pass in the select service_id's from above, as the route run 
+by a vehicle also depends on the selected service. 
+
+```{r}
+# get_route_geometry needs a gtfs object that includes shapes as simple feature data frames
+gtfs_sf <- gtfs_as_sf(gtfs)
+routes_sf <- get_route_geometry(gtfs_sf, service_ids = service_ids)
+```
+
+Then we join the geometries to the calculated frequencies:
+
+```{r}
+routes_sf <- routes_sf %>% 
+  inner_join(am_route_freq, by = 'route_id')
+```
+
+And finally, lets plot the routes with median headways of less than 10 minutes in the morning.
+
+```{r, fig.width=6, fig.height=10, warn=FALSE}
+# convert to an appropriate coordinate reference system
+routes_sf_crs <- sf::st_transform(routes_sf, 26919) 
+routes_sf_crs %>% 
+  filter(median_headways < 10*60) %>%
+  ggplot() + 
+  geom_sf(aes(colour=as.factor(median_headways))) + 
+  labs(color = "Headways") +
+  geom_sf_text(aes(label=route_id)) +
+  theme_bw() 
+```
+
+Its clear that a number of the route lines overlap.
+
+# 5) Map Departures by Stop and Route
+
+Still, we'd like to represent where and how frequently the subway runs in NYC in the 
+morning. How can we do so given that, graphically, the route lines overlap?
+
+One method might be change the units we are representing graphically. Thus far, we have 
+used stops and routes as units. But GTFS data also come with a `shapes` table, which, in 
+theory, should allow us to say what the frequency of vehicles passing through any given 
+shape is, using similar methods. This kind of method is beyond the scope of this vignette. 
+
+Alternatively, regular ggplot users might expect the ggplot `dodge` function to allow us to 
+move around these lines but, by design, thats not possible with `geom_sf`. One can see why: 
+unlike a bar chart, the representations of route lines in geographic space on a map have a 
+specific meaning. 
+
+So we'll use a cartographic trick, scaling each line according to total departures and 
+close to a number around .001 [decimal degrees](https://en.wikipedia.org/wiki/Decimal_degrees) 
+which is a about the length of a street, which will fit on the map well. One might call 
+this a cartogram. 
+
+```{r}
+routes_sf_buffer <- st_buffer(routes_sf,
+                              dist=routes_sf$total_departures/1e6)
+```
+
+Next, when we render the map, we'll make sure to make the borders around each route 
+transparent, and set the opacity for the fill of all the polygons high again. 
+
+```{r, fig.width=6, fig.height=10}
+routes_sf_buffer %>% 
+  ggplot() + 
+  geom_sf(colour=alpha("white",0),fill=alpha("red",0.2)) +
+  theme_bw() 
+```
+
+Now we have a rough representation of the question we set out to answer: where and how 
+frequently does transit service run in the AM in the NYC Subway. Note that in this graphic, 
+the intensity of the red tells you how many overlapping trains run through the line and the 
+thickness of the lines represents how many run along each line. 
+
+We can combine this with stops to get a sense of how central stops relate to routes. 
+
+```{r}
+nyc_stop_am_departures_main <- nyc_stops_sf %>% 
+  left_join(am_freq, by = "stop_id") %>% 
+  filter(n_departures>50)
+```
+
+First, we'll leverage the common `stop_name` variable to group and count departures, in 
+both directions, for all stops, filtering to out a number of smaller stops for more 
+graphical clarity. 
+
+```{r}
+nyc_stops <- left_join(gtfs$stops, am_freq, by="stop_id")
+
+stop_departures <- nyc_stops %>%  
+  group_by(stop_name) %>%
+  transmute(total_departures = sum(n_departures, na.rm=TRUE))
+
+nyc_stops1 <- right_join(nyc_stops_sf,
+                         stop_departures, by="stop_name")
+
+stop_departures <- nyc_stops1 %>%
+  filter(total_departures>100)
+```
+
+Finally, we can plot both the route line counts and the stop departure counts on one map:
+
+```{r, fig.width=6, fig.height=10}
+ggplot() + 
+  geom_sf(data=routes_sf_buffer,colour=alpha("white",0),fill=alpha("red",0.3)) +
+  geom_sf(data=stop_departures, aes(size=total_departures), shape=1) + 
+  labs(size = "Departures (Hundreds)") +
+  theme_bw() +
+  theme(legend.position="none") +
+  ggtitle("NYC MTA - Relative Departures by Route and Stop (AM)")
+```

From e06197c89cbef3217e9f8ece25988f98070a6364 Mon Sep 17 00:00:00 2001
From: Flavio Poletti <flavio.poletti@hotmail.ch>
Date: Fri, 19 Nov 2021 14:40:21 +0100
Subject: [PATCH 6/8] update vignette

---
 vignettes/frequency.Rmd | 161 ++++++++++++++++++++--------------------
 1 file changed, 80 insertions(+), 81 deletions(-)

diff --git a/vignettes/frequency.Rmd b/vignettes/frequency.Rmd
index 00ddd588..1bf412e8 100644
--- a/vignettes/frequency.Rmd
+++ b/vignettes/frequency.Rmd
@@ -18,10 +18,10 @@ library(dplyr)
 library(tidytransit)
 library(ggplot2)
 library(sf)
-summarise <- function(...) dplyr::summarise(..., .groups = "drop")
+options(dplyr.summarise.inform=F)
 ```
 
-# Introduction
+## Introduction
 
 The focus of this vignette is on how to use R to make graphics about where and how often 
 transit service operates based on schedule data published in the [General Transit Feed Specification](http://gtfs.org/). 
@@ -30,11 +30,11 @@ vignette, but you can easily apply it to thousands of other GTFS data sources. S
 [tidytransit introductory vignette](http://tidytransit.r-transit.org/articles/introduction.html#finding-more-gtfs-feeds) 
 for instructions on finding data for other cities and operators. 
 
-# Setup
+## Setup
 
 You'll need to have tidytransit installed. Please see the [install instructions](http://tidytransit.r-transit.org/articles/introduction.html#installation-dependencies) for more details. 
 
-# Outline
+## Outline
 
 So, to review, we're going to:
 
@@ -44,7 +44,7 @@ So, to review, we're going to:
 4) Map Headways By Route
 5) Map Departures by Stop and Route
 
-# 1) Import Transit Data (GTFS)
+### 1) Import Transit Data (GTFS)
 
 We'll start by importing a snapshot of the NYC MTA's subway schedule, which is included 
 with the package when [installed](http://tidytransit.r-transit.org/index.html#installation).  
@@ -54,13 +54,14 @@ local_gtfs_path <- system.file("extdata", "google_transit_nyc_subway.zip", packa
 gtfs <- read_gtfs(local_gtfs_path)
 ```
 
-But note that you can also just uncomment the line below and import the data from the NYC MTA's URL directly. 
+But note that you can also just uncomment the line below and import the data from the NYC 
+MTA's URL directly. 
 
 ```{r}
 # gtfs <- read_gtfs("http://web.mta.info/developers/data/nyct/subway/google_transit.zip")
 ```
 
-# 2) Identify Weekday Schedules of Service
+### 2) Identify Weekday Schedules of Service
 
 GTFS feeds typically contain a schedule of all the schedules of service for a given system. 
 Selecting a schedule of service in NYC allows us to focus on, for example, non-holiday 
@@ -79,15 +80,17 @@ gtfs <- set_servicepattern(gtfs)
 After setting the service patterns, we can summarise each service by the number of trips and 
 stops. We'll also summarise the total distance covered by all trips in the service, and then 
 check that against the total distance covered by the average route. First, we need to 
-calculate the distance of each part of the route shapes.  
+calculate the distance of each part of the route shapes. To do this (and for creating maps
+later on) we convert stops and shapes to [simple features](https://r-spatial.github.io/sf/articles/sf1.html) 
+with `gtfs_as_sf`.
 
 ```{r}
-shp1 <- shapes_as_sf(gtfs$shapes)
-shp1 <- st_transform(shp1, crs=2263)
-shp1$length <- st_length(shp1)
-shp2 <- shp1 %>% 
+gtfs <- gtfs_as_sf(gtfs)
+gtfs$shapes$length <- st_length(gtfs$shapes)
+
+shape_lengths <- gtfs$shapes %>% 
   as.data.frame() %>% 
-  select(shape_id,length,-geometry) 
+  select(shape_id, length, -geometry)
 ```
 
 Now we're ready to roll the statistics up to services. 
@@ -95,21 +98,20 @@ Now we're ready to roll the statistics up to services.
 ```{r}
 service_pattern_summary <- gtfs$trips %>%
   left_join(gtfs$.$servicepatterns, by="service_id") %>% 
-  left_join(shp2, by="shape_id") %>%
+  left_join(shape_lengths, by="shape_id") %>%
   left_join(gtfs$stop_times, by="trip_id") %>% 
   group_by(servicepattern_id) %>% 
-  summarise(trips = n(), 
-            routes = n_distinct(route_id),
-            total_distance_per_day_km = sum(as.numeric(length), 
-                                    na.rm=TRUE)/1e3,
-            route_avg_distance_km = (sum(as.numeric(length),
-                                    na.rm=TRUE)/1e3)/(trips*routes),
-            stops=(n_distinct(stop_id)/2))
+  summarise(
+    trips = n(), 
+    routes = n_distinct(route_id),
+    total_distance_per_day_km = sum(as.numeric(length), na.rm=TRUE)/1e3,
+    route_avg_distance_km = (sum(as.numeric(length), na.rm=TRUE)/1e3)/(trips*routes),
+    stops=(n_distinct(stop_id)/2))
 ```
 
 We can also add the number of days that each service is in operation. 
 
-```{r}
+```{r} 
 service_pattern_summary <- gtfs$.$dates_servicepatterns %>% 
   group_by(servicepattern_id) %>% 
   summarise(days_in_service = n()) %>% 
@@ -126,7 +128,7 @@ It seems that if we want to summarise the most common patterns of service in the
 system, we should use the `s_e25d6ca` service pattern, as it has the most days in service, 
 the most trips, stops, and routes. 
 
-We'll use that pattern below to pull out the Service ID's that we need to use to identify 
+We'll use that pattern below to pull out the service_ids that we need to use to identify 
 trips in the GTFS feed for which we want to summarise service. 
 
 ```{r}
@@ -139,11 +141,11 @@ head(service_ids) %>%
 ```
 
 So, what are these service_id codes? How they are put together varies from operator to 
-operator. The important thing is that the service_id's are also a field on the `trips` 
-table, which describes all the trips taken in the system. 
+operator. The important thing is that the service_ids are also a field on the `trips` 
+table, which describes all the trips taken in the system.
 
-Lets see how many trips fall under each of these service_id's on the trips table, and how 
-they relate to routes. 
+Lets see how many trips fall under each of these service_ids on the trips table, and how 
+they relate to routes.
 
 ```{r}
 gtfs$trips %>%
@@ -154,50 +156,54 @@ gtfs$trips %>%
   knitr::kable()
 ```
 
-Given the one-to-one relationship between service-id's and routes, we might conclude that 
-the NYC Subway GTFS creates service_id's for each route that a trip runs on. Some GTFS feeds
+Given the one-to-one relationship between service_ids and routes, we might conclude that 
+the NYC Subway GTFS creates service_ids for each route that a trip runs on. Some GTFS feeds
 are simpler: a single service_id might relate to 'all vehicle trips running every weekdays'. 
 Service patterns get us around complications like this by describing service in terms of 
 exhaustive calendar dates, regardless of whether an operator may break out each route as 
-a different service. 
+a different service.
 
-# 3) Calculate Headways
+### 3) Calculate Headways
 
 So, now that we've used service patterns to identify the set of service_ids that refer to 
 all weekday trips, we can summarize service between 6 am and 10 am for the NYC Subway 
-system on weekdays.  
+system on weekdays.
 
 ```{r}
-am_freq <- get_stop_frequency(gtfs, start_time = 6*3600, end_time = 10*3600, service_ids = service_ids, by_route = T)
+am_stop_freq <- get_stop_frequency(gtfs, start_time = 6*3600, end_time = 10*3600, 
+                              service_ids = service_ids, by_route = TRUE)
 ```
 
 ```{r}
-knitr::kable(head(am_freq))
+knitr::kable(head(am_stop_freq))
 ```
 
 This table includes columns for the id for a given stop, the route_id, our selected 
-service_id's, and the number of departures and the average headway for a given direction 
-from 6 am to 10 am on weekdays. 
+service_ids, and the number of departures and the average headway for a given direction 
+from 6 am to 10 am on weekdays.
 
 The `get_stop_frequency` function simply counts the number of departures within the time 
-frame to get departures per stop. Then, to get headways, it divides the number of minutes 
-by the number of departures, and rounds to the nearest integer. 
+frame to get departures per stop. Then, to get headways, it divides the number of seconds 
+(between start_time and end_time) by the number of departures, and rounds to the nearest
+integer.
 
 Lets have a look at the headways for the 1 train, which runs from the Bronx down to the 
-Bottom of Manhattan. 
+Bottom of Manhattan.
 
-First, we filter the `am_freq` data frame to just stops going in 1 direction on the 1 train, 
-and then we join to the original `stops` table, which includes a more descriptive stop_name. 
+First, we filter the `am_stop_freq` data frame to just stops going in one direction on the 1 
+train, and then we join to the original `stops` table, which includes a more descriptive 
+stop_name. 
 
 ```{r}
-one_line_stops <- am_freq %>% 
-    filter(route_id==1 & direction_id==0) %>%
-    left_join(gtfs$stops, by ="stop_id")
+one_line_stops <- am_stop_freq %>% 
+    filter(route_id == 1 & direction_id == 0) %>%
+    left_join(gtfs$stops, by ="stop_id") %>% 
+    mutate(mean_headway_minutes = mean_headway/60)
 ```
 
 As we can see, some stops seem to have higher headways than others, even when the train is 
 running in the same direction. This may be counterintuitive, because we might expect the 
-train to run through every stop the same amount of times for a given direction. 
+train to run through every stop the same amount of times for a given direction.
 
 Lets inspect the stops at which headways are higher. 
 
@@ -224,27 +230,19 @@ noticeably higher headways (8 mins) at this time of day than the South Ferry Sto
 which is at the south end of Manhattan.
 
 Lets also plot the headways at these stops on a map to see how they are distributed across 
-the city. 
-First, we'll use the `stops_as_sf` function, which converts the latitudes and longitudes on 
-the stops table in the GTFS feed into [simple features](https://r-spatial.github.io/sf/articles/sf1.html).  
-
-```{r}
-nyc_stops_sf <- stops_as_sf(gtfs$stops)
-```
-
-Now we can join those stop coordinates to the 1 line's calculated stop headways. 
+the city. First, we join the stops sf object to the 1 line's calculated stop headways. 
 
 ```{r}
-one_line_stops_sf <- nyc_stops_sf %>%
+one_line_stops_sf <- gtfs$stops %>%
   right_join(one_line_stops, by="stop_id") 
 ```
 
 And then use ggplot's `geom_sf` to plot the headways. 
 
-```{r}
+```{r, fig.width=9}
 one_line_stops_sf %>% 
   ggplot() + 
-  geom_sf(aes(color = mean_headway)) +
+  geom_sf(aes(color = mean_headway_minutes)) +
   theme_bw()
 ```
 
@@ -256,19 +254,20 @@ Finally, we can easily summarise what the headways are like along the entire rou
 using R's default summary function for the vector of headways. 
 
 ```{r}
-summary(one_line_stops$headway)
+summary(one_line_stops$mean_headway)
 ```
 
 This is the same method that tidytransit uses to summarise headways along all routes in the 
 system when we use the `get_route_frequency` function, which we'll try next.  
 
-# 4) Map Headways By Route
+### 4) Map Headways By Route
 
 Now we'll use the `get_route_frequency` function to summarise transit service by route, for 
 the same time period. 
 
 ```{r}
-am_route_freq <- get_route_frequency(gtfs, service_ids = service_ids, start_time = 6*3600, end_time = 10*3600) 
+am_route_freq <- get_route_frequency(gtfs, service_ids = service_ids, 
+                                     start_time = 6*3600, end_time = 10*3600) 
 head(am_route_freq) %>%
   knitr::kable()
 ```
@@ -276,16 +275,15 @@ head(am_route_freq) %>%
 Since, under the hood, this table is a summary of stop frequencies along each route, it 
 includes the same variables as a summary of the headways at each stop along the route, as 
 well as a sum of all departures. Again, its important to note that this summary is based on 
-the trips that happened within the time frame we specify. 
+the trips that happened within the time frame we specify.
 
 As with the stops, we can easily join this table to simple features and then plot it on a 
-map. Note that here too we pass in the select service_id's from above, as the route run 
+map. Note that here too we pass in the select service_ids from above, as the route run 
 by a vehicle also depends on the selected service. 
 
 ```{r}
 # get_route_geometry needs a gtfs object that includes shapes as simple feature data frames
-gtfs_sf <- gtfs_as_sf(gtfs)
-routes_sf <- get_route_geometry(gtfs_sf, service_ids = service_ids)
+routes_sf <- get_route_geometry(gtfs, service_ids = service_ids)
 ```
 
 Then we join the geometries to the calculated frequencies:
@@ -306,12 +304,12 @@ routes_sf_crs %>%
   geom_sf(aes(colour=as.factor(median_headways))) + 
   labs(color = "Headways") +
   geom_sf_text(aes(label=route_id)) +
-  theme_bw() 
+  theme_bw()
 ```
 
 Its clear that a number of the route lines overlap.
 
-# 5) Map Departures by Stop and Route
+### 5) Map Departures by Stop and Route
 
 Still, we'd like to represent where and how frequently the subway runs in NYC in the 
 morning. How can we do so given that, graphically, the route lines overlap?
@@ -333,7 +331,7 @@ this a cartogram.
 
 ```{r}
 routes_sf_buffer <- st_buffer(routes_sf,
-                              dist=routes_sf$total_departures/1e6)
+                              dist = routes_sf$total_departures/1e6)
 ```
 
 Next, when we render the map, we'll make sure to make the borders around each route 
@@ -342,8 +340,8 @@ transparent, and set the opacity for the fill of all the polygons high again.
 ```{r, fig.width=6, fig.height=10}
 routes_sf_buffer %>% 
   ggplot() + 
-  geom_sf(colour=alpha("white",0),fill=alpha("red",0.2)) +
-  theme_bw() 
+  geom_sf(colour = alpha("white", 0), fill = alpha("red",0.2)) +
+  theme_bw()
 ```
 
 Now we have a rough representation of the question we set out to answer: where and how 
@@ -354,9 +352,11 @@ thickness of the lines represents how many run along each line.
 We can combine this with stops to get a sense of how central stops relate to routes. 
 
 ```{r}
-nyc_stop_am_departures_main <- nyc_stops_sf %>% 
-  left_join(am_freq, by = "stop_id") %>% 
-  filter(n_departures>50)
+gtfs$stops %>% 
+  inner_join(am_stop_freq, by = "stop_id") %>% 
+  filter(n_departures > 50) %>% 
+  select(stop_id, stop_name, n_departures, mean_headway) %>% 
+  knitr::kable()
 ```
 
 First, we'll leverage the common `stop_name` variable to group and count departures, in 
@@ -364,25 +364,24 @@ both directions, for all stops, filtering to out a number of smaller stops for m
 graphical clarity. 
 
 ```{r}
-nyc_stops <- left_join(gtfs$stops, am_freq, by="stop_id")
+am_stop_name_departures <- left_join(gtfs$stops, am_stop_freq, by="stop_id")
 
-stop_departures <- nyc_stops %>%  
+am_stop_name_departures <- am_stop_name_departures %>%  
   group_by(stop_name) %>%
   transmute(total_departures = sum(n_departures, na.rm=TRUE))
 
-nyc_stops1 <- right_join(nyc_stops_sf,
-                         stop_departures, by="stop_name")
-
-stop_departures <- nyc_stops1 %>%
-  filter(total_departures>100)
+am_stop_name_departures <- am_stop_name_departures %>%
+  filter(total_departures > 100)
 ```
 
 Finally, we can plot both the route line counts and the stop departure counts on one map:
 
 ```{r, fig.width=6, fig.height=10}
 ggplot() + 
-  geom_sf(data=routes_sf_buffer,colour=alpha("white",0),fill=alpha("red",0.3)) +
-  geom_sf(data=stop_departures, aes(size=total_departures), shape=1) + 
+  geom_sf(data = routes_sf_buffer, 
+          colour = alpha("white",0), fill = alpha("red",0.3)) +
+  geom_sf(data = am_stop_name_departures, 
+          aes(size = total_departures), shape=1) + 
   labs(size = "Departures (Hundreds)") +
   theme_bw() +
   theme(legend.position="none") +

From c6dbb4e6e85f041936935be2d73b63f12889862c Mon Sep 17 00:00:00 2001
From: Flavio Poletti <flavio.poletti@hotmail.ch>
Date: Fri, 19 Nov 2021 15:02:27 +0100
Subject: [PATCH 7/8] update vignette

---
 README.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 1a1628f1..d5dd2f90 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ status](https://github.com/r-transit/tidytransit/workflows/R-CMD-check/badge.svg
 # tidytransit
 
 Use tidytransit to map transit stops and routes, calculate travel times and transit
-frequencies, and validate transit feeds. tidytransit reads the 
+frequencies, and validate transit feeds. Tidytransit reads the 
 [General Transit Feed Specification](http://gtfs.org/) into 
 [tidyverse](https://tibble.tidyverse.org/) and 
 [simple features](https://en.wikipedia.org/wiki/Simple_Features) data frames. 
@@ -23,8 +23,9 @@ Tidytransit can be used to:
 Have a look at the following vignettes to see how tidytransit can be used to analyse a feed:
 
 - [the tutorial](http://tidytransit.r-transit.org/articles/introduction.html) 
-- [introduction to service patterns](http://tidytransit.r-transit.org/articles/servicepatterns.html)
-- [introduction to time tables](http://tidytransit.r-transit.org/articles/timetable.html)
+- [calendar and service patterns](http://tidytransit.r-transit.org/articles/servicepatterns.html)
+- [create time tables for stops](http://tidytransit.r-transit.org/articles/timetable.html)
+- [frequency and headway calculations](http://tidytransit.r-transit.org/articles/frequency.html)  
 
 ## Installation
 

From a997c6ac088c470fa640e6800a242a39127a49ce Mon Sep 17 00:00:00 2001
From: Flavio Poletti <flavio.poletti@hotmail.ch>
Date: Mon, 22 Nov 2021 09:16:28 +0100
Subject: [PATCH 8/8] update docs

---
 R/frequencies.R            | 10 ++++++++--
 man/get_route_frequency.Rd |  6 +++++-
 man/get_stop_frequency.Rd  |  6 +++++-
 vignettes/frequency.Rmd    |  2 ++
 4 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/R/frequencies.R b/R/frequencies.R
index cc182e84..7c598395 100644
--- a/R/frequencies.R
+++ b/R/frequencies.R
@@ -1,6 +1,9 @@
 #' Get Stop Frequency
 #' 
-#' Note that some GTFS feeds contain a frequency data frame already. 
+#' Calculate the number of departures and mean headways for all stops within a
+#' given timespan and for given service_ids. 
+#' 
+#' @note Some GTFS feeds contain a frequency data frame already. 
 #' Consider using this instead, as it will be more accurate than what 
 #' tidytransit calculates.
 #' 
@@ -76,7 +79,10 @@ get_stop_frequency <- function(gtfs_obj,
 
 #' Get Route Frequency
 #' 
-#' Note that some GTFS feeds contain a frequency data frame already. 
+#' Calculate the number of departures and mean headways for routes within a given timespan
+#' and for given service_ids.
+#' 
+#' @note Some GTFS feeds contain a frequency data frame already. 
 #' Consider using this instead, as it will be more accurate than what 
 #' tidytransit calculates. 
 #' 
diff --git a/man/get_route_frequency.Rd b/man/get_route_frequency.Rd
index 3edbbdb9..676282f5 100644
--- a/man/get_route_frequency.Rd
+++ b/man/get_route_frequency.Rd
@@ -29,7 +29,11 @@ a dataframe of routes with variables or headway/frequency in seconds for a route
 within a given time frame
 }
 \description{
-Note that some GTFS feeds contain a frequency data frame already.
+Calculate the number of departures and mean headways for routes within a given timespan
+and for given service_ids.
+}
+\note{
+Some GTFS feeds contain a frequency data frame already.
 Consider using this instead, as it will be more accurate than what
 tidytransit calculates.
 }
diff --git a/man/get_stop_frequency.Rd b/man/get_stop_frequency.Rd
index 9408ad3f..830e0bbb 100644
--- a/man/get_stop_frequency.Rd
+++ b/man/get_stop_frequency.Rd
@@ -33,7 +33,11 @@ dataframe of stops with the number of departures and the headway
 (departures divided by timespan) in seconds as columns
 }
 \description{
-Note that some GTFS feeds contain a frequency data frame already.
+Calculate the number of departures and mean headways for all stops within a
+given timespan and for given service_ids.
+}
+\note{
+Some GTFS feeds contain a frequency data frame already.
 Consider using this instead, as it will be more accurate than what
 tidytransit calculates.
 }
diff --git a/vignettes/frequency.Rmd b/vignettes/frequency.Rmd
index 1bf412e8..ad703c99 100644
--- a/vignettes/frequency.Rmd
+++ b/vignettes/frequency.Rmd
@@ -356,6 +356,8 @@ gtfs$stops %>%
   inner_join(am_stop_freq, by = "stop_id") %>% 
   filter(n_departures > 50) %>% 
   select(stop_id, stop_name, n_departures, mean_headway) %>% 
+  arrange(n_departures) %>% 
+  head() %>% 
   knitr::kable()
 ```