Skip to content

Commit

Permalink
Port documentation from zoneinfo-parse
Browse files Browse the repository at this point in the history
  • Loading branch information
pitdicker committed Apr 14, 2024
1 parent 7c84cfd commit e1094aa
Show file tree
Hide file tree
Showing 3 changed files with 235 additions and 0 deletions.
29 changes: 29 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,32 @@
//! Rust library for reading the text files comprising the [zoneinfo
//! database][w], which records time zone changes and offsets across the world
//! from multiple sources.
//!
//! The zoneinfo database is distributed in one of two formats: a raw text
//! format with one file per continent, and a compiled binary format with one
//! file per time zone. This crate deals with the former; for the latter, see
//! the [`zoneinfo_compiled` crate][zc] instead.
//!
//! The database itself is maintained by IANA. For more information, see
//! [IANA’s page on the time zone database][iana]. You can also find the text
//! files themselves in [the tz repository][tz].
//!
//! [iana]: https://www.iana.org/time-zones
//! [tz]: https://github.com/eggert/tz
//! [w]: https://en.wikipedia.org/wiki/Tz_database
//! [zc]: https://github.com/rust-datetime/zoneinfo-compiled
//!
//! ## Outline
//!
//! Reading a zoneinfo text file is split into three stages:
//!
//! - **Parsing** individual lines of text into `Lines` is done by the `line`
//! module;
//! - **Interpreting** these lines into a complete `Table` is done by the
//! `table` module;
//! - **Calculating transitions** from this table is done by the `transitions`
//! module.

extern crate regex;

pub mod line;
Expand Down
160 changes: 160 additions & 0 deletions src/line.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,73 @@
//! Parsing zoneinfo data files, line-by-line.
//!
//! This module provides functions that take a line of input from a zoneinfo
//! data file and attempts to parse it, returning the details of the line if
//! it gets parsed successfully. It classifies them as `Rule`, `Link`,
//! `Zone`, or `Continuation` lines.
//!
//! `Line` is the type that parses and holds zoneinfo line data. To try to
//! parse a string, use the `Line::from_str` constructor. (This isn’t the
//! `FromStr` trait, so you can’t use `parse` on a string. Sorry!)
//!
//! ## Examples
//!
//! Parsing a `Rule` line:
//!
//! ```
//! use parse_zoneinfo::line::*;
//!
//! let parser = LineParser::new();
//! let line = parser.parse_str("Rule EU 1977 1980 - Apr Sun>=1 1:00u 1:00 S");
//!
//! assert_eq!(line, Ok(Line::Rule(Rule {
//! name: "EU",
//! from_year: Year::Number(1977),
//! to_year: Some(Year::Number(1980)),
//! month: Month::April,
//! day: DaySpec::FirstOnOrAfter(Weekday::Sunday, 1),
//! time: TimeSpec::HoursMinutes(1, 0).with_type(TimeType::UTC),
//! time_to_add: TimeSpec::HoursMinutes(1, 0),
//! letters: Some("S"),
//! })));
//! ```
//!
//! Parsing a `Zone` line:
//!
//! ```
//! use parse_zoneinfo::line::*;
//!
//! let parser = LineParser::new();
//! let line = parser.parse_str("Zone Australia/Adelaide 9:30 Aus AC%sT 1971 Oct 31 2:00:00");
//!
//! assert_eq!(line, Ok(Line::Zone(Zone {
//! name: "Australia/Adelaide",
//! info: ZoneInfo {
//! utc_offset: TimeSpec::HoursMinutes(9, 30),
//! saving: Saving::Multiple("Aus"),
//! format: "AC%sT",
//! time: Some(ChangeTime::UntilTime(
//! Year::Number(1971),
//! Month::October,
//! DaySpec::Ordinal(31),
//! TimeSpec::HoursMinutesSeconds(2, 0, 0).with_type(TimeType::Wall))
//! ),
//! },
//! })));
//! ```
//!
//! Parsing a `Link` line:
//!
//! ```
//! use parse_zoneinfo::line::*;
//!
//! let parser = LineParser::new();
//! let line = parser.parse_str("Link Europe/Istanbul Asia/Istanbul");
//! assert_eq!(line, Ok(Line::Link(Link {
//! existing: "Europe/Istanbul",
//! new: "Asia/Istanbul",
//! })));
//! ```

use std::fmt;
use std::str::FromStr;
// we still support rust that doesn't have the inherent methods
Expand Down Expand Up @@ -62,9 +132,14 @@ impl fmt::Display for Error {

impl std::error::Error for Error {}

// A set of regexes to test against.
//
// All of these regexes use the `(?x)` flag, which means they support
// comments and whitespace directly in the regex string!
impl LineParser {
pub fn new() -> Self {
LineParser {
// Format of a Rule line: one capturing group per field.
rule_line: Regex::new(
r##"(?x) ^
Rule \s+
Expand All @@ -82,6 +157,7 @@ impl LineParser {
)
.unwrap(),

// Format of a day specification.
day_field: Regex::new(
r##"(?x) ^
( ?P<weekday> \w+ )
Expand All @@ -91,6 +167,7 @@ impl LineParser {
)
.unwrap(),

// Format of an hour and a minute specification.
hm_field: Regex::new(
r##"(?x) ^
( ?P<sign> -? )
Expand All @@ -100,6 +177,7 @@ impl LineParser {
)
.unwrap(),

// Format of an hour, a minute, and a second specification.
hms_field: Regex::new(
r##"(?x) ^
( ?P<sign> -? )
Expand All @@ -109,6 +187,9 @@ impl LineParser {
)
.unwrap(),

// ^ those two could be done with the same regex, but... they aren‘t.

// Format of a Zone line, with one capturing group per field.
zone_line: Regex::new(
r##"(?x) ^
Zone \s+
Expand All @@ -125,6 +206,8 @@ impl LineParser {
)
.unwrap(),

// Format of a Continuation Zone line, which is the same as the opening
// Zone line except the first two fields are replaced by whitespace.
continuation_line: Regex::new(
r##"(?x) ^
\s+
Expand All @@ -140,6 +223,7 @@ impl LineParser {
)
.unwrap(),

// Format of a Link line, with one capturing group per field.
link_line: Regex::new(
r##"(?x) ^
Link \s+
Expand All @@ -150,6 +234,7 @@ impl LineParser {
)
.unwrap(),

// Format of an empty line, which contains only comments.
empty_line: Regex::new(
r##"(?x) ^
\s*
Expand All @@ -161,10 +246,22 @@ impl LineParser {
}
}

/// A **year** definition field.
///
/// A year has one of the following representations in a file:
///
/// - `min` or `minimum`, the minimum year possible, for when a rule needs to
/// apply up until the first rule with a specific year;
/// - `max` or `maximum`, the maximum year possible, for when a rule needs to
/// apply after the last rule with a specific year;
/// - a year number, referring to a specific year.
#[derive(PartialEq, Debug, Copy, Clone)]
pub enum Year {
/// The minimum year possible: `min` or `minimum`.
Minimum,
/// The maximum year possible: `max` or `maximum`.
Maximum,
/// A specific year number.
Number(i64),
}

Expand Down Expand Up @@ -610,15 +707,27 @@ impl TimeSpec {
}
}

/// The time at which the rules change for a location.
///
/// This is described with as few units as possible: a change that occurs at
/// the beginning of the year lists only the year, a change that occurs on a
/// particular day has to list the year, month, and day, and one that occurs
/// at a particular second has to list everything.
#[derive(PartialEq, Debug, Copy, Clone)]
pub enum ChangeTime {
/// The earliest point in a particular **year**.
UntilYear(Year),
/// The earliest point in a particular **month**.
UntilMonth(Year, Month),
/// The earliest point in a particular **day**.
UntilDay(Year, Month, DaySpec),
/// The earliest point in a particular **hour, minute, or second**.
UntilTime(Year, Month, DaySpec, TimeSpecAndType),
}

impl ChangeTime {
/// Convert this change time to an absolute timestamp, as the number of
/// seconds since the Unix epoch that the change occurs at.
pub fn to_timestamp(&self) -> i64 {
fn seconds_in_year(year: i64) -> i64 {
if is_leap(year) {
Expand Down Expand Up @@ -744,21 +853,50 @@ fn to_timestamp() {
assert_eq!(time.to_timestamp(), 951642000);
}

/// The information contained in both zone lines *and* zone continuation lines.
#[derive(PartialEq, Debug, Copy, Clone)]
pub struct ZoneInfo<'a> {
/// The amount of time that needs to be added to UTC to get the standard
/// time in this zone.
pub utc_offset: TimeSpec,
/// The name of all the rules that should apply in the time zone, or the
/// amount of time to add.
pub saving: Saving<'a>,
/// The format for time zone abbreviations, with `%s` as the string marker.
pub format: &'a str,
/// The time at which the rules change for this location, or `None` if
/// these rules are in effect until the end of time (!).
pub time: Option<ChangeTime>,
}

/// The amount of daylight saving time (DST) to apply to this timespan. This
/// is a special type for a certain field in a zone line, which can hold
/// different types of value.
#[derive(PartialEq, Debug, Copy, Clone)]
pub enum Saving<'a> {
/// Just stick to the base offset.
NoSaving,
/// This amount of time should be saved while this timespan is in effect.
/// (This is the equivalent to there being a single one-off rule with the
/// given amount of time to save).
OneOff(TimeSpec),
/// All rules with the given name should apply while this timespan is in
/// effect.
Multiple(&'a str),
}

/// A **rule** definition line.
///
/// According to the `zic(8)` man page, a rule line has this form, along with
/// an example:
///
/// ```text
/// Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
/// Rule US 1967 1973 ‐ Apr lastSun 2:00 1:00 D
/// ```
///
/// Apart from the opening `Rule` to specify which kind of line this is, and
/// the `type` column, every column in the line has a field in this struct.
#[derive(PartialEq, Debug, Copy, Clone)]
pub struct Rule<'a> {
pub name: &'a str,
Expand All @@ -771,6 +909,25 @@ pub struct Rule<'a> {
pub letters: Option<&'a str>,
}

/// A **zone** definition line.
///
/// According to the `zic(8)` man page, a zone line has this form, along with
/// an example:
///
/// ```text
/// Zone NAME GMTOFF RULES/SAVE FORMAT [UNTILYEAR [MONTH [DAY [TIME]]]]
/// Zone Australia/Adelaide 9:30 Aus AC%sT 1971 Oct 31 2:00
/// ```
///
/// The opening `Zone` identifier is ignored, and the last four columns are
/// all optional, with their variants consolidated into a `ChangeTime`.
///
/// The `Rules/Save` column, if it contains a value, *either* contains the
/// name of the rules to use for this zone, *or* contains a one-off period of
/// time to save.
///
/// A continuation rule line contains all the same fields apart from the
/// `Name` column and the opening `Zone` identifier.
#[derive(PartialEq, Debug, Copy, Clone)]
pub struct Zone<'a> {
pub name: &'a str,
Expand Down Expand Up @@ -944,6 +1101,9 @@ impl LineParser {
let saving = self.saving_from_str(caps.name("rulessave").unwrap().as_str())?;
let format = caps.name("format").unwrap().as_str();

// The year, month, day, and time fields are all optional, meaning
// that it should be impossible to, say, have a defined month but not
// a defined year.
let time = match (
caps.name("year"),
caps.name("month"),
Expand Down
46 changes: 46 additions & 0 deletions src/table.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,49 @@
//! Collecting parsed zoneinfo data lines into a set of time zone data.
//!
//! This module provides the `Table` struct, which is able to take parsed
//! lines of input from the `line` module and coalesce them into a single
//! set of data.
//!
//! It’s not as simple as it seems, because the zoneinfo data lines refer to
//! each other through strings: lines of the form “link zone A to B” could be
//! *parsed* successfully but still fail to be *interpreted* successfully if
//! “B” doesn’t exist. So it has to check every step of the way—nothing wrong
//! with this, it’s just a consequence of reading data from a text file.
//!
//! This module only deals with constructing a table from data: any analysis
//! of the data is done elsewhere.
//!
//!
//! ## Example
//!
//! ```
//! use parse_zoneinfo::line::{Zone, Line, LineParser, Link};
//! use parse_zoneinfo::table::{TableBuilder};
//!
//! let parser = LineParser::new();
//! let mut builder = TableBuilder::new();
//!
//! let zone = "Zone Pacific/Auckland 11:39:04 - LMT 1868 Nov 2";
//! let link = "Link Pacific/Auckland Antarctica/McMurdo";
//!
//! for line in [zone, link] {
//! match parser.parse_str(&line)? {
//! Line::Zone(zone) => builder.add_zone_line(zone).unwrap(),
//! Line::Continuation(cont) => builder.add_continuation_line(cont).unwrap(),
//! Line::Rule(rule) => builder.add_rule_line(rule).unwrap(),
//! Line::Link(link) => builder.add_link_line(link).unwrap(),
//! Line::Space => {}
//! }
//! }
//!
//! let table = builder.build();
//!
//! assert!(table.get_zoneset("Pacific/Auckland").is_some());
//! assert!(table.get_zoneset("Antarctica/McMurdo").is_some());
//! assert!(table.get_zoneset("UTC").is_none());
//! # Ok::<(), parse_zoneinfo::line::Error>(())
//! ```

use std::collections::hash_map::{Entry, HashMap};
use std::fmt;

Expand Down

0 comments on commit e1094aa

Please sign in to comment.