Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds a Date Parser #14

Merged
merged 16 commits into from
Nov 7, 2024
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,6 @@ Here are the documents/patterns which have been contemplated so far:
- **PIS**
- **SUS**
- **Mobile numbers**
- **Dates**

Some usage examples can be found at example/example.go.
173 changes: 173 additions & 0 deletions date.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
// Package brazil provides utilities for parsing and formatting dates specific to Brazilian formats.
package brazil

import (
"fmt"
"regexp"
"strconv"
"strings"
"time"
)

const (
// dateDelimiter is the delimiter used in date strings.
dateDelimiter = "/"
// dateDelimiterPattern is the regex pattern for date delimiters.
dateDelimiterPattern = `[ ]{0,1}%s[ ]{0,1}`
// digitsPattern is the regex pattern for digits.
digitsPattern = `\d+`
// DateFormatShort is the short date format (DD/MM/YY).
DateFormatShort = "02/01/06"
// DateFormatLong is the long date format (DD/MM/YYYY).
DateFormatLong = "02/01/2006"
)

var (
// delimiters is a list of possible date delimiters.
delimiters = []string{`\/`, `\.`, `\-`, `[ ]`, `,`, `de`}
// months is a map of month abbreviations to their corresponding month numbers.
months = monthsOfYear{
"JAN": 1, "FEV": 2, "MAR": 3, "ABR": 4, "MAI": 5, "JUN": 6, "JUL": 7, "AGO": 8, "SET": 9, "OUT": 10, "NOV": 11, "DEZ": 12,
}
// monthPattern is the regex pattern for matching month names or numbers.
monthPattern = `(?:` + digitsPattern + `|(` + strings.Join(months.getMonths(), "|") + `)[\D!ç]{0,7})`
// datePattern is the regex pattern for matching dates.
datePattern = `(?i)` + digitsPattern + dateDelimiterPattern + monthPattern + dateDelimiterPattern + digitsPattern
)

type monthsOfYear map[string]int8

// getMonths returns the months of the year as a slice of strings.
func (m monthsOfYear) getMonths() []string {
keys := make([]string, 0, len(m))
for name := range m {
keys = append(keys, name)
}
return keys
}

// date struct represents a date value.
type date struct {
value time.Time
}

// Time returns the date as a time.Time.
func (d date) Time() time.Time {
return d.value
}

// String returns the date as a string with a DateFormatLong format.
func (d date) String() string {
return d.value.Format(DateFormatLong)
}

// ParseDate parses a date from a string and returns a date struct.
// Here are some possibilities for usage:
// - "5 de Abril de 1999"
// - "05 Abril 1999"
// - "5-4-1999"
// - "05.ABR.1999"
// - "5,4,1999"
//
// The date can be separated by a space, a dot, a hyphen, a comma, or the word "de".
func ParseDate(value string) (date, error) {
var (
matches []string
err error
time *time.Time
)

// Create a pattern for each delimiter and find all matches.
for _, delimiter := range delimiters {
pattern := fmt.Sprintf(datePattern, delimiter, delimiter)
matches = append(matches, regexp.MustCompile(pattern).FindAllString(value, -1)...)
}

// Try to get the time from the matches.
for _, match := range matches {
time, err = getTime(match)
// If there has been an error, return it.
if err != nil {
return date{}, err
}
}

// If the time is nil, return an error.
if time == nil {
return date{}, ErrInvalidDate
}

// Return the date.
return date{value: *time}, nil
}

// getTime parses a date string and returns a time.Time pointer.
func getTime(value string) (*time.Time, error) {
// Replace all non-word characters with the date delimiter.
r := regexp.MustCompile(`(?:\W|(`+strings.Join(delimiters, "|")+`)\W)+`).ReplaceAllString(value, dateDelimiter)
// Split the string by the date delimiter.
arr := strings.Split(r, dateDelimiter)

// If the array has less than 3 elements, return an error.
if len(arr) < 3 {
return nil, ErrInvalidDate
}

// Set the day, month, and year.
day := arr[0]
month := normalizeMonth(arr[1])
year := arr[len(arr)-1:][0]

// Set the default date format and year digits.
dateFormat := DateFormatLong
yearDigits := "%04d"

// If the year has 2 digits, change the date format and the year digits.
if len(year) == 2 {
dateFormat = DateFormatShort
yearDigits = "%02d"
}

// Parse the date with the format obtained.
layout := fmt.Sprintf("%02d/%02d/"+yearDigits, parseToint(day), parseToint(month), parseToint(year))

date, err := time.Parse(dateFormat, layout)
if err != nil {
return nil, fmt.Errorf("parsing time with layout %q: %w", layout, ErrInvalidDate)
}

// Return the date.
return &date, nil
}

// normalizeMonth converts a month name to its corresponding month number.
func normalizeMonth(text string) string {
// If the text is empty or has less than 3 characters, return it.
if len(text) < 3 {
return text
}

// Get the first 3 characters of the text and convert them to uppercase.
month := strings.ToUpper(text)[0:3]

// Check if the month is in the map, if not return an empty string.
value, ok := months[month]
if !ok {
return ""
}

// Return the month number with 2 digits.
return fmt.Sprintf("%02d", value)
}

// parseToint converts a string to an integer.
func parseToint(value string) int {
if value == "" {
return 0
}
i, err := strconv.Atoi(value)
if err != nil {
return 0
}
return i
}
94 changes: 94 additions & 0 deletions date_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package brazil

import (
"errors"
"fmt"
"testing"
)

func Test_normalizeMonth(t *testing.T) {
t.Run("must normalize the month name to the number", func(t *testing.T) {
tableTests := []struct {
name string
values []string
}{
{
name: "short",
values: []string{"JAN", "FEV", "MAR", "ABR", "MAI", "JUN", "JUL", "AGO", "SET", "OUT", "NOV", "DEZ"},
},
{
name: "full",
values: []string{"JANEIRO", "FEVEREIRO", "MARÇO", "ABRIL", "MAIO", "JUNHO", "JULHO", "AGOSTO", "SETEMBRO", "OUTUBRO", "NOVEMBRO", "DEZEMBRO"},
},
}
for _, tt := range tableTests {
for idx, name := range tt.values {
want := fmt.Sprintf("%02d", idx+1)
got := normalizeMonth(name)
if got != want {
t.Errorf("Expected %s, got %#v", want, got)
}
}
}
})
t.Run("should return an empty string when the month name is not valid", func(t *testing.T) {
want := ""
got := normalizeMonth("INVALID")
if got != want {
t.Errorf("Expected %s, got %#v", want, got)
}
})
}

func TestParseDate(t *testing.T) {
t.Run("should return an error when the date is not valid", func(t *testing.T) {
tableTests := []struct {
name string
value string
want error
}{
{
value: "01 13 2020",
want: ErrInvalidDate,
},
{
value: "01 AGO",
want: ErrInvalidDate,
},
}
for _, tt := range tableTests {
_, err := ParseDate(tt.value)
if err == nil {
t.Errorf("Expected an error, got nil")
}
if !errors.Is(err, tt.want) {
t.Errorf("Expected %s, got %#v", tt.want, err)
}
}
})
t.Run("should return a valid date", func(t *testing.T) {
for value, want := range map[string]string{
"27 de AGOSTO de 1994": "27/08/1994",
"9 JUL/JUL 1932": "09/07/1932",
"19/ABRIL/1943": "19/04/1943",
"15.NOVEMBRO.1889": "15/11/1889",
"11-SET-01": "11/09/2001",
"1 JULHO 2024": "01/07/2024",
"01-JUNHO-1920": "01/06/1920",
"1/1/1992": "01/01/1992",
"1-06-1920": "01/06/1920",
"21.09.2012": "21/09/2012",
"1,5,1889": "01/05/1889",
"20 Nov 1695": "20/11/1695",
"9 MAI/MAY 1988": "09/05/1988",
} {
got, err := ParseDate(value)
if err != nil {
t.Errorf("Expected nil, got %s", err)
}
if got.String() != want || got.Time().Format(DateFormatLong) != want {
t.Errorf("Expected %s, got %#v", want, got)
}
}
})
}