Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds a Date Parser #14

Merged
merged 16 commits into from
Nov 7, 2024
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,6 @@ Here are the documents/patterns which have been contemplated so far:
- **PIS**
- **SUS**
- **Mobile numbers**
- **Dates**

Some usage examples can be found at example/example.go.
160 changes: 160 additions & 0 deletions date.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
package brazil

import (
"fmt"
"regexp"
"strconv"
"strings"
"time"
)

const (
dateDelimiter = "/"
dateDelimiterPattern = `[ ]{0,1}%s[ ]{0,1}`
digitsPattern = `\d+`
DateFormatShort = "02/01/06"
DateFormatLong = "02/01/2006"
)

var (
delimiters = []string{`\/`, `\.`, `\-`, `[ ]`, `,`, `de`}
months = monthsOfYear{
"JAN": 1, "ENE": 1,
"FEV": 2, "FEB": 2,
"MAR": 3, "M4R": 3,
"ABR": 4, "APR": 4, "4BR": 4,
"MAI": 5, "MAY": 5,
"JUN": 6,
"JUL": 7, "JU1": 7,
"AGO": 8, "AUG": 8, "AG0": 8,
"SET": 9, "SEP": 9,
"OUT": 10, "OCT": 10,
"NOV": 11, "N0V": 11,
"DEZ": 12, "DEC": 12, "DIC": 12, "DE2": 12,
}
monthPattern = `(?:` + digitsPattern + `|(` + strings.Join(months.getMonths(), "|") + `)[\D!ç]{0,7})`
datePattern = `(?i)` + digitsPattern + dateDelimiterPattern + monthPattern + dateDelimiterPattern + digitsPattern
)

type monthsOfYear map[string]int8

// GetMonths returns the months of the year
func (m monthsOfYear) getMonths() []string {
keys := make([]string, 0, len(m))
for name := range m {
keys = append(keys, name)
}
return keys
}

// Date struct
type date struct {
value time.Time
}

// Time returns the date as a time.Time
func (d date) Time() time.Time {
return d.value
}

// String returns the date as a string
func (d date) String() string {
return d.value.Format(DateFormatLong)
}

// ParseDate parses a date from a string
func ParseDate(value string) (date, error) {
var (
matches []string
err error
time *time.Time
)

// Create a pattern for each delimiter and find all matches
for _, delimiter := range delimiters {
pattern := fmt.Sprintf(datePattern, delimiter, delimiter)
matches = append(matches, regexp.MustCompile(pattern).FindAllString(value, -1)...)
}

// Try to get the time from the matches
for _, match := range matches {
time, err = getTime(match)
// If there has been an error, return it
if err != nil {
return date{}, err
}
}

// If the time is nil, return an error
if time == nil {
return date{}, fmt.Errorf("invalid date")
}

// Return the date
return date{value: *time}, nil
}

// Get the time from the string
func getTime(value string) (*time.Time, error) {
// Replace all non-word characters with the date delimiter
r := regexp.MustCompile(`(?:\W|(`+strings.Join(delimiters, "|")+`)\W)+`).ReplaceAllString(value, dateDelimiter)
// Split the string by the date delimiter
arr := strings.Split(r, dateDelimiter)

// Get the day
day, err := strconv.Atoi(arr[0])
if err != nil {
return nil, err
}

// Get the month
month, err := strconv.Atoi(normalizeMonth(arr[1]))
if err != nil {
return nil, err
}

// Get the year
year, err := strconv.Atoi(arr[len(arr)-1:][0])
if err != nil {
return nil, err
}

// Set the default date format and year digits
dateFormat := DateFormatLong
yearDigits := "%04d"

// If the year has 2 digits, change the date format and the year digits
if len(arr[2]) == 2 {
dateFormat = DateFormatShort
yearDigits = "%02d"
}

// Parse the date with the format obtained
date, err := time.Parse(dateFormat, fmt.Sprintf("%02d/%02d/"+yearDigits, day, month, year))
if err != nil {
return nil, err
}

// Return the date
return &date, nil
}

// Normalize the month name to number
func normalizeMonth(text string) string {
// If the text is empty or has less than 3 characters, return it
if len(text) < 3 {
return text
}

// Get the first 3 characters of the text and convert them to uppercase
month := strings.ToUpper(text)[0:3]

// Check if the month is in the map, if not return an empty string
value, ok := months[month]
if !ok {
return ""
}

// Return the month number with 2 digits
return fmt.Sprintf("%02d", value)
}
79 changes: 79 additions & 0 deletions date_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package brazil

import (
"fmt"
"testing"
)

func Test_normalizeMonth(t *testing.T) {
t.Run("must normalize the month name to the number", func(t *testing.T) {
tableTests := []struct {
name string
values []string
}{
{
name: "PT",
values: []string{"JANEIRO", "FEVEREIRO", "MARÇO", "ABRIL", "MAIO", "JUNHO", "JULHO", "AGOSTO", "SETEMBRO", "OUTUBRO", "NOVEMBRO", "DEZEMBRO"},
},
{
name: "EN",
values: []string{"JANUARY", "FEBRUARY", "MARCH", "APRIL", "MAY", "JUNE", "JULY", "AUGUST", "SEPTEMBER", "OCTUBER", "NOVEMBER", "DECEMBER"},
},
{
name: "ES",
values: []string{"ENERO", "FEBRERO", "MARZO", "ABRIL", "MAYO", "JUNIO", "JULIO", "AGOSTO", "SEPTIEMBRE", "OCTUBRE", "NOVIEMBRE", "DICIEMBRE"},
},
}

for _, tt := range tableTests {
for idx, name := range tt.values {
want := fmt.Sprintf("%02d", idx+1)
got := normalizeMonth(name)
if got != want {
t.Errorf("Expected %s, got %#v", want, got)
}
}
}
})
t.Run("should return an empty string when the month name is not valid", func(t *testing.T) {
want := ""
got := normalizeMonth("INVALID")
if got != want {
t.Errorf("Expected %s, got %#v", want, got)
}
})
}

func TestParseDate(t *testing.T) {
t.Run("should return an error when the date is not valid", func(t *testing.T) {
_, err := ParseDate("01 13 2020")
if err == nil {
t.Errorf("Expected an error, got nil")
}
})
t.Run("should return a valid date", func(t *testing.T) {
for value, want := range map[string]string{
"27 de AG0STO de 1994": "27/08/1994",
"09 JUL/JUL 1932": "09/07/1932",
"19/ABRIL/1943": "19/04/1943",
"15.NOVEMBRO.1889": "15/11/1889",
"11-SET-01": "11/09/2001",
"1 JU1HO 2024": "01/07/2024",
"01-JUNHO-1920": "01/06/1920",
"1/1/1992": "01/01/1992",
"1-06-1920": "01/06/1920",
"21.09.2012": "21/09/2012",
"1,5,1889": "01/05/1889",
"20 N0VEMBER 1695": "20/11/1695",
"9 MAI/MAY 1988": "09/05/1988",
} {
got, err := ParseDate(value)
if err != nil {
t.Errorf("Expected nil, got %s", err)
}
if got.String() != want || got.Time().Format(DateFormatLong) != want {
t.Errorf("Expected %s, got %#v", want, got)
}
}
})
}