diff --git a/README.md b/README.md index 3602d74..af8a114 100644 --- a/README.md +++ b/README.md @@ -22,5 +22,6 @@ Here are the documents/patterns which have been contemplated so far: - **PIS** - **SUS** - **Mobile numbers** +- **Dates** Some usage examples can be found at example/example.go. diff --git a/date.go b/date.go new file mode 100644 index 0000000..aa5e4ab --- /dev/null +++ b/date.go @@ -0,0 +1,173 @@ +// Package brazil provides utilities for parsing and formatting dates specific to Brazilian formats. +package brazil + +import ( + "fmt" + "regexp" + "strconv" + "strings" + "time" +) + +const ( + // dateDelimiter is the delimiter used in date strings. + dateDelimiter = "/" + // dateDelimiterPattern is the regex pattern for date delimiters. + dateDelimiterPattern = `[ ]{0,1}%s[ ]{0,1}` + // digitsPattern is the regex pattern for digits. + digitsPattern = `\d+` + // DateFormatShort is the short date format (DD/MM/YY). + DateFormatShort = "02/01/06" + // DateFormatLong is the long date format (DD/MM/YYYY). + DateFormatLong = "02/01/2006" +) + +var ( + // delimiters is a list of possible date delimiters. + delimiters = []string{`\/`, `\.`, `\-`, `[ ]`, `,`, `de`} + // months is a map of month abbreviations to their corresponding month numbers. + months = monthsOfYear{ + "JAN": 1, "FEV": 2, "MAR": 3, "ABR": 4, "MAI": 5, "JUN": 6, "JUL": 7, "AGO": 8, "SET": 9, "OUT": 10, "NOV": 11, "DEZ": 12, + } + // monthPattern is the regex pattern for matching month names or numbers. + monthPattern = `(?:` + digitsPattern + `|(` + strings.Join(months.getMonths(), "|") + `)[\D!ç]{0,7})` + // datePattern is the regex pattern for matching dates. + datePattern = `(?i)` + digitsPattern + dateDelimiterPattern + monthPattern + dateDelimiterPattern + digitsPattern +) + +type monthsOfYear map[string]int8 + +// getMonths returns the months of the year as a slice of strings. +func (m monthsOfYear) getMonths() []string { + keys := make([]string, 0, len(m)) + for name := range m { + keys = append(keys, name) + } + return keys +} + +// date struct represents a date value. +type date struct { + value time.Time +} + +// Time returns the date as a time.Time. +func (d date) Time() time.Time { + return d.value +} + +// String returns the date as a string with a DateFormatLong format. +func (d date) String() string { + return d.value.Format(DateFormatLong) +} + +// ParseDate parses a date from a string and returns a date struct. +// Here are some possibilities for usage: +// - "5 de Abril de 1999" +// - "05 Abril 1999" +// - "5-4-1999" +// - "05.ABR.1999" +// - "5,4,1999" +// +// The date can be separated by a space, a dot, a hyphen, a comma, or the word "de". +func ParseDate(value string) (date, error) { + var ( + matches []string + err error + time *time.Time + ) + + // Create a pattern for each delimiter and find all matches. + for _, delimiter := range delimiters { + pattern := fmt.Sprintf(datePattern, delimiter, delimiter) + matches = append(matches, regexp.MustCompile(pattern).FindAllString(value, -1)...) + } + + // Try to get the time from the matches. + for _, match := range matches { + time, err = getTime(match) + // If there has been an error, return it. + if err != nil { + return date{}, err + } + } + + // If the time is nil, return an error. + if time == nil { + return date{}, ErrInvalidDate + } + + // Return the date. + return date{value: *time}, nil +} + +// getTime parses a date string and returns a time.Time pointer. +func getTime(value string) (*time.Time, error) { + // Replace all non-word characters with the date delimiter. + r := regexp.MustCompile(`(?:\W|(`+strings.Join(delimiters, "|")+`)\W)+`).ReplaceAllString(value, dateDelimiter) + // Split the string by the date delimiter. + arr := strings.Split(r, dateDelimiter) + + // If the array has less than 3 elements, return an error. + if len(arr) < 3 { + return nil, ErrInvalidDate + } + + // Set the day, month, and year. + day := arr[0] + month := normalizeMonth(arr[1]) + year := arr[len(arr)-1:][0] + + // Set the default date format and year digits. + dateFormat := DateFormatLong + yearDigits := "%04d" + + // If the year has 2 digits, change the date format and the year digits. + if len(year) == 2 { + dateFormat = DateFormatShort + yearDigits = "%02d" + } + + // Parse the date with the format obtained. + layout := fmt.Sprintf("%02d/%02d/"+yearDigits, parseToint(day), parseToint(month), parseToint(year)) + + date, err := time.Parse(dateFormat, layout) + if err != nil { + return nil, fmt.Errorf("parsing time with layout %q: %w", layout, ErrInvalidDate) + } + + // Return the date. + return &date, nil +} + +// normalizeMonth converts a month name to its corresponding month number. +func normalizeMonth(text string) string { + // If the text is empty or has less than 3 characters, return it. + if len(text) < 3 { + return text + } + + // Get the first 3 characters of the text and convert them to uppercase. + month := strings.ToUpper(text)[0:3] + + // Check if the month is in the map, if not return an empty string. + value, ok := months[month] + if !ok { + return "" + } + + // Return the month number with 2 digits. + return fmt.Sprintf("%02d", value) +} + +// parseToint converts a string to an integer. +func parseToint(value string) int { + if value == "" { + return 0 + } + i, err := strconv.Atoi(value) + if err != nil { + return 0 + } + return i +} diff --git a/date_test.go b/date_test.go new file mode 100644 index 0000000..82b6cf6 --- /dev/null +++ b/date_test.go @@ -0,0 +1,94 @@ +package brazil + +import ( + "errors" + "fmt" + "testing" +) + +func Test_normalizeMonth(t *testing.T) { + t.Run("must normalize the month name to the number", func(t *testing.T) { + tableTests := []struct { + name string + values []string + }{ + { + name: "short", + values: []string{"JAN", "FEV", "MAR", "ABR", "MAI", "JUN", "JUL", "AGO", "SET", "OUT", "NOV", "DEZ"}, + }, + { + name: "full", + values: []string{"JANEIRO", "FEVEREIRO", "MARÇO", "ABRIL", "MAIO", "JUNHO", "JULHO", "AGOSTO", "SETEMBRO", "OUTUBRO", "NOVEMBRO", "DEZEMBRO"}, + }, + } + for _, tt := range tableTests { + for idx, name := range tt.values { + want := fmt.Sprintf("%02d", idx+1) + got := normalizeMonth(name) + if got != want { + t.Errorf("Expected %s, got %#v", want, got) + } + } + } + }) + t.Run("should return an empty string when the month name is not valid", func(t *testing.T) { + want := "" + got := normalizeMonth("INVALID") + if got != want { + t.Errorf("Expected %s, got %#v", want, got) + } + }) +} + +func TestParseDate(t *testing.T) { + t.Run("should return an error when the date is not valid", func(t *testing.T) { + tableTests := []struct { + name string + value string + want error + }{ + { + value: "01 13 2020", + want: ErrInvalidDate, + }, + { + value: "01 AGO", + want: ErrInvalidDate, + }, + } + for _, tt := range tableTests { + _, err := ParseDate(tt.value) + if err == nil { + t.Errorf("Expected an error, got nil") + } + if !errors.Is(err, tt.want) { + t.Errorf("Expected %s, got %#v", tt.want, err) + } + } + }) + t.Run("should return a valid date", func(t *testing.T) { + for value, want := range map[string]string{ + "27 de AGOSTO de 1994": "27/08/1994", + "9 JUL/JUL 1932": "09/07/1932", + "19/ABRIL/1943": "19/04/1943", + "15.NOVEMBRO.1889": "15/11/1889", + "11-SET-01": "11/09/2001", + "1 JULHO 2024": "01/07/2024", + "01-JUNHO-1920": "01/06/1920", + "1/1/1992": "01/01/1992", + "1-06-1920": "01/06/1920", + "21.09.2012": "21/09/2012", + "1,5,1889": "01/05/1889", + "20 Nov 1695": "20/11/1695", + "9 MAI/MAY 1988": "09/05/1988", + } { + got, err := ParseDate(value) + if err != nil { + t.Errorf("Expected nil, got %s", err) + } + if got.String() != want || got.Time().Format(DateFormatLong) != want { + t.Errorf("Expected %s, got %#v", want, got) + } + } + }) +}