Skip to content

Commit

Permalink
update dependencies + small fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
lu4p committed Mar 21, 2020
1 parent 583a2e1 commit dcbc6f5
Show file tree
Hide file tree
Showing 9 changed files with 60 additions and 36 deletions.
4 changes: 2 additions & 2 deletions cat.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ func File(filename string) (string, error) {

// FromBytes converts a []bytes representation of a document to text
func FromBytes(data []byte) (string, error) {
mime, _ := mimetype.Detect(data)
switch mime {
mime := mimetype.Detect(data)
switch mime.String() {
case "application/vnd.oasis.opendocument.text":
return odtxt.BytesToStr(data)
case "application/pdf":
Expand Down
7 changes: 3 additions & 4 deletions docxtxt/docxreader.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"regexp"
)

//docx zip struct
// docx zip struct
type docx struct {
zipFileReader *zip.ReadCloser
Files []*zip.File
Expand All @@ -28,7 +28,6 @@ func ToStr(filename string) (string, error) {
return "", err
}
return BytesToStr(content)

}

// BytesToStr converts a []byte representation of .docx document file to string
Expand Down Expand Up @@ -97,7 +96,7 @@ func (d *docx) GenWordsList() {
d.listP(xmlData)
}

//get w:t value
// get w:t value
func (d *docx) getT(item string) {
var subStr string
data := item
Expand Down Expand Up @@ -131,7 +130,7 @@ func hasP(data string) bool {
func (d *docx) listP(data string) {
var result []string
re := regexp.MustCompile(`(?U)<w:p>(.*)</w:p>`)
for _, match := range re.FindAllStringSubmatch(string(data), -1) {
for _, match := range re.FindAllStringSubmatch(data, -1) {
result = append(result, match[1])
}
for _, item := range result {
Expand Down
7 changes: 4 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
module github.com/lu4p/cat

go 1.13
go 1.14

require (
github.com/EndFirstCorp/peekingReader v0.0.0-20171012052444-257fb6f1a1a6
github.com/gabriel-vasile/mimetype v0.3.21
github.com/lu4p/unipdf/v3 v3.3.2
github.com/gabriel-vasile/mimetype v1.0.4
github.com/lu4p/unipdf/v3 v3.5.1-0.20200321134028-22d47cef6c06
golang.org/x/image v0.0.0-20200119044424-58c23975cae1 // indirect
)
27 changes: 16 additions & 11 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,30 +1,35 @@
github.com/EndFirstCorp/peekingReader v0.0.0-20171012052444-257fb6f1a1a6 h1:t27CGFMv8DwGwqRPEa2VNof5I/aZwO6q2gfJhN8q0U4=
github.com/EndFirstCorp/peekingReader v0.0.0-20171012052444-257fb6f1a1a6/go.mod h1:zpqkXxDsVfEIUZEWvT9yAo8OmRvSlRrcYQ3Zs8sSubA=
github.com/adrg/strutil v0.1.0/go.mod h1:pXRr2+IyX5AEPAF5icj/EeTaiflPSD2hvGjnguilZgE=
github.com/adrg/sysfont v0.1.0/go.mod h1:DzISco90USPZJ+lmtpuz1SOTn1fih6YyB0KG2TEP/0U=
github.com/adrg/xdg v0.2.1/go.mod h1:ZuOshBmzV4Ta+s23hdfFZnBsdzmoR3US0d7ErpqSbTQ=
github.com/boombuler/barcode v1.0.0 h1:s1TvRnXwL2xJRaccrdcBQMZxq6X7DvsMogtmJeHDdrc=
github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gabriel-vasile/mimetype v0.3.21 h1:Mc81ydjjIFN3Ir12WJ4myhnMs6cFAIlthU7MKY6XAIk=
github.com/gabriel-vasile/mimetype v0.3.21/go.mod h1:6CDPel/o/3/s4+bp6kIbsWATq8pmgOisOPG40CJa6To=
github.com/gabriel-vasile/mimetype v1.0.4 h1:uBejfH8l3/2f+5vjl1e4xIaSyNEhRBZ5N/ij7ohpNd8=
github.com/gabriel-vasile/mimetype v1.0.4/go.mod h1:6CDPel/o/3/s4+bp6kIbsWATq8pmgOisOPG40CJa6To=
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
github.com/gunnsth/pkcs7 v0.0.0-20181213175627-3cffc6fbfe83 h1:saj5dTV7eQ1wFg/gVZr1SfbkOmg8CYO9R8frHgQiyR4=
github.com/gunnsth/pkcs7 v0.0.0-20181213175627-3cffc6fbfe83/go.mod h1:xaGEIRenAiJcGgd9p62zbiP4993KaV3PdjczwGnP50I=
github.com/lu4p/unipdf/v3 v3.3.2 h1:MI1CmbDjxA2LhAJwfKMH9USo16B0iLU4XakQ/9ZbdI8=
github.com/lu4p/unipdf/v3 v3.3.2/go.mod h1:BN0DPOet7qvp6n+JbZVUR1MgWNROJMlg8l43dCFRU1A=
github.com/lu4p/unipdf/v3 v3.5.1-0.20200321134028-22d47cef6c06 h1:r49J8T7xpn6yTAncprN10Ye2dWqJ490TkNBGtRtwZKw=
github.com/lu4p/unipdf/v3 v3.5.1-0.20200321134028-22d47cef6c06/go.mod h1:748EuXjaKAupkXEtU/wJQX7efgs3/mPvNtrV1kAE6vY=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5 h1:58fnuSXlxZmFdJyvtTFVmVhcMLU6v5fEb/ok4wyqtNU=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/image v0.0.0-20181116024801-cd38e8056d9b h1:VHyIDlv3XkfCa5/a81uzaoDkHH4rr81Z62g+xlnO8uM=
golang.org/x/image v0.0.0-20181116024801-cd38e8056d9b/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/image v0.0.0-20200119044424-58c23975cae1 h1:5h3ngYt7+vXCDZCup/HkCQgW5XwmSvR/nA2JmJ0RErg=
golang.org/x/image v0.0.0-20200119044424-58c23975cae1/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190606173856-1492cefac77f/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190606174628-0139d5756a7d/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
7 changes: 3 additions & 4 deletions odtxt/odtreader.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"io/ioutil"
)

//Odt zip struct
// odt zip struct
type odt struct {
zipFileReader *zip.ReadCloser
Files []*zip.File
Expand Down Expand Up @@ -46,7 +46,7 @@ func BytesToStr(data []byte) (string, error) {
}
content, err := d.GetTxt()
if err != nil {
return "", errors.New("Could not Get Content")
return "", errors.New("could not Get Content")
}
return content, nil
}
Expand All @@ -72,7 +72,7 @@ func openReader(bytesReader *bytes.Reader) (*odt, error) {
return &odtDoc, nil
}

//Read all files contents
// retrieveFileContents Read all files contents
func (d *odt) retrieveFileContents(filename string) ([]byte, error) {
var file *zip.File
for _, f := range d.Files {
Expand All @@ -96,7 +96,6 @@ func (d *odt) retrieveFileContents(filename string) ([]byte, error) {
func (d *odt) GetTxt() (string, error) {
xmlData := d.FilesContent["content.xml"]
return d.listP(xmlData)

}

// listP for w:p tag value
Expand Down
3 changes: 2 additions & 1 deletion pdftxt/pdftxt.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//Package pdftxt extracts text from .pdf documents
// Package pdftxt extracts text from .pdf documents
package pdftxt

import (
Expand Down Expand Up @@ -57,3 +57,4 @@ func BytesToStr(data []byte) (string, error) {

return out, nil
}

1 change: 0 additions & 1 deletion plaintxt/plaintxt.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ func ToStr(filename string) (string, error) {
return "", errors.New("Error while reading file: " + err.Error())
}
return BytesToStr(content)

}

// BytesToStr converts a []byte representation of a plaintext file to string
Expand Down
31 changes: 25 additions & 6 deletions rtftxt/rtftxt.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ func ToStr(filename string) (string, error) {
content, err := ioutil.ReadFile(filename)
if err != nil {
return "", err

}

return BytesToStr(content)
}

Expand Down Expand Up @@ -228,20 +228,38 @@ func readUntilClosingBrace(r peekingReader.Reader) error {
return err
}

func handleParams(control, param string, text *bytes.Buffer) {
func handleParams(control, param string, text io.StringWriter) {
param = strings.TrimPrefix(param, " ")
if param == "" {
return
}
switch control {

case "fldrslt":
text.WriteString(param)
case "acccircle", "acccomma", "accdot", "accnone", "accunderdot", "animtextN", "b", "caps", "cbN", "cchsN ", "cfN", "charscalexN", "csN", "dnN", "embo", "expndN", "expndtwN ", "fittextN", "fN", "fsN", "i", "kerningN ", "langfeN", "langfenpN", "langN", "langnpN", "ltrch", "noproof", "nosupersub ", "outl", "plain", "rtlch", "scaps", "shad", "strike", "sub ", "super ", "ul", "ulcN", "uld", "uldash", "uldashd", "uldashdd", "uldb", "ulhwave", "ulldash", "ulnone", "ulth", "ulthd", "ulthdash", "ulthdashd", "ulthdashdd", "ulthldash", "ululdbwave", "ulw", "ulwave", "upN", "v", "webhidden":
case "acccircle", "acccomma", "accdot", "accnone", "accunderdot",
"animtextN", "b", "caps", "cbN", "cchsN ", "cfN", "charscalexN",
"csN", "dnN", "embo", "expndN", "expndtwN ", "fittextN", "fN",
"fsN", "i", "kerningN ", "langfeN", "langfenpN", "langN", "langnpN",
"ltrch", "noproof", "nosupersub ", "outl", "plain", "rtlch", "scaps",
"shad", "strike", "sub ", "super ", "ul", "ulcN", "uld", "uldash",
"uldashd", "uldashdd", "uldb", "ulhwave", "ulldash", "ulnone", "ulth",
"ulthd", "ulthdash", "ulthdashd", "ulthdashdd", "ulthldash", "ululdbwave", "ulw", "ulwave", "upN", "v", "webhidden":
text.WriteString(param)

// Paragraph Formatting Properties
case "aspalpha", "aspnum", "collapsed", "contextualspace", "cufiN", "culiN", "curiN", "faauto", "facenter", "fafixed", "fahang", "faroman", "favar", "fiN", "hyphpar ", "indmirror", "intbl", "itapN", "keep", "keepn", "levelN", "liN", "linN", "lisaN", "lisbN", "ltrpar", "nocwrap", "noline", "nooverflow", "nosnaplinegrid", "nowidctlpar ", "nowwrap", "outlinelevelN ", "pagebb", "pard", "prauthN", "prdateN", "qc", "qd", "qj", "qkN", "ql", "qr", "qt", "riN", "rinN", "rtlpar", "saautoN", "saN", "sbautoN", "sbN", "sbys", "slmultN", "slN", "sN", "spv", "subdocumentN ", "tscbandhorzeven", "tscbandhorzodd", "tscbandverteven", "tscbandvertodd", "tscfirstcol", "tscfirstrow", "tsclastcol", "tsclastrow", "tscnecell", "tscnwcell", "tscsecell", "tscswcell", "txbxtwalways", "txbxtwfirst", "txbxtwfirstlast", "txbxtwlast", "txbxtwno", "widctlpar", "ytsN":
case "aspalpha", "aspnum", "collapsed", "contextualspace",
"cufiN", "culiN", "curiN", "faauto", "facenter",
"fafixed", "fahang", "faroman", "favar", "fiN", "hyphpar ",
"indmirror", "intbl", "itapN", "keep", "keepn", "levelN", "liN",
"linN", "lisaN", "lisbN", "ltrpar", "nocwrap", "noline", "nooverflow",
"nosnaplinegrid", "nowidctlpar ", "nowwrap", "outlinelevelN ", "pagebb",
"pard", "prauthN", "prdateN", "qc", "qd", "qj", "qkN", "ql", "qr", "qt",
"riN", "rinN", "rtlpar", "saautoN", "saN", "sbautoN", "sbN", "sbys",
"slmultN", "slN", "sN", "spv", "subdocumentN ", "tscbandhorzeven",
"tscbandhorzodd", "tscbandverteven", "tscbandvertodd", "tscfirstcol",
"tscfirstrow", "tsclastcol", "tsclastrow", "tscnecell", "tscnwcell",
"tscsecell", "tscswcell", "txbxtwalways", "txbxtwfirst", "txbxtwfirstlast",
"txbxtwlast", "txbxtwno", "widctlpar", "ytsN":
text.WriteString(param)

// Section Formatting Properties
Expand All @@ -253,7 +271,8 @@ func handleParams(control, param string, text *bytes.Buffer) {
text.WriteString(param)

// Special Characters
case "-", ":", "_", "{", "|", "}", "~", "bullet", "chatn", "chdate", "chdpa", "chdpl", "chftn", "chftnsep", "chftnsepc", "chpgn", "chtime", "column", "emdash", "emspace ", "endash", "enspace ", "lbrN", "ldblquote", "line", "lquote", "ltrmark", "page", "par", "qmspace", "rdblquote", "row", "rquote", "rtlmark", "sect", "sectnum", "softcol ", "softlheightN ", "softline ", "softpage ", "tab", "zwbo", "zwj", "zwnbo", "zwnj":
case "-", ":", "_", "{", "|", "}", "~", "bullet", "chatn", "chdate", "chdpa", "chdpl", "chftn", "chftnsep", "chftnsepc", "chpgn", "chtime", "column", "emdash", "emspace ",
"endash", "enspace ", "lbrN", "ldblquote", "line", "lquote", "ltrmark", "page", "par", "qmspace", "rdblquote", "row", "rquote", "rtlmark", "sect", "sectnum", "softcol ", "softlheightN ", "softline ", "softpage ", "tab", "zwbo", "zwj", "zwnbo", "zwnj":
text.WriteString(param)

// Table Definitions
Expand Down
9 changes: 5 additions & 4 deletions rtftxt/stack_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,21 @@ import (

func TestPushPeekPopLen(t *testing.T) {
var s stack
s.Push("hello")
if s.size != 1 || s.top.value != "hello" || s.top.next != nil {
const h = "hello"
s.Push(h)
if s.size != 1 || s.top.value != h || s.top.next != nil {
t.Error("expected valid value", s.size, s.top)
}

if s.Len() != 1 {
t.Error("expected correct length")
}

if v := s.Peek(); v != "hello" || s.size != 1 || s.top.value != "hello" || s.top.next != nil {
if v := s.Peek(); v != h || s.size != 1 || s.top.value != h || s.top.next != nil {
t.Error("expected same value and no size reduction")
}

if v := s.Pop(); v != "hello" {
if v := s.Pop(); v != h {
t.Error("expected pushed value", v)
}

Expand Down

0 comments on commit dcbc6f5

Please sign in to comment.