Skip to content

Commit

Permalink
Added organize feature (#64)
Browse files Browse the repository at this point in the history
* Added organize feature

* Added changes from suggestion

Suggestion: https://github.com/unidoc/unipdf-cli/pull/64/files#r1770322097

* Apply suggestions from code review

Co-authored-by: Ade Anom A <[email protected]>

* Applied suggestions from code review

- Comment: https://github.com/unidoc/unipdf-cli/pull/64/files/3898c2938786d339d44b7affb4becfddfc704452#r1770271470

---------

Co-authored-by: Ade Anom A <[email protected]>
  • Loading branch information
deon-gracias and 3ace authored Sep 26, 2024
1 parent 9dfe4f1 commit 4f206b6
Show file tree
Hide file tree
Showing 3 changed files with 282 additions and 1 deletion.
75 changes: 75 additions & 0 deletions internal/cli/organize.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/

package cli

import (
"errors"
"fmt"

"github.com/spf13/cobra"
"github.com/unidoc/unipdf-cli/pkg/pdf"
)

const organizeCmdDesc = `Split PDF files.
The command is used to organize one or more page ranges from the input file
and save the result as the output file.
If no page range is specified, all the pages from the input file will be
copied to the output file.
An example of the pages parameter: 1-3,4,6-7
Only pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be present in the output file,
while page number 5 is skipped.
`

var organizeCmdExample = fmt.Sprintf("%s\n%s\n",
fmt.Sprintf("%s organize input_file.pdf output_file.pdf 1-2", appName),
fmt.Sprintf("%s organize -p pass input_file.pd output_file.pdf 1-2,4", appName),
)

// organizeCmd represents the split command.
var organizeCmd = &cobra.Command{
Use: "organize [FLAG]... INPUT_FILE OUTPUT_FILE [PAGES]",
Short: "Organize PDF files",
Long: organizeCmdDesc,
Example: organizeCmdExample,
DisableFlagsInUseLine: true,
Run: func(cmd *cobra.Command, args []string) {
inputPath := args[0]
outputPath := args[1]
password, _ := cmd.Flags().GetString("password")

// Parse page range.
var err error
var pages []int

if len(args) > 2 {
if pages, err = parsePageRangeUnsorted(args[2]); err != nil {
printUsageErr(cmd, "Invalid page range specified\n")
}
}

if err := pdf.Organize(inputPath, outputPath, password, pages); err != nil {
printErr("Error: %s\n", err)
}

fmt.Printf("Successfully organized file %s\n", inputPath)
fmt.Printf("Output file saved to %s\n", outputPath)
},
Args: func(_ *cobra.Command, args []string) error {
if len(args) < 2 {
return errors.New("must provide at least the input and output files")
}

return nil
},
}

func init() {
rootCmd.AddCommand(organizeCmd)

organizeCmd.Flags().StringP("password", "p", "", "input file password")
}
56 changes: 55 additions & 1 deletion internal/cli/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ func parsePageRange(pageRange string) ([]int, error) {
}

indices := strings.Split(rng, "-")

lenIndices := len(indices)
if lenIndices > 2 {
return nil, errors.New("invalid page range")
Expand Down Expand Up @@ -86,6 +85,61 @@ func parsePageRange(pageRange string) ([]int, error) {
return pages, nil
}

func parsePageRangeUnsorted(pageRange string) ([]int, error) {
var pages []int

rngs := strings.Split(removeSpaces(pageRange), ",")
for _, rng := range rngs {
if rng == "" {
continue
}

indices := strings.Split(rng, "-")
lenIndices := len(indices)
if lenIndices > 2 {
return nil, errors.New("invalid page range")
}
if lenIndices == 2 {
start, err := strconv.Atoi(indices[0])
if err != nil {
return nil, errors.New("invalid start page number")
}
if start < 1 {
return nil, errors.New("page range start must be greater than 0")
}

end, err := strconv.Atoi(indices[1])
if err != nil {
return nil, errors.New("invalid end page number")
}
if end < 1 {
return nil, errors.New("page range end must be greater than 0")
}

if start > end {
return nil, errors.New("page range end must be greater than the start")
}

for page := start; page <= end; page++ {
pages = append(pages, page)
}

continue
}

page, err := strconv.Atoi(indices[0])
if err != nil {
return nil, errors.New("invalid page number")
}

pages = append(pages, page)
}

pages = uniqueIntSlice(pages)

return pages, nil
}

func parseInputPaths(inputPaths []string, recursive bool, matcher fileMatcher) ([]string, error) {
var err error
var files []string
Expand Down
152 changes: 152 additions & 0 deletions pkg/pdf/organize.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/

package pdf

import (
"github.com/unidoc/unipdf/v3/common"
unipdf "github.com/unidoc/unipdf/v3/model"
)

// Organize extracts the provided page list from PDF file specified by the
// inputPath parameter then merges the individual pages and saves the
// resulting file at the location specified by the outputPath parameter.
// A password can be passed in for encrypted input files.
func Organize(inputPath, outputPath, password string, pages []int) error {
// Read input file.
pdfReader, _, _, _, err := readPDF(inputPath, password)
if err != nil {
return err
}

// Add selected pages to the writer.
pdfWriter := unipdf.NewPdfWriter()

for i := 0; i < len(pages); i++ {
page, err := pdfReader.GetPage(pages[i])
if err != nil {
return err
}

err = pdfWriter.AddPage(page)
if err != nil {
return err
}
}

// Copy PDF version.
version := pdfReader.PdfVersion()
pdfWriter.SetVersion(version.Major, version.Minor)

// Copy PDF info.
info, err := pdfReader.GetPdfInfo()
if err != nil {
common.Log.Debug("ERROR: %v", err)
} else {
pdfWriter.SetDocInfo(info)
}

// Copy Catalog Metadata.
if meta, ok := pdfReader.GetCatalogMetadata(); ok {
if err := pdfWriter.SetCatalogMetadata(meta); err != nil {
return err
}
}

// Copy catalog mark information.
if markInfo, ok := pdfReader.GetCatalogMarkInfo(); ok {
if err := pdfWriter.SetCatalogMarkInfo(markInfo); err != nil {
return err
}
}

// Copy AcroForm.
err = pdfWriter.SetForms(pdfReader.AcroForm)
if err != nil {
common.Log.Debug("ERROR: %v", err)
return err
}

// Copy viewer preferences.
if pref, ok := pdfReader.GetCatalogViewerPreferences(); ok {
if err := pdfWriter.SetCatalogViewerPreferences(pref); err != nil {
return err
}
}

// Copy language preferences.
if lang, ok := pdfReader.GetCatalogLanguage(); ok {
if err := pdfWriter.SetCatalogLanguage(lang); err != nil {
return err
}
}

// Copy document outlines.
pdfWriter.AddOutlineTree(pdfReader.GetOutlineTree())

// Copy OC Properties.
props, err := pdfReader.GetOCProperties()
if err != nil {
common.Log.Debug("ERROR: %v", err)
} else {
err = pdfWriter.SetOCProperties(props)
if err != nil {
common.Log.Debug("ERROR: %v", err)
}
}

// Copy page labels.
labelObj, err := pdfReader.GetPageLabels()
if err != nil {
common.Log.Debug("ERROR: %v", err)
} else {
err = pdfWriter.SetPageLabels(labelObj)
if err != nil {
common.Log.Debug("ERROR: %v", err)
}
}

// Copy named destinations.
namedDest, err := pdfReader.GetNamedDestinations()
if err != nil {
common.Log.Debug("ERROR: %v", err)
} else {
err = pdfWriter.SetNamedDestinations(namedDest)
if err != nil {
common.Log.Debug("ERROR: %v", err)
}
}

// Copy name dictionary.
nameDict, err := pdfReader.GetNameDictionary()
if err != nil {
common.Log.Debug("ERROR: %v", err)
} else {
err = pdfWriter.SetNameDictionary(nameDict)
if err != nil {
common.Log.Debug("ERROR: %v", err)
}
}

// Copy StructTreeRoot dictionary.
structTreeRoot, found := pdfReader.GetCatalogStructTreeRoot()
if found {
err := pdfWriter.SetCatalogStructTreeRoot(structTreeRoot)
if err != nil {
common.Log.Debug("ERROR: %v", err)
}
}

// Copy global page rotation.
if pdfReader.Rotate != nil {
if err := pdfWriter.SetRotation(*pdfReader.Rotate); err != nil {
common.Log.Debug("ERROR: %v", err)
}
}

// Write output file.
safe := inputPath == outputPath
return writePDF(outputPath, &pdfWriter, safe)
}

0 comments on commit 4f206b6

Please sign in to comment.