Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mpro coleta #5

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 80 additions & 2 deletions crawler.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,84 @@
package main

func Crawl(month int, year int, outputPath string) []string {
import (
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
"strings"

"github.com/dadosjusbr/coletores/status"
)

type urlRequest struct {
thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
remuDownloadURL string
thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
}

// Retorna as url para download de cada planilha em questão
func initRequests(month, year int) (urlRequest, error) {
thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
thyagopereira marked this conversation as resolved.
Show resolved Hide resolved

thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
idURL := fmt.Sprint("https://servicos-portal.mpro.mp.br/plcVis/frameset?__report=..%2FROOT%2Frel%2Fcontracheque%2Fmembros%2FremuneracaoMembrosAtivos.rptdesign&anomes=", year, fmt.Sprintf("%02d", month), "&nome=&cargo=&lotacao=")
sessionId, err := seasonId(idURL)
if err != nil {
return urlRequest{}, err
}

downloadURL := fmt.Sprint(idURL, fmt.Sprintf("&__sessionId=%s&__format=xls&__asattachment=true&__overwrite=false", sessionId))
return urlRequest{downloadURL}, nil

thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
}

// Inicializa o id de sessão para uma dada url
func seasonId(url string) (string, error) {
thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
resp, err := http.Get(url)
if err != nil {
danielfireman marked this conversation as resolved.
Show resolved Hide resolved
return "", status.NewError(status.ConnectionError, fmt.Errorf("Was not possible to get a season id to the url: %s. %q", url, err))
}
defer resp.Body.Close()

page, err := ioutil.ReadAll(resp.Body)
danielfireman marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return "", status.NewError(status.ConnectionError, fmt.Errorf("Was not possible to get a season id to the url: %s. %q", url, err))
}

htmlCode := string(page)
id := strings.Split(htmlCode, "Constants.viewingSessionId = \"")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Poderia, por favor, fazer um teste de unidade para essa função? para tornar o teste mais fácil, pode utilizar a bilioteca httptest

thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
seasonId := id[1][0:19]

return seasonId, err
thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
}

func download(url string, filePath string) error {
resp, err := http.Get(url)
if err != nil {
return status.NewError(status.DataUnavailable, fmt.Errorf("Was not possible download the file: %s .The following mistake was taken: %q", filePath, err))
thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
}
defer resp.Body.Close()

file, err := os.Create(filePath)
if err != nil {
return status.NewError(status.DataUnavailable, fmt.Errorf("Was not possible download the file: %s .The following mistake was taken: %q", filePath, err))
thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
}
defer file.Close()

io.Copy(file, resp.Body)
thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
return nil
}

func Crawl(month int, year int, outputPath string) ([]string, error) {
var paths []string
thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
return paths

thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
var fileName = fmt.Sprint(year, "_", fmt.Sprintf("%02d", month), "_remu")
thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
var filePath = fmt.Sprint(outputPath, "/", fileName, ".xls")

request, err := initRequests(year, month)
if err != nil {
return paths, err
}

download(request.remuDownloadURL, filePath)
thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
paths = append(paths, filePath)

return paths, nil
}
6 changes: 5 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,11 @@ func main() {
}

// Main execution
fileNames := Crawl(month, year, outputPath)
fileNames, err := Crawl(month, year, outputPath)
if err != nil {
status.ExitFromError(err)
os.Exit(1)
thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

usar pacote status

}
thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
employees := Parse(month, year, fileNames)

cr := coletores.ExecutionResult{
Expand Down