Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mpro coleta #5

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions crawler.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,79 @@
package main

import (
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
"strings"

"github.com/dadosjusbr/coletores/status"
)

var urlFormats = map[string]string{
thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
"remu": "&__sessionId=%s&__format=xls&__asattachment=true&__overwrite=false",
}

// Inicializa um mapa com o formato da url complementar para cada tipo de planilha
func initComplements(month, year int) map[string]string {
thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
return map[string]string{
"remu": fmt.Sprint("https://servicos-portal.mpro.mp.br/plcVis/frameset?__report=..%2FROOT%2Frel%2Fcontracheque%2Fmembros%2FremuneracaoMembrosAtivos.rptdesign&anomes=", year, fmt.Sprintf("%02d", month), "&nome=&cargo=&lotacao="),
}
}

// Inicializa o id de sessão para uma dada url
func seasonId(url string) string {

thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
resp, err := http.Get(url)
if err != nil {
danielfireman marked this conversation as resolved.
Show resolved Hide resolved
status.ExitFromError(status.NewError(status.ConnectionError, fmt.Errorf("Was not possible to get a season id to the url: %s. %q", url, err)))
os.Exit(1)
}
defer resp.Body.Close()

page, err := ioutil.ReadAll(resp.Body)
danielfireman marked this conversation as resolved.
Show resolved Hide resolved
htmlCode := string(page)

id := strings.Split(htmlCode, "Constants.viewingSessionId = \"")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Poderia, por favor, fazer um teste de unidade para essa função? para tornar o teste mais fácil, pode utilizar a bilioteca httptest

seasonId := id[1][0:19]

return seasonId
}

func download(url string, filePath string) {

thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
resp, err := http.Get(url)
if err != nil {
status.ExitFromError(status.NewError(status.DataUnavailable, fmt.Errorf("Não foi possível fazer o download do arquivo: %s .O seguinte erro foi gerado: %q", filePath, err)))
os.Exit(1)
}

file, err := os.Create(filePath)
if err != nil {
status.ExitFromError(status.NewError(status.DataUnavailable, fmt.Errorf("Não foi possível fazer o download do arquivo: %s .O seguinte erro foi gerado: %q", filePath, err)))
os.Exit(1)
}
defer file.Close()

io.Copy(file, resp.Body)
thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
defer resp.Body.Close()
thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
}

func Crawl(month int, year int, outputPath string) []string {
var paths []string
thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
complements := initComplements(month, year)

thyagopereira marked this conversation as resolved.
Show resolved Hide resolved
for key, _ := range complements {
var fileName = fmt.Sprint(year, "_", fmt.Sprintf("%02d", month), "_", key)
var filePath = fmt.Sprint(outputPath, "/", fileName, ".xls")

seasonId := seasonId(complements[key])
url := fmt.Sprint(complements[key], fmt.Sprintf(urlFormats[key], seasonId))

download(url, filePath)
paths = append(paths, filePath)
}

return paths
}