Skip to content

Commit

Permalink
Merge pull request #372 from uoregon-libraries/fix/batchname
Browse files Browse the repository at this point in the history
Store canonical batch name in the database
  • Loading branch information
jechols authored Dec 26, 2024
2 parents eb4cadc + f057330 commit ee5eaa0
Show file tree
Hide file tree
Showing 16 changed files with 122 additions and 36 deletions.
12 changes: 12 additions & 0 deletions changelogs/2024-12-20-batchname.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
### Fixed

- Batch names are now stored in the database when a batch is built, rather than
computed as needed. This fixes weird datestamp inconsistency when a batch is
generated on a server with one timezone, and links to staging/production are
generated in a different timezone. This also prevents us from having massive
problems if we make a major change to the batch name generation algorithm.

### Migration

- Run database migrations:
- `make && ./bin/migrate-database -c ./settings up`
6 changes: 3 additions & 3 deletions src/cmd/delete-live-done-issues/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ func main() {
}

for _, b := range batches {
logger.Infof("Closing batch %q", b.FullName())
logger.Infof("Closing batch %q", b.FullName)
var err = b.Finalize()
if err != nil {
logger.Errorf("Unable to close %q: %s", b.FullName(), err)
logger.Errorf("Unable to close %q: %s", b.FullName, err)
}
}

Expand Down Expand Up @@ -81,7 +81,7 @@ func warning(issues []*models.Issue) {
fmt.Println()
fmt.Println("The following batches are affected:")
for _, b := range batches {
fmt.Printf(" - %s\n", b.FullName())
fmt.Printf(" - %s\n", b.FullName)
}

fmt.Println()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- +goose Up
ALTER TABLE `batches` ADD `full_name` TINYTEXT COLLATE utf8_bin;

-- +goose Down
ALTER TABLE `batches` DROP COLUMN `full_name`;
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
)

func init() {
goose.AddMigration(upCountIssuePages, downCountIssuePages)
goose.AddMigration(upCountIssuePages, noop)
}

// upCountIssuePages is an expensive little hack, but it's a one-time cost: we
Expand Down Expand Up @@ -41,8 +41,3 @@ func upCountIssuePages(_ *sql.Tx) error {
}
return nil
}

// downCountIssuePages is a no-op; page counts are deleted from the table anyway
func downCountIssuePages(_ *sql.Tx) error {
return nil
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package migrations

import (
"database/sql"
"fmt"

"github.com/pressly/goose/v3"
"github.com/uoregon-libraries/newspaper-curation-app/src/models"
)

func init() {
goose.AddMigration(upAddBatchPermaname, noop)
}

func upAddBatchPermaname(tx *sql.Tx) error {
var stmt, err = tx.Prepare("UPDATE batches SET full_name = ? WHERE id = ? AND (full_name IS NULL OR full_name = '')")
if err != nil {
return fmt.Errorf("preparing SQL: %w", err)
}

var batches []*models.Batch
batches, err = models.ActionableBatches()
if err != nil {
return fmt.Errorf("reading batches from db: %w", err)
}

for _, b := range batches {
if b.FullName == "" {
b.GenerateFullName()
var r, err = stmt.Exec(b.FullName, b.ID)
if err != nil {
return fmt.Errorf("saving batch %q: %w", b.FullName, err)
}
var n int64
n, err = r.RowsAffected()
if err != nil {
return fmt.Errorf("counting rows affected by batch naming: %w", err)
}
if n != 1 {
return fmt.Errorf("saving batch %q: %d rows affected instead of 1", b.FullName, n)
}
}
}

return nil
}
9 changes: 9 additions & 0 deletions src/cmd/migrate-database/migrations/noop.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package migrations

import "database/sql"

// noop is for migrations that don't do anything, such as when a column is
// deleted in an SQL migration, so there's no need for a logic reversal.
func noop(_ *sql.Tx) error {
return nil
}
8 changes: 4 additions & 4 deletions src/cmd/server/internal/batchhandler/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,11 @@ func qcApproveHandler(w http.ResponseWriter, req *http.Request) {
// TODO: send job to ONI to load batch live
var err = r.batch.Save(models.ActionTypeApproveBatch, r.Vars.User.ID, "")
if err != nil {
logger.Criticalf(`Unable to log "approve batch" action for batch %d (%s): %s`, r.batch.ID, r.batch.FullName(), err)
logger.Criticalf(`Unable to log "approve batch" action for batch %d (%s): %s`, r.batch.ID, r.batch.FullName, err)
} else {
err = jobs.QueueBatchGoLive(r.batch.Batch, conf)
if err != nil {
logger.Criticalf(`Unable to queue go-live job for batch %d (%s): %s`, r.batch.ID, r.batch.FullName(), err)
logger.Criticalf(`Unable to queue go-live job for batch %d (%s): %s`, r.batch.ID, r.batch.FullName, err)
}
}

Expand Down Expand Up @@ -142,7 +142,7 @@ func setArchivedHandler(w http.ResponseWriter, req *http.Request) {
r.batch.ArchivedAt = time.Now()
var err = r.batch.SaveWithoutAction()
if err != nil {
logger.Criticalf(`Unable to flag batch %d (%s) as archived: %s`, r.batch.ID, r.batch.FullName(), err)
logger.Criticalf(`Unable to flag batch %d (%s) as archived: %s`, r.batch.ID, r.batch.FullName, err)

// Reload the batch and rerender
r, ok = getBatchResponder(w, req)
Expand Down Expand Up @@ -191,7 +191,7 @@ func qcRejectHandler(w http.ResponseWriter, req *http.Request) {
// Since we're merely re-rending the template, we must put the batch back
// to its original state or the template could be weird/broken
r.batch.Status = oldStatus
logger.Criticalf("Unable to reject batch %d (%s): %s", r.batch.ID, r.batch.FullName(), err)
logger.Criticalf("Unable to reject batch %d (%s): %s", r.batch.ID, r.batch.FullName, err)
r.Vars.Title = "Error saving batch"
r.Vars.Alert = template.HTML("Unable to reject batch. Try again or contact support.")
r.Render(rejectFormTmpl)
Expand Down
2 changes: 1 addition & 1 deletion src/cmd/server/internal/batchhandler/routing.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ var (

func batchNewsURL(root string, b *Batch) string {
var u, _ = url.Parse(root)
u.Path = path.Join("batches", b.FullName())
u.Path = path.Join("batches", b.FullName)
return u.String() + "/"
}

Expand Down
4 changes: 2 additions & 2 deletions src/cmd/server/internal/batchmakerhandler/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,8 @@ func generateBatches(w http.ResponseWriter, req *http.Request) {
}
err = jobs.QueueMakeBatch(batch, conf)
if err != nil {
logger.Criticalf("Unable to queue batch %d (%q): %s", batch.ID, batch.FullName(), err)
logger.Criticalf("Batch %d (%q) will likely need to be manually fixed in the database!", batch.ID, batch.FullName())
logger.Criticalf("Unable to queue batch %d (%q): %s", batch.ID, batch.FullName, err)
logger.Criticalf("Batch %d (%q) will likely need to be manually fixed in the database!", batch.ID, batch.FullName)
r.Error(http.StatusInternalServerError, "Error processing request - try again or contact support")
return
}
Expand Down
2 changes: 1 addition & 1 deletion src/jobs/api_jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func getONIAgent(j *Job, c *config.Config) (*openoni.RPC, error) {
type batchJobFunc func(batchname string) (jobid int64, err error)

func (j *BatchJob) queueAgentJob(name string, fn batchJobFunc) ProcessResponse {
var jobid, err = fn(j.DBBatch.FullName())
var jobid, err = fn(j.DBBatch.FullName)
if err != nil {
j.Logger.Errorf("Error calling ONI Agent: %s", err)
return PRFailure
Expand Down
4 changes: 2 additions & 2 deletions src/jobs/create_batch_structure.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ func (j *CreateBatchStructure) Process(*config.Config) ProcessResponse {
var iList []*models.Issue
iList, err = j.DBBatch.Issues()
if err != nil {
j.Logger.Criticalf("Unable to read issues for %q: %s", j.DBBatch.FullName(), err)
j.Logger.Criticalf("Unable to read issues for %q: %s", j.DBBatch.FullName, err)
return PRFailure
}
for _, issue := range iList {
Expand All @@ -52,7 +52,7 @@ func (j *CreateBatchStructure) Process(*config.Config) ProcessResponse {
err = linkFiles(issue.Location, destPath)
}
if err != nil {
j.Logger.Criticalf("Unable to link issue %q into batch %q: %s", issue.Key(), j.DBBatch.FullName(), err)
j.Logger.Criticalf("Unable to link issue %q into batch %q: %s", issue.Key(), j.DBBatch.FullName, err)
return PRFailure
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/jobs/make_batch_xml.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type MakeBatchXML struct {

// Process generates the batch XML file
func (j *MakeBatchXML) Process(c *config.Config) ProcessResponse {
var bName = j.DBBatch.FullName()
var bName = j.DBBatch.FullName
j.Logger.Debugf("Generating batch XML for batch %q", bName)

// Set up variables
Expand Down
6 changes: 3 additions & 3 deletions src/jobs/queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ func QueueMakeBatch(batch *models.Batch, c *config.Config) error {
// essential files to the live location, and load it onto staging.
func getJobsForMakeBatch(batch *models.Batch, c *config.Config) []*models.Job {
// Prepare the various directory vars we'll need
var batchname = batch.FullName()
var batchname = batch.FullName
var wipDir = filepath.Join(c.BatchOutputPath, ".wip-"+batchname)
var outDir = filepath.Join(c.BatchOutputPath, batchname)
var liveDir = filepath.Join(c.BatchProductionPath, batchname)
Expand Down Expand Up @@ -328,7 +328,7 @@ func getJobsForFinalizingFlaggedIssues(batch *models.Batch, flagged []*models.Fl
// First: jobs to destroy the two batch dirs. The full-batch dir contains
// hard links and easily rebuilt metadata (e.g., the bagit info), and the
// live dir is just a copy of those files, so this is not really dangerous
var liveDir = filepath.Join(c.BatchProductionPath, batch.FullName())
var liveDir = filepath.Join(c.BatchProductionPath, batch.FullName)
jobs = append(jobs,
models.NewJob(models.JobTypeKillDir, makeLocArgs(liveDir)),
models.NewJob(models.JobTypeKillDir, makeLocArgs(batch.Location)),
Expand Down Expand Up @@ -389,7 +389,7 @@ func QueueBatchForDeletion(batch *models.Batch, flagged []*models.FlaggedIssue,
// QueueBatchGoLive fires off all jobs needed to ingest a batch live and get it
// ready for archiving.
func QueueBatchGoLive(batch *models.Batch, c *config.Config) error {
var finalPath = filepath.Join(c.BatchArchivePath, batch.FullName())
var finalPath = filepath.Join(c.BatchArchivePath, batch.FullName)
var jobs []*models.Job

// First we need jobs to push the batch to production ONI
Expand Down
31 changes: 21 additions & 10 deletions src/models/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ type Batch struct {
ID int64 `sql:",primary"`
MARCOrgCode string
Name string
FullName string
CreatedAt time.Time
ArchivedAt time.Time
WentLiveAt time.Time
Expand Down Expand Up @@ -152,11 +153,29 @@ func ActionableBatches() ([]*Batch, error) {
return findBatches(qry, statusList...)
}

// AllBatches grabs every batch in the database, including those currently in
// some kind of system process. Deleted batches are still ignored, as those
// should never need any actions, even by admins and devs.
func AllBatches() ([]*Batch, error) {
return findBatches("status <> ?", BatchStatusDeleted)
}

// FindLiveArchivedBatches returns all batches that are live and archived
func FindLiveArchivedBatches() ([]*Batch, error) {
return findBatches("status = ?", BatchStatusLiveArchived)
}

// GenerateFullName sets the batch FullName, used as the directory name for
// Open ONI to ingest. This should only ever be generated *once* per batch to
// ensure that no matter what we change, this value, and thus the computed file
// path, will always be consistent.
//
// We expose this as a public function so that we can migrate old data where
// batches didn't have a "permaname".
func (b *Batch) GenerateFullName() {
b.FullName = fmt.Sprintf("batch_%s_%s%s_ver01", b.MARCOrgCode, b.CreatedAt.Format("20060102"), b.Name)
}

// CreateBatch creates a batch in the database, using its ID combined with the
// hash of the site's web root string to generate a unique batch name, and
// associating the given list of issues. This is inefficient, but it gets the
Expand All @@ -182,6 +201,7 @@ func CreateBatch(webroot, moc string, issues []*Issue) (*Batch, error) {

var chksum = crc32.ChecksumIEEE([]byte(webroot))
b.Name = RandomBatchName(uint32(b.ID) + chksum)
b.GenerateFullName()
for _, i := range issues {
i.BatchID = b.ID
_ = i.SaveOp(op, ActionTypeInternalProcess, SystemUser.ID, fmt.Sprintf("added to batch %q", b.Name))
Expand Down Expand Up @@ -223,15 +243,6 @@ func (b *Batch) FlaggedIssues() ([]*FlaggedIssue, error) {
return findFlaggedIssues("batch_id = ?", b.ID)
}

// FullName returns the name of a batch as it is needed for chronam / ONI.
//
// Note that currently we assume all generated batches will be _ver01, because
// we would usually generate a completely new batch if one were in such a state
// as to need to be pulled from production.
func (b *Batch) FullName() string {
return fmt.Sprintf("batch_%s_%s%s_ver01", b.MARCOrgCode, b.CreatedAt.Format("20060102"), b.Name)
}

// AwardYear uses the batch creation date to produce the "award year" - this is
// the most similar value we can produce
func (b *Batch) AwardYear() int {
Expand Down Expand Up @@ -425,7 +436,7 @@ func (b *Batch) Finalize() error {
func (b *Batch) deserialize() error {
b.StatusMeta = bs(b.Status)
if b.StatusMeta == noStatus {
return fmt.Errorf("invalid status %q on batch %s", b.Status, b.FullName())
return fmt.Errorf("invalid status %q on batch %s", b.Status, b.FullName)
}

return nil
Expand Down
2 changes: 1 addition & 1 deletion src/models/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ func QueueBatchJobs(name PipelineName, batch *Batch, jobs ...*Job) error {
return err
}

var p = newPipeline(name, fmt.Sprintf("batch %s", batch.FullName()))
var p = newPipeline(name, fmt.Sprintf("batch %s", batch.FullName))
p.ObjectType = JobObjectTypeBatch
p.ObjectID = batch.ID
return p.queueSerialOp(op, jobs...)
Expand Down
12 changes: 10 additions & 2 deletions test/report.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,16 @@ func cacheBatchData() {
l.Fatalf("Unable to query database for batch rename map: %s", op.Err())
}
var b = &models.Batch{MARCOrgCode: moc, CreatedAt: created, Name: name}
b.GenerateFullName()
var bnormal = &models.Batch{
MARCOrgCode: moc,
CreatedAt: time.UnixMilli(1136243045000),
Name: "Pages" + pages + "Titles" + titles,
}
bnormal.GenerateFullName()
batchRenames = append(batchRenames, replacer{
search: regexp.MustCompile(regexp.QuoteMeta(b.FullName())),
replace: bnormal.FullName(),
search: regexp.MustCompile(regexp.QuoteMeta(b.FullName)),
replace: bnormal.FullName,
})
batchRenames = append(batchRenames, replacer{
search: regexp.MustCompile(regexp.QuoteMeta(b.Name)),
Expand Down Expand Up @@ -271,6 +273,12 @@ func main() {
filelist = append(filelist, path)
return err
})
if err != nil {
l.Fatalf("Unable to search for files in %q: %s", fakemount, err)
}
if len(filelist) == 0 {
l.Fatalf("Unable to search for files in %q: nothing found", fakemount)
}
sort.Strings(filelist)

// Create a complete list of all [Path]s
Expand Down

0 comments on commit ee5eaa0

Please sign in to comment.