Skip to content

Commit

Permalink
more changes to filter copyright, add openai api to get descriptions …
Browse files Browse the repository at this point in the history
…(you'll need a key)
  • Loading branch information
kentfitch committed Sep 11, 2024
1 parent 3bec713 commit 537c772
Show file tree
Hide file tree
Showing 4 changed files with 182 additions and 12 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,5 @@ copyright-check-Francis-set-suppressed-ignored.csv
web/package-lock.json
web/package.json

web/package-lock.json
web/package.json
161 changes: 153 additions & 8 deletions web/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions web/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"log4js": "^6.9.1",
"moment": "^2.29.4",
"morgan": "^1.10.0",
"openai": "^4.58.2",
"pdf-text-reader": "^5.1.0",
"pdf2html": "^3.1.0",
"rotating-file-stream": "^3.1.1",
Expand Down
30 changes: 26 additions & 4 deletions web/routes/admin.js
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ async function addOpenAIDescriptions(req, res) {
let count = 0 ;
let alreadyGotDesc = 0 ;
let descAdded = 0 ;
let errors = 0 ;

let solrRes = await axios.get(appConfig.solr.getSolrBaseUrl() + "pictures/select" +
"?wt=json&rows=9999&fl=id,url,title,bibId,processingStatus,openAIDescription,copyright" +
Expand Down Expand Up @@ -139,6 +140,8 @@ async function addOpenAIDescriptions(req, res) {
let instructions = "Please describe this image." ;
if (doc.title) instructions += " For reference, this is the title of the image: " + doc.title.replace("[picture]", "") ;

let openAIDescription = null ;
try {
const completion = await openai.chat.completions.create({
model:"gpt-4o",
messages:[
Expand All @@ -152,10 +155,28 @@ async function addOpenAIDescriptions(req, res) {

console.log("COMPLETION " + JSON.stringify(completion)) ;

let openAIDescription = completion.choices[0].message.content ;
openAIDescription = completion.choices[0].message.content ;
}
catch (oe) {

res.write("openAIDescription Error " + oe + "\n") ;
console.log("openAIDescription Error " + oe) ;
console.log(oe.stack) ;
}


res.write("doc " + doc.id + " openAIDescription: " + openAIDescription + "\n") ;

if (!openAIDescription) { // error...
let updatedFields = {
processingStatus: "error getting openAIdescr"
} ;
if (!doc.copyright) // fix bug
updatedFields.copyright = "Out of Copyright" ;
await updateDoc(doc.id, updatedFields) ;
errors++ ;
continue ;
}
let openaiDescriptionVector = await util.getEmbedding(openAIDescription) ;
// console.log("got embedding") ;

Expand All @@ -172,8 +193,8 @@ async function addOpenAIDescriptions(req, res) {

}

res.write("\n done count " + count + " alreadyGotDesc " + alreadyGotDesc + " descAdded " + descAdded) ;
console.log("\n done count " + count + " alreadyGotDesc " + alreadyGotDesc + " descAdded " + descAdded) ;
res.write("\n done count " + count + " alreadyGotDesc " + alreadyGotDesc + " descAdded " + descAdded + " errors " + errors) ;
console.log("\n done count " + count + " alreadyGotDesc " + alreadyGotDesc + " descAdded " + descAdded + " errors " + errors) ;

}

Expand Down Expand Up @@ -372,7 +393,8 @@ async function generatePhi35DescriptionForOAimageDescriptions(req, res) {

// REAL WAS : "?wt=json&rows=999999&fl=id,url,title,suppressed,manuallyForcedUnsuppressed&sort=id asc&q=id: {\"" + lastId + "\" TO \"z\"]") ;
//fix up first run - those without v12 were not given title to help!
"?wt=json&rows=999999&fl=id,url,title,suppressed,manuallyForcedUnsuppressed&sort=id asc&q=openAIDescription:* AND msVision35Description:*") ;
// another run - gen for those with no vision35 yet 11sep24
"?wt=json&rows=999999&fl=id,url,title,suppressed,manuallyForcedUnsuppressed&sort=id asc&q=openAIDescription:* AND -msVision35Description:*") ;
if (!((solrRes.status == 200) && solrRes.data && solrRes.data.response)) {
res.write(" Failed to find any records, status: " + solrRes.status + "\n") ;
if (solrRes.data) res.write(" Solr data: " + JSON.stringify(solrRes.data) + "\n") ;
Expand Down

0 comments on commit 537c772

Please sign in to comment.