diff --git a/packages/db/prisma/data-migrations/2024-02-20_appsheet-load/!load.ts b/packages/db/prisma/data-migrations/2024-02-20_appsheet-load/!load.ts new file mode 100644 index 0000000000..374f0045ac --- /dev/null +++ b/packages/db/prisma/data-migrations/2024-02-20_appsheet-load/!load.ts @@ -0,0 +1,113 @@ +/* eslint-disable node/no-process-env */ +import { JWT } from 'google-auth-library' +import { GoogleSpreadsheet } from 'google-spreadsheet' +import PQueue from 'p-queue' +import PRetry from 'p-retry' +import papa from 'papaparse' + +import fs from 'fs' +import path from 'path' + +const creds = JSON.parse(process.env.GOOGLE_SERVICE_ACCT_CREDS as string) +const scopes = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive.file'] +const jwt = new JWT({ + email: creds.client_email, + key: creds.private_key, + scopes, +}) +const sheetID = '17Egecl5U8_o8Nx8qic5cUE7oD3A8__2KgXilz-7yoMU' + +const queue = new PQueue({ + concurrency: 1, + interval: 2250, + intervalCap: 1, + autoStart: false, + carryoverConcurrencyCount: true, +}) +function sleep(ms: number) { + return new Promise((resolve) => setTimeout(resolve, ms)) +} +const main = async () => { + const wb = new GoogleSpreadsheet(sheetID, jwt) + await wb.loadInfo() + + const sheetsToGet = { + Orgs: 'organization', + Emails: 'orgEmail', + 'Access Instructions': 'svcAccess', + Phones: 'orgPhone', + Locations: 'orgLocation', + OrgSocial: 'orgSocial', + Services: 'orgService', + } + const joinsToGet = { + OrgServicePhone: 'orgServicePhone', + OrgServiceEmail: 'orgServiceEmail', + OrgLocationEmail: 'orgLocationEmail', + OrgLocationService: 'orgLocationService', + OrgLocationPhone: 'orgLocationPhone', + } + const data = {} + const joins = {} + + const getData = async (sheetName: string) => { + const sheet = wb.sheetsByTitle[sheetName] + console.log('Parsing', sheetName) + if (!sheet) throw new Error(`Sheet ${sheetName} not found in spreadsheet ${sheetID}`) + const csv = await sheet.downloadAsCSV() + const parsed = papa.parse(csv.toString(), { header: true, skipEmptyLines: 'greedy' }) + const dataName = sheetsToGet[sheetName] + console.log(sheetName, `returned ${parsed.data.length} rows`) + data[dataName] = parsed.data + } + + const getJoin = async (joinName: string) => { + const sheet = wb.sheetsByTitle[joinName] + console.log('Parsing', joinName) + if (!sheet) throw new Error(`Sheet ${joinName} not found in spreadsheet ${sheetID}`) + const csv = await sheet.downloadAsCSV() + const parsed = papa.parse(csv.toString(), { header: true, skipEmptyLines: true }) + console.log(joinName, `returned ${parsed.data.length} rows`) + const dataName = joinsToGet[joinName] + joins[dataName] = parsed.data + } + + for (const sheetName of Object.keys(sheetsToGet)) { + queue.add(async () => { + await PRetry(() => getData(sheetName), { + onFailedAttempt: async (err) => { + console.error(`[${err.attemptNumber}/${err.retriesLeft}] ${err.message} -- Trying again`) + // await sleep(5000) + }, + // factor: 3, + randomize: true, + }) + }) + } + queue.add(async () => { + console.log("Let google catch it's breath") + await sleep(5000) + }) + + for (const joinName of Object.keys(joinsToGet)) { + queue.add(async () => { + await PRetry(() => getJoin(joinName), { + onFailedAttempt: async (err) => { + console.error( + `[${err.attemptNumber}/${err.retriesLeft}] ${err.message} -- Trying again in 5 seconds` + ) + await sleep(5000) + }, + }) + }) + } + queue.add(() => { + console.log('writing data.json') + fs.writeFileSync(path.resolve(__dirname, 'load.json'), JSON.stringify(data)) + console.log('writing joins.json') + fs.writeFileSync(path.resolve(__dirname, 'joins.json'), JSON.stringify(joins)) + }) + queue.start() +} + +main() diff --git a/packages/db/prisma/data-migrations/2024-02-20_appsheet-load/!prep-single.ts b/packages/db/prisma/data-migrations/2024-02-20_appsheet-load/!prep-single.ts new file mode 100644 index 0000000000..15f201f2b1 --- /dev/null +++ b/packages/db/prisma/data-migrations/2024-02-20_appsheet-load/!prep-single.ts @@ -0,0 +1,828 @@ +/* eslint-disable node/no-process-env */ +import compact from 'just-compact' +import { isSupportedCountry, parsePhoneNumberWithError } from 'libphonenumber-js' +import superjson from 'superjson' + +import fs from 'fs' +import path from 'path' + +import { type Prisma, prisma } from '~db/client' +import { generateNestedFreeText, generateNestedFreeTextUpsert } from '~db/lib/generateFreeText' +import { generateId, isIdFor } from '~db/lib/idGen' +import { generateUniqueSlug } from '~db/lib/slugGen' +import { JsonInputOrNull, accessInstructions as zAccessInstructions } from '~db/zod_util' + +import { DataFile, JoinFile } from './!schemas' + +const rawData = JSON.parse(fs.readFileSync(path.resolve(__dirname, 'load.json'), 'utf8')) +const rawJoins = JSON.parse(fs.readFileSync(path.resolve(__dirname, 'joins.json'), 'utf8')) + +const parsedData = DataFile.safeParse(rawData) +const parsedJoins = JoinFile.safeParse(rawJoins) + +export interface Output { + records: { + organization: Prisma.OrganizationUpsertArgs + orgLocation: Prisma.OrgLocationUpsertArgs[] + orgEmail: Prisma.OrgEmailUpsertArgs[] + orgPhone: Prisma.OrgPhoneUpsertArgs[] + orgService: Prisma.OrgServiceUpsertArgs[] + }[] + + handledSuggestions: Prisma.SuggestionUpdateManyArgs +} + +const handledSuggestions: string[] = [] + +const output: Output = { + records: [], + handledSuggestions: { + where: { organizationId: { in: handledSuggestions } }, + data: { handled: true }, + }, +} +const orgAttributes = [ + 'asylum-seekers', + 'bipoc-comm', + 'bipoc-led', + 'black-led', + 'gender-nc', + 'hiv-comm', + 'immigrant-comm', + 'immigrant-led', + 'lgbtq-youth-focus', + 'resettled-refugees', + 'spanish-speakers', + 'trans-comm', + 'trans-fem', + 'trans-led', + 'trans-masc', + 'trans-youth-focus', +] +const activeCountries = ['UM', 'US', 'MH', 'PW', 'AS', 'MX', 'CA', 'MP', 'GU', 'PR', 'VI'] + +const attributes = { + alertMessage: 'attr_01GYSVX1NAMR6RDV6M69H4KN3T', + serviceAccess: { + email: 'attr_01GW2HHFVKFM4TDY4QRK4AR2ZW', + phone: 'attr_01GW2HHFVMKTFWCKBVVFJ5GMY0', + file: 'attr_01GW2HHFVKMRHFD8SMDAZM3SSM', + link: 'attr_01GW2HHFVMYXMS8ARA3GE7HZFD', + }, + 'at-capacity': 'attr_01GW2HHFV3YJ2AWADHVKG79BQ0', + 'cost-fees': 'attr_01GW2HHFVGWKWB53HWAAHQ9AAZ', + 'cost-free': 'attr_01GW2HHFVGDTNW9PDQNXK6TF1T', + 'elig-age-min': 'attr_01GW2HHFVGSAZXGR4JAVHEK6ZC', + 'elig-age-max': 'attr_01GW2HHFVGSAZXGR4JAVHEK6ZC', + 'has-confidentiality-policy': 'attr_01GW2HHFV3BADK80TG0DXXFPMM', + 'lang-offered': 'attr_01GW2HHFVJ8K180CNX339BTXM2', + 'offers-remote-services': 'attr_01GW2HHFV5Q7XN2ZNTYFR1AD3M', + 'other-describe': 'attr_01GW2HHFVJDKVF1HV7559CNZCY', + 'req-medical-insurance': 'attr_01GW2HHFVH9DPBZ968VXGE50E7', + 'req-photo-id': 'attr_01GW2HHFVHZ599M48CMSPGDCSC', + 'req-proof-of-age': 'attr_01GW2HHFVH0GQK0GAJR5D952V3', + 'req-proof-of-income': 'attr_01GW2HHFVHEVX4PMNN077ASQMG', + 'req-referral': 'attr_01GW2HHFVJH8MADHYTHBV54CER', +} + +const serviceAttributes = { + boolean: [ + 'at-capacity', + 'cost-free', + 'has-confidentiality-policy', + 'offers-remote-services', + 'req-medical-insurance', + 'req-photo-id', + 'req-proof-of-age', + 'req-proof-of-income', + 'req-referral', + ], + cost: ['cost-fees'], + age: ['elig-age-max', 'elig-age-min'], + languages: ['lang-offered'], + text: ['other-describe'], + all: [ + 'at-capacity', + 'cost-free', + 'has-confidentiality-policy', + 'offers-remote-services', + 'req-medical-insurance', + 'req-photo-id', + 'req-proof-of-age', + 'req-proof-of-income', + 'req-referral', + 'cost-fees', + 'elig-age-max', + 'elig-age-min', + 'lang-offered', + 'other-describe', + ], +} as const +const zServAccess = zAccessInstructions.getAll() +const prep = async () => { + const attributes = await prisma.attribute.findMany({ select: { id: true, tag: true } }) + const attributeMap = new Map(attributes.map(({ id, tag }) => [tag, id])) + const countries = await prisma.country.findMany({ select: { id: true, cca2: true } }) + const countryMap = new Map(countries.map(({ cca2, id }) => [id, cca2])) + const govDist = await prisma.govDist.findMany({ + select: { id: true, abbrev: true }, + where: { isPrimary: true }, + }) + const govDistMap = new Map(govDist.map(({ abbrev, id }) => [id, abbrev])) + const existingOrgs = await prisma.organization.findMany() + const orgMap = new Map(existingOrgs.map(({ id, ...rest }) => [id, rest])) + const existingLocations = await prisma.orgLocation.findMany({ include: { serviceAreas: true } }) + const locationMap = new Map(existingLocations.map(({ id, ...rest }) => [id, rest])) + const geoCache = new Map() + if (fs.existsSync(path.resolve(__dirname, 'geocache.json'))) { + const cacheData = JSON.parse(fs.readFileSync(path.resolve(__dirname, 'geocache.json'), 'utf8')) as [ + string, + { lat: number; lon: number }, + ][] + if (Array.isArray(cacheData)) { + for (const [id, loc] of cacheData) { + geoCache.set(id, loc) + } + console.log(geoCache.size) + } + } + + return { attributeMap, countryMap, govDistMap, orgMap, locationMap, geoCache } +} + +function throttleApiCalls(fn: () => Promise): () => Promise { + let count = 0 + const interval = 1000 // 1 second + + return async function apiCall() { + if (count >= 5) { + await new Promise((resolve) => setTimeout(resolve, interval)) + count = 0 + } + + count++ + return await fn() + } +} + +const run = async () => { + const { attributeMap, countryMap, govDistMap, orgMap, locationMap, geoCache } = await prep() + if (!parsedData.success || !parsedJoins.success) { + if (!parsedData.success) console.error(parsedData.error.format()) + if (!parsedJoins.success) console.error(parsedJoins.error.format()) + return + } + const data = parsedData.data + const joins = parsedJoins.data + + const idMap = new Map() + + for (const org of data.organization) { + if (org['reviewed?'] !== false) { + console.info(`Skipping ${org.Name} (${org.id}) --> Not ready for upload`) + continue + } + const existingOrgRecord = orgMap.get(org.id) + + const isNew = !isIdFor('organization', org.id) || !existingOrgRecord + const orgId = isNew ? generateId('organization') : org.id + console.info(`Processing ${org.id} -- ${org.Name}`) + if (!isNew) { + handledSuggestions.push(orgId) + } + idMap.set(org.id, orgId) + + const orgData: Prisma.OrganizationUpsertArgs['create'] = { + name: org.Name.trim(), + slug: existingOrgRecord?.slug ?? (await generateUniqueSlug({ name: org.Name.trim(), id: orgId })), + source: { connect: { id: 'srce_01GXD88N4X2XNE3DW0G1AZJ403' } }, + lastVerified: new Date(), + published: true, + } as const + + const record: Output['records'][number] = { + organization: { + where: { id: orgId }, + create: { id: orgId, ...orgData }, + update: orgData, + }, + orgLocation: [], + orgEmail: [], + orgPhone: [], + orgService: [], + } + const organizationAttributes: OrganizationAttributes = { + connectOrCreate: [], + upsert: [], + } + + if (org.Description) { + const descriptionId = existingOrgRecord?.descriptionId ?? generateId('freeText') + const generateFreetextArgs = { + orgId, + text: org.Description.trim(), + type: 'orgDesc', + freeTextId: descriptionId, + } as const + record.organization.create.description = generateNestedFreeText(generateFreetextArgs) + record.organization.update.description = generateNestedFreeTextUpsert(generateFreetextArgs) + } + if (org['Alert Message']) { + const existingAlert = await prisma.attributeSupplement.findFirst({ + where: { attributeId: attributes.alertMessage, organizationId: orgId }, + }) + const supplementId = existingAlert?.id ?? generateId('attributeSupplement') + const alertMessageArgs = { + orgId, + text: org['Alert Message'].trim(), + type: 'attSupp', + itemId: supplementId, + } as const + organizationAttributes.connectOrCreate.push({ + where: { id: supplementId }, + create: { + id: supplementId, + attribute: { connect: { id: attributes.alertMessage } }, + text: generateNestedFreeText(alertMessageArgs), + }, + }) + organizationAttributes.upsert.push({ + where: { id: supplementId }, + update: { text: generateNestedFreeTextUpsert(alertMessageArgs) }, + create: { + attribute: { connect: { id: attributes.alertMessage } }, + text: generateNestedFreeText(alertMessageArgs), + }, + }) + } + for (const attrib of orgAttributes) { + if (org[attrib]) { + const attributeId = attributeMap.get(attrib) + if (!attributeId) continue + const existingAttrib = await prisma.attributeSupplement.findFirst({ + where: { attributeId, organizationId: orgId }, + }) + const id = existingAttrib?.id ?? generateId('attributeSupplement') + const connectOrCreateArgs = { where: { id }, create: { id, attributeId } } as const + organizationAttributes.connectOrCreate.push(connectOrCreateArgs) + organizationAttributes.upsert.push({ + ...connectOrCreateArgs, + update: {}, + }) + } + } + + if (org.URL) { + const existingWebsite = await prisma.orgWebsite.findFirst({ + where: { organizationId: orgId }, + }) + const id = existingWebsite?.id ?? generateId('orgWebsite') + record.organization.create.websites = { + create: { url: org.URL, id: generateId('orgWebsite') }, + } + record.organization.update.websites = { + upsert: { where: { id }, create: { id, url: org.URL }, update: { url: org.URL } }, + } + } + + const locations = data.orgLocation.filter(({ organizationId }) => organizationId === org.id) + const locationIds = locations.map(({ id }) => id) + + const services = data.orgService.filter(({ organizationId }) => organizationId === org.id) + const serviceIds = services.map(({ id }) => id) + + const emailIdsToProcess = [ + ...new Set( + joins.orgLocationEmail + .filter(({ locationId }) => locationIds.includes(locationId)) + .map(({ emailId }) => emailId) + .concat( + joins.orgServiceEmail + .filter(({ serviceId }) => serviceIds.includes(serviceId)) + .map(({ emailId }) => emailId) + ) + ), + ] + const phoneIdsToProcess = [ + ...new Set( + joins.orgLocationPhone + .filter(({ locationId }) => locationIds.includes(locationId)) + .map(({ phoneId }) => phoneId) + .concat( + joins.orgServicePhone + .filter(({ serviceId }) => serviceIds.includes(serviceId)) + .map(({ phoneId }) => phoneId) + ) + ), + ] + const serviceIdsToProcess = [ + ...new Set( + joins.orgLocationService + .filter(({ locationId }) => locationIds.includes(locationId)) + .map(({ serviceId }) => serviceId) + ), + ] + for (const legacyId of emailIdsToProcess) { + const email = data.orgEmail.find(({ id }) => id === legacyId) + if (!email) { + console.error(`Cannot locate email record ${legacyId}`) + continue + } + const existingRecord = await prisma.orgEmail.findFirst({ + where: { OR: [{ id: email.id }, { legacyId: email.id }] }, + }) + const emailId = isIdFor('orgEmail', email.id) ? email.id : existingRecord?.id ?? generateId('orgEmail') + idMap.set(legacyId, emailId) + const emailDescArgs = { + orgId, + type: 'emailDesc', + itemId: emailId, + text: email.description!, + freeTextId: existingRecord?.descriptionId, + } as const + record.orgEmail.push({ + where: { id: emailId }, + create: { + id: emailId, + legacyId, + email: email.email, + description: email.description ? generateNestedFreeText(emailDescArgs) : undefined, + }, + update: { + legacyId, + email: email.email, + description: email.description ? generateNestedFreeTextUpsert(emailDescArgs) : undefined, + }, + }) + } + for (const legacyId of phoneIdsToProcess) { + const phone = data.orgPhone.find(({ id }) => id === legacyId) + if (!phone) { + console.error(`Cannot locate phone record ${legacyId}`) + continue + } + const existingRecord = await prisma.orgPhone.findFirst({ + where: { OR: [{ id: phone.id }, { legacyId: phone.id }] }, + }) + const phoneId = isIdFor('orgPhone', phone.id) ? phone.id : existingRecord?.id ?? generateId('orgPhone') + idMap.set(legacyId, phoneId) + const phoneDescArgs = { + orgId, + type: 'phoneDesc', + itemId: phoneId, + text: phone.description, + freeTextId: existingRecord?.descriptionId, + } as const + const locationLinkCandidate = data.orgLocation.find(({ id }) => { + const { locationId } = joins.orgLocationPhone.find(({ phoneId }) => phoneId === phone.id) ?? {} + return locationId === id + }) + + const cca2val = locationLinkCandidate?.Country ?? 'US' + const countrycode = isSupportedCountry(cca2val) ? cca2val : 'US' + const parsedPhone = parsePhoneNumberWithError( + compact([phone.number, phone.ext]).join(' ').trim(), + countrycode + ) + record.orgPhone.push({ + where: { id: phoneId }, + create: { + id: phoneId, + legacyId, + number: parsedPhone.nationalNumber, + ext: parsedPhone.ext, + country: { connect: { cca2: countrycode } }, + description: phone.description ? generateNestedFreeText(phoneDescArgs) : undefined, + }, + update: { + legacyId, + number: parsedPhone.nationalNumber, + ext: parsedPhone.ext, + country: { connect: { cca2: countrycode } }, + description: phone.description ? generateNestedFreeTextUpsert(phoneDescArgs) : undefined, + }, + }) + } + for (const legacyId of serviceIdsToProcess) { + const service = data.orgService.find(({ id }) => id === legacyId) + if (!service) { + console.error(`Cannot locate service record ${legacyId}`) + continue + } + const existingRecord = await prisma.orgService.findFirst({ + where: { OR: [{ id: service.id }, { legacyId: service.id }] }, + }) + const serviceId = isIdFor('orgService', service.id) + ? service.id + : existingRecord?.id ?? generateId('orgService') + idMap.set(legacyId, serviceId) + const serviceNameArgs = { + orgId, + type: 'svcName', + itemId: serviceId, + text: service.Title, + freeTextId: existingRecord?.serviceNameId, + } as const + const serviceDescArgs = { + orgId, + type: 'svcDesc', + itemId: serviceId, + text: service.Description, + freeTextId: existingRecord?.descriptionId, + } as const + + const generateAttribRecords = async (): Promise<{ + create: Prisma.AttributeSupplementCreateNestedManyWithoutServiceInput + update: Prisma.AttributeSupplementUpdateManyWithoutServiceNestedInput + }> => { + const connectOrCreate: Prisma.AttributeSupplementCreateNestedManyWithoutServiceInput['connectOrCreate'] = + [] + const upsert: Prisma.AttributeSupplementUpdateManyWithoutServiceNestedInput['upsert'] = [] + + for (const tag of serviceAttributes.all) { + if (Object.keys(service).includes(tag) && service[tag]) { + const attributeId = attributes[tag] + if (!attributeId) throw new Error(`Unknown attribute -> ${tag}`) + + const existingRecord = await prisma.attributeSupplement.findFirst({ + where: { + attributeId, + serviceId, + }, + }) + const supplementId = existingRecord?.id ?? generateId('attributeSupplement') + const where = { id: supplementId } + + switch (tag) { + case 'other-describe': + case 'cost-fees': { + const content = service[tag] + if (typeof content !== 'string') break + const freeTextArgs = { + orgId, + type: 'attSupp', + itemId: supplementId, + text: content, + freeTextId: existingRecord?.textId, + } as const + + const create = { + id: supplementId, + attribute: { connect: { id: attributeId } }, + text: generateNestedFreeText(freeTextArgs), + } + connectOrCreate.push({ + where, + create, + }) + upsert.push({ + where, + create, + update: { + text: generateNestedFreeTextUpsert(freeTextArgs), + }, + }) + break + } + case 'elig-age-max': + case 'elig-age-min': { + const data = JsonInputOrNull.parse( + superjson.serialize({ + ...(service['elig-age-min'] ? { min: service['elig-age-min'] } : {}), + ...(service['elig-age-max'] ? { max: service['elig-age-max'] } : {}), + }) + ) + const create = { + id: supplementId, + attribute: { connect: { id: attributeId } }, + data, + } + connectOrCreate.push({ + where, + create, + }) + upsert.push({ + where, + create, + update: { + data, + }, + }) + + break + } + case 'lang-offered': { + if (!service['lang-offered']) break + const langs = service['lang-offered'] + for (const langId of langs) { + const create = { + id: supplementId, + attribute: { connect: { id: attributeId } }, + language: { connect: { id: langId } }, + } + connectOrCreate.push({ + where, + create, + }) + upsert.push({ + where, + create, + update: { + language: { connect: { id: langId } }, + }, + }) + } + break + } + default: { + const create = { + id: supplementId, + attribute: { connect: { id: attributeId } }, + } + connectOrCreate.push({ + where, + create, + }) + upsert.push({ + where, + create, + update: { + attribute: { connect: { id: attributeId } }, + }, + }) + } + } + } + } + const serviceAccessToAdd = data.svcAccess.filter(({ serviceId }) => serviceId === service.id) + for (const { type: accessType, value } of serviceAccessToAdd) { + if (accessType === '') continue + const attributeId = attributes.serviceAccess[accessType] + const existingRecord = await prisma.attributeSupplement.findFirst({ + where: { + attributeId, + serviceId, + }, + }) + const supplementId = existingRecord?.id ?? generateId('attributeSupplement') + const where = { id: supplementId } + const data = JsonInputOrNull.parse( + superjson.serialize( + zServAccess.parse({ + access_type: accessType, + access_value: value, + }) + ) + ) + const create = { + id: supplementId, + attribute: { connect: { id: attributeId } }, + data, + } + connectOrCreate.push({ + where, + create, + }) + upsert.push({ + where, + create, + update: { + attribute: { connect: { id: attributeId } }, + data, + }, + }) + } + return { + create: { connectOrCreate }, + update: { upsert }, + } + } + const attributeRecords = await generateAttribRecords() + const servicePhones = compact( + joins.orgServicePhone + .filter(({ serviceId }) => serviceId === service.id) + .map(({ phoneId }) => idMap.get(phoneId)) + ) + const serviceEmails = compact( + joins.orgServiceEmail + .filter(({ serviceId }) => serviceId === service.id) + .map(({ emailId }) => idMap.get(emailId)) + ) + + record.orgService.push({ + where: { id: serviceId }, + create: { + id: serviceId, + legacyId, + serviceName: generateNestedFreeText(serviceNameArgs), + description: generateNestedFreeText(serviceDescArgs), + organization: { connect: { id: orgId } }, + published: true, + services: service['Tag(s)']?.length + ? { createMany: { data: service['Tag(s)']?.map(({ tag: tagId }) => ({ tagId })) } } + : undefined, + attributes: attributeRecords.create, + emails: { + connectOrCreate: serviceEmails.map((orgEmailId) => ({ + where: { orgEmailId_serviceId: { orgEmailId, serviceId } }, + create: { orgEmailId }, + })), + }, + phones: { + connectOrCreate: servicePhones.map((orgPhoneId) => ({ + where: { orgPhoneId_serviceId: { orgPhoneId, serviceId } }, + create: { orgPhoneId }, + })), + }, + }, + update: { + legacyId, + serviceName: generateNestedFreeTextUpsert(serviceNameArgs), + description: generateNestedFreeTextUpsert(serviceDescArgs), + organization: { connect: { id: orgId } }, + published: true, + services: service['Tag(s)']?.length + ? { + upsert: service['Tag(s)'].map(({ tag: tagId }) => ({ + where: { serviceId_tagId: { serviceId, tagId } }, + create: { tagId }, + update: { tagId }, + })), + } + : undefined, + emails: { + connectOrCreate: serviceEmails.map((orgEmailId) => ({ + where: { orgEmailId_serviceId: { orgEmailId, serviceId } }, + create: { orgEmailId }, + })), + }, + phones: { + connectOrCreate: servicePhones.map((orgPhoneId) => ({ + where: { orgPhoneId_serviceId: { orgPhoneId, serviceId } }, + create: { orgPhoneId }, + })), + }, + attributes: attributeRecords.update, + }, + }) + } + + for (const loc of locations) { + const existingLocationRecord = locationMap.get(loc.id) + const orgLocationId = isIdFor('orgLocation', loc.id) ? loc.id : generateId('orgLocation') + + const locationEmails = compact( + joins.orgLocationEmail + .filter(({ locationId }) => locationId === loc.id) + .map(({ emailId }) => idMap.get(emailId)) + ) + const locationPhones = compact( + joins.orgLocationPhone + .filter(({ locationId }) => locationId === loc.id) + .map(({ phoneId }) => idMap.get(phoneId)) + ) + const locationServices = compact( + joins.orgLocationService + .filter(({ locationId }) => locationId === loc.id) + .map(({ serviceId }) => idMap.get(serviceId)) + ) + + const locDataCreate: Prisma.OrgLocationUpsertArgs['create'] = { + id: orgLocationId, + orgId, + name: loc['Location Name'].trim(), + street1: loc.Street, + city: loc.City?.trim() ?? '', + countryId: loc.Country, + govDistId: loc.State, + postCode: loc.PostalCode, + notVisitable: loc['Hide Location?'], + mapCityOnly: loc['Hide Location?'], + phones: { + connectOrCreate: locationPhones.map((phoneId) => ({ + where: { orgLocationId_phoneId: { orgLocationId, phoneId } }, + create: { phoneId }, + })), + }, + emails: { + connectOrCreate: locationEmails.map((orgEmailId) => ({ + where: { orgEmailId_orgLocationId: { orgEmailId, orgLocationId } }, + create: { orgEmailId }, + })), + }, + services: { + connectOrCreate: locationServices.map((serviceId) => ({ + where: { orgLocationId_serviceId: { orgLocationId, serviceId } }, + create: { serviceId }, + })), + }, + } + const locDataUpdate: Prisma.OrgLocationUpsertArgs['update'] = { + ...locDataCreate, + id: undefined, + } + + const cca2 = countryMap.get(loc.Country) + const govDistAbbrev = govDistMap.get(loc.State ?? '') + if (loc.City && cca2) { + const searchString = compact([ + locDataCreate.street1, + locDataCreate.city, + govDistAbbrev, + locDataCreate.postCode, + cca2, + ]).join(', ') + + const searchParams = new URLSearchParams({ + text: searchString, + format: 'json', + apiKey: process.env.GEOAPIFY_API_KEY as string, + filter: `countrycode:${activeCountries.join(',').toLowerCase()}`, + }).toString() + const cachedResult = geoCache.get(searchParams) + if (cachedResult?.lat && cachedResult?.lon) { + locDataCreate.latitude = cachedResult.lat + locDataCreate.longitude = cachedResult.lon + locDataUpdate.latitude = cachedResult.lat + locDataUpdate.longitude = cachedResult.lon + } else { + const geoURL = `https://api.geoapify.com/v1/geocode/search?${searchParams}` + const geoResponse = await throttleApiCalls(async () => await fetch(geoURL))() + const geoData = await geoResponse.json() + const geoResult = geoData.results.length ? geoData.results[0] : null + locDataCreate.latitude = geoResult?.lat + locDataCreate.longitude = geoResult?.lon + locDataUpdate.latitude = geoResult?.lat + locDataUpdate.longitude = geoResult?.lon + if (geoResult?.lat && geoResult?.lon) { + geoCache.set(searchParams, { lat: geoResult.lat, lon: geoResult.lon }) + } + } + + if (loc['Service Area Coverage - State(s)'] || loc['Service Area Coverage - USA National']) { + const serviceAreaId = existingLocationRecord?.serviceAreas?.id ?? generateId('serviceArea') + + const countriesToAttach = (loc['Service Area Coverage - USA National'] ?? []).map((country) => ({ + countryId: country.trim(), + })) + const govDistsToAttach = (loc['Service Area Coverage - State(s)'] ?? []).map((govDist) => ({ + govDistId: govDist.trim(), + })) + const serviceAreaCreate = { + create: { + id: serviceAreaId, + countries: { + createMany: { + data: countriesToAttach, + skipDuplicates: true, + }, + }, + districts: { + createMany: { + data: govDistsToAttach, + skipDuplicates: true, + }, + }, + }, + } as const + locDataCreate.serviceAreas = serviceAreaCreate + locDataUpdate.serviceAreas = { + upsert: { + create: serviceAreaCreate.create, + update: { + countries: { + set: countriesToAttach.map(({ countryId }) => ({ + serviceAreaId_countryId: { countryId, serviceAreaId }, + })), + }, + districts: { + set: govDistsToAttach.map(({ govDistId }) => ({ + serviceAreaId_govDistId: { govDistId, serviceAreaId }, + })), + }, + }, + }, + } + } + } + + const orgLocationRecord: Prisma.OrgLocationUpsertArgs = { + where: { id: orgLocationId }, + create: locDataCreate, + update: locDataUpdate, + } + record.orgLocation.push(orgLocationRecord) + } + output.records.push(record) + } + console.log(geoCache.size) + fs.writeFileSync(path.resolve(__dirname, 'geocache.json'), JSON.stringify([...geoCache.entries()])) + fs.writeFileSync(path.resolve(__dirname, 'data.json'), JSON.stringify(output)) +} + +run() + +type OrganizationAttributes = { + connectOrCreate: Prisma.AttributeSupplementCreateOrConnectWithoutOrganizationInput[] + upsert: Prisma.AttributeSupplementUpsertWithWhereUniqueWithoutOrganizationInput[] +} diff --git a/packages/db/prisma/data-migrations/2024-02-20_appsheet-load/!schemas.ts b/packages/db/prisma/data-migrations/2024-02-20_appsheet-load/!schemas.ts new file mode 100644 index 0000000000..0467be15f5 --- /dev/null +++ b/packages/db/prisma/data-migrations/2024-02-20_appsheet-load/!schemas.ts @@ -0,0 +1,169 @@ +import { string, z } from 'zod' + +const stripEmptyString = (val?: string) => (typeof val === 'string' && val === '' ? undefined : val) +const boolOrBlank = z.enum(['FALSE', 'TRUE', '']).transform((val) => (val === 'TRUE' ? true : false)) + +const stringToArray = (val?: string) => + (typeof val === 'string' && val === '') || val === undefined + ? undefined + : val.split(',').map((x) => x.trim()) + +const separateServiceTags = (val?: string) => { + const arr = stringToArray(val) + if (!arr) return undefined + const output: { category: string; tag: string }[] = [] + for (const item of arr) { + const [category, tag] = item.split(':') + if (typeof category === 'string' && typeof tag === 'string') { + output.push({ category, tag }) + } + } + return output +} +const coerceNumber = (val?: string) => { + const stripped = stripEmptyString(val) + if (stripped) { + return parseInt(stripped) + } + return undefined +} +export const DataSchema = { + Organization: z.object({ + id: z.string(), + Name: z.string(), + URL: z.string().optional().transform(stripEmptyString), + Description: z.string(), + 'Alert Message': z.string().optional().transform(stripEmptyString), + 'bipoc-led': boolOrBlank, + 'black-led': boolOrBlank, + 'bipoc-comm': boolOrBlank, + 'immigrant-led': boolOrBlank, + 'immigrant-comm': boolOrBlank, + 'asylum-seekers': boolOrBlank, + 'resettled-refugees': boolOrBlank, + 'trans-led': boolOrBlank, + 'trans-comm': boolOrBlank, + 'trans-youth-focus': boolOrBlank, + 'trans-masc': boolOrBlank, + 'trans-fem': boolOrBlank, + 'gender-nc': boolOrBlank, + 'lgbtq-youth-focus': boolOrBlank, + 'spanish-speakers': boolOrBlank, + 'hiv-comm': boolOrBlank, + 'Additional Notes': z.string().optional().transform(stripEmptyString), + 'reviewed?': boolOrBlank, + }), + + OrgEmail: z.object({ + id: z.string(), + firstName: z.string().optional().transform(stripEmptyString), + lastName: z.string().optional().transform(stripEmptyString), + primary: boolOrBlank, + email: z.string().email(), + description: z.string().optional().transform(stripEmptyString), + organizationId: z.string(), + locationOnly: boolOrBlank, + serviceOnly: boolOrBlank, + }), + + SvcAccess: z.object({ + id: z.string(), + serviceId: z.string(), + type: z.enum(['email', 'phone', 'file', 'link', '']), + value: z.string(), + }), + OrgPhone: z.object({ + id: z.string(), + number: z.string(), + ext: z.string().optional().transform(stripEmptyString), + primary: boolOrBlank, + countryId: z.string().optional().transform(stripEmptyString), + description: z.string(), + organizationId: z.string(), + }), + OrgLocation: z.object({ + id: z.string(), + organizationId: z.string(), + 'Location Name': z.string(), + Country: z.string(), + Street: z.string().optional().transform(stripEmptyString), + City: z.string().optional().transform(stripEmptyString), + State: z.string().optional().transform(stripEmptyString), + PostalCode: z.string().optional().transform(stripEmptyString), + 'Hide Location?': boolOrBlank, + 'Service Area Coverage - USA National': z + .string() + .optional() + .transform((val) => stringToArray(val)), + 'Service Area Coverage - State(s)': z + .string() + .optional() + .transform((val) => stringToArray(val)), + }), + OrgSocial: z.object({ + id: z.string(), + organizationId: z.string(), + service: z.string(), + url: z.string().url(), + }), + OrgService: z.object({ + id: z.string(), + organizationId: z.string(), + Title: z.string(), + Description: z.string(), + 'Tag(s)': z.string().optional().transform(separateServiceTags), + 'other-describe': z.string().optional().transform(stripEmptyString), + 'elig-age-min': z.string().optional().transform(coerceNumber), + 'elig-age-max': z.string().optional().transform(coerceNumber), + 'cost-free': boolOrBlank, + 'cost-fees': z.string().optional().transform(coerceNumber), + 'lang-offered': z.string().optional().transform(stringToArray), + 'has-confidentiality-policy': boolOrBlank, + 'offers-remote-services': boolOrBlank, + 'req-medical-insurance': boolOrBlank, + 'req-photo-id': boolOrBlank, + 'req-proof-of-age': boolOrBlank, + 'req-proof-of-income': boolOrBlank, + 'req-referral': boolOrBlank, + 'at-capacity': boolOrBlank, + }), +} +export const DataFile = z.object({ + organization: DataSchema.Organization.array(), + orgEmail: DataSchema.OrgEmail.array(), + svcAccess: DataSchema.SvcAccess.array(), + orgPhone: DataSchema.OrgPhone.array(), + orgLocation: DataSchema.OrgLocation.array(), + orgSocial: DataSchema.OrgSocial.array(), + orgService: DataSchema.OrgService.array(), +}) + +export const JoinSchema = { + OrgServicePhone: z.object({ + serviceId: z.string(), + phoneId: z.string(), + }), + OrgServiceEmail: z.object({ + serviceId: z.string(), + emailId: z.string(), + }), + OrgLocationEmail: z.object({ + locationId: z.string(), + emailId: z.string(), + }), + OrgLocationService: z.object({ + locationId: z.string(), + serviceId: z.string(), + }), + OrgLocationPhone: z.object({ + locationId: z.string(), + phoneId: z.string(), + }), +} +export const JoinFile = z.object({ + orgServicePhone: JoinSchema.OrgServicePhone.array(), + orgServiceEmail: JoinSchema.OrgServiceEmail.array(), + orgLocationEmail: JoinSchema.OrgLocationEmail.array(), + orgLocationService: JoinSchema.OrgLocationService.array(), + orgLocationPhone: JoinSchema.OrgLocationPhone.array(), +}) diff --git a/packages/db/prisma/data-migrations/2024-02-20_appsheet-load/index.ts b/packages/db/prisma/data-migrations/2024-02-20_appsheet-load/index.ts new file mode 100644 index 0000000000..71a37d7415 --- /dev/null +++ b/packages/db/prisma/data-migrations/2024-02-20_appsheet-load/index.ts @@ -0,0 +1,98 @@ +import { prisma } from '~db/client' +import { downloadFromDatastore, formatMessage } from '~db/prisma/common' +import { type MigrationJob } from '~db/prisma/dataMigrationRunner' +import { createLogger, type JobDef, jobPostRunner } from '~db/prisma/jobPreRun' + +import { type Output } from './!prep-single' +/** Define the job metadata here. */ +const jobDef: JobDef = { + jobId: '2024-02-20_appsheet-load', + title: 'appsheet load', + createdBy: 'Joe Karow', + /** Optional: Longer description for the job */ + description: undefined, +} +/** + * Job export - this variable MUST be UNIQUE + */ +export const job20240220_appsheet_load = { + title: `[${jobDef.jobId}] ${jobDef.title}`, + task: async (_ctx, task) => { + /** Create logging instance */ + createLogger(task, jobDef.jobId) + const log = (...args: Parameters) => (task.output = formatMessage(...args)) + /** + * Start defining your data migration from here. + * + * To log output, use `task.output = 'Message to log'` + * + * This will be written to `stdout` and to a log file in `/prisma/migration-logs/` + */ + + // Do stuff + + log(`Downloading data from datastore`) + const data = (await downloadFromDatastore('migrations/2024-02-20_appsheet-load/data.json', log)) as Output + + await prisma.$transaction( + async (tx) => { + let i = 1 + const total = data.records.length + for (const record of data.records) { + log(`[${i}/${total}] Upserting records for ${record.organization.create.name}`, 'info') + const counts = { + emails: 0, + phones: 0, + services: 0, + locations: 0, + organizations: 0, + } + const org = await tx.organization.upsert(record.organization) + if (org) counts.organizations++ + for (const email of record.orgEmail) { + log(`Upserting email ${email.create.email}`, 'update', true) + const result = await tx.orgEmail.upsert(email) + if (result) counts.emails++ + } + for (const phone of record.orgPhone) { + log(`Upserting phone ${phone.create.number}`, 'update', true) + const result = await tx.orgPhone.upsert(phone) + if (result) counts.phones++ + } + for (const service of record.orgService) { + log( + `Upserting service ${service.create.serviceName?.create?.tsKey?.create?.text}`, + 'update', + true + ) + const result = await tx.orgService.upsert(service) + if (result) counts.services++ + } + for (const location of record.orgLocation) { + log(`Upserting location ${location.create.name}`, 'update', true) + const result = await tx.orgLocation.upsert(location) + if (result) counts.locations++ + } + log( + `Processed -> Organizations: ${counts.organizations} Emails: ${counts.emails}/${record.orgEmail.length} Phones: ${counts.phones}/${record.orgPhone.length} Services: ${counts.services}/${record.orgService.length} Locations: ${counts.locations}/${record.orgLocation.length}`, + 'info', + true + ) + i++ + } + }, + { timeout: 180_000 } + ) + + const handledSuggestions = await prisma.suggestion.updateMany(data.handledSuggestions) + log(`Marked ${handledSuggestions.count} suggestions as 'handled'`) + + /** + * DO NOT REMOVE BELOW + * + * This writes a record to the DB to register that this migration has run successfully. + */ + await jobPostRunner(jobDef) + }, + def: jobDef, +} satisfies MigrationJob diff --git a/packages/db/prisma/data-migrations/index.ts b/packages/db/prisma/data-migrations/index.ts index 31638c79d7..c535b13b25 100644 --- a/packages/db/prisma/data-migrations/index.ts +++ b/packages/db/prisma/data-migrations/index.ts @@ -3,4 +3,6 @@ export * from './2024-01-31_fix-attr-supp-json/index' export * from './2024-01-31_target-population-attrib' export * from './2024-02-01_add-missing-attributes/index' export * from './2024-02-02_deactivate-incompatible-attribs' +export * from './2024-02-19_attach-orphan-text' +export * from './2024-02-20_appsheet-load/index' // codegen:end