diff --git a/app/config/TaskConfiguration.scala b/app/config/TaskConfiguration.scala index 2c78b244..cd16d398 100644 --- a/app/config/TaskConfiguration.scala +++ b/app/config/TaskConfiguration.scala @@ -18,6 +18,7 @@ case class TaskConfiguration( probe: ProbeConfiguration, exportReportsToSFTP: ExportReportsToSFTPConfiguration, subcategoryLabels: SubcategoryLabelsTaskConfiguration, + siretExtraction: SiretExtractionConfiguration, sampleData: SampleDataConfiguration ) @@ -64,6 +65,11 @@ case class SubcategoryLabelsTaskConfiguration( interval: FiniteDuration ) +case class SiretExtractionConfiguration( + interval: FiniteDuration, + websiteCount: Int +) + case class ProbeConfiguration(active: Boolean) case class ExportReportsToSFTPConfiguration(filePath: String, startTime: LocalTime) diff --git a/app/controllers/SiretExtractorController.scala b/app/controllers/SiretExtractorController.scala index 83fcc224..5d5fcd24 100644 --- a/app/controllers/SiretExtractorController.scala +++ b/app/controllers/SiretExtractorController.scala @@ -6,25 +6,38 @@ import models.UserRole import play.api.mvc.ControllerComponents import services.SiretExtractorService import authentication.actions.UserAction.WithRole +import play.api.libs.json.Json +import repositories.siretextraction.SiretExtractionRepositoryInterface import scala.concurrent.ExecutionContext +import scala.concurrent.Future class SiretExtractorController( + siretExtractionRepository: SiretExtractionRepositoryInterface, siretExtractorService: SiretExtractorService, authenticator: Authenticator[User], controllerComponents: ControllerComponents )(implicit val ec: ExecutionContext) extends BaseController(authenticator, controllerComponents) { - def extractSiret() = SecuredAction.andThen(WithRole(UserRole.Admins)).async { request => - siretExtractorService - .extractSiret(request.body.asJson) - .map { response => - response.body match { - case Left(body) => Status(response.code.code)(body.getMessage) - case Right(body) => Status(response.code.code)(body) - } - } + def extractSiret() = SecuredAction.andThen(WithRole(UserRole.Admins)).async(parse.json) { request => + val maybeWebsite = (request.body \ "website").asOpt[String] + + maybeWebsite match { + case Some(website) => + siretExtractorService + .extractSiret(website) + .flatMap { response => + response.body match { + case Left(body) => Future.successful(Status(response.code.code)(body.getMessage)) + case Right(body) => + logger.debug(s"Saving siret extraction result (${body.status}) in DB before returning") + siretExtractionRepository.insertOrReplace(body).map(_ => Status(response.code.code)(Json.toJson(body))) + } + } + case None => Future.successful(BadRequest) + } + } } diff --git a/app/controllers/WebsiteController.scala b/app/controllers/WebsiteController.scala index f278efc2..ea19c609 100644 --- a/app/controllers/WebsiteController.scala +++ b/app/controllers/WebsiteController.scala @@ -137,7 +137,7 @@ class WebsiteController( errors => Future.successful(BadRequest(JsError.toJson(errors))), company => websitesOrchestrator - .updateCompany(websiteId, company, request.identity) + .updateCompany(websiteId, company, Some(request.identity)) .map(websiteAndCompany => Ok(Json.toJson(websiteAndCompany))) ) } diff --git a/app/loader/SignalConsoApplicationLoader.scala b/app/loader/SignalConsoApplicationLoader.scala index e51307a9..eb78b418 100644 --- a/app/loader/SignalConsoApplicationLoader.scala +++ b/app/loader/SignalConsoApplicationLoader.scala @@ -93,6 +93,7 @@ import repositories.reportmetadata.ReportMetadataRepository import repositories.reportmetadata.ReportMetadataRepositoryInterface import repositories.signalconsoreview.SignalConsoReviewRepository import repositories.signalconsoreview.SignalConsoReviewRepositoryInterface +import repositories.siretextraction.SiretExtractionRepository import repositories.socialnetwork.SocialNetworkRepository import repositories.socialnetwork.SocialNetworkRepositoryInterface import repositories.subcategorylabel.SubcategoryLabelRepository @@ -128,6 +129,7 @@ import tasks.report.ReportNotificationTask import tasks.report.ReportRemindersTask import tasks.report.SampleDataGenerationTask import tasks.subcategorylabel.SubcategoryLabelTask +import tasks.website.SiretExtractionTask import utils.CustomIpFilter import utils.EmailAddress import utils.FrontRoute @@ -255,6 +257,8 @@ class SignalConsoComponents( val albertClassificationRepository = new AlbertClassificationRepository(dbConfig) + val siretExtractionRepository = new SiretExtractionRepository(dbConfig) + val crypter = new JcaCrypter(applicationConfiguration.crypter) val signer = new JcaSigner(applicationConfiguration.signer) @@ -320,6 +324,8 @@ class SignalConsoComponents( attachmentService ) + val siretExtractorService = new SiretExtractorService(applicationConfiguration.siretExtractor) + // Orchestrator val userOrchestrator = new UserOrchestrator(userRepository, eventRepository) @@ -723,6 +729,16 @@ class SignalConsoComponents( taskRepository ) + val siretExtractionTask = new SiretExtractionTask( + actorSystem, + taskConfiguration, + taskRepository, + siretExtractionRepository, + siretExtractorService, + websitesOrchestrator, + companyRepository + ) + // Controller val blacklistedEmailsController = @@ -894,9 +910,13 @@ class SignalConsoComponents( controllerComponents ) - val siretExtractorService = new SiretExtractorService(applicationConfiguration.siretExtractor) val siretExtractorController = - new SiretExtractorController(siretExtractorService, cookieAuthenticator, controllerComponents) + new SiretExtractorController( + siretExtractionRepository, + siretExtractorService, + cookieAuthenticator, + controllerComponents + ) val importOrchestrator = new ImportOrchestrator( companyRepository, @@ -982,6 +1002,7 @@ class SignalConsoComponents( subcategoryLabelTask.schedule() companyAlbertLabelTask.schedule() companyReportCountViewRefresherTask.schedule() + siretExtractionTask.schedule() } override def config: Config = ConfigFactory.load() diff --git a/app/models/website/WebsiteCompanyReportCount.scala b/app/models/website/WebsiteCompanyReportCount.scala index 60f9f4b0..a375d926 100644 --- a/app/models/website/WebsiteCompanyReportCount.scala +++ b/app/models/website/WebsiteCompanyReportCount.scala @@ -5,6 +5,7 @@ import models.company.Company import models.investigation.InvestigationStatus import play.api.libs.json.Json import play.api.libs.json.Writes +import tasks.website.ExtractionResultApi import utils.Country import java.time.OffsetDateTime @@ -23,15 +24,18 @@ case class WebsiteCompanyReportCount( kind: String, company: Option[Company], investigationStatus: InvestigationStatus, - count: Int + count: Int, + siretExtraction: Option[ExtractionResultApi] ) object WebsiteCompanyReportCount { implicit val WebsiteCompanyCountWrites: Writes[WebsiteCompanyReportCount] = Json.writes[WebsiteCompanyReportCount] - def toApi(countByWebsiteCompany: ((Website, Option[Company]), Int)): WebsiteCompanyReportCount = { - val ((website, maybeCompany), count) = countByWebsiteCompany + def toApi( + countByWebsiteCompany: (Website, Option[Company], Option[ExtractionResultApi], Int) + ): WebsiteCompanyReportCount = { + val (website, maybeCompany, maybeSiretExtraction, count) = countByWebsiteCompany website .into[WebsiteCompanyReportCount] .withFieldComputed(_.id, _.id) @@ -39,6 +43,7 @@ object WebsiteCompanyReportCount { .withFieldConst(_.companyCountry, website.companyCountry.map(Country.fromCode(_))) .withFieldConst(_.count, count) .withFieldConst(_.kind, IdentificationStatus.toKind(website.identificationStatus)) + .withFieldConst(_.siretExtraction, maybeSiretExtraction) .transform } } diff --git a/app/orchestrators/ImportOrchestrator.scala b/app/orchestrators/ImportOrchestrator.scala index f54d6343..b4597e4a 100644 --- a/app/orchestrators/ImportOrchestrator.scala +++ b/app/orchestrators/ImportOrchestrator.scala @@ -131,8 +131,12 @@ class ImportOrchestrator( companyId = Some(company.id), isMarketplace = true ) - updatedWebsite <- websitesOrchestrator.updateIdentification(websiteToUpdate, user) - _ <- websitesOrchestrator.updatePreviousReportsAssociatedToWebsite(website.host, company, user.id) + updatedWebsite <- websitesOrchestrator.updateIdentification(websiteToUpdate, Some(user)) + _ <- websitesOrchestrator.updatePreviousReportsAssociatedToWebsite( + website.host, + company, + Some(user.id) + ) } yield updatedWebsite } case Nil => @@ -145,7 +149,7 @@ class ImportOrchestrator( ) for { createdWebsite <- websiteRepository.create(website) - _ <- websitesOrchestrator.updatePreviousReportsAssociatedToWebsite(website.host, company, user.id) + _ <- websitesOrchestrator.updatePreviousReportsAssociatedToWebsite(website.host, company, Some(user.id)) } yield createdWebsite } diff --git a/app/orchestrators/ReportOrchestrator.scala b/app/orchestrators/ReportOrchestrator.scala index 4b484e7d..cc22875a 100644 --- a/app/orchestrators/ReportOrchestrator.scala +++ b/app/orchestrators/ReportOrchestrator.scala @@ -618,14 +618,14 @@ class ReportOrchestrator( updatedReport <- updateReportCompany( existingReport, reportCompany, - requestingUserId + Some(requestingUserId) ) } yield updatedReport def updateReportCompanyForWebsite( existingReport: Report, reportCompany: ReportCompany, - adminUserId: UUID + adminUserId: Option[UUID] ) = if (isReportTooOld(existingReport)) { logger.debug(s"Report ${existingReport.id} is too old to be updated") @@ -636,7 +636,7 @@ class ReportOrchestrator( private def updateReportCompany( existingReport: Report, reportCompany: ReportCompany, - adminUserId: UUID + adminUserId: Option[UUID] ): Future[Report] = { val updateDateTime = OffsetDateTime.now() @@ -687,9 +687,9 @@ class ReportOrchestrator( UUID.randomUUID(), Some(updatedReport.id), Some(company.id), - Some(adminUserId), + adminUserId, updateDateTime, - Constants.EventType.ADMIN, + if (adminUserId.isDefined) Constants.EventType.ADMIN else Constants.EventType.SYSTEM, Constants.ActionEvent.REPORT_COMPANY_CHANGE, stringToDetailsJsValue( s"Entreprise précédente : Siret ${existingReport.companySiret diff --git a/app/orchestrators/WebsitesOrchestrator.scala b/app/orchestrators/WebsitesOrchestrator.scala index f9d6cb88..f93cecdd 100644 --- a/app/orchestrators/WebsitesOrchestrator.scala +++ b/app/orchestrators/WebsitesOrchestrator.scala @@ -64,7 +64,7 @@ class WebsitesOrchestrator( identificationStatus = IdentificationStatus.Identified ) createdWebsite <- repository.create(website) - _ <- updatePreviousReportsAssociatedToWebsite(website.host, createdCompany, user.id) + _ <- updatePreviousReportsAssociatedToWebsite(website.host, createdCompany, Some(user.id)) } yield createdWebsite def searchByHost(host: String): Future[Seq[Country]] = @@ -135,7 +135,7 @@ class WebsitesOrchestrator( .getOrElse(Future.successful(None)) _ = logger.debug(s"Company Siret is ${maybeCompany.map(_.siret)}") _ <- maybeCompany - .map(company => updatePreviousReportsAssociatedToWebsite(website.host, company, user.id)) + .map(company => updatePreviousReportsAssociatedToWebsite(website.host, company, Some(user.id))) .getOrElse(Future.unit) } yield () } else Future.unit @@ -152,7 +152,11 @@ class WebsitesOrchestrator( } yield website } - def updateCompany(websiteId: WebsiteId, companyToAssign: CompanyCreation, user: User): Future[WebsiteAndCompany] = + def updateCompany( + websiteId: WebsiteId, + companyToAssign: CompanyCreation, + user: Option[User] + ): Future[WebsiteAndCompany] = for { company <- { logger.debug(s"Updating website (id ${websiteId}) with company siret : ${companyToAssign.siret}") @@ -171,7 +175,7 @@ class WebsitesOrchestrator( companyId = Some(company.id) ) updatedWebsite <- updateIdentification(websiteToUpdate, user) - _ <- updatePreviousReportsAssociatedToWebsite(website.host, company, user.id) + _ <- updatePreviousReportsAssociatedToWebsite(website.host, company, user.map(_.id)) } yield WebsiteAndCompany.toApi(updatedWebsite, Some(company)) def updateCompanyCountry(websiteId: WebsiteId, companyCountry: String, user: User): Future[WebsiteAndCompany] = for { @@ -188,17 +192,17 @@ class WebsitesOrchestrator( companyCountry = Some(companyCountry), companyId = None ) - updatedWebsite <- updateIdentification(websiteToUpdate, user) + updatedWebsite <- updateIdentification(websiteToUpdate, Some(user)) } yield WebsiteAndCompany.toApi(updatedWebsite, maybeCompany = None) - def updateIdentification(website: Website, user: User): Future[Website] = { + def updateIdentification(website: Website, user: Option[User]): Future[Website] = { logger.debug(s"Removing other websites with the same host : ${website.host}") for { _ <- repository .removeOtherNonIdentifiedWebsitesWithSameHost(website) _ = logger.debug(s"updating identification status when Admin is updating identification") websiteToUpdate = - if (UserRole.isAdmin(user.userRole)) website.copy(identificationStatus = Identified) else website + if (user.map(_.userRole).forall(UserRole.isAdmin)) website.copy(identificationStatus = Identified) else website _ = logger.debug(s"Website to update : ${websiteToUpdate}") updatedWebsite <- update(websiteToUpdate) _ = logger.debug(s"Website company country successfully updated") @@ -264,7 +268,7 @@ class WebsitesOrchestrator( def updatePreviousReportsAssociatedToWebsite( websiteHost: String, company: Company, - userId: UUID + userId: Option[UUID] ): Future[Unit] = { val reportCompany = ReportCompany( name = company.name, diff --git a/app/repositories/siretextraction/SiretExtractionRepository.scala b/app/repositories/siretextraction/SiretExtractionRepository.scala new file mode 100644 index 00000000..e8f1f8f7 --- /dev/null +++ b/app/repositories/siretextraction/SiretExtractionRepository.scala @@ -0,0 +1,48 @@ +package repositories.siretextraction + +import models.website.IdentificationStatus +import models.website.Website +import slick.basic.DatabaseConfig +import slick.jdbc.JdbcProfile +import repositories.PostgresProfile.api._ +import repositories.website.WebsiteColumnType._ +import repositories.website.WebsiteTable +import tasks.website.ExtractionResultApi + +import scala.concurrent.ExecutionContext +import scala.concurrent.Future + +trait SiretExtractionRepositoryInterface { + def getByHost(host: String): Future[Option[ExtractionResultApi]] + def listUnextractedWebsiteHosts(n: Int): Future[List[Website]] + def insertOrReplace(extraction: ExtractionResultApi): Future[Unit] +} + +class SiretExtractionRepository(dbConfig: DatabaseConfig[JdbcProfile])(implicit ec: ExecutionContext) + extends SiretExtractionRepositoryInterface { + + val table = SiretExtractionTable.table + + import dbConfig._ + + override def getByHost(host: String): Future[Option[ExtractionResultApi]] = db.run( + table.filter(_.host === host).to[List].result.headOption + ) + + def listUnextractedWebsiteHosts(n: Int): Future[List[Website]] = db.run( + WebsiteTable.table + .joinLeft(table) + .on(_.host === _.host) + .filter(_._1.identificationStatus === (IdentificationStatus.NotIdentified: IdentificationStatus)) + .filter(_._2.isEmpty) + .map(_._1) + .distinctOn(_.host) + .take(n) + .to[List] + .result + ) + + override def insertOrReplace(extraction: ExtractionResultApi): Future[Unit] = db.run( + table.insertOrUpdate(extraction).map(_ => ()) + ) +} diff --git a/app/repositories/siretextraction/SiretExtractionTable.scala b/app/repositories/siretextraction/SiretExtractionTable.scala new file mode 100644 index 00000000..3e177338 --- /dev/null +++ b/app/repositories/siretextraction/SiretExtractionTable.scala @@ -0,0 +1,36 @@ +package repositories.siretextraction + +import play.api.libs.json.JsValue +import play.api.libs.json.Json +import repositories.PostgresProfile.api._ +import slick.ast.BaseTypedType +import slick.jdbc.JdbcType +import slick.lifted.ProvenShape +import tasks.website.ExtractionResultApi +import tasks.website.SiretExtractionApi + +class SiretExtractionTable(tag: Tag) extends Table[ExtractionResultApi](tag, "siret_extractions") { + + implicit val ListSiretExtractionApiColumnType + : JdbcType[List[SiretExtractionApi]] with BaseTypedType[List[SiretExtractionApi]] = + MappedColumnType.base[List[SiretExtractionApi], JsValue]( + Json.toJson, + _.as[List[SiretExtractionApi]] + ) + + def host = column[String]("host", O.PrimaryKey) + def status = column[String]("status") + def error = column[Option[String]]("error") + def extractions = column[Option[List[SiretExtractionApi]]]("extractions") + + override def * : ProvenShape[ExtractionResultApi] = ( + host, + status, + error, + extractions + ) <> ((ExtractionResultApi.apply _).tupled, ExtractionResultApi.unapply) +} + +object SiretExtractionTable { + val table = TableQuery[SiretExtractionTable] +} diff --git a/app/repositories/website/WebsiteRepository.scala b/app/repositories/website/WebsiteRepository.scala index f87a3f37..45f3fd04 100644 --- a/app/repositories/website/WebsiteRepository.scala +++ b/app/repositories/website/WebsiteRepository.scala @@ -13,10 +13,12 @@ import repositories.PaginateOps import repositories.TypedCRUDRepository import repositories.company.CompanyTable import repositories.report.ReportTable +import repositories.siretextraction.SiretExtractionTable import repositories.website.WebsiteColumnType._ import slick.basic.DatabaseConfig import slick.jdbc.JdbcProfile import slick.lifted.TableQuery +import tasks.website.ExtractionResultApi import utils.URL import java.time._ @@ -147,7 +149,7 @@ class WebsiteRepository( hasAssociation: Option[Boolean], isOpen: Option[Boolean], isMarketplace: Option[Boolean] - ): Future[PaginatedResult[((Website, Option[Company]), Int)]] = { + ): Future[PaginatedResult[(Website, Option[Company], Option[ExtractionResultApi], Int)]] = { val baseQuery = WebsiteTable.table @@ -169,26 +171,28 @@ class WebsiteRepository( } .joinLeft(CompanyTable.table) .on(_.companyId === _.id) + .joinLeft(SiretExtractionTable.table) + .on(_._1.host === _.host) .joinLeft(ReportTable.table) .on { (tupleTable, reportTable) => - val (websiteTable, _) = tupleTable + val ((websiteTable, _), _) = tupleTable websiteTable.host === reportTable.host && reportTable.host.isDefined } - .filterOpt(isOpen) { case (((_, companyTable), _), isOpenFilter) => + .filterOpt(isOpen) { case ((((_, companyTable), _), _), isOpenFilter) => companyTable.map(_.isOpen === isOpenFilter) } - .filterOpt(start) { case (((websiteTable, _), reportTable), start) => + .filterOpt(start) { case ((((websiteTable, _), _), reportTable), start) => reportTable.map(_.creationDate >= start).getOrElse(websiteTable.creationDate >= start) } - .filterOpt(end) { case (((websiteTable, _), reportTable), end) => + .filterOpt(end) { case ((((websiteTable, _), _), reportTable), end) => reportTable.map(_.creationDate <= end).getOrElse(websiteTable.creationDate <= end) } val query = baseQuery .groupBy(_._1) - .map { case (grouped, all) => (grouped, all.map(_._2).size) } + .map { case (grouped, all) => (grouped._1._1, grouped._1._2, grouped._2, all.map(_._2).size) } .sortBy { tupleTable => - val ((websiteTable, _), reportCount) = tupleTable + val (websiteTable, _, _, reportCount) = tupleTable (reportCount.desc, websiteTable.host.desc, websiteTable.id.desc) } .to[Seq] diff --git a/app/repositories/website/WebsiteRepositoryInterface.scala b/app/repositories/website/WebsiteRepositoryInterface.scala index a8666555..6a1031de 100644 --- a/app/repositories/website/WebsiteRepositoryInterface.scala +++ b/app/repositories/website/WebsiteRepositoryInterface.scala @@ -7,6 +7,7 @@ import models.website.Website import models.website.WebsiteId import models.website.IdentificationStatus import repositories.TypedCRUDRepositoryInterface +import tasks.website.ExtractionResultApi import java.time.LocalDate import java.time.OffsetDateTime @@ -41,7 +42,7 @@ trait WebsiteRepositoryInterface extends TypedCRUDRepositoryInterface[Website, W hasAssociation: Option[Boolean], isOpen: Option[Boolean], isMarketplace: Option[Boolean] - ): Future[PaginatedResult[((Website, Option[Company]), Int)]] + ): Future[PaginatedResult[(Website, Option[Company], Option[ExtractionResultApi], Int)]] def searchValidAssociationByHost(host: String): Future[Seq[Website]] diff --git a/app/services/SiretExtractorService.scala b/app/services/SiretExtractorService.scala index 4942dbba..d1d98514 100644 --- a/app/services/SiretExtractorService.scala +++ b/app/services/SiretExtractorService.scala @@ -2,14 +2,19 @@ package services import config.SiretExtractorConfiguration import play.api.Logger +import play.api.libs.json.JsError import play.api.libs.json.JsValue +import play.api.libs.json.Json import sttp.capabilities import sttp.client3.HttpClientFutureBackend +import sttp.client3.Response +import sttp.client3.ResponseException import sttp.client3.SttpBackend import sttp.client3.UriContext import sttp.client3.basicRequest import sttp.client3.playJson._ import sttp.model.Header +import tasks.website.ExtractionResultApi import scala.concurrent.Future @@ -19,16 +24,18 @@ class SiretExtractorService(siretExtractorConfiguration: SiretExtractorConfigura private val backend: SttpBackend[Future, capabilities.WebSockets] = HttpClientFutureBackend() - def extractSiret(body: Option[JsValue]) = { - logger.debug(s"Calling siret extractor with body ${body}") + def extractSiret( + website: String + ): Future[Response[Either[ResponseException[String, JsError], ExtractionResultApi]]] = { + logger.debug(s"Calling siret extractor with website $website") val url = uri"${siretExtractorConfiguration.url}/extract" val request = basicRequest .headers(Header("X-Api-Key", siretExtractorConfiguration.apiKey)) .post(url) .response(asJson[JsValue]) + .body(Json.obj("website" -> website)) + .response(asJson[ExtractionResultApi]) - val requestWithBody = body.fold(request)(s => request.body(s)) - - requestWithBody.send(backend) + request.send(backend) } } diff --git a/app/tasks/website/ExtractionResultApi.scala b/app/tasks/website/ExtractionResultApi.scala new file mode 100644 index 00000000..9387612f --- /dev/null +++ b/app/tasks/website/ExtractionResultApi.scala @@ -0,0 +1,45 @@ +package tasks.website + +import play.api.libs.json.Json +import play.api.libs.json.OFormat +import tasks.company.CompanySearchResult + +case class SiretApi( + siret: String, + valid: Boolean +) + +object SiretApi { + implicit val format: OFormat[SiretApi] = Json.format[SiretApi] +} + +case class SirenApi( + siren: String, + valid: Boolean +) + +object SirenApi { + implicit val format: OFormat[SirenApi] = Json.format[SirenApi] +} + +case class SiretExtractionApi( + siret: Option[SiretApi], + siren: Option[SirenApi], + links: List[String], + sirene: Option[CompanySearchResult] +) + +object SiretExtractionApi { + implicit val format: OFormat[SiretExtractionApi] = Json.format[SiretExtractionApi] +} + +case class ExtractionResultApi( + website: String, + status: String, + error: Option[String], + extractions: Option[List[SiretExtractionApi]] +) + +object ExtractionResultApi { + implicit val format: OFormat[ExtractionResultApi] = Json.format[ExtractionResultApi] +} diff --git a/app/tasks/website/SiretExtractionTask.scala b/app/tasks/website/SiretExtractionTask.scala new file mode 100644 index 00000000..57f98f6a --- /dev/null +++ b/app/tasks/website/SiretExtractionTask.scala @@ -0,0 +1,118 @@ +package tasks.website + +import cats.implicits.catsSyntaxOption +import cats.implicits.toTraverseOps +import config.TaskConfiguration +import controllers.error.AppError.CompanyNotFound +import models.company.Company +import models.company.CompanyCreation +import models.website.Website +import orchestrators.WebsitesOrchestrator +import org.apache.pekko.actor.ActorSystem +import repositories.company.CompanyRepositoryInterface +import repositories.siretextraction.SiretExtractionRepositoryInterface +import repositories.tasklock.TaskRepositoryInterface +import services.SiretExtractorService +import tasks.ScheduledTask +import tasks.company.CompanySearchResult +import tasks.model.TaskSettings +import tasks.model.TaskSettings.FrequentTaskSettings + +import scala.concurrent.ExecutionContext +import scala.concurrent.Future + +class SiretExtractionTask( + actorSystem: ActorSystem, + taskConfiguration: TaskConfiguration, + taskRepository: TaskRepositoryInterface, + siretExtractionRepository: SiretExtractionRepositoryInterface, + siretExtractorService: SiretExtractorService, + websitesOrchestrator: WebsitesOrchestrator, + companyRepository: CompanyRepositoryInterface +)(implicit executionContext: ExecutionContext) + extends ScheduledTask(14, "siret_extraction_task", taskRepository, actorSystem, taskConfiguration) { + + override val taskSettings: TaskSettings = FrequentTaskSettings(interval = taskConfiguration.siretExtraction.interval) + + private def findMatchingAndValidExtraction( + company: Company, + extractions: List[SiretExtractionApi] + ): Option[CompanySearchResult] = { + val a = extractions + .filter(extraction => extraction.siret.forall(_.valid) && extraction.siren.forall(_.valid)) + .find { extraction => + extraction.sirene match { + case Some(sirene) => sirene.siret == company.siret && company.isOpen + case None => false + } + } + a.flatMap(_.sirene) + } + + override def runTask(): Future[Unit] = for { + websites <- siretExtractionRepository.listUnextractedWebsiteHosts(taskConfiguration.siretExtraction.websiteCount) + _ = logger.debug(s"Found ${websites.length} websites to handle (siret extraction)") + results <- websites.traverse(website => + siretExtractorService + .extractSiret(website.host) + .map { response => + response.body match { + case Left(error) => + logger.debug(s"Siret extraction failed for ${website.id} : $error") + website -> ExtractionResultApi( + website.host, + "error while calling Siret extractor service", + Some(error.getMessage), + None + ) + case Right(body) => + logger.debug(s"Siret extraction succeeded for ${website.id}") + website -> body + } + } + ) + // Sauvegarde des résultats en DB + _ = logger.debug(s"Saving ${results.length} extraction results un DB") + _ <- results.traverse { case (_, extraction) => siretExtractionRepository.insertOrReplace(extraction) } + + // Association automatique si : + // - Un siret trouvé est valide et présent dans SIRENE + // - Le conso a fourni la même entreprise + // - L'entreprise est ouverte + companyIdsAndExtractions = results.collect { + case (Website(id, _, _, _, _, Some(companyId), _, _, _), ExtractionResultApi(_, _, _, Some(extractions))) => + (id, companyId, extractions) + } + _ = logger.debug( + s"${companyIdsAndExtractions.length} websites remaining after filtering succeeded extractions and companyId already provided by consumer" + ) + + companyAndExtractions <- companyIdsAndExtractions.traverse { case (websiteId, companyId, extractions) => + companyRepository + .get(companyId) + .flatMap(_.liftTo[Future](CompanyNotFound(companyId))) + .map(company => (websiteId, company, extractions)) + } + _ = logger.debug(s"${companyAndExtractions.length} websites remaining after fetching companies") + websiteIdAndFoundCompany = companyAndExtractions.flatMap { case (websiteId, company, extractions) => + findMatchingAndValidExtraction(company, extractions).map(companySearchResult => websiteId -> companySearchResult) + } + _ = logger.debug(s"${websiteIdAndFoundCompany.length} website(s) will be associated to companies") + _ <- websiteIdAndFoundCompany.traverse { case (websiteId, companySearchResult) => + val companyCreation = CompanyCreation( + siret = companySearchResult.siret, + name = companySearchResult.name.getOrElse(""), + address = companySearchResult.address, + activityCode = companySearchResult.activityCode, + isHeadOffice = Some(companySearchResult.isHeadOffice), + isOpen = Some(companySearchResult.isOpen), + isPublic = Some(companySearchResult.isPublic), + brand = companySearchResult.brand, + commercialName = companySearchResult.commercialName, + establishmentCommercialName = companySearchResult.establishmentCommercialName + ) + logger.debug(s"Website $websiteId will be associated to company ${companyCreation.siret}") + websitesOrchestrator.updateCompany(websiteId, companyCreation, None) + } + } yield () +} diff --git a/conf/common/task.conf b/conf/common/task.conf index fb2daf17..e085a19c 100644 --- a/conf/common/task.conf +++ b/conf/common/task.conf @@ -78,5 +78,12 @@ task { interval = 24 hours interval = ${?SUBCATEGORY_LABELS_TASK_INTERVAL} } + + siret-extraction { + interval = 1 hour + interval = ${?SIRET_EXTRACTION_TASK_INTERVAL} + website-count = 1 + website-count = ${?SIRET_EXTRACTION_WEBSITE_COUNT} + } } \ No newline at end of file diff --git a/conf/db/migration/default/V52__revert_reports_creation_date_index.sql b/conf/db/migration/default/V52__revert_reports_creation_date_index.sql new file mode 100644 index 00000000..e5a3efb1 --- /dev/null +++ b/conf/db/migration/default/V52__revert_reports_creation_date_index.sql @@ -0,0 +1 @@ +DROP INDEX IF EXISTS reports_creation_date_desc_idx; \ No newline at end of file diff --git a/conf/db/migration/default/V53__siret_extractions.sql b/conf/db/migration/default/V53__siret_extractions.sql new file mode 100644 index 00000000..10c15525 --- /dev/null +++ b/conf/db/migration/default/V53__siret_extractions.sql @@ -0,0 +1,6 @@ +CREATE TABLE IF NOT EXISTS siret_extractions ( + host TEXT NOT NULL PRIMARY KEY, + status TEXT NOT NULL, + error TEXT, + extractions JSONB + ); \ No newline at end of file diff --git a/conf/test.application.conf b/conf/test.application.conf index bad39e29..0346d8ec 100644 --- a/conf/test.application.conf +++ b/conf/test.application.conf @@ -79,4 +79,7 @@ task { etablissement-api-key = "fake_key" } + siret-extraction { + website-count = 10 + } } diff --git a/test/tasks/account/InactiveDgccrfAccountReminderTaskSpec.scala b/test/tasks/account/InactiveDgccrfAccountReminderTaskSpec.scala index 1c625263..20d3054b 100644 --- a/test/tasks/account/InactiveDgccrfAccountReminderTaskSpec.scala +++ b/test/tasks/account/InactiveDgccrfAccountReminderTaskSpec.scala @@ -45,7 +45,7 @@ class InactiveDgccrfAccountReminderTaskSpec(implicit ee: ExecutionEnv) override def eventRepository: EventRepositoryInterface = mockEventRepository - override val userRepository: UserRepositoryInterface = mockUserRepository + override def userRepository: UserRepositoryInterface = mockUserRepository override def configuration: Configuration = Configuration( "slick.dbs.default.db.connectionPool" -> "disabled", diff --git a/test/tasks/report/ReportRemindersTaskUnitSpec.scala b/test/tasks/report/ReportRemindersTaskUnitSpec.scala index 27d67ea8..73e9782f 100644 --- a/test/tasks/report/ReportRemindersTaskUnitSpec.scala +++ b/test/tasks/report/ReportRemindersTaskUnitSpec.scala @@ -5,6 +5,7 @@ import config.ExportReportsToSFTPConfiguration import config.ProbeConfiguration import config.ReportRemindersTaskConfiguration import config.SampleDataConfiguration +import config.SiretExtractionConfiguration import config.SubcategoryLabelsTaskConfiguration import config.TaskConfiguration import models.company.AccessLevel @@ -61,6 +62,7 @@ class ReportRemindersTaskUnitSpec extends Specification with FutureMatchers { probe = ProbeConfiguration(false), exportReportsToSFTP = ExportReportsToSFTPConfiguration("./reports.csv", LocalTime.of(3, 30)), subcategoryLabels = SubcategoryLabelsTaskConfiguration(startTime = LocalTime.of(2, 0), interval = 1.day), + siretExtraction = SiretExtractionConfiguration(interval = 1.day, websiteCount = 1), sampleData = SampleDataConfiguration(true, LocalTime.of(3, 30)) ) diff --git a/test/tasks/website/SiretExtractionTaskSpec.scala b/test/tasks/website/SiretExtractionTaskSpec.scala new file mode 100644 index 00000000..7a8bea35 --- /dev/null +++ b/test/tasks/website/SiretExtractionTaskSpec.scala @@ -0,0 +1,155 @@ +package tasks.website + +import models.company.CompanyCreation +import models.website.IdentificationStatus.Identified +import models.website.IdentificationStatus.NotIdentified +import models.website.Website +import models.website.WebsiteAndCompany +import orchestrators.WebsitesOrchestrator +import org.apache.pekko.stream.Materializer +import org.specs2.concurrent.ExecutionEnv +import org.specs2.matcher.FutureMatchers +import org.specs2.mock.Mockito +import org.specs2.mutable +import services.SiretExtractorService +import sttp.client3.Response +import sttp.model.StatusCode +import tasks.company.CompanySearchResult +import utils.AppSpec +import utils.Fixtures +import utils.TaskRepositoryMock +import utils.TestApp + +import scala.concurrent.Future + +class SiretExtractionTaskSpec(implicit ee: ExecutionEnv) + extends mutable.Specification + with AppSpec + with Mockito + with FutureMatchers { + + val (app, components) = TestApp.buildApp() + implicit val mat: Materializer = app.materializer + + val taskRepositoryMock = new TaskRepositoryMock() + + "SiretExtractionTask" should { + "correctly save and associate websites" in { + val siretExtractorServiceMock = mock[SiretExtractorService] + val websitesOrchestratorMock = mock[WebsitesOrchestrator] + + val service = new SiretExtractionTask( + app.actorSystem, + components.applicationConfiguration.task, + taskRepositoryMock, + components.siretExtractionRepository, + siretExtractorServiceMock, + websitesOrchestratorMock, + components.companyRepository + ) + + val company = Fixtures.genCompany.sample.get + val unusedCompany = Fixtures.genCompany.sample.get + val companySearchResult = + CompanySearchResult.fromCompany(company, Website(host = "", companyCountry = None, companyId = None)) + val website = Fixtures + .genWebsite() + .sample + .get + .copy( + host = "test2.com", + companyCountry = None, + companyId = Some(company.id), + identificationStatus = NotIdentified + ) + + siretExtractorServiceMock.extractSiret("test.com") returns Future.successful( + Response( + Right( + ExtractionResultApi( + website = "test.com", + status = "success", + error = None, + extractions = Some( + List( + SiretExtractionApi( + Some(SiretApi(company.siret.value, true)), + None, + List.empty, + Some(companySearchResult) + ) + ) + ) + ) + ), + StatusCode.Ok + ) + ) + + siretExtractorServiceMock.extractSiret("test2.com") returns Future.successful( + Response( + Right( + ExtractionResultApi( + website = "test2.com", + status = "success", + error = None, + extractions = Some( + List( + SiretExtractionApi( + Some(SiretApi(company.siret.value, true)), + None, + List.empty, + Some(companySearchResult) + ) + ) + ) + ) + ), + StatusCode.Ok + ) + ) + + websitesOrchestratorMock.updateCompany( + website.id, + CompanyCreation( + siret = companySearchResult.siret, + name = companySearchResult.name.getOrElse(""), + address = companySearchResult.address, + activityCode = companySearchResult.activityCode, + isHeadOffice = Some(companySearchResult.isHeadOffice), + isOpen = Some(companySearchResult.isOpen), + isPublic = Some(companySearchResult.isPublic), + brand = companySearchResult.brand, + commercialName = companySearchResult.commercialName, + establishmentCommercialName = companySearchResult.establishmentCommercialName + ), + None + ) returns Future.successful(WebsiteAndCompany.toApi(website, Some(company))) + + for { + _ <- components.companyRepository.create(company) + _ <- components.companyRepository.create(unusedCompany) + _ <- components.websiteRepository.create(Website(host = "test.com", companyCountry = None, companyId = None)) + _ <- components.websiteRepository.create(website) + _ <- components.websiteRepository.create( + Website( + host = "test3.com", + companyCountry = None, + companyId = Some(unusedCompany.id), + identificationStatus = Identified + ) + ) + b1 <- components.siretExtractionRepository.getByHost("test.com") + b2 <- components.siretExtractionRepository.getByHost("test2.com") + b3 <- components.siretExtractionRepository.getByHost("test3.com") + _ <- service.runTask() + a1 <- components.siretExtractionRepository.getByHost("test.com") + a2 <- components.siretExtractionRepository.getByHost("test2.com") + a3 <- components.siretExtractionRepository.getByHost("test3.com") + } yield (b1 should beNone).and(a1.isDefined shouldEqual true) and + (b2 should beNone).and(a2.isDefined shouldEqual true) and + (b3 should beNone).and(a3 should beNone) + } + } + +}