From 499aa0933b54ce5839e460ca7581f23e8454b87e Mon Sep 17 00:00:00 2001 From: Harel Mazor Date: Sun, 22 Mar 2020 20:20:46 +0200 Subject: [PATCH] Resolves #1183 - Wikimedia duplicate images creates bad links --- .../Controllers/PointsOfInterestController.cs | 31 ++-- .../Controllers/UpdateController.cs | 12 +- .../Executors/IImagesUrlsStorageExecutor.cs | 36 +++++ .../IOsmLatestFileFetcherExecutor.cs | 3 +- .../Executors/ImagesUrlsStorageExecutor.cs | 139 ++++++++++++++++++ .../Executors/OsmLatestFileFetcherExecutor.cs | 7 +- .../PointsOfInterestFilesCreatorExecutor.cs | 54 ++----- .../Gpx/SerializarionExtensions.cs | 15 ++ IsraelHiking.API/RegisterApi.cs | 1 + .../Services/Osm/DatabasesUpdaterService.cs | 28 +++- IsraelHiking.Common/ImageItem.cs | 9 ++ IsraelHiking.Common/UpdateRequest.cs | 26 +++- .../ElasticSearchGateway.cs | 71 +++++++++ .../OpenStreetMap/OsmRepository.cs | 18 +++ IsraelHiking.DataAccess/RegisterDataAccess.cs | 1 + .../IElasticSearchGateway.cs | 2 +- .../IImagesRepository.cs | 17 +++ .../IOsmRepository.cs | 1 + .../services/image-resize.service.ts | 2 +- .../PointsOfInterestControllerTests.cs | 39 ++++- .../Osm/DatabasesUpdaterServiceTests.cs | 1 + .../ElasticSearchGatewayTests.cs | 33 +++++ .../RemoteFileFetcherGatewayTests.cs | 13 +- 23 files changed, 490 insertions(+), 69 deletions(-) create mode 100644 IsraelHiking.API/Executors/IImagesUrlsStorageExecutor.cs create mode 100644 IsraelHiking.API/Executors/ImagesUrlsStorageExecutor.cs create mode 100644 IsraelHiking.Common/ImageItem.cs create mode 100644 IsraelHiking.DataAccessInterfaces/IImagesRepository.cs diff --git a/IsraelHiking.API/Controllers/PointsOfInterestController.cs b/IsraelHiking.API/Controllers/PointsOfInterestController.cs index 283050c62..37e562f3d 100644 --- a/IsraelHiking.API/Controllers/PointsOfInterestController.cs +++ b/IsraelHiking.API/Controllers/PointsOfInterestController.cs @@ -1,4 +1,5 @@ using IsraelHiking.API.Converters; +using IsraelHiking.API.Executors; using IsraelHiking.API.Services; using IsraelHiking.API.Services.Poi; using IsraelHiking.Common; @@ -13,6 +14,7 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using System.Security.Cryptography; using System.Threading.Tasks; namespace IsraelHiking.API.Controllers @@ -28,6 +30,7 @@ public class PointsOfInterestController : ControllerBase private readonly IWikimediaCommonGateway _wikimediaCommonGateway; private readonly IPointsOfInterestProvider _pointsOfInterestProvider; private readonly IBase64ImageStringToFileConverter _base64ImageConverter; + private readonly IImagesUrlsStorageExecutor _imageUrlStoreExecutor; private readonly ConfigurationData _options; private readonly LruCache _cache; @@ -39,6 +42,7 @@ public class PointsOfInterestController : ControllerBase /// /// /// + /// /// /// public PointsOfInterestController(IClientsFactory clientsFactory, @@ -46,6 +50,7 @@ public PointsOfInterestController(IClientsFactory clientsFactory, IWikimediaCommonGateway wikimediaCommonGateway, IPointsOfInterestProvider pointsOfInterestProvider, IBase64ImageStringToFileConverter base64ImageConverter, + IImagesUrlsStorageExecutor imageUrlStoreExecutor, IOptions options, LruCache cache) { @@ -53,6 +58,7 @@ public PointsOfInterestController(IClientsFactory clientsFactory, _tagsHelper = tagsHelper; _cache = cache; _base64ImageConverter = base64ImageConverter; + _imageUrlStoreExecutor = imageUrlStoreExecutor; _pointsOfInterestProvider = pointsOfInterestProvider; _wikimediaCommonGateway = wikimediaCommonGateway; _options = options.Value; @@ -147,24 +153,31 @@ public async Task UploadPointOfInterest([FromBody]PointOfInterest var imageUrls = pointOfInterest.ImagesUrls ?? new string[0]; for (var urlIndex = 0; urlIndex < imageUrls.Length; urlIndex++) { - var url = imageUrls[urlIndex]; var fileName = string.IsNullOrWhiteSpace(pointOfInterest.Title) ? pointOfInterest.Icon.Replace("icon-", "") : pointOfInterest.Title; - var file = _base64ImageConverter.ConvertToFile(url, fileName); + var file = _base64ImageConverter.ConvertToFile(imageUrls[urlIndex], fileName); if (file == null) { continue; } - using (var memoryStream = new MemoryStream(file.Content)) + using (var md5 = MD5.Create()) { - var imageName = await _wikimediaCommonGateway.UploadImage(pointOfInterest.Title, - pointOfInterest.Description, user.DisplayName, file.FileName, memoryStream, - pointOfInterest.Location.ToCoordinate()); - url = await _wikimediaCommonGateway.GetImageUrl(imageName); - imageUrls[urlIndex] = url; + var imageUrl = await _imageUrlStoreExecutor.GetImageUrlIfExists(md5, file.Content); + if (imageUrl != null) + { + imageUrls[urlIndex] = imageUrl; + continue; + } + using (var memoryStream = new MemoryStream(file.Content)) + { + var imageName = await _wikimediaCommonGateway.UploadImage(pointOfInterest.Title, + pointOfInterest.Description, user.DisplayName, file.FileName, memoryStream, + pointOfInterest.Location.ToCoordinate()); + imageUrls[urlIndex] = await _wikimediaCommonGateway.GetImageUrl(imageName); + await _imageUrlStoreExecutor.StoreImage(md5, file.Content, imageUrls[urlIndex]); + } } - } if (string.IsNullOrWhiteSpace(pointOfInterest.Id)) diff --git a/IsraelHiking.API/Controllers/UpdateController.cs b/IsraelHiking.API/Controllers/UpdateController.cs index 30538518a..18531cbbf 100644 --- a/IsraelHiking.API/Controllers/UpdateController.cs +++ b/IsraelHiking.API/Controllers/UpdateController.cs @@ -67,7 +67,9 @@ public async Task PostUpdateData(UpdateRequest request) request.Routing == false && request.Highways == false && request.PointsOfInterest == false && - request.OsmFile == false && + request.UpdateOsmFile == false && + request.DownloadOsmFile == false && + request.Images == false && request.SiteMap == false) { request = new UpdateRequest @@ -75,13 +77,15 @@ public async Task PostUpdateData(UpdateRequest request) Routing = true, Highways = true, PointsOfInterest = true, - OsmFile = true, - SiteMap = true + UpdateOsmFile = true, + DownloadOsmFile = true, + SiteMap = true, + Images = true }; _logger.LogInformation("No specific filters were applied, updating all databases."); } _logger.LogInformation("Starting updating site's databases according to request: " + JsonConvert.SerializeObject(request)); - await _osmLatestFileFetcherExecutor.Update(request.OsmFile); + await _osmLatestFileFetcherExecutor.Update(request.DownloadOsmFile, request.UpdateOsmFile); _logger.LogInformation("Update OSM file completed."); await _databasesUpdaterService.Rebuild(request); diff --git a/IsraelHiking.API/Executors/IImagesUrlsStorageExecutor.cs b/IsraelHiking.API/Executors/IImagesUrlsStorageExecutor.cs new file mode 100644 index 000000000..2f9dbd1a4 --- /dev/null +++ b/IsraelHiking.API/Executors/IImagesUrlsStorageExecutor.cs @@ -0,0 +1,36 @@ +using System.Collections.Generic; +using System.Security.Cryptography; +using System.Threading.Tasks; + +namespace IsraelHiking.API.Executors +{ + /// + /// Stores images in order to avoid uploading the same image to wikimedia twice + /// + public interface IImagesUrlsStorageExecutor + { + /// + /// Dowonloads the content from the urls, calculates hash and stores to database + /// + /// + /// + Task DownloadAndStoreUrls(List imagesUrls); + + /// + /// Get an image url if it exsits in the repository + /// + /// + /// + /// The image url or null + Task GetImageUrlIfExists(MD5 md5, byte[] content); + + /// + /// This method stores images in the repostory after computing hash and resizing them + /// + /// + /// + /// + /// + Task StoreImage(MD5 md5, byte[] content, string imageUrl); + } +} \ No newline at end of file diff --git a/IsraelHiking.API/Executors/IOsmLatestFileFetcherExecutor.cs b/IsraelHiking.API/Executors/IOsmLatestFileFetcherExecutor.cs index 9f2cdb8b5..ae63adb54 100644 --- a/IsraelHiking.API/Executors/IOsmLatestFileFetcherExecutor.cs +++ b/IsraelHiking.API/Executors/IOsmLatestFileFetcherExecutor.cs @@ -11,9 +11,10 @@ public interface IOsmLatestFileFetcherExecutor /// /// Updates the osm file to latest version /// + /// Should the operation download the daily OSM file /// Should the operation download updates for daily OSM file /// - Task Update(bool updateFile = true); + Task Update(bool downloadFile = true, bool updateFile = true); /// /// Gets a stream to the OSM file diff --git a/IsraelHiking.API/Executors/ImagesUrlsStorageExecutor.cs b/IsraelHiking.API/Executors/ImagesUrlsStorageExecutor.cs new file mode 100644 index 000000000..3d96f93f6 --- /dev/null +++ b/IsraelHiking.API/Executors/ImagesUrlsStorageExecutor.cs @@ -0,0 +1,139 @@ +using IsraelHiking.API.Gpx; +using IsraelHiking.Common; +using IsraelHiking.DataAccessInterfaces; +using Microsoft.Extensions.Logging; +using SixLabors.ImageSharp; +using SixLabors.ImageSharp.Processing; +using SixLabors.Primitives; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Security.Cryptography; +using System.Threading; +using System.Threading.Tasks; + +namespace IsraelHiking.API.Executors +{ + /// + public class ImagesUrlsStorageExecutor : IImagesUrlsStorageExecutor + { + private readonly IImagesRepository _imagesRepository; + private readonly IRemoteFileSizeFetcherGateway _remoteFileFetcherGateway; + private readonly ILogger _logger; + + /// + /// Constrcutor + /// + /// + /// + /// + public ImagesUrlsStorageExecutor(IImagesRepository imagesRepository, + IRemoteFileSizeFetcherGateway remoteFileFetcherGateway, + ILogger logger) + { + _imagesRepository = imagesRepository; + _remoteFileFetcherGateway = remoteFileFetcherGateway; + _logger = logger; + } + + /// + public async Task DownloadAndStoreUrls(List imagesUrls) + { + var exitingUrls = await _imagesRepository.GetAllUrls(); + var needToRemove = exitingUrls.Except(imagesUrls).ToList(); + _logger.LogInformation($"Need to remove {needToRemove.Count} images that are no longer relevant"); + foreach(var imageUrlToRemove in needToRemove) + { + await _imagesRepository.DeleteImageByUrl(imageUrlToRemove); + } + _logger.LogInformation($"Finished removing images"); + using (var md5 = MD5.Create()) + { + var counter = 0; + Parallel.ForEach(imagesUrls, new ParallelOptions { MaxDegreeOfParallelism = 20 }, (imageUrl) => + { + try + { + Interlocked.Increment(ref counter); + if (counter % 100 == 0) + { + _logger.LogInformation($"Indexed {counter} images of {imagesUrls.Count}"); + } + if (exitingUrls.Contains(imageUrl)) + { + var size = _remoteFileFetcherGateway.GetFileSize(imageUrl).Result; + if (size > 0) + { + return; + } + } + var content = new byte[0]; + for (int retryIndex = 0; retryIndex < 3; retryIndex++) + { + try + { + content = _remoteFileFetcherGateway.GetFileContent(imageUrl).Result.Content; + break; + } + catch + { + Task.Delay(200).Wait(); + } + } + if (content.Length == 0) + { + _imagesRepository.DeleteImageByUrl(imageUrl).Wait(); + return; + } + StoreImage(md5, content, imageUrl).Wait(); + } + catch (Exception ex) + { + _logger.LogWarning("There was a problem with the following image url: " + imageUrl + " " + ex.ToString()); + } + }); + } + } + + private byte[] ResizeImage(Image originalImage, int newSizeInPixels) + { + var ratio = originalImage.Width > originalImage.Height + ? newSizeInPixels * 1.0 / originalImage.Width + : newSizeInPixels * 1.0 / originalImage.Height; + var newSize = new Size((int)(originalImage.Width * ratio), (int)(originalImage.Height * ratio)); + originalImage.Mutate(x => x.Resize(newSize)); + + var memoryStream = new MemoryStream(); + originalImage.SaveAsJpeg(memoryStream); + return memoryStream.ToArray(); + } + + /// + public Task StoreImage(MD5 md5, byte[] content, string imageUrl) + { + var hash = md5.ComputeHash(content).ToHashString(); + var image = Image.Load(content, out var _); + content = ResizeImage(image, 200); + return _imagesRepository.StoreImage(new ImageItem + { + ImageUrl = imageUrl, + Data = $"data:image/jpeg;base64," + Convert.ToBase64String(content), + Hash = hash + }); + } + + /// + public async Task GetImageUrlIfExists(MD5 md5, byte[] content) + { + var hash = md5.ComputeHash(content).ToHashString(); + var imageItem = await _imagesRepository.GetImageByHash(hash); + var imageUrl = imageItem?.ImageUrl; + if (imageUrl != null) + { + _logger.LogInformation($"Found exiting image with url: {imageUrl}"); + } + return imageUrl; + } + } +} diff --git a/IsraelHiking.API/Executors/OsmLatestFileFetcherExecutor.cs b/IsraelHiking.API/Executors/OsmLatestFileFetcherExecutor.cs index a49964a72..8a2beb80f 100644 --- a/IsraelHiking.API/Executors/OsmLatestFileFetcherExecutor.cs +++ b/IsraelHiking.API/Executors/OsmLatestFileFetcherExecutor.cs @@ -51,7 +51,7 @@ public OsmLatestFileFetcherExecutor(IFileSystemHelper fileSystemHelper, } /// - public async Task Update(bool updateFile = true) + public async Task Update(bool downloadFile = true, bool updateFile = true) { _logger.LogInformation("Starting updating to latest OSM file."); var workingDirectory = Path.Combine(_options.BinariesFolder, OSM_C_TOOLS_FOLDER); @@ -60,7 +60,10 @@ public async Task Update(bool updateFile = true) { _fileSystemHelper.CreateDirectory(workingDirectory); } - await DownloadDailyOsmFile(workingDirectory); + if (downloadFile || updateFile) + { + await DownloadDailyOsmFile(workingDirectory); + } if (updateFile) { UpdateFileToLatestVersion(workingDirectory); diff --git a/IsraelHiking.API/Executors/PointsOfInterestFilesCreatorExecutor.cs b/IsraelHiking.API/Executors/PointsOfInterestFilesCreatorExecutor.cs index c0486376c..45216175f 100644 --- a/IsraelHiking.API/Executors/PointsOfInterestFilesCreatorExecutor.cs +++ b/IsraelHiking.API/Executors/PointsOfInterestFilesCreatorExecutor.cs @@ -10,8 +10,6 @@ using Newtonsoft.Json; using Newtonsoft.Json.Serialization; using SixLabors.ImageSharp; -using SixLabors.ImageSharp.Processing; -using SixLabors.Primitives; using System; using System.Collections.Concurrent; using System.Collections.Generic; @@ -24,18 +22,13 @@ namespace IsraelHiking.API.Executors { - internal class ImageItem - { - public string ImageUrl { get; set; } - public string Data { get; set; } - } - /// public class PointsOfInterestFilesCreatorExecutor : IPointsOfInterestFilesCreatorExecutor { private readonly IRemoteFileFetcherGateway _remoteFileFetcherGateway; private readonly IFileSystemHelper _fileSystemHelper; private readonly IWebHostEnvironment _environment; + private readonly IImagesRepository _imagesRepository; private readonly ILogger _logger; /// /// Constructor @@ -43,15 +36,18 @@ public class PointsOfInterestFilesCreatorExecutor : IPointsOfInterestFilesCreato /// /// /// + /// /// public PointsOfInterestFilesCreatorExecutor(IFileSystemHelper fileSystemHelper, IRemoteFileFetcherGateway remoteFileFetcherGateway, IWebHostEnvironment environment, + IImagesRepository imagesRepository, ILogger logger) { _fileSystemHelper = fileSystemHelper; _remoteFileFetcherGateway = remoteFileFetcherGateway; _environment = environment; + _imagesRepository = imagesRepository; _logger = logger; } @@ -123,7 +119,7 @@ private void CreateOfflinePoisFile(List features) CreateImagesJsonFiles(features, zipStream); zipStream.Finish(); outputMemStream.Position = 0; - + _fileSystemHelper.WriteAllBytes("pois.ihm", outputMemStream.ToArray()); } } @@ -132,7 +128,6 @@ private void CreateImagesJsonFiles(List features, ZipOutputStream zipSt { _logger.LogInformation("Staring Image file creation: " + features.Count + " features"); var items = new ConcurrentBag(); - var size = 200; Parallel.ForEach(features, new ParallelOptions { MaxDegreeOfParallelism = 10 }, (feature) => { var urls = feature.Attributes.GetNames() @@ -140,33 +135,15 @@ private void CreateImagesJsonFiles(List features, ZipOutputStream zipSt .Where(u => !string.IsNullOrWhiteSpace(u)); foreach (var url in urls) { - var needResize = true; - var updatedUrl = url; - var pattern = @"(http.*\/\/upload\.wikimedia\.org\/wikipedia\/(commons|he|en)\/)(.*\/)(.*)"; - if (Regex.Match(url, pattern).Success) - { - updatedUrl = Regex.Replace(url, pattern, $"$1thumb/$3$4/{size}px-$4"); - updatedUrl = url.EndsWith(".svg") ? updatedUrl + ".png" : updatedUrl; - needResize = false; - } try { - var content = _remoteFileFetcherGateway.GetFileContent(updatedUrl).Result.Content; - if (content.Length == 0) + var imageItem = _imagesRepository.GetImageByUrl(url).Result; + if (imageItem == null) { _logger.LogWarning("The following image does not exist: " + url + " feature: " + feature.GetId()); continue; } - var image = Image.Load(content, out var format); - if (!needResize) - { - items.Add(new ImageItem { ImageUrl = url, Data = $"data:image/{format};base64," + Convert.ToBase64String(content) }); - } - else - { - content = ResizeImage(image, size); - items.Add(new ImageItem { ImageUrl = url, Data = $"data:image/jpeg;base64," + Convert.ToBase64String(content) }); - } + items.Add(imageItem); } catch (Exception) { @@ -183,7 +160,7 @@ private void CreateImagesJsonFiles(List features, ZipOutputStream zipSt { ContractResolver = new CamelCasePropertyNamesContractResolver() }); - var newEntry = new ZipEntry($"images/images{ index.ToString("000") }.json") + var newEntry = new ZipEntry($"images/images{index:000}.json") { DateTime = DateTime.Now }; @@ -195,18 +172,5 @@ private void CreateImagesJsonFiles(List features, ZipOutputStream zipSt } _logger.LogInformation("Finished Image file creation: " + items.Count()); } - - private byte[] ResizeImage(Image originalImage, int newSizeInPixels) - { - var ratio = originalImage.Width > originalImage.Height - ? newSizeInPixels * 1.0 / originalImage.Width - : newSizeInPixels * 1.0 / originalImage.Height; - var newSize = new Size((int)(originalImage.Width * ratio), (int)(originalImage.Height * ratio)); - originalImage.Mutate(x => x.Resize(newSize)); - - var memoryStream = new MemoryStream(); - originalImage.SaveAsJpeg(memoryStream); - return memoryStream.ToArray(); - } } } diff --git a/IsraelHiking.API/Gpx/SerializarionExtensions.cs b/IsraelHiking.API/Gpx/SerializarionExtensions.cs index 5a95a2502..37fdede2d 100644 --- a/IsraelHiking.API/Gpx/SerializarionExtensions.cs +++ b/IsraelHiking.API/Gpx/SerializarionExtensions.cs @@ -186,5 +186,20 @@ public static GpxFile UpdateBounds(this GpxFile gpx) : new GpxMetadata(gpx.Metadata.Creator, gpx.Metadata.Name, gpx.Metadata.Description, gpx.Metadata.Author, gpx.Metadata.Copyright, gpx.Metadata.Links, gpx.Metadata.CreationTimeUtc, gpx.Metadata.Keywords, boundingBox, gpx.Metadata.Extensions); return gpx; } + + /// + /// Get a byte array and converts it to string + /// + /// + /// + public static string ToHashString(this byte[] hash) + { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < hash.Length; i++) + { + sb.Append(hash[i].ToString("X2")); + } + return sb.ToString(); + } } } diff --git a/IsraelHiking.API/RegisterApi.cs b/IsraelHiking.API/RegisterApi.cs index 84382fe9e..a2edc3bb5 100644 --- a/IsraelHiking.API/RegisterApi.cs +++ b/IsraelHiking.API/RegisterApi.cs @@ -42,6 +42,7 @@ public static IServiceCollection AddIHMApi(this IServiceCollection services) services.AddTransient(); services.AddTransient(); services.AddTransient(); + services.AddTransient(); // registration here is what determines the order of which to merge points: services.AddTransient(); diff --git a/IsraelHiking.API/Services/Osm/DatabasesUpdaterService.cs b/IsraelHiking.API/Services/Osm/DatabasesUpdaterService.cs index 0dd508696..63c8fedae 100644 --- a/IsraelHiking.API/Services/Osm/DatabasesUpdaterService.cs +++ b/IsraelHiking.API/Services/Osm/DatabasesUpdaterService.cs @@ -29,6 +29,7 @@ public class DatabasesUpdaterService : IDatabasesUpdaterService private readonly IOsmLatestFileFetcherExecutor _latestFileFetcherExecutor; private readonly IGraphHopperGateway _graphHopperGateway; private readonly IPointsOfInterestFilesCreatorExecutor _pointsOfInterestFilesCreatorExecutor; + private readonly IImagesUrlsStorageExecutor _imagesUrlsStorageExecutor; private readonly ILogger _logger; /// /// Service's constructor @@ -43,6 +44,7 @@ public class DatabasesUpdaterService : IDatabasesUpdaterService /// /// /// + /// /// public DatabasesUpdaterService(IClientsFactory clinetsFactory, IElasticSearchGateway elasticSearchGateway, @@ -53,6 +55,7 @@ public DatabasesUpdaterService(IClientsFactory clinetsFactory, IOsmLatestFileFetcherExecutor latestFileFetcherExecutor, IGraphHopperGateway graphHopperGateway, IPointsOfInterestFilesCreatorExecutor pointsOfInterestFilesCreatorExecutor, + IImagesUrlsStorageExecutor imagesUrlsStorageExecutor, ILogger logger) { _elasticSearchGateway = elasticSearchGateway; @@ -65,6 +68,7 @@ public DatabasesUpdaterService(IClientsFactory clinetsFactory, _latestFileFetcherExecutor = latestFileFetcherExecutor; _graphHopperGateway = graphHopperGateway; _osmGateway = clinetsFactory.CreateNonAuthClient(); + _imagesUrlsStorageExecutor = imagesUrlsStorageExecutor; _logger = logger; } @@ -165,6 +169,10 @@ public async Task Rebuild(UpdateRequest request) { await RebuildPointsOfInterest(); } + if (request.Images) + { + await RebuildImages(); + } if (request.SiteMap) { await RebuildSiteMap(); @@ -184,13 +192,12 @@ private async Task RebuildRouting() _logger.LogInformation("Finished rebuilding routing database."); } - private async Task RebuildPointsOfInterest() { _logger.LogInformation("Starting rebuilding POIs database."); var osmSource = _pointsOfInterestAdapterFactory.GetBySource(Sources.OSM); var osmFeaturesTask = osmSource.GetPointsForIndexing(); - var sources = _pointsOfInterestAdapterFactory.GetAll().Where(s=> s.Source != Sources.OSM).Select(s => s.Source); + var sources = _pointsOfInterestAdapterFactory.GetAll().Where(s => s.Source != Sources.OSM).Select(s => s.Source); var otherTasks = sources.Select(s => _elasticSearchGateway.GetExternalPoisBySource(s)).ToArray(); await Task.WhenAll(new Task[] { osmFeaturesTask }.Concat(otherTasks)); var features = _featuresMergeExecutor.Merge(osmFeaturesTask.Result.Concat(otherTasks.SelectMany(t => t.Result)).ToList()); @@ -211,6 +218,23 @@ private async Task RebuildHighways() _logger.LogInformation("Finished rebuilding highways database."); } + private async Task RebuildImages() + { + _logger.LogInformation("Starting rebuilding images database."); + using (var stream = _latestFileFetcherExecutor.Get()) + { + var features = await _elasticSearchGateway.GetAllPointsOfInterest(); + var featuresUrls = features.SelectMany(f => + f.Attributes.GetNames() + .Where(n => n.StartsWith(FeatureAttributes.IMAGE_URL)) + .Select(k => f.Attributes[k].ToString()) + ); + var urls = await _osmRepository.GetImagesUrls(stream); + await _imagesUrlsStorageExecutor.DownloadAndStoreUrls(urls.Union(featuresUrls).ToList()); + } + _logger.LogInformation("Finished rebuilding images database."); + } + private async Task RebuildSiteMap() { _logger.LogInformation("Starting rebuilding sitemap."); diff --git a/IsraelHiking.Common/ImageItem.cs b/IsraelHiking.Common/ImageItem.cs new file mode 100644 index 000000000..82e852dcd --- /dev/null +++ b/IsraelHiking.Common/ImageItem.cs @@ -0,0 +1,9 @@ +namespace IsraelHiking.Common +{ + public class ImageItem + { + public string ImageUrl { get; set; } + public string Data { get; set; } + public string Hash { get; set; } + } +} diff --git a/IsraelHiking.Common/UpdateRequest.cs b/IsraelHiking.Common/UpdateRequest.cs index bda4de324..81eaed6ee 100644 --- a/IsraelHiking.Common/UpdateRequest.cs +++ b/IsraelHiking.Common/UpdateRequest.cs @@ -2,10 +2,34 @@ { public class UpdateRequest { - public bool OsmFile { get; set; } + /// + /// Download daily OSM file, this is done if update OSM is true + /// + public bool DownloadOsmFile { get; set; } + /// + /// Updates OSM file to be the lastet. + /// + public bool UpdateOsmFile { get; set; } + /// + /// Update site's routing database + /// public bool Routing { get; set; } + /// + /// Update points of interest database + /// public bool PointsOfInterest { get; set; } + /// + /// Update highway database + /// public bool Highways { get; set; } + /// + /// Updates images mirror + /// + public bool Images { get; set; } + /// + /// Update site map xml file and offline points of interest file + /// public bool SiteMap { get; set; } + } } diff --git a/IsraelHiking.DataAccess/ElasticSearchGateway.cs b/IsraelHiking.DataAccess/ElasticSearchGateway.cs index 361e0037f..1da5a72f0 100644 --- a/IsraelHiking.DataAccess/ElasticSearchGateway.cs +++ b/IsraelHiking.DataAccess/ElasticSearchGateway.cs @@ -44,6 +44,7 @@ public class ElasticSearchGateway : IElasticSearchGateway private const string SHARES = "shares"; private const string CUSTOM_USER_LAYERS = "custom_user_layers"; private const string EXTERNAL_POIS = "external_pois"; + private const string IMAGES = "images"; private const int NUMBER_OF_RESULTS = 10; private readonly ILogger _logger; @@ -99,6 +100,10 @@ public void Initialize() { _elasticClient.CreateIndex(CUSTOM_USER_LAYERS); } + if (_elasticClient.IndexExists(IMAGES).Exists == false) + { + CreateImagesIndex(); + } _logger.LogInformation("Finished initialing elasticsearch with uri: " + uri); } @@ -459,6 +464,21 @@ private Task CreateExternalPoisIndex() ); } + private Task CreateImagesIndex() + { + return _elasticClient.CreateIndexAsync(IMAGES, c => + c.Mappings(ms => + ms.Map(m => + m.Properties(p => + p.Keyword(k => k.Name(ii => ii.Hash)) + .Keyword(s => s.Name(n => n.ImageUrl)) + .Binary(a => a.Name(i => i.Data)) + ) + ) + ) + ); + } + private async Task UpdateUsingPaging(List features, string alias) { _logger.LogInformation($"Starting indexing {features.Count} records"); @@ -586,5 +606,56 @@ public Task DeleteUserLayer(MapLayerData layerData) { return _elasticClient.DeleteAsync(layerData.Id, d => d.Index(CUSTOM_USER_LAYERS)); } + + public async Task GetImageByUrl(string url) + { + var response = await _elasticClient.SearchAsync(s => + s.Index(IMAGES) + .Query(q => q.Match(m => m.Field(i => i.ImageUrl).Query(url))) + ); + return response.Documents.FirstOrDefault(); + } + + public async Task GetImageByHash(string hash) + { + var response = await _elasticClient.GetAsync(hash, r => r.Index(IMAGES)); + return response.Source; + } + public async Task> GetAllUrls() + { + var list = new List(); + var response = await _elasticClient.SearchAsync( + s => s.Index(IMAGES) + .Size(10000) + .Scroll("10s") + .Source(sf => sf + .Includes(i => i.Fields(f => f.ImageUrl, f => f.Hash)) + ).Query(q => q.MatchAll()) + ); + list.AddRange(response.Documents.Select(i => i.ImageUrl).ToList()); + var results = _elasticClient.Scroll("10s", response.ScrollId); + list.AddRange(results.Documents.Select(i => i.ImageUrl).ToList()); + while (results.Documents.Any()) + { + results = _elasticClient.Scroll("10s", results.ScrollId); + list.AddRange(results.Documents.Select(i => i.ImageUrl).ToList()); + } + return list; + } + + public Task StoreImage(ImageItem imageItem) + { + return _elasticClient.IndexAsync(imageItem, r => r.Index(IMAGES).Id(imageItem.Hash)); + } + + public async Task DeleteImageByUrl(string url) + { + var imageItem = await GetImageByUrl(url); + if (imageItem != null) + { + await _elasticClient.DeleteAsync(imageItem.Hash, d => d.Index(IMAGES)); + } + } + } } diff --git a/IsraelHiking.DataAccess/OpenStreetMap/OsmRepository.cs b/IsraelHiking.DataAccess/OpenStreetMap/OsmRepository.cs index 278df379f..0737bebae 100644 --- a/IsraelHiking.DataAccess/OpenStreetMap/OsmRepository.cs +++ b/IsraelHiking.DataAccess/OpenStreetMap/OsmRepository.cs @@ -9,6 +9,7 @@ using System.IO; using IsraelHiking.Common.Extensions; using OsmSharp; +using IsraelHiking.Common; namespace IsraelHiking.DataAccess.OpenStreetMap { @@ -74,5 +75,22 @@ public Task> GetPointsWithNoNameByTags(Stream osmFileStream, List> GetImagesUrls(Stream osmFileStream) + { + return Task.Run(() => + { + _logger.LogInformation("Starting extracting urls from OSM stream."); + osmFileStream.Seek(0, SeekOrigin.Begin); + var source = new PBFOsmStreamSource(osmFileStream); + var completeSource = new OsmSimpleCompleteStreamSource(source); + var urls = completeSource.Where(element => element.Tags.Any(t => t.Key.StartsWith(FeatureAttributes.IMAGE_URL))) + .SelectMany(element => element.Tags.Where(t => t.Key.StartsWith(FeatureAttributes.IMAGE_URL))) + .Select(tag => tag.Value) + .ToList(); + _logger.LogInformation("Finished extracting urls from OSM stream. " + urls.Count); + return urls; + }); + } } } diff --git a/IsraelHiking.DataAccess/RegisterDataAccess.cs b/IsraelHiking.DataAccess/RegisterDataAccess.cs index 9e140c19e..fcf582718 100644 --- a/IsraelHiking.DataAccess/RegisterDataAccess.cs +++ b/IsraelHiking.DataAccess/RegisterDataAccess.cs @@ -16,6 +16,7 @@ public static IServiceCollection AddIHMDataAccess(this IServiceCollection servic services.AddTransient(); services.AddSingleton(); services.AddSingleton(x => x.GetService()); + services.AddSingleton(x => x.GetService()); services.AddSingleton(); services.AddTransient(); services.AddTransient(); diff --git a/IsraelHiking.DataAccessInterfaces/IElasticSearchGateway.cs b/IsraelHiking.DataAccessInterfaces/IElasticSearchGateway.cs index f67743e90..6937fbcbd 100644 --- a/IsraelHiking.DataAccessInterfaces/IElasticSearchGateway.cs +++ b/IsraelHiking.DataAccessInterfaces/IElasticSearchGateway.cs @@ -5,7 +5,7 @@ namespace IsraelHiking.DataAccessInterfaces { - public interface IElasticSearchGateway : IRepository + public interface IElasticSearchGateway : IRepository, IImagesRepository { void Initialize(); Task> Search(string searchTerm, string language); diff --git a/IsraelHiking.DataAccessInterfaces/IImagesRepository.cs b/IsraelHiking.DataAccessInterfaces/IImagesRepository.cs new file mode 100644 index 000000000..bd4845362 --- /dev/null +++ b/IsraelHiking.DataAccessInterfaces/IImagesRepository.cs @@ -0,0 +1,17 @@ +using IsraelHiking.Common; +using System; +using System.Collections.Generic; +using System.Text; +using System.Threading.Tasks; + +namespace IsraelHiking.DataAccessInterfaces +{ + public interface IImagesRepository + { + Task GetImageByUrl(string url); + Task GetImageByHash(string hash); + Task> GetAllUrls(); + Task StoreImage(ImageItem imageItem); + Task DeleteImageByUrl(string url); + } +} diff --git a/IsraelHiking.DataAccessInterfaces/IOsmRepository.cs b/IsraelHiking.DataAccessInterfaces/IOsmRepository.cs index dc3a719f3..2672f5e72 100644 --- a/IsraelHiking.DataAccessInterfaces/IOsmRepository.cs +++ b/IsraelHiking.DataAccessInterfaces/IOsmRepository.cs @@ -11,5 +11,6 @@ public interface IOsmRepository Task>> GetElementsWithName(Stream osmFileStream); Task> GetAllHighways(Stream osmFileStream); Task> GetPointsWithNoNameByTags(Stream osmFileStream, List> tags); + Task> GetImagesUrls(Stream osmFileStream); } } \ No newline at end of file diff --git a/IsraelHiking.Web/sources/application/services/image-resize.service.ts b/IsraelHiking.Web/sources/application/services/image-resize.service.ts index 9d9ce5892..31023c003 100644 --- a/IsraelHiking.Web/sources/application/services/image-resize.service.ts +++ b/IsraelHiking.Web/sources/application/services/image-resize.service.ts @@ -45,7 +45,7 @@ export class ImageResizeService { private resizeImageAndConvertToAny(file: File, convertMethod: (data: string, name: string, geoLocation: LatLngAlt) => TReturn, - throwIfNoLocation = true) { + throwIfNoLocation = true): Promise { return new Promise((resolve, reject) => { let reader = new FileReader(); reader.onload = (event: any) => { diff --git a/Tests/IsraelHiking.API.Tests/Controllers/PointsOfInterestControllerTests.cs b/Tests/IsraelHiking.API.Tests/Controllers/PointsOfInterestControllerTests.cs index 69abf96a1..0e54b8eb6 100644 --- a/Tests/IsraelHiking.API.Tests/Controllers/PointsOfInterestControllerTests.cs +++ b/Tests/IsraelHiking.API.Tests/Controllers/PointsOfInterestControllerTests.cs @@ -1,5 +1,6 @@ using IsraelHiking.API.Controllers; using IsraelHiking.API.Converters; +using IsraelHiking.API.Executors; using IsraelHiking.API.Services; using IsraelHiking.API.Services.Poi; using IsraelHiking.Common; @@ -15,6 +16,7 @@ using OsmSharp.API; using OsmSharp.IO.API; using System.IO; +using System.Security.Cryptography; namespace IsraelHiking.API.Tests.Controllers { @@ -26,6 +28,7 @@ public class PointsOfInterestControllerTests private IAuthClient _osmGateway; private ITagsHelper _tagHelper; private IPointsOfInterestProvider _pointsOfInterestProvider; + private IImagesUrlsStorageExecutor _imagesUrlsStorageExecutor; private LruCache _cache; [TestInitialize] @@ -35,12 +38,20 @@ public void TestInitialize() _tagHelper = Substitute.For(); _wikimediaCommonGateway = Substitute.For(); _osmGateway = Substitute.For(); + _imagesUrlsStorageExecutor = Substitute.For(); var optionsProvider = Substitute.For>(); optionsProvider.Value.Returns(new ConfigurationData()); _cache = new LruCache(optionsProvider, Substitute.For()); var factory = Substitute.For(); factory.CreateOAuthClient(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()).Returns(_osmGateway); - _controller = new PointsOfInterestController(factory, _tagHelper, _wikimediaCommonGateway, _pointsOfInterestProvider, new Base64ImageStringToFileConverter(), optionsProvider, _cache); + _controller = new PointsOfInterestController(factory, + _tagHelper, + _wikimediaCommonGateway, + _pointsOfInterestProvider, + new Base64ImageStringToFileConverter(), + _imagesUrlsStorageExecutor, + optionsProvider, + _cache); } [TestMethod] @@ -158,10 +169,36 @@ public void UploadPointOfInterest_WithImageIdExists_ShouldUpdate() ImagesUrls = new [] { "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//" + "8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==", "http://link.com"} }; + _imagesUrlsStorageExecutor.GetImageUrlIfExists(Arg.Any(), Arg.Any()).Returns((string)null); + _controller.UploadPointOfInterest(poi, "he").Wait(); _wikimediaCommonGateway.Received(1).UploadImage(poi.Title, poi.Description, user.DisplayName, "title.png", Arg.Any(), Arg.Any()); _wikimediaCommonGateway.Received(1).GetImageUrl(Arg.Any()); + _imagesUrlsStorageExecutor.Received(1).StoreImage(Arg.Any(), Arg.Any(), Arg.Any()); + } + + [TestMethod] + public void UploadPointOfInterest_WithImageInRepository_ShouldNotUploadImage() + { + var user = new User { DisplayName = "DisplayName" }; + _controller.SetupIdentity(_cache); + _osmGateway.GetUserDetails().Returns(user); + var poi = new PointOfInterestExtended + { + Title = "title", + Source = Sources.OSM, + Id = "1", + Location = new LatLng(5, 6), + ImagesUrls = new[] { "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//" + + "8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==", "http://link.com"} + }; + _imagesUrlsStorageExecutor.GetImageUrlIfExists(Arg.Any(), Arg.Any()).Returns("some-url"); + + _controller.UploadPointOfInterest(poi, "he").Wait(); + + _wikimediaCommonGateway.DidNotReceive().UploadImage(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + _wikimediaCommonGateway.DidNotReceive().GetImageUrl(Arg.Any()); } [TestMethod] diff --git a/Tests/IsraelHiking.API.Tests/Services/Osm/DatabasesUpdaterServiceTests.cs b/Tests/IsraelHiking.API.Tests/Services/Osm/DatabasesUpdaterServiceTests.cs index 6ab3ef989..c8489e21e 100644 --- a/Tests/IsraelHiking.API.Tests/Services/Osm/DatabasesUpdaterServiceTests.cs +++ b/Tests/IsraelHiking.API.Tests/Services/Osm/DatabasesUpdaterServiceTests.cs @@ -63,6 +63,7 @@ public void TestInitialize() _osmLatestFileFetcherExecutor, _graphHopperGateway, _pointsOfInterestFilesCreatorExecutor, + null, Substitute.For()); } diff --git a/Tests/IsraelHiking.DataAccess.Tests/ElasticSearch/ElasticSearchGatewayTests.cs b/Tests/IsraelHiking.DataAccess.Tests/ElasticSearch/ElasticSearchGatewayTests.cs index 5d15c0db8..9ac4f75e3 100644 --- a/Tests/IsraelHiking.DataAccess.Tests/ElasticSearch/ElasticSearchGatewayTests.cs +++ b/Tests/IsraelHiking.DataAccess.Tests/ElasticSearch/ElasticSearchGatewayTests.cs @@ -104,6 +104,39 @@ public void GetPoisBySource_ShouldGetThem() Assert.IsTrue(features.Count > 10000); } + + [TestMethod] + [Ignore] + public void GetImageByUrl_ShouldGetIt() + { + _gateway.Initialize(); + + var imageItem = _gateway.GetImageByUrl("https://upload.wikimedia.org/wikipedia/commons/0/05/Israel_Hiking_Map_%D7%97%D7%95%D7%A8%D7%91%D7%AA_%D7%97%D7%A0%D7%95%D7%AA_2.jpeg").Result; + + Assert.IsNotNull(imageItem); + } + + [TestMethod] + [Ignore] + public void GetImageHash_ShouldGetIt() + { + _gateway.Initialize(); + + var imageItem = _gateway.GetImageByHash("7F4E8F16362FD1E527FFBC516E0197C7").Result; + + Assert.IsNotNull(imageItem); + } + + [TestMethod] + [Ignore] + public void GetAllUrls_ShouldGetThem() + { + _gateway.Initialize(); + + var imageItem = _gateway.GetAllUrls().Result; + + Assert.IsNotNull(imageItem); + } } } diff --git a/Tests/IsraelHiking.DataAccess.Tests/RemoteFileFetcherGatewayTests.cs b/Tests/IsraelHiking.DataAccess.Tests/RemoteFileFetcherGatewayTests.cs index 77e9939d0..f9f12e5c6 100644 --- a/Tests/IsraelHiking.DataAccess.Tests/RemoteFileFetcherGatewayTests.cs +++ b/Tests/IsraelHiking.DataAccess.Tests/RemoteFileFetcherGatewayTests.cs @@ -1,7 +1,6 @@ using System.Linq; using Microsoft.VisualStudio.TestTools.UnitTesting; using NSubstitute; -using Microsoft.Extensions.Logging; using IsraelHiking.DataAccessInterfaces; using System.Net.Http; @@ -17,7 +16,7 @@ public void TestInitialize() { var factory = Substitute.For(); factory.CreateClient().Returns(new HttpClient()); - _gateway = new RemoteFileFetcherGateway(factory, Substitute.For()); + _gateway = new RemoteFileFetcherGateway(factory, new TraceLogger()); } @@ -40,11 +39,21 @@ public void TestGateway_JeeptripTwl() } [TestMethod] + [Ignore] public void TestGateway_InvalidFile() { var response = _gateway.GetFileContent("http://israelhiking.osm.org.il/Hebrew/Tiles/11/1228/826.png").Result; Assert.IsFalse(response.Content.Any()); } + + [TestMethod] + [Ignore] + public void TestGateway_ImageFile() + { + var response = _gateway.GetFileContent("https://upload.wikimedia.org/wikipedia/commons/2/2a/Israel_Hiking_Map_%D7%97%D7%95%D7%A8%D7%91%D7%AA_%D7%9C%D7%95%D7%96%D7%94.jpeg").Result; + + Assert.IsFalse(response.Content.Any()); + } } }