-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Resolves #1183 - Wikimedia duplicate images creates bad links
- Loading branch information
Showing
23 changed files
with
490 additions
and
69 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
using System.Collections.Generic; | ||
using System.Security.Cryptography; | ||
using System.Threading.Tasks; | ||
|
||
namespace IsraelHiking.API.Executors | ||
{ | ||
/// <summary> | ||
/// Stores images in order to avoid uploading the same image to wikimedia twice | ||
/// </summary> | ||
public interface IImagesUrlsStorageExecutor | ||
{ | ||
/// <summary> | ||
/// Dowonloads the content from the urls, calculates hash and stores to database | ||
/// </summary> | ||
/// <param name="imagesUrls"></param> | ||
/// <returns></returns> | ||
Task DownloadAndStoreUrls(List<string> imagesUrls); | ||
|
||
/// <summary> | ||
/// Get an image url if it exsits in the repository | ||
/// </summary> | ||
/// <param name="md5"></param> | ||
/// <param name="content"></param> | ||
/// <returns>The image url or null</returns> | ||
Task<string> GetImageUrlIfExists(MD5 md5, byte[] content); | ||
|
||
/// <summary> | ||
/// This method stores images in the repostory after computing hash and resizing them | ||
/// </summary> | ||
/// <param name="md5"></param> | ||
/// <param name="content"></param> | ||
/// <param name="imageUrl"></param> | ||
/// <returns></returns> | ||
Task StoreImage(MD5 md5, byte[] content, string imageUrl); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
139 changes: 139 additions & 0 deletions
139
IsraelHiking.API/Executors/ImagesUrlsStorageExecutor.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
using IsraelHiking.API.Gpx; | ||
using IsraelHiking.Common; | ||
using IsraelHiking.DataAccessInterfaces; | ||
using Microsoft.Extensions.Logging; | ||
using SixLabors.ImageSharp; | ||
using SixLabors.ImageSharp.Processing; | ||
using SixLabors.Primitives; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Linq; | ||
using System.Security.Cryptography; | ||
using System.Threading; | ||
using System.Threading.Tasks; | ||
|
||
namespace IsraelHiking.API.Executors | ||
{ | ||
/// <inheritdoc/> | ||
public class ImagesUrlsStorageExecutor : IImagesUrlsStorageExecutor | ||
{ | ||
private readonly IImagesRepository _imagesRepository; | ||
private readonly IRemoteFileSizeFetcherGateway _remoteFileFetcherGateway; | ||
private readonly ILogger _logger; | ||
|
||
/// <summary> | ||
/// Constrcutor | ||
/// </summary> | ||
/// <param name="imagesRepository"></param> | ||
/// <param name="remoteFileFetcherGateway"></param> | ||
/// <param name="logger"></param> | ||
public ImagesUrlsStorageExecutor(IImagesRepository imagesRepository, | ||
IRemoteFileSizeFetcherGateway remoteFileFetcherGateway, | ||
ILogger logger) | ||
{ | ||
_imagesRepository = imagesRepository; | ||
_remoteFileFetcherGateway = remoteFileFetcherGateway; | ||
_logger = logger; | ||
} | ||
|
||
/// <inheritdoc/> | ||
public async Task DownloadAndStoreUrls(List<string> imagesUrls) | ||
{ | ||
var exitingUrls = await _imagesRepository.GetAllUrls(); | ||
var needToRemove = exitingUrls.Except(imagesUrls).ToList(); | ||
_logger.LogInformation($"Need to remove {needToRemove.Count} images that are no longer relevant"); | ||
foreach(var imageUrlToRemove in needToRemove) | ||
{ | ||
await _imagesRepository.DeleteImageByUrl(imageUrlToRemove); | ||
} | ||
_logger.LogInformation($"Finished removing images"); | ||
using (var md5 = MD5.Create()) | ||
{ | ||
var counter = 0; | ||
Parallel.ForEach(imagesUrls, new ParallelOptions { MaxDegreeOfParallelism = 20 }, (imageUrl) => | ||
{ | ||
try | ||
{ | ||
Interlocked.Increment(ref counter); | ||
if (counter % 100 == 0) | ||
{ | ||
_logger.LogInformation($"Indexed {counter} images of {imagesUrls.Count}"); | ||
} | ||
if (exitingUrls.Contains(imageUrl)) | ||
{ | ||
var size = _remoteFileFetcherGateway.GetFileSize(imageUrl).Result; | ||
if (size > 0) | ||
{ | ||
return; | ||
} | ||
} | ||
var content = new byte[0]; | ||
for (int retryIndex = 0; retryIndex < 3; retryIndex++) | ||
{ | ||
try | ||
{ | ||
content = _remoteFileFetcherGateway.GetFileContent(imageUrl).Result.Content; | ||
break; | ||
} | ||
catch | ||
{ | ||
Task.Delay(200).Wait(); | ||
} | ||
} | ||
if (content.Length == 0) | ||
{ | ||
_imagesRepository.DeleteImageByUrl(imageUrl).Wait(); | ||
return; | ||
} | ||
StoreImage(md5, content, imageUrl).Wait(); | ||
} | ||
catch (Exception ex) | ||
{ | ||
_logger.LogWarning("There was a problem with the following image url: " + imageUrl + " " + ex.ToString()); | ||
} | ||
}); | ||
} | ||
} | ||
|
||
private byte[] ResizeImage(Image originalImage, int newSizeInPixels) | ||
{ | ||
var ratio = originalImage.Width > originalImage.Height | ||
? newSizeInPixels * 1.0 / originalImage.Width | ||
: newSizeInPixels * 1.0 / originalImage.Height; | ||
var newSize = new Size((int)(originalImage.Width * ratio), (int)(originalImage.Height * ratio)); | ||
originalImage.Mutate(x => x.Resize(newSize)); | ||
|
||
var memoryStream = new MemoryStream(); | ||
originalImage.SaveAsJpeg(memoryStream); | ||
return memoryStream.ToArray(); | ||
} | ||
|
||
/// <inheritdoc/> | ||
public Task StoreImage(MD5 md5, byte[] content, string imageUrl) | ||
{ | ||
var hash = md5.ComputeHash(content).ToHashString(); | ||
var image = Image.Load(content, out var _); | ||
content = ResizeImage(image, 200); | ||
return _imagesRepository.StoreImage(new ImageItem | ||
{ | ||
ImageUrl = imageUrl, | ||
Data = $"data:image/jpeg;base64," + Convert.ToBase64String(content), | ||
Hash = hash | ||
}); | ||
} | ||
|
||
/// <inheritdoc/> | ||
public async Task<string> GetImageUrlIfExists(MD5 md5, byte[] content) | ||
{ | ||
var hash = md5.ComputeHash(content).ToHashString(); | ||
var imageItem = await _imagesRepository.GetImageByHash(hash); | ||
var imageUrl = imageItem?.ImageUrl; | ||
if (imageUrl != null) | ||
{ | ||
_logger.LogInformation($"Found exiting image with url: {imageUrl}"); | ||
} | ||
return imageUrl; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.