Skip to content

Commit

Permalink
Merge pull request #48 from OUCC/feat/#41
Browse files Browse the repository at this point in the history
#41 S3へのアップロードを実装
  • Loading branch information
aiueo-1234 authored May 3, 2024
2 parents 2679c64 + d9130ba commit cb5fd6e
Show file tree
Hide file tree
Showing 31 changed files with 263 additions and 62 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: CI

on:
push:
branches: [main]
branches: [main, release/*]
pull_request:
branches: [main]
branches: [main, release/*]

permissions:
contents: read
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ public interface IScraperSelectorService
/// </summary>
public bool IsMatchSites(string url);

public ValueTask<EpubDocument> ScrapingAsync(string url, string coverFillePath, string tempDirectory, Guid id, CancellationToken ct);
public ValueTask<EpubDocument> ScrapingAsync(string url, string tempDirectory, Guid id, CancellationToken ct);
}
2 changes: 1 addition & 1 deletion Epub/KoeBook.Epub/Contracts/Services/IScrapingService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ public interface IScrapingService
{
public bool IsMatchSite(Uri url);

public ValueTask<EpubDocument> ScrapingAsync(string url, string coverFillePath, string tempDirectory, Guid id, CancellationToken ct);
public ValueTask<EpubDocument> ScrapingAsync(string url, string tempDirectory, Guid id, CancellationToken ct);
}
2 changes: 1 addition & 1 deletion Epub/KoeBook.Epub/Models/EpubDocument.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

namespace KoeBook.Epub.Models;

public class EpubDocument(string title, string author, string coverFilePath, Guid id)
public class EpubDocument(string title, string author, Guid id, string coverFilePath = "")
{
public string Title { get; set; } = title;
public string Author { get; set; } = author;
Expand Down
2 changes: 1 addition & 1 deletion Epub/KoeBook.Epub/Services/AiStoryAnalyzerService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public partial class AiStoryAnalyzerService(ISplitBraceService splitBraceService

public EpubDocument CreateEpubDocument(AiStory aiStory, Guid id)
{
return new EpubDocument(aiStory.Title, "AI", "", id)
return new EpubDocument(aiStory.Title, "AI", id)
{
Chapters = [new Chapter()
{
Expand Down
3 changes: 2 additions & 1 deletion Epub/KoeBook.Epub/Services/AnalyzerService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public async ValueTask<BookScripts> AnalyzeAsync(BookProperties bookProperties,
switch (bookProperties)
{
case { SourceType: SourceType.Url or SourceType.FilePath, Source: string uri }:
document = await _scrapingService.ScrapingAsync(uri, coverFilePath, tempDirectory, bookProperties.Id, cancellationToken);
document = await _scrapingService.ScrapingAsync(uri, tempDirectory, bookProperties.Id, cancellationToken);
break;
case { SourceType: SourceType.AiStory, Source: AiStory aiStory }:
document = _aiStoryAnalyzerService.CreateEpubDocument(aiStory, bookProperties.Id);
Expand All @@ -45,6 +45,7 @@ public async ValueTask<BookScripts> AnalyzeAsync(BookProperties bookProperties,
}

_createCoverFileService.Create(document.Title, document.Author, coverFilePath);
document.CoverFilePath = coverFilePath;
}
catch (EbookException) { throw; }
catch (Exception ex)
Expand Down
12 changes: 10 additions & 2 deletions Epub/KoeBook.Epub/Services/EpubGenerateService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using KoeBook.Core.Models;
using KoeBook.Epub.Contracts.Services;
using KoeBook.Epub.Models;
using NAudio.Wave;

namespace KoeBook.Epub.Services;

Expand All @@ -18,9 +19,16 @@ public async ValueTask<string> GenerateEpubAsync(BookScripts bookScripts, string

var document = _documentStoreService.Documents.Single(d => d.Id == bookScripts.BookProperties.Id);

foreach (var scriptLine in bookScripts.ScriptLines)
for (var i = 0; i < bookScripts.ScriptLines.Length; i++)
{
scriptLine.Audio = new Audio(await _soundGenerationService.GenerateLineSoundAsync(scriptLine, bookScripts.Options, cancellationToken).ConfigureAwait(false));
var scriptLine = bookScripts.ScriptLines[i];
var wavData = await _soundGenerationService.GenerateLineSoundAsync(scriptLine, bookScripts.Options, cancellationToken).ConfigureAwait(false);
using var ms = new MemoryStream(wavData);
using var reader = new WaveFileReader(ms);
var tmpMp3Path = Path.Combine(tempDirectory, $"{document.Title}{i}.mp3");
MediaFoundationEncoder.EncodeToMp3(reader, tmpMp3Path);
using var mp3Stream = new Mp3FileReader(tmpMp3Path);
scriptLine.Audio = new Audio(mp3Stream.TotalTime, tmpMp3Path);
}

if (await _createService.TryCreateEpubAsync(document, tempDirectory, cancellationToken).ConfigureAwait(false))
Expand Down
4 changes: 2 additions & 2 deletions Epub/KoeBook.Epub/Services/ScraperSelectorService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ public bool IsMatchSites(string url)
}
}

public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFillePath, string tempDirectory, Guid id, CancellationToken ct)
public async ValueTask<EpubDocument> ScrapingAsync(string url, string tempDirectory, Guid id, CancellationToken ct)
{
var uri = new Uri(url);

foreach (var service in _scrapingServices)
{
if (service.IsMatchSite(uri))
return await service.ScrapingAsync(url, coverFillePath, tempDirectory, id, ct);
return await service.ScrapingAsync(url, tempDirectory, id, ct);
}

throw new ArgumentException("対応するURLではありません");
Expand Down
4 changes: 2 additions & 2 deletions Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public bool IsMatchSite(Uri uri)
return uri.Host == "www.aozora.gr.jp";
}

public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFilePath, string imageDirectory, Guid id, CancellationToken ct)
public async ValueTask<EpubDocument> ScrapingAsync(string url, string imageDirectory, Guid id, CancellationToken ct)
{
var config = Configuration.Default.WithDefaultLoader();
using var context = BrowsingContext.New(config);
Expand All @@ -37,7 +37,7 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
?? throw new EbookException(ExceptionType.WebScrapingFailed, $"著者の取得に失敗しました。\n以下のリンクから正しい小説のリンクを取得してください。\n{GetCardUrl(url)}");

// EpubDocument の生成
var document = new EpubDocument(TextReplace(bookTitle.InnerHtml), TextReplace(bookAuther.InnerHtml), coverFilePath, id);
var document = new EpubDocument(TextReplace(bookTitle.InnerHtml), TextReplace(bookAuther.InnerHtml), id);

var (contentsIds, hasChapter, hasSection) = LoadToc(doc, document);

Expand Down
4 changes: 2 additions & 2 deletions Epub/KoeBook.Epub/Services/ScrapingNaroService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public bool IsMatchSite(Uri uri)
return uri.Host == "ncode.syosetu.com";
}

public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFilePath, string imageDirectory, Guid id, CancellationToken ct)
public async ValueTask<EpubDocument> ScrapingAsync(string url, string imageDirectory, Guid id, CancellationToken ct)
{
var ncode = GetNcode(url);
var novelInfo = await GetNovelInfoAsync(ncode, ct).ConfigureAwait(false);
Expand Down Expand Up @@ -53,7 +53,7 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
? bookAuthorTag.InnerHtml
: bookAuthorElement.InnerHtml.Replace("作者:", "");

var document = new EpubDocument(bookTitle, bookAuthor, coverFilePath, id);
var document = new EpubDocument(bookTitle, bookAuthor, id);
if (novelInfo.IsSerial) // 連載の時
{
async IAsyncEnumerable<(string? title, Section section)> LoadDetailsAsync(IBrowsingContext context, NovelInfo novelInfo, string imageDirectory, [EnumeratorCancellation] CancellationToken ct)
Expand Down
6 changes: 6 additions & 0 deletions KoeBook.Core/Contracts/Services/IS3UploadService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace KoeBook.Core.Contracts.Services;

public interface IS3UploadService
{
ValueTask<string> UploadFileAsync(string filePath, string title, CancellationToken cancellationToken);
}
3 changes: 3 additions & 0 deletions KoeBook.Core/EbookException.cs
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,7 @@ public enum ExceptionType

[EnumMember(Value = "表紙の画像の生成に失敗しました")]
CreateCoverFileFailed,

[EnumMember(Value = "ファイルのアップロードに失敗しました")]
S3UploadFailed,
}
2 changes: 2 additions & 0 deletions KoeBook.Core/KoeBook.Core.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="AWSSDK.Extensions.NETCore.Setup" Version="3.7.300" />
<PackageReference Include="AWSSDK.S3" Version="3.7.307.25" />
<PackageReference Include="Betalgo.OpenAI" Version="8.1.1" />
<PackageReference Include="Claudia" Version="1.2.0" />
<PackageReference Include="FastEnum" Version="1.8.0" />
Expand Down
24 changes: 7 additions & 17 deletions KoeBook.Core/Models/Audio.cs
Original file line number Diff line number Diff line change
@@ -1,25 +1,15 @@
using NAudio.Wave;
using System.IO;
using NAudio.Wave;

namespace KoeBook.Epub.Models;

public sealed class Audio
public sealed class Audio(TimeSpan totalTIme, string tempFilePath)
{
public TimeSpan TotalTime { get; }
private readonly byte[] _mp3Data;
public TimeSpan TotalTime { get; } = totalTIme;
public string TempFilePath { get; } = tempFilePath;

public Audio(byte[] mp3Data)
public FileStream GetStream()
{
_mp3Data = mp3Data;
using var ms = new MemoryStream();
ms.Write(_mp3Data.AsSpan());
ms.Flush();
ms.Position = 0;
using var reader = new Mp3FileReader(ms);
TotalTime = reader.TotalTime;
}

public MemoryStream GetStream()
{
return new MemoryStream(_mp3Data);
return new FileStream(TempFilePath, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, true);
}
}
7 changes: 6 additions & 1 deletion KoeBook.Core/Services/ClaudeAnalyzerService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ private Dictionary<string, string> ExtractCharacterVoiceMapping(string response,
.Select(l =>
{
var characterId = l[1..l.IndexOf('.')];
var voiceTypeSpan = l.AsSpan()[(l.IndexOf(':') + 2)..];
var voiceTypeSpan = l.AsSpan()[(l.IndexOf(':') + 2)..].Trim();
// ボイス割り当てが複数あたったときに先頭のものを使う(例:群衆 AdultMan, AdultWoman)
var separatorIndex = voiceTypeSpan.IndexOfAny(_searchValues);
if (separatorIndex > 0)
Expand Down Expand Up @@ -174,10 +174,15 @@ private static (Character[], Dictionary<string, string>) ExtractCharacterList(st
var voiceIdLine = zippedLine.First.AsSpan();
voiceIdLine = voiceIdLine[(voiceIdLine.IndexOf(' ') + 2)..];//cまで無視
voiceIdLine = voiceIdLine[..voiceIdLine.IndexOf(' ')];// 二人以上話す時には先頭のものを使う
if (voiceIdLine[^1] == '.')// idに"."がつくことがあるので削除する
{
voiceIdLine = voiceIdLine[..^1];
}
if (characterId2Name.TryGetValue(voiceIdLine.ToString(), out var characterName))
{
zippedLine.Second.Character = characterName;
}
else { throw new EbookException(ExceptionType.ClaudeTalkerAndStyleSettingFailed); }
return 0;
}).Count();
if (voiceIdLinesCount != scriptLines.Length)
Expand Down
28 changes: 28 additions & 0 deletions KoeBook.Core/Services/S3UploadService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
using Amazon.S3;
using Amazon.S3.Transfer;
using KoeBook.Core.Contracts.Services;

namespace KoeBook.Core.Services;

public class S3UploadService(IAmazonS3 s3Client) : IS3UploadService
{
private readonly IAmazonS3 _s3Client = s3Client;

public async ValueTask<string> UploadFileAsync(string filePath, string title, CancellationToken cancellationToken)
{
try
{
// 設定に移すのが面倒なので固定値
const string S3BucketName = "koebook-gakusai-storage";
var guid = Guid.NewGuid();
var fileTransferUtility = new TransferUtility(_s3Client);
await fileTransferUtility.UploadAsync(filePath, S3BucketName, $"{guid}/{title}.epub", cancellationToken);

return $"http://storage.koebook.oucc.org/{guid}/{Uri.EscapeDataString(title)}.epub";
}
catch (AmazonS3Exception e)
{
throw new EbookException(ExceptionType.S3UploadFailed, innerException: e);
}
}
}
2 changes: 1 addition & 1 deletion KoeBook.Core/Services/SoundGenerationSelectorService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public async ValueTask InitializeAsync(CancellationToken cancellationToken)
.GetFromJsonAsync<Dictionary<string, ModelInfo>>("/models/info", ExceptionType.InitializeFailed, cancellationToken)
.ConfigureAwait(false);

Models = models.Select(kvp => new SoundModel(kvp.Key, kvp.Value.FirstSpk, kvp.Value.Styles)).ToArray();
Models = models.Select(kvp => new SoundModel(kvp.Key, kvp.Value.FirstSpk.Replace(" ", ""), kvp.Value.Styles)).ToArray();
}
catch (EbookException e) when (e.ExceptionType == ExceptionType.UnknownStyleBertVitsRoot) { }
}
Expand Down
92 changes: 85 additions & 7 deletions KoeBook.Core/Services/SoundGenerationService.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
using System.Web;
using System.Buffers;
using System.Runtime.CompilerServices;
using System.Threading;
using System.Web;
using KoeBook.Core.Contracts.Services;
using KoeBook.Core.Models;
using NAudio.Wave;

namespace KoeBook.Core.Services;

Expand All @@ -17,11 +21,85 @@ public async ValueTask<byte[]> GenerateLineSoundAsync(ScriptLine scriptLine, Boo
var soundModel = _soundGenerationSelectorService.Models.FirstOrDefault(m => m.Name == model)
?? throw new EbookException(ExceptionType.SoundGenerationFailed);
var style = soundModel.Styles.Contains(scriptLine.Style) ? scriptLine.Style : soundModel.Styles[0];
var queryCollection = HttpUtility.ParseQueryString(string.Empty);
queryCollection.Add("text", scriptLine.Text);
queryCollection.Add("model_id", soundModel.Id);
queryCollection.Add("style", scriptLine.Style);
return await _styleBertVitsClientService
.GetAsByteArrayAsync($"/voice/{queryCollection}", ExceptionType.SoundGenerationFailed, cancellationToken).ConfigureAwait(false);
using var msWriter = new MemoryStream();
WaveFileWriter? writer = null;
byte[] dataBuffer = ArrayPool<byte>.Shared.Rent(1024);
try
{
await foreach (var voice in GenerateSoundAsync(scriptLine.Text, style, soundModel.Id, cancellationToken))
{
if (voice.Length > dataBuffer.Length)
{
ArrayPool<byte>.Shared.Return(dataBuffer);
dataBuffer = ArrayPool<byte>.Shared.Rent(voice.Length);
}
using var msReader = new MemoryStream(voice);
using var reader = new WaveFileReader(msReader);
var read = await reader.ReadAsync(dataBuffer, cancellationToken);
if (writer is null)
{
writer = new WaveFileWriter(msWriter, reader.WaveFormat);
}
await writer.WriteAsync(dataBuffer.AsMemory()[..read], cancellationToken);
}
if (writer is null)
{
throw new EbookException(ExceptionType.SoundGenerationFailed);
}
await writer.FlushAsync(cancellationToken);
return msWriter.ToArray();
}
catch { throw; }
finally
{
ArrayPool<byte>.Shared?.Return(dataBuffer);
writer?.Dispose();
}
}

private async IAsyncEnumerable<byte[]> GenerateSoundAsync(string text, string style, string modelId, [EnumeratorCancellation] CancellationToken cancellationToken)
{
foreach (var l in SplitPeriod(text, 300))
{
var queryCollection = HttpUtility.ParseQueryString(string.Empty);
queryCollection.Add("text", l);
queryCollection.Add("model_id", modelId);
queryCollection.Add("style", style);
yield return await _styleBertVitsClientService
.GetAsByteArrayAsync($"/voice?{queryCollection}", ExceptionType.SoundGenerationFailed, cancellationToken).ConfigureAwait(false);
}
}

private IEnumerable<string> SplitPeriod(string text, int limit)
{
if (text.Length < limit)
{
yield return text;
}
else
{
List<int> periodList = [0];
var textSpan = text.AsSpan();
var chunk = textSpan[..limit];
while (true)
{
var periodIndex = periodList[^1] + chunk.LastIndexOf('。') + 1;
periodList.Add(periodIndex);
var nextEnd = periodIndex + limit;
if (nextEnd < textSpan.Length)
{
chunk = textSpan[periodIndex..nextEnd];
}
else
{
periodList.Add(textSpan.Length);
break;
}
}
for (var i = 1; i < periodList.Count; i++)
{
yield return text[periodList[i - 1]..periodList[i]];
}
}
}
}
6 changes: 3 additions & 3 deletions KoeBook.Test/Epub/EpubDocumentTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ public class EpubDocumentTest
[Fact]
public void EnsureChapter()
{
var document = new EpubDocument("title", "author", "cover", default);
var document = new EpubDocument("title", "author", default);

Assert.Empty(document.Chapters);

Expand All @@ -29,7 +29,7 @@ public void EnsureChapter()
[Fact]
public void EnsureSection()
{
var document = new EpubDocument("title", "author", "cover", default);
var document = new EpubDocument("title", "author", default);

Assert.Empty(document.Chapters);

Expand Down Expand Up @@ -77,7 +77,7 @@ public void EnsureSection()
[Fact]
public void EnsureParagraph()
{
var document = new EpubDocument("title", "author", "cover", default);
var document = new EpubDocument("title", "author", default);

Assert.Empty(document.Chapters);

Expand Down
Loading

0 comments on commit cb5fd6e

Please sign in to comment.