From 985457a78f2e54e2d274206e85b00708848c063f Mon Sep 17 00:00:00 2001 From: nietras Date: Fri, 31 Jan 2025 14:01:57 +0100 Subject: [PATCH] SepReader.Cols: Add Join/JoinToString(ReadOnlySpan separator) (#252) --- README.md | 2 + src/Sep.Test/SepReaderColsTest.cs | 49 +++++++-- src/Sep/Internals/SepRange.cs | 6 ++ src/Sep/SepReader.Cols.cs | 8 ++ src/Sep/SepReaderState.cs | 171 ++++++++++++++++++++++++++++++ 5 files changed, 230 insertions(+), 6 deletions(-) create mode 100644 src/Sep/Internals/SepRange.cs diff --git a/README.md b/README.md index 3b93c851..46e9e80e 100644 --- a/README.md +++ b/README.md @@ -2068,6 +2068,8 @@ namespace nietras.SeparatedValues { public int Count { get; } public nietras.SeparatedValues.SepReader.Col this[int index] { get; } + public System.ReadOnlySpan Join(System.ReadOnlySpan separator) { } + public string JoinToString(System.ReadOnlySpan separator) { } public System.Span Parse() where T : System.ISpanParsable { } public void Parse(System.Span span) diff --git a/src/Sep.Test/SepReaderColsTest.cs b/src/Sep.Test/SepReaderColsTest.cs index e5808693..48c8370a 100644 --- a/src/Sep.Test/SepReaderColsTest.cs +++ b/src/Sep.Test/SepReaderColsTest.cs @@ -27,7 +27,8 @@ public class SepReaderColsTest [TestMethod] public void SepReaderColsTest_Length() { - Run((cols, range) => Assert.AreEqual(range.GetOffsetAndLength(_colsCount).Length, cols.Count)); + Run((cols, range) => Assert.AreEqual(range.GetOffsetAndLength(_colsCount).Length, cols.Count), + checkIndexOutOfRange: false); } [TestMethod] @@ -57,7 +58,7 @@ public void SepReaderColsTest_Indexer_OutOfRange_Throws() { Assert.IsNotNull(e); } - }); + }, checkIndexOutOfRange: false); } [TestMethod] @@ -120,7 +121,7 @@ public void SepReaderColsTest_Parse_IntoSpan_LengthWrong_Throws() { Assert.AreEqual($"'span':{colValues.Length} must have length/count {cols.Count} matching columns selected", e.Message); } - }); + }, checkIndexOutOfRange: false); } [TestMethod] @@ -156,7 +157,7 @@ public void SepReaderColsTest_TryParse_IntoSpan_LengthWrong_Throws() { Assert.AreEqual($"'span':{colValues.Length} must have length/count {cols.Count} matching columns selected", e.Message); } - }); + }, checkIndexOutOfRange: false); } [TestMethod] @@ -177,9 +178,21 @@ public void SepReaderColsTest_Select_ToStringDirect() Run((cols, range) => CollectionAssert.AreEqual(_colTexts[range], cols.Select(c => c.ToStringDirect()).ToArray())); } + [DataTestMethod] + [DataRow("")] + [DataRow("/")] + [DataRow("")] + public void SepReaderColsTest_Join(string separator) + { + // Join + Run((cols, range) => Assert.AreEqual(string.Join(separator, _colTexts[range]), cols.Join(separator).ToString())); + // JoinToString + Run((cols, range) => Assert.AreEqual(string.Join(separator, _colTexts[range]), cols.JoinToString(separator))); + } + static string ToString(SepReader.Col col) => col.ToString(); - static void Run(ColsTestAction action, string text = Text) + static void Run(ColsTestAction action, string text = Text, bool checkIndexOutOfRange = true) { var ranges = new Range[] { @@ -194,7 +207,26 @@ static void Run(ColsTestAction action, string text = Text) 2..2, 2.._colsCount, }; - using var reader = Sep.Reader().FromText(text); + { + using var reader = Sep.Reader().FromText(text); + Run(reader, ranges, action, checkIndexOutOfRange); + } + { + using var reader = Sep.Reader(o => o with { Unescape = true }).FromText(text); + Run(reader, ranges, action, checkIndexOutOfRange); + } + { + using var reader = Sep.Reader(o => o with { Trim = SepTrim.All }).FromText(text); + Run(reader, ranges, action, checkIndexOutOfRange); + } + { + using var reader = Sep.Reader(o => o with { Unescape = true, Trim = SepTrim.All }).FromText(text); + Run(reader, ranges, action, checkIndexOutOfRange); + } + } + + static void Run(SepReader reader, Range[] ranges, ColsTestAction action, bool checkIndexOutOfRange) + { Assert.IsTrue(reader.MoveNext()); var row = reader.Current; action(row[_colNames], ..); @@ -210,6 +242,11 @@ static void Run(ColsTestAction action, string text = Text) action(row[range], range); } + if (checkIndexOutOfRange) + { + // Ensure index out of range causes exception (note range is not same) + Assert.ThrowsException(() => action(reader.Current[[-1]], 0..1)); + } } delegate void ColsTestAction(SepReader.Cols cols, Range range); diff --git a/src/Sep/Internals/SepRange.cs b/src/Sep/Internals/SepRange.cs new file mode 100644 index 00000000..6d5b0ed1 --- /dev/null +++ b/src/Sep/Internals/SepRange.cs @@ -0,0 +1,6 @@ +using System.Diagnostics.CodeAnalysis; + +namespace nietras.SeparatedValues; + +[ExcludeFromCodeCoverage] +readonly record struct SepRange(int Start, int Length); diff --git a/src/Sep/SepReader.Cols.cs b/src/Sep/SepReader.Cols.cs index fda12fec..9eeed3d8 100644 --- a/src/Sep/SepReader.Cols.cs +++ b/src/Sep/SepReader.Cols.cs @@ -103,6 +103,14 @@ public unsafe Span Select(delegate* selector) => IsIndices() ? _state.ColsSelect(_colIndices, selector) : _state.ColsSelect(_colStartIfRange, _colIndices.Length, selector); + public ReadOnlySpan Join(ReadOnlySpan separator) => IsIndices() + ? _state.Join(_colIndices, separator) + : _state.Join(_colStartIfRange, _colIndices.Length, separator); + + public string JoinToString(ReadOnlySpan separator) => IsIndices() + ? _state.JoinToString(_colIndices, separator) + : _state.JoinToString(_colStartIfRange, _colIndices.Length, separator); + bool IsIndices() => _colStartIfRange < 0; [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/src/Sep/SepReaderState.cs b/src/Sep/SepReaderState.cs index 14f98126..edb63e86 100644 --- a/src/Sep/SepReaderState.cs +++ b/src/Sep/SepReaderState.cs @@ -274,6 +274,39 @@ internal int GetCachedColIndex(string colName) #endregion #region Col + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal SepRange GetColRange(int index) + { + if ((uint)index >= (uint)_currentRowColCount) { SepThrow.IndexOutOfRangeException(); } + A.Assert(_currentRowColEndsOrInfosOffset >= 0); + index += _currentRowColEndsOrInfosOffset; + if (_colSpanFlags == 0) + { + var colEnds = _colEndsOrColInfos; + var colStart = colEnds[index] + 1; // +1 since previous end + var colEnd = colEnds[index + 1]; + A.Assert(colStart >= 0); + A.Assert(colEnd < _chars.Length); + A.Assert(colEnd >= colStart); + return new(colStart, colEnd - colStart); + } + else if (_colSpanFlags == UnescapeFlag) // Unquote/Unescape + { + ref var colInfos = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(_colEndsOrColInfos)); + var colStart = Unsafe.Add(ref colInfos, index).ColEnd + 1; // +1 since previous end + ref var colInfo = ref Unsafe.Add(ref colInfos, index + 1); + var (colEnd, quoteCountOrNegativeUnescapedLength) = colInfo; + A.Assert(colStart >= 0); + A.Assert(colEnd < _chars.Length); + A.Assert(colEnd >= colStart); + return new(colStart, colEnd - colStart); + } + else + { + return GetColSpanTrimmedRange(index); + } + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] internal ReadOnlySpan GetColSpan(int index) { @@ -415,6 +448,16 @@ ReadOnlySpan GetColSpanTrimmed(int index) } } + //[MethodImpl(MethodImplOptions.NoInlining)] + SepRange GetColSpanTrimmedRange(int index) + { + var colSpan = GetColSpanTrimmed(index); + var byteOffset = Unsafe.ByteOffset(ref MemoryMarshal.GetArrayDataReference(_chars), + ref MemoryMarshal.GetReference(colSpan)); + var colStart = (int)(byteOffset >> 1); + return new(colStart, colSpan.Length); + } + // Only trim the default space character no other whitespace characters [MethodImpl(MethodImplOptions.AggressiveInlining)] static ref char TrimSpace(ref char col, ref int length) @@ -637,6 +680,38 @@ internal unsafe Span ColsSelect(ReadOnlySpan colIndices, delegate* Join(ReadOnlySpan colIndices, scoped ReadOnlySpan separator) + { + var length = colIndices.Length; + if (length == 0) { return []; } + if (length == 1) { return GetColSpan(colIndices[0]); } + // Assume col count never so high stackalloc is not possible + Span colRanges = stackalloc SepRange[colIndices.Length]; + GetColRanges(colIndices, colRanges); + return Join(colRanges, separator); + } + [SkipLocalsInit] + internal string JoinToString(ReadOnlySpan colIndices, scoped ReadOnlySpan separator) + { + var length = colIndices.Length; + if (length == 0) { return string.Empty; } + if (length == 1) { return ToStringDefault(colIndices[0]); } + // Assume col count never so high stackalloc is not possible + Span colRanges = stackalloc SepRange[colIndices.Length]; + GetColRanges(colIndices, colRanges); + return JoinToString(colRanges, separator); + } + + void GetColRanges(ReadOnlySpan colIndices, Span colRanges) + { + A.Assert(colIndices.Length == colRanges.Length); + for (var i = 0; i < colIndices.Length; i++) + { + colRanges[i] = GetColRange(colIndices[i]); + } + } #endregion #region Cols Range @@ -727,6 +802,102 @@ internal unsafe Span ColsSelect(int colStart, int colCount, delegate* Join(int colStart, int colCount, scoped ReadOnlySpan separator) + { + if (colCount == 0) { return []; } + if (colCount == 1) { return GetColSpan(colStart); } + // Assume col count never so high stackalloc is not possible + Span colRanges = stackalloc SepRange[colCount]; + GetColRanges(colStart, colRanges); + return Join(colRanges, separator); + } + [SkipLocalsInit] + internal string JoinToString(int colStart, int colCount, scoped ReadOnlySpan separator) + { + if (colCount == 0) { return string.Empty; } + if (colCount == 1) { return ToStringDefault(colStart); } + // Assume col count never so high stackalloc is not possible + Span colRanges = stackalloc SepRange[colCount]; + GetColRanges(colStart, colRanges); + return JoinToString(colRanges, separator); + } + + void GetColRanges(int colStart, Span colRanges) + { + for (var i = 0; i < colRanges.Length; i++) + { + colRanges[i] = GetColRange(colStart + i); + } + } + #endregion + + #region Join + ReadOnlySpan Join(scoped Span colRanges, scoped ReadOnlySpan separator) + { + var totalLength = JoinTotalLength(colRanges, separator.Length); + var join = _arrayPool.RentUniqueArrayAsSpan(totalLength); + Join(_chars.AsSpan(), colRanges, separator, join); + return join; + } + + readonly ref struct JoinToStringState(ReadOnlySpan colRanges, ReadOnlySpan separator) + { + public ReadOnlySpan ColRanges { get; } = colRanges; + public ReadOnlySpan Separator { get; } = separator; + } + + string JoinToString(scoped ReadOnlySpan colRanges, scoped ReadOnlySpan separator) + { + var totalLength = JoinTotalLength(colRanges, separator.Length); +#if NET9_0_OR_GREATER + var state = new JoinToStringState(colRanges, separator); + return string.Create(totalLength, state, (join, state) => + { + Join(_chars.AsSpan(), state.ColRanges, state.Separator, join); + }); +#else + // Before .NET 9 no `allows ref struct`, so create uninitialized string, + // and get mutable span for that and join into that. + var s = new string('\0', totalLength); + var join = MemoryMarshal.CreateSpan(ref MemoryMarshal.GetReference(s), s.Length); + Join(_chars.AsSpan(), colRanges, separator, join); + return s; +#endif + } + + static void Join(ReadOnlySpan chars, + ReadOnlySpan colRanges, ReadOnlySpan separator, + Span join) + { + var separatorLength = separator.Length; + var spanIndex = 0; + for (var i = 0; i < colRanges.Length; i++) + { + var colRange = colRanges[i]; + var colSpan = chars.Slice(colRange.Start, colRange.Length); + colSpan.CopyTo(join.Slice(spanIndex)); + spanIndex += colSpan.Length; + if (i < colRanges.Length - 1) + { + separator.CopyTo(join.Slice(spanIndex)); + spanIndex += separatorLength; + } + } + A.Assert(spanIndex == join.Length); + } + + static int JoinTotalLength(ReadOnlySpan colRanges, int separatorLength) + { + var totalLength = 0; + for (var i = 0; i < colRanges.Length; i++) + { + totalLength += colRanges[i].Length; + } + totalLength += separatorLength * (colRanges.Length - 1); + return totalLength; + } #endregion [ExcludeFromCodeCoverage]