diff --git a/BrightData/BrightData.xml b/BrightData/BrightData.xml index 8cb6583f..27c1a2f4 100644 --- a/BrightData/BrightData.xml +++ b/BrightData/BrightData.xml @@ -715,6 +715,56 @@ + + + A range (with an offset and size) of a buffer + + Start position within the buffer + Length within the buffer + + + + A range (with an offset and size) of a buffer + + Start position within the buffer + Length within the buffer + + + Start position within the buffer + + + Length within the buffer + + + + Returns the section referenced by this offset and length + + + Base buffer + + + + + The end offset + + + + + Checks if a position is within the range + + + + + + + Checks if another range intersects + + + + + + + Notification from an aggregate operation @@ -2836,6 +2886,14 @@ + + + Writes the item (that can be represented as a byte array) to disk + + + + + Converts a type code to a type @@ -6552,51 +6610,6 @@ Returns all strings by indexed order - - - File based string table that has been created with InMemoryStringTableBuilder - - - - - Creates a string table from the file path - - - - - - - - - - - - - - - - - - - - - - Creates a string indexer - - Type of string indexer to create - Max string size to index - - - - - - - Creates a string indexer from a tokenizer (characters are mapped to a series of integers, for example from Byte Pair Encoding) - - - - - Builds utf-8 based string data in memory and writes to file @@ -6662,6 +6675,49 @@ Additional strings that were not in the underlying string indexer + + + File based string table that has been created with InMemoryStringTableBuilder + + + + + File based string table that has been created with InMemoryStringTableBuilder + + + + + + + + + + + + + + + + + + + + Creates a string indexer + + Type of string indexer to create + Max string size to index + + + + + + + Creates a string indexer from a tokenizer (characters are mapped to a series of integers, for example from Byte Pair Encoding) + + + + + @@ -7385,32 +7441,6 @@ Max string size - - - Asynchronous string table - - - - - Gets a string as utf-8 - - - - - - - Gets a string by index - - - - - - - Gets all strings - - Max string size - - Notifies of operations and messages @@ -7649,6 +7679,11 @@ Size of each of the spans in the tuple + + + Byte size of each of the spans in the tuple + + Invokes a callback on each span in the tuple @@ -7656,6 +7691,16 @@ + + + Indicates that the type has an offset into a buffer + + + + + Offset into a buffer + + Typed data analyser @@ -19866,6 +19911,9 @@ + + + @@ -19892,6 +19940,9 @@ + + + @@ -19923,6 +19974,9 @@ + + + @@ -19959,6 +20013,9 @@ + + + @@ -20000,6 +20057,9 @@ + + + @@ -20046,6 +20106,9 @@ + + + @@ -20097,6 +20160,9 @@ + + + @@ -20153,6 +20219,9 @@ + + + @@ -20214,6 +20283,9 @@ + + + @@ -20280,6 +20352,9 @@ + + + @@ -20351,6 +20426,9 @@ + + + @@ -20429,6 +20507,9 @@ Item 2 in the tuple + + + @@ -20455,6 +20536,9 @@ Item 3 in the tuple + + + @@ -20486,6 +20570,9 @@ Item 4 in the tuple + + + @@ -20522,6 +20609,9 @@ Item 5 in the tuple + + + @@ -20563,6 +20653,9 @@ Item 6 in the tuple + + + @@ -20609,6 +20702,9 @@ Item 7 in the tuple + + + @@ -20660,6 +20756,9 @@ Item 8 in the tuple + + + @@ -20716,6 +20815,9 @@ Item 9 in the tuple + + + @@ -20777,6 +20879,9 @@ Item 10 in the tuple + + + @@ -20843,6 +20948,9 @@ Item 11 in the tuple + + + @@ -20914,6 +21022,9 @@ Item 12 in the tuple + + + @@ -21058,6 +21169,9 @@ + + + @@ -21079,6 +21193,9 @@ + + + Casts each item to T and then invokes a callback on the new span @@ -21109,6 +21226,9 @@ + + + @@ -21135,6 +21255,9 @@ + + + Casts each item to T and then invokes a callback on the new span @@ -21170,6 +21293,9 @@ + + + @@ -21201,6 +21327,9 @@ + + + Casts each item to T and then invokes a callback on the new span @@ -21241,6 +21370,9 @@ + + + @@ -21277,6 +21409,9 @@ + + + Casts each item to T and then invokes a callback on the new span @@ -21322,6 +21457,9 @@ + + + @@ -21363,6 +21501,9 @@ + + + Casts each item to T and then invokes a callback on the new span @@ -21413,6 +21554,9 @@ + + + @@ -21459,6 +21603,9 @@ + + + Casts each item to T and then invokes a callback on the new span @@ -21514,6 +21661,9 @@ + + + @@ -21565,6 +21715,9 @@ + + + Casts each item to T and then invokes a callback on the new span @@ -21625,6 +21778,9 @@ + + + @@ -21681,6 +21837,9 @@ + + + Casts each item to T and then invokes a callback on the new span @@ -21746,6 +21905,9 @@ + + + @@ -21807,6 +21969,9 @@ + + + Casts each item to T and then invokes a callback on the new span @@ -21877,6 +22042,9 @@ + + + @@ -21943,6 +22111,9 @@ + + + Casts each item to T and then invokes a callback on the new span @@ -22018,6 +22189,9 @@ + + + @@ -22089,6 +22263,9 @@ + + + Casts each item to T and then invokes a callback on the new span @@ -22141,22 +22318,10 @@ An item within a weighted index list - - - Index of item - - - - - Weight of item - - - Constructor + An item within a weighted index list - Index of item - Weight of item @@ -22264,6 +22429,14 @@ How to merge item weights + + + Merges a sequence of weighted index items into one list + + Lists to merge + How to merge item weights + + Writes the data to an XML writer diff --git a/BrightData/Buffer/BlockHeader.cs b/BrightData/Buffer/BlockHeader.cs index b027398d..5285ae2e 100644 --- a/BrightData/Buffer/BlockHeader.cs +++ b/BrightData/Buffer/BlockHeader.cs @@ -11,7 +11,7 @@ namespace BrightData.Buffer /// /// The start offset of the block in the buffer /// The size of the block in the buffer - public readonly partial record struct BlockHeader(int Offset, int Size) : IHaveSize + public readonly record struct BlockHeader(int Offset, int Size) : IHaveSize { /// /// The end offset of the block in the buffer @@ -83,13 +83,13 @@ public static ReadOnlyMemory Create(params int[] blockSizes) /// public static ReadOnlyMemory Combine(in T tuple) where T: IAmTupleOfSpans, allows ref struct { - var sizes = tuple.Sizes; + var sizes = tuple.ByteSizes; var header = Create(sizes); var headerBytes = header.Span.AsBytes(); var totalSize = headerBytes.Length + sizes.Sum(); Memory ret = new byte[totalSize]; - headerBytes.CopyTo(header.Span[0].Get(ret).Span); + headerBytes.CopyTo(ret.Span[..headerBytes.Length]); tuple.ForEach((x, i) => x.CopyTo(header.Span[i].Get(ret).Span)); return ret; } diff --git a/BrightData/Buffer/OffsetAndSize.cs b/BrightData/Buffer/OffsetAndSize.cs new file mode 100644 index 00000000..94bd9842 --- /dev/null +++ b/BrightData/Buffer/OffsetAndSize.cs @@ -0,0 +1,49 @@ +using System; + +namespace BrightData.Buffer +{ + /// + /// A range (with an offset and size) of a buffer + /// + /// Start position within the buffer + /// Length within the buffer + public readonly record struct OffsetAndSize(uint StartOffset, uint Size) : IHaveOffset, IHaveSize, IComparable + { + /// + /// Returns the section referenced by this offset and length + /// + /// + /// Base buffer + /// + public ReadOnlySpan GetSpan(ReadOnlySpan span) => span.Slice((int)StartOffset, (int)Size); + + /// + /// The end offset + /// + public uint EndOffset => StartOffset + Size; + + /// + /// Checks if a position is within the range + /// + /// + /// + public bool Intersects(uint position) => position >= StartOffset && position < EndOffset; + + /// + /// Checks if another range intersects + /// + /// + /// + public bool Intersects(in OffsetAndSize other) => Intersects(other.StartOffset) || Intersects(other.EndOffset); + + /// + public int CompareTo(OffsetAndSize other) + { + var ret = StartOffset.CompareTo(other.StartOffset); + if(ret != 0) return ret; + return Size.CompareTo(other.Size); + } + + uint IHaveOffset.Offset => StartOffset; + } +} diff --git a/BrightData/ExtensionMethods.Buffers.cs b/BrightData/ExtensionMethods.Buffers.cs index 58c575e2..21edf34a 100644 --- a/BrightData/ExtensionMethods.Buffers.cs +++ b/BrightData/ExtensionMethods.Buffers.cs @@ -1104,6 +1104,12 @@ public static async Task GetVectoriser(this IReadOnlyBufferW return new VectorisationModel(vectorisers); } + /// + /// Writes the item (that can be represented as a byte array) to disk + /// + /// + /// + /// public static Task WriteTo(this IHaveMemory itemWithMemory, string filePath) { return File.WriteAllBytesAsync(filePath, itemWithMemory.ReadOnlyMemory); diff --git a/BrightData/Helper/StringTables/FileBasedStringTable.cs b/BrightData/Helper/StringTables/FileBasedStringTable.cs deleted file mode 100644 index 04fa8018..00000000 --- a/BrightData/Helper/StringTables/FileBasedStringTable.cs +++ /dev/null @@ -1,202 +0,0 @@ -using System; -using System.Buffers.Binary; -using System.IO; -using System.Runtime.CompilerServices; -using System.Text; -using System.Threading.Tasks; -using CommunityToolkit.HighPerformance; -using CommunityToolkit.HighPerformance.Buffers; -using Microsoft.Win32.SafeHandles; - -namespace BrightData.Helper.StringTables -{ - /// - /// File based string table that has been created with InMemoryStringTableBuilder - /// - public class FileBasedStringTable : IDisposable, IStringTableInMemory, IAsyncStringTable - { - readonly SafeFileHandle _file; - readonly InMemoryStringTableBuilder.OffsetAndLength[] _stringTable; - readonly long _stringDataOffset; - - FileBasedStringTable(SafeFileHandle file, InMemoryStringTableBuilder.OffsetAndLength[] stringTable, long stringDataOffset) - { - _file = file; - _stringTable = stringTable; - _stringDataOffset = stringDataOffset; - } - - /// - /// Creates a string table from the file path - /// - /// - /// - public static async Task Create(string filePath) - { - var sizeBuffer = new byte[12]; - var file = File.OpenHandle(filePath, FileMode.Open, FileAccess.Read, FileShare.Read, FileOptions.Asynchronous | FileOptions.RandomAccess); - await RandomAccess.ReadAsync(file, sizeBuffer, 0); - var sizeBufferSpan = sizeBuffer.AsSpan(); - var size = BinaryPrimitives.ReadUInt32LittleEndian(sizeBufferSpan[..4]); - var stringTableOffset = BinaryPrimitives.ReadUInt32LittleEndian(sizeBufferSpan[4..8]); - var stringDataOffset = BinaryPrimitives.ReadUInt32LittleEndian(sizeBufferSpan[8..12]); - - var stringTable = new InMemoryStringTableBuilder.OffsetAndLength[size]; - await RandomAccess.ReadAsync(file, stringTable.AsMemory().AsBytes(), stringTableOffset); - return new(file, stringTable, stringDataOffset); - } - - /// - public void Dispose() - { - GC.SuppressFinalize(this); - _file.Dispose(); - } - - async Task> IAsyncStringTable.GetUtf8(uint index) - { - var (offset, size) = _stringTable[index]; - var buffer = new byte[(int)size]; - await RandomAccess.ReadAsync(_file, buffer, _stringDataOffset + offset); - return buffer; - } - - async Task IAsyncStringTable.GetString(uint index) - { - var (offset, size) = _stringTable[index]; - var buffer = new byte[(int)size]; - await RandomAccess.ReadAsync(_file, buffer, _stringDataOffset + offset); - return Encoding.UTF8.GetString(buffer); - } - - async Task IAsyncStringTable.GetAll(int maxStringSize) - { - using var buffer = MemoryOwner.Allocate(maxStringSize); - - // read entire string block - var dataSize = RandomAccess.GetLength(_file) - _stringDataOffset; - using var data = MemoryOwner.Allocate((int)dataSize); - await RandomAccess.ReadAsync(_file, data.Memory, _stringDataOffset); - - var ret = new string[Size]; - var span = data.Span; - for (var i = 0U; i < Size; i++) { - var (offset, size) = _stringTable[i]; - ret[i] = Encoding.UTF8.GetString(span.Slice((int)offset, (int)size)); - } - return ret; - } - - /// - [SkipLocalsInit] - public string GetString(uint stringIndex) - { - var (offset, size) = _stringTable[stringIndex]; - Span buffer = stackalloc byte[(int)size]; - RandomAccess.Read(_file, buffer, _stringDataOffset + offset); - return Encoding.UTF8.GetString(buffer); - } - - /// - public ReadOnlySpan GetUtf8(uint stringIndex) - { - var (offset, size) = _stringTable[stringIndex]; - var buffer = new byte[(int)size]; - RandomAccess.Read(_file, buffer, _stringDataOffset + offset); - return buffer; - } - - /// - public string[] GetAll(int maxStringSize = 1024) - { - using var buffer = SpanOwner.Allocate(maxStringSize); - - // read entire string block - var dataSize = RandomAccess.GetLength(_file) - _stringDataOffset; - using var data = SpanOwner.Allocate((int)dataSize); - var span = data.Span; - RandomAccess.Read(_file, span, _stringDataOffset); - - var ret = new string[Size]; - for (var i = 0U; i < Size; i++) { - var (offset, size) = _stringTable[i]; - ret[i] = Encoding.UTF8.GetString(span.Slice((int)offset, (int)size)); - } - return ret; - } - - /// - public uint Size => (uint)_stringTable.Length; - - /// - /// Creates a string indexer - /// - /// Type of string indexer to create - /// Max string size to index - /// - /// - /// - public async Task GetStringIndexer(StringIndexType type = StringIndexType.Dictionary, int maxStringSize = 1024) - { - // read entire string block - var dataSize = RandomAccess.GetLength(_file) - _stringDataOffset; - using var data = MemoryOwner.Allocate((int)dataSize); - await RandomAccess.ReadAsync(_file, data.Memory, _stringDataOffset); - - var span = data.Span; - using var buffer = SpanOwner.Allocate(maxStringSize); - switch (type) { - case StringIndexType.Dictionary: { - var ret = new DictionaryStringIndexer(); - for (var i = 0U; i < Size; i++) { - var (offset, size) = _stringTable[i]; - var str = Encoding.UTF8.GetString(span.Slice((int)offset, (int)size)); - if (ret.GetIndex(str) != i) - throw new Exception("Indices did not align"); - } - return ret; - } - case StringIndexType.Trie: { - var bufferSpan = buffer.Span; - var trieBuilder = new UniqueIndexedStringTrie.Builder(); - for (var i = 0U; i < Size; i++) { - var (offset, size) = _stringTable[i]; - var bufferSize = Encoding.UTF8.GetChars(span.Slice((int)offset, (int)size), bufferSpan); - trieBuilder.Add(bufferSpan[..bufferSize], i); - } - - return new TrieStringIndexer(trieBuilder.Build(), this); - } - default: - throw new NotImplementedException(type.ToString()); - } - } - - /// - /// Creates a string indexer from a tokenizer (characters are mapped to a series of integers, for example from Byte Pair Encoding) - /// - /// - /// - /// - public async Task GetStringIndexer(Func, ReadOnlySpan> tokenizer, int maxStringSize = 1024) - { - // read entire string block - var dataSize = RandomAccess.GetLength(_file) - _stringDataOffset; - using var data = MemoryOwner.Allocate((int)dataSize); - await RandomAccess.ReadAsync(_file, data.Memory, _stringDataOffset); - - // build the tokenized trie - using var buffer = SpanOwner.Allocate(maxStringSize); - var bufferSpan = buffer.Span; - var span = data.Span; - var trieBuilder = new UniqueIndexedStringTrie.Builder(); - for (var i = 0U; i < Size; i++) { - var (offset, size) = _stringTable[i]; - var bufferSize = Encoding.UTF8.GetChars(span.Slice((int)offset, (int)size), bufferSpan); - trieBuilder.Add(tokenizer(bufferSpan[..bufferSize]), i); - } - - return new TokenizedTrieStringIndexer(trieBuilder.Build(), this, tokenizer); - } - } -} diff --git a/BrightData/Helper/StringTables/InMemoryStringTableBuilder.cs b/BrightData/Helper/StringTables/InMemoryStringTableBuilder.cs index 82bfe7f3..4256e262 100644 --- a/BrightData/Helper/StringTables/InMemoryStringTableBuilder.cs +++ b/BrightData/Helper/StringTables/InMemoryStringTableBuilder.cs @@ -6,6 +6,7 @@ using System.Runtime.CompilerServices; using System.Text; using System.Threading.Tasks; +using BrightData.Buffer; using CommunityToolkit.HighPerformance; using CommunityToolkit.HighPerformance.Buffers; @@ -16,9 +17,8 @@ namespace BrightData.Helper.StringTables /// public class InMemoryStringTableBuilder : IDisposable, IStringTableInMemory { - internal readonly record struct OffsetAndLength(uint Offset, uint Length); - readonly ArrayPoolBufferWriter _writer = new(); - readonly List _stringTable = []; + readonly ArrayPoolBufferWriter _dataWriter = new(); + readonly ArrayPoolBufferWriter _stringTable = new(); /// /// Creates a string table builder from a string indexer @@ -31,11 +31,11 @@ public InMemoryStringTableBuilder(IIndexStrings stringIndexer, int maxStringSize { Span buffer = stackalloc byte[maxStringSizeInBytes]; foreach (var str in stringIndexer.OrderedStrings) { - var offset = (uint)_writer.WrittenCount; + var offset = (uint)_dataWriter.WrittenCount; if (!Encoding.UTF8.TryGetBytes(str, buffer, out var size)) throw new Exception($"String was too large to encode in {maxStringSizeInBytes:N0} bytes: \"{str[..32]}...\" ({str.Length} characters)"); - _writer.Write(buffer[..size]); - _stringTable.Add(new(offset, (uint)size)); + _dataWriter.Write(buffer[..size]); + _stringTable.Write(new OffsetAndSize(offset, (uint)size)); } } @@ -43,18 +43,15 @@ public InMemoryStringTableBuilder(IIndexStrings stringIndexer, int maxStringSize public void Dispose() { GC.SuppressFinalize(this); - _writer.Dispose(); + _dataWriter.Dispose(); + _stringTable.Dispose(); } /// - public uint Size => (uint)_stringTable.Count; + public uint Size => (uint)_stringTable.WrittenCount; /// - public ReadOnlySpan GetUtf8(uint index) - { - var (offset, size) = _stringTable[(int)index]; - return _writer.WrittenSpan.Slice((int)offset, (int)size); - } + public ReadOnlySpan GetUtf8(uint index) => _stringTable.WrittenSpan[(int)index].GetSpan(_dataWriter.WrittenSpan); /// public string GetString(uint index) => Encoding.UTF8.GetString(GetUtf8(index)); @@ -74,29 +71,8 @@ public string[] GetAll(int maxStringSize) /// public async Task WriteTo(string outputPath) { - const uint HeaderSize = 12U; - var sizeHeader = new byte[HeaderSize]; - var strings = _stringTable.ToArray().AsMemory().Cast(); - var header = WriteHeader(sizeHeader, [ - Size, - HeaderSize, - HeaderSize + (uint)strings.Length - ]); - - // write to file - using var file = File.OpenHandle(outputPath, FileMode.Create, FileAccess.Write, FileShare.None, FileOptions.Asynchronous | FileOptions.SequentialScan); - await RandomAccess.WriteAsync(file, sizeHeader, 0); - await RandomAccess.WriteAsync(file, strings, header[1]); - await RandomAccess.WriteAsync(file, _writer.WrittenMemory, header[2]); - return; - - static uint[] WriteHeader(Span span, uint[] header) - { - BinaryPrimitives.WriteUInt32LittleEndian(span[..4], header[0]); - BinaryPrimitives.WriteUInt32LittleEndian(span[4..8], header[1]); - BinaryPrimitives.WriteUInt32LittleEndian(span[8..12], header[2]); - return header; - } + var stringTable = new StringTable(_stringTable.WrittenMemory, _dataWriter.WrittenMemory); + await File.WriteAllBytesAsync(outputPath, stringTable.ReadOnlyMemory); } } } diff --git a/BrightData/Helper/StringTables/StringTable.cs b/BrightData/Helper/StringTables/StringTable.cs new file mode 100644 index 00000000..ca2eb57f --- /dev/null +++ b/BrightData/Helper/StringTables/StringTable.cs @@ -0,0 +1,123 @@ +using System; +using System.IO; +using System.Text; +using System.Threading.Tasks; +using BrightData.Buffer; +using BrightData.Types; +using CommunityToolkit.HighPerformance.Buffers; + +namespace BrightData.Helper.StringTables +{ + /// + /// File based string table that has been created with InMemoryStringTableBuilder + /// + public class StringTable(ReadOnlyMemory stringTable, ReadOnlyMemory stringData) : IStringTableInMemory, IHaveMemory + { + readonly ReadOnlyMemory _stringTable = stringTable; + readonly ReadOnlyMemory _stringData = stringData; + + public static StringTable Create(ReadOnlyMemory data) + { + var blocks = data.GetTupleFromBlockHeader(); + return new StringTable( + blocks.Item1, + blocks.Item2 + ); + } + + public static async Task Create(string filePath) + { + return Create(await File.ReadAllBytesAsync(filePath)); + } + + /// + public ReadOnlyMemory ReadOnlyMemory => BlockHeader.Combine(ReadOnlyMultiTypeSpanTuple.Create( + _stringTable.Span, + _stringData.Span + )); + + /// + public string GetString(uint stringIndex) => Encoding.UTF8.GetString(GetUtf8(stringIndex)); + + /// + public ReadOnlySpan GetUtf8(uint stringIndex) => _stringTable.Span[(int)stringIndex].GetSpan(_stringData.Span); + + /// + public string[] GetAll(int maxStringSize = 1024) + { + var span = _stringTable.Span; + var dataSpan = _stringData.Span; + + var ret = new string[Size]; + for (var i = 0U; i < Size; i++) + ret[i] = Encoding.UTF8.GetString(span[(int)i].GetSpan(dataSpan)); + return ret; + } + + /// + public uint Size => (uint)_stringTable.Length; + + /// + /// Creates a string indexer + /// + /// Type of string indexer to create + /// Max string size to index + /// + /// + /// + public async Task GetStringIndexer(StringIndexType type = StringIndexType.Dictionary, int maxStringSize = 1024) + { + var span = _stringTable.Span; + var dataSpan = _stringData.Span; + using var buffer = SpanOwner.Allocate(maxStringSize); + switch (type) { + case StringIndexType.Dictionary: { + var ret = new DictionaryStringIndexer(); + for (var i = 0U; i < Size; i++) { + var utf8 = span[(int)i].GetSpan(dataSpan); + var str = Encoding.UTF8.GetString(utf8); + if (ret.GetIndex(str) != i) + throw new Exception("Indices did not align"); + } + return ret; + } + case StringIndexType.Trie: { + var bufferSpan = buffer.Span; + var trieBuilder = new UniqueIndexedStringTrie.Builder(); + for (var i = 0U; i < Size; i++) { + var utf8 = span[(int)i].GetSpan(dataSpan); + var bufferSize = Encoding.UTF8.GetChars(utf8, bufferSpan); + trieBuilder.Add(bufferSpan[..bufferSize], i); + } + + return new TrieStringIndexer(trieBuilder.Build(), this); + } + default: + throw new NotImplementedException(type.ToString()); + } + } + + /// + /// Creates a string indexer from a tokenizer (characters are mapped to a series of integers, for example from Byte Pair Encoding) + /// + /// + /// + /// + public async Task GetStringIndexer(Func, ReadOnlySpan> tokenizer, int maxStringSize = 1024) + { + // build the tokenized trie + using var buffer = SpanOwner.Allocate(maxStringSize); + var bufferSpan = buffer.Span; + var span = _stringTable.Span; + var dataSpan = _stringData.Span; + var trieBuilder = new UniqueIndexedStringTrie.Builder(); + for (var i = 0U; i < Size; i++) { + var utf8 = span[(int)i].GetSpan(dataSpan); + var bufferSize = Encoding.UTF8.GetChars(utf8, bufferSpan); + trieBuilder.Add(tokenizer(bufferSpan[..bufferSize]), i); + } + + return new TokenizedTrieStringIndexer(trieBuilder.Build(), this, tokenizer); + } + } +} diff --git a/BrightData/Interfaces.cs b/BrightData/Interfaces.cs index ee0c0564..0b3ec4c7 100644 --- a/BrightData/Interfaces.cs +++ b/BrightData/Interfaces.cs @@ -364,33 +364,6 @@ public interface IStringTableInMemory : IHaveSize string[] GetAll(int maxStringSize = 1024); } - /// - /// Asynchronous string table - /// - public interface IAsyncStringTable : IHaveSize - { - /// - /// Gets a string as utf-8 - /// - /// - /// - Task> GetUtf8(uint index); - - /// - /// Gets a string by index - /// - /// - /// - Task GetString(uint index); - - /// - /// Gets all strings - /// - /// Max string size - /// - Task GetAll(int maxStringSize = 1024); - } - /// /// Notifies of operations and messages /// @@ -618,6 +591,11 @@ public interface IAmTupleOfSpans /// int[] Sizes { get; } + /// + /// Byte size of each of the spans in the tuple + /// + int[] ByteSizes { get; } + /// /// Invokes a callback on each span in the tuple /// @@ -625,4 +603,15 @@ public interface IAmTupleOfSpans /// void ForEach(ForEachSpanCallback callback) where T : unmanaged; } + + /// + /// Indicates that the type has an offset into a buffer + /// + public interface IHaveOffset + { + /// + /// Offset into a buffer + /// + uint Offset { get; } + } } diff --git a/BrightData/LinearAlgebra/MutableMatrix.cs b/BrightData/LinearAlgebra/MutableMatrix.cs index 1547624c..15191405 100644 --- a/BrightData/LinearAlgebra/MutableMatrix.cs +++ b/BrightData/LinearAlgebra/MutableMatrix.cs @@ -319,7 +319,7 @@ static unsafe IMatrix MultiplyWithThisTransposed(LinearAlgebraProvider lap fixed (T* otherPtr = otherSpan) fixed (T* retPtr = retSpan) { //MatrixMultiplyChunked(matrixPtr, otherPtr, lda, rowCount, columnCount, retPtr); - MatrixMultiplyTiled2(matrixPtr, otherPtr, lda, rowCount, columnCount, retPtr); + MatrixMultiplyTiled3(matrixPtr, otherPtr, lda, rowCount, columnCount, retPtr); } } finally { @@ -478,6 +478,55 @@ void MultiplyBlock(uint rowStart, uint colStart, uint rowEnd, uint colEnd) } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static unsafe void MatrixMultiplyTiled3(T* a, T* b, int size, uint rows, uint cols, T* ret) + { + const int L1BlockSize = 32; + const int L2BlockSize = 64; + var vectorSize = Vector.Count; + var numVectors = size / vectorSize; + var ceiling = numVectors * vectorSize; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + void MultiplyBlock(uint rowStart, uint colStart, uint rowEnd, uint colEnd) + { + for (var i = rowStart; i < rowEnd; i += L1BlockSize) { + for (var j = colStart; j < colEnd; j += L1BlockSize) { + for (uint ii = i, iLen = Math.Min(i + L1BlockSize, rows); ii < iLen; ii++) { + var xPtr = &a[ii * size]; + for (uint jj = j, jLen = Math.Min(j + L1BlockSize, cols); jj < jLen; jj++) { + var yPtr = &b[jj * size]; + var vSum = Vector.Zero; + for (var z = 0; z < numVectors; z++) + vSum += Vector.Load(xPtr + z * vectorSize) * Vector.Load(yPtr + z * vectorSize); + + var sum = Vector.Dot(vSum, Vector.One); + for (var z = ceiling; z < size; z++) + sum += xPtr[z] * yPtr[z]; + ret[jj * rows + ii] = sum; + } + } + } + } + } + + if (rows * cols >= Consts.MinimumSizeForParallel) { + Parallel.For(0, (int)Math.Ceiling((double)rows / L2BlockSize), rowTile => + { + var rowStart = (uint)rowTile * L2BlockSize; + var rowEnd = rowStart + L2BlockSize; + for (var colTile = 0U; colTile < cols; colTile += L2BlockSize) + MultiplyBlock(rowStart, colTile, rowEnd, colTile + L2BlockSize); + }); + } + else { + for (var rowTile = 0U; rowTile < rows; rowTile += L2BlockSize) { + for (var colTile = 0U; colTile < cols; colTile += L2BlockSize) + MultiplyBlock(rowTile, colTile, rowTile + L2BlockSize, colTile + L2BlockSize); + } + } + } + /// public override string ToString() { diff --git a/BrightData/Types/SpanTuples/SpanTuple.cs b/BrightData/Types/SpanTuples/SpanTuple.cs index 15d50533..3e98ad86 100644 --- a/BrightData/Types/SpanTuples/SpanTuple.cs +++ b/BrightData/Types/SpanTuples/SpanTuple.cs @@ -1,6 +1,7 @@ using System; using BrightData.Buffer; using System.Runtime.InteropServices; +using System.Runtime.CompilerServices; using CommunityToolkit.HighPerformance; namespace BrightData.Types @@ -204,6 +205,12 @@ internal SingleTypeSpanTuple2(Span item1, Span item2) Item2.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -245,6 +252,13 @@ internal SingleTypeSpanTuple3(Span item1, Span item2, Span item3) Item3.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -294,6 +308,14 @@ internal SingleTypeSpanTuple4(Span item1, Span item2, Span item3, Sp Item4.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -351,6 +373,15 @@ internal SingleTypeSpanTuple5(Span item1, Span item2, Span item3, Sp Item5.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -416,6 +447,16 @@ internal SingleTypeSpanTuple6(Span item1, Span item2, Span item3, Sp Item6.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -489,6 +530,17 @@ internal SingleTypeSpanTuple7(Span item1, Span item2, Span item3, Sp Item7.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -570,6 +622,18 @@ internal SingleTypeSpanTuple8(Span item1, Span item2, Span item3, Sp Item8.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -659,6 +723,19 @@ internal SingleTypeSpanTuple9(Span item1, Span item2, Span item3, Sp Item9.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + Item9.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -756,6 +833,20 @@ internal SingleTypeSpanTuple10(Span item1, Span item2, Span item3, S Item10.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + Item9.Length * Unsafe.SizeOf(), + Item10.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -861,6 +952,21 @@ internal SingleTypeSpanTuple11(Span item1, Span item2, Span item3, S Item11.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + Item9.Length * Unsafe.SizeOf(), + Item10.Length * Unsafe.SizeOf(), + Item11.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -974,6 +1080,22 @@ internal SingleTypeSpanTuple12(Span item1, Span item2, Span item3, S Item12.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + Item9.Length * Unsafe.SizeOf(), + Item10.Length * Unsafe.SizeOf(), + Item11.Length * Unsafe.SizeOf(), + Item12.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -1185,6 +1307,12 @@ internal ReadOnlySingleTypeSpanTuple2(ReadOnlySpan item1, ReadOnlySpan i /// public ReadOnlySpan Item2 { get; } + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + ]; + /// public int[] Sizes => [ Item1.Length, @@ -1225,6 +1353,13 @@ internal ReadOnlySingleTypeSpanTuple3(ReadOnlySpan item1, ReadOnlySpan i /// public ReadOnlySpan Item3 { get; } + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + ]; + /// public int[] Sizes => [ Item1.Length, @@ -1273,6 +1408,14 @@ internal ReadOnlySingleTypeSpanTuple4(ReadOnlySpan item1, ReadOnlySpan i /// public ReadOnlySpan Item4 { get; } + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + ]; + /// public int[] Sizes => [ Item1.Length, @@ -1329,6 +1472,15 @@ internal ReadOnlySingleTypeSpanTuple5(ReadOnlySpan item1, ReadOnlySpan i /// public ReadOnlySpan Item5 { get; } + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + ]; + /// public int[] Sizes => [ Item1.Length, @@ -1393,6 +1545,16 @@ internal ReadOnlySingleTypeSpanTuple6(ReadOnlySpan item1, ReadOnlySpan i /// public ReadOnlySpan Item6 { get; } + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + ]; + /// public int[] Sizes => [ Item1.Length, @@ -1465,6 +1627,17 @@ internal ReadOnlySingleTypeSpanTuple7(ReadOnlySpan item1, ReadOnlySpan i /// public ReadOnlySpan Item7 { get; } + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + ]; + /// public int[] Sizes => [ Item1.Length, @@ -1545,6 +1718,18 @@ internal ReadOnlySingleTypeSpanTuple8(ReadOnlySpan item1, ReadOnlySpan i /// public ReadOnlySpan Item8 { get; } + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + ]; + /// public int[] Sizes => [ Item1.Length, @@ -1633,6 +1818,19 @@ internal ReadOnlySingleTypeSpanTuple9(ReadOnlySpan item1, ReadOnlySpan i /// public ReadOnlySpan Item9 { get; } + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + Item9.Length * Unsafe.SizeOf(), + ]; + /// public int[] Sizes => [ Item1.Length, @@ -1729,6 +1927,20 @@ internal ReadOnlySingleTypeSpanTuple10(ReadOnlySpan item1, ReadOnlySpan /// public ReadOnlySpan Item10 { get; } + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + Item9.Length * Unsafe.SizeOf(), + Item10.Length * Unsafe.SizeOf(), + ]; + /// public int[] Sizes => [ Item1.Length, @@ -1833,6 +2045,21 @@ internal ReadOnlySingleTypeSpanTuple11(ReadOnlySpan item1, ReadOnlySpan /// public ReadOnlySpan Item11 { get; } + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + Item9.Length * Unsafe.SizeOf(), + Item10.Length * Unsafe.SizeOf(), + Item11.Length * Unsafe.SizeOf(), + ]; + /// public int[] Sizes => [ Item1.Length, @@ -1945,6 +2172,22 @@ internal ReadOnlySingleTypeSpanTuple12(ReadOnlySpan item1, ReadOnlySpan /// public ReadOnlySpan Item12 { get; } + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + Item9.Length * Unsafe.SizeOf(), + Item10.Length * Unsafe.SizeOf(), + Item11.Length * Unsafe.SizeOf(), + Item12.Length * Unsafe.SizeOf(), + ]; + /// public int[] Sizes => [ Item1.Length, @@ -2506,6 +2749,12 @@ internal MultiTypeSpanTuple2(Span item1, Span item2) Item2.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -2543,6 +2792,12 @@ internal ReadOnlyMultiTypeSpanTuple2(ReadOnlySpan item1, ReadOnlySpan it Item2.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + ]; + /// /// Casts each item to T and then invokes a callback on the new span /// @@ -2591,6 +2846,13 @@ internal MultiTypeSpanTuple3(Span item1, Span item2, Span item3) Item3.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -2637,6 +2899,13 @@ internal ReadOnlyMultiTypeSpanTuple3(ReadOnlySpan item1, ReadOnlySpan it Item3.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + ]; + /// /// Casts each item to T and then invokes a callback on the new span /// @@ -2694,6 +2963,14 @@ internal MultiTypeSpanTuple4(Span item1, Span item2, Span item3, Spa Item4.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -2749,6 +3026,14 @@ internal ReadOnlyMultiTypeSpanTuple4(ReadOnlySpan item1, ReadOnlySpan it Item4.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + ]; + /// /// Casts each item to T and then invokes a callback on the new span /// @@ -2815,6 +3100,15 @@ internal MultiTypeSpanTuple5(Span item1, Span item2, Span item3, Spa Item5.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -2879,6 +3173,15 @@ internal ReadOnlyMultiTypeSpanTuple5(ReadOnlySpan item1, ReadOnlySpan it Item5.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + ]; + /// /// Casts each item to T and then invokes a callback on the new span /// @@ -2954,6 +3257,16 @@ internal MultiTypeSpanTuple6(Span item1, Span item2, Span item3, Spa Item6.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -3027,6 +3340,16 @@ internal ReadOnlyMultiTypeSpanTuple6(ReadOnlySpan item1, ReadOnlySpan it Item6.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + ]; + /// /// Casts each item to T and then invokes a callback on the new span /// @@ -3111,6 +3434,17 @@ internal MultiTypeSpanTuple7(Span item1, Span item2, Span item3, Spa Item7.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -3193,6 +3527,17 @@ internal ReadOnlyMultiTypeSpanTuple7(ReadOnlySpan item1, ReadOnlySpan it Item7.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + ]; + /// /// Casts each item to T and then invokes a callback on the new span /// @@ -3286,6 +3631,18 @@ internal MultiTypeSpanTuple8(Span item1, Span item2, Span item3, Spa Item8.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -3377,6 +3734,18 @@ internal ReadOnlyMultiTypeSpanTuple8(ReadOnlySpan item1, ReadOnlySpan it Item8.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + ]; + /// /// Casts each item to T and then invokes a callback on the new span /// @@ -3479,6 +3848,19 @@ internal MultiTypeSpanTuple9(Span item1, Span item2, Span item3, Spa Item9.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + Item9.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -3579,6 +3961,19 @@ internal ReadOnlyMultiTypeSpanTuple9(ReadOnlySpan item1, ReadOnlySpan it Item9.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + Item9.Length * Unsafe.SizeOf(), + ]; + /// /// Casts each item to T and then invokes a callback on the new span /// @@ -3690,6 +4085,20 @@ internal MultiTypeSpanTuple10(Span item1, Span item2, Span item3, Sp Item10.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + Item9.Length * Unsafe.SizeOf(), + Item10.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -3799,6 +4208,20 @@ internal ReadOnlyMultiTypeSpanTuple10(ReadOnlySpan item1, ReadOnlySpan i Item10.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + Item9.Length * Unsafe.SizeOf(), + Item10.Length * Unsafe.SizeOf(), + ]; + /// /// Casts each item to T and then invokes a callback on the new span /// @@ -3919,6 +4342,21 @@ internal MultiTypeSpanTuple11(Span item1, Span item2, Span item3, Sp Item11.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + Item9.Length * Unsafe.SizeOf(), + Item10.Length * Unsafe.SizeOf(), + Item11.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -4037,6 +4475,21 @@ internal ReadOnlyMultiTypeSpanTuple11(ReadOnlySpan item1, ReadOnlySpan i Item11.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + Item9.Length * Unsafe.SizeOf(), + Item10.Length * Unsafe.SizeOf(), + Item11.Length * Unsafe.SizeOf(), + ]; + /// /// Casts each item to T and then invokes a callback on the new span /// @@ -4166,6 +4619,22 @@ internal MultiTypeSpanTuple12(Span item1, Span item2, Span item3, Sp Item12.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + Item9.Length * Unsafe.SizeOf(), + Item10.Length * Unsafe.SizeOf(), + Item11.Length * Unsafe.SizeOf(), + Item12.Length * Unsafe.SizeOf(), + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -4293,6 +4762,22 @@ internal ReadOnlyMultiTypeSpanTuple12(ReadOnlySpan item1, ReadOnlySpan i Item12.Length, ]; + /// + public int[] ByteSizes => [ + Item1.Length * Unsafe.SizeOf(), + Item2.Length * Unsafe.SizeOf(), + Item3.Length * Unsafe.SizeOf(), + Item4.Length * Unsafe.SizeOf(), + Item5.Length * Unsafe.SizeOf(), + Item6.Length * Unsafe.SizeOf(), + Item7.Length * Unsafe.SizeOf(), + Item8.Length * Unsafe.SizeOf(), + Item9.Length * Unsafe.SizeOf(), + Item10.Length * Unsafe.SizeOf(), + Item11.Length * Unsafe.SizeOf(), + Item12.Length * Unsafe.SizeOf(), + ]; + /// /// Casts each item to T and then invokes a callback on the new span /// diff --git a/BrightData/Types/SpanTuples/SpanTuple.tt b/BrightData/Types/SpanTuples/SpanTuple.tt index 65403117..740a94f0 100644 --- a/BrightData/Types/SpanTuples/SpanTuple.tt +++ b/BrightData/Types/SpanTuples/SpanTuple.tt @@ -7,6 +7,7 @@ using System; using BrightData.Buffer; using System.Runtime.InteropServices; +using System.Runtime.CompilerServices; using CommunityToolkit.HighPerformance; namespace BrightData.Types @@ -58,6 +59,13 @@ namespace BrightData.Types <# } #> ]; + /// + public int[] ByteSizes => [ +<# for(var j = 1; j <= i; j++) { #> + Item<#= j #>.Length * Unsafe.SizeOf(), +<# } #> + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -108,6 +116,13 @@ namespace BrightData.Types public ReadOnlySpan Item<#= j #> { get; } <# } #> + /// + public int[] ByteSizes => [ +<# for(var j = 1; j <= i; j++) { #> + Item<#= j #>.Length * Unsafe.SizeOf(), +<# } #> + ]; + /// public int[] Sizes => [ <# for(var j = 1; j <= i; j++) { #> @@ -201,6 +216,13 @@ namespace BrightData.Types <# } #> ]; + /// + public int[] ByteSizes => [ +<# for(var j = 1; j <= i; j++) { #> + Item<#= j #>.Length * Unsafe.SizeOf>(), +<# } #> + ]; + /// public void ForEach(ForEachSpanCallback callback) where T: unmanaged { @@ -239,6 +261,13 @@ namespace BrightData.Types <# } #> ]; + /// + public int[] ByteSizes => [ +<# for(var j = 1; j <= i; j++) { #> + Item<#= j #>.Length * Unsafe.SizeOf>(), +<# } #> + ]; + /// /// Casts each item to T and then invokes a callback on the new span /// diff --git a/BrightData/Types/WeightedIndexList.cs b/BrightData/Types/WeightedIndexList.cs index 363f1097..a56c28e8 100644 --- a/BrightData/Types/WeightedIndexList.cs +++ b/BrightData/Types/WeightedIndexList.cs @@ -88,29 +88,8 @@ public IEnumerable Indices /// An item within a weighted index list /// //[StructLayout(LayoutKind.Sequential, Pack=0)] - public readonly record struct Item + public readonly record struct Item(uint Index, float Weight) { - /// - /// Index of item - /// - public uint Index { get; } - - /// - /// Weight of item - /// - public float Weight { get; } - - /// - /// Constructor - /// - /// Index of item - /// Weight of item - public Item(uint index, float weight) - { - Index = index; - Weight = weight; - } - /// public override string ToString() => $"{Index}:{Weight}"; @@ -168,14 +147,14 @@ public readonly ref readonly Item Current /// /// Weighted indices /// - public static WeightedIndexList Create(params Item[] indexList) => Merge(indexList); + public static WeightedIndexList Create(params Item[] indexList) => Merge(indexList.AsSpan()); /// /// Creates a new weighted index list /// /// Weighted indices /// - public static WeightedIndexList Create(ReadOnlySpan indexList) => Merge(indexList.ToArray()); + public static WeightedIndexList Create(ReadOnlySpan indexList) => Merge(indexList); /// /// Creates a new weighted index list @@ -190,7 +169,7 @@ public readonly ref readonly Item Current /// Weighted indices /// public static WeightedIndexList Create(params (uint Index, float Weight)[] indexList) => - Merge(indexList.Select(d => new Item(d.Index, d.Weight)).ToArray()); + Merge(indexList.Select(d => new Item(d.Index, d.Weight))); /// /// Creates a new weighted index list @@ -198,7 +177,7 @@ public static WeightedIndexList Create(params (uint Index, float Weight)[] index /// Weighted indices /// public static WeightedIndexList Create(IEnumerable<(uint Index, float Weight)> indexList) => - Merge(indexList.Select(d => new Item(d.Index, d.Weight)).ToArray()); + Merge(indexList.Select(d => new Item(d.Index, d.Weight))); /// /// The number of items in the list @@ -245,14 +224,35 @@ public void Initialize(BrightDataContext context, BinaryReader reader) /// Lists to merge /// How to merge item weights /// + [OverloadResolutionPriority(2)] public static WeightedIndexList Merge(IEnumerable items, AggregationType mergeOperation = AggregationType.Sum) { var itemWeights = new Dictionary>(); - foreach (var index in items) - { - if (!itemWeights.TryGetValue(index.Index, out var weights)) - itemWeights.Add(index.Index, weights = []); - weights.Add(index.Weight); + foreach (var (index, weight) in items) { + if (!itemWeights.TryGetValue(index, out var weights)) + itemWeights.Add(index, weights = []); + weights.Add(weight); + } + + return new WeightedIndexList( + itemWeights.Select(d => new Item(d.Key, mergeOperation.Aggregate(d.Value))).ToArray() + ); + } + + /// + /// Merges a sequence of weighted index items into one list + /// + /// Lists to merge + /// How to merge item weights + /// + [OverloadResolutionPriority(1)] + public static WeightedIndexList Merge(ReadOnlySpan items, AggregationType mergeOperation = AggregationType.Sum) + { + var itemWeights = new Dictionary>(); + foreach (var (index, weight) in items) { + if (!itemWeights.TryGetValue(index, out var weights)) + itemWeights.Add(index, weights = []); + weights.Add(weight); } return new WeightedIndexList( diff --git a/ExampleCode/DataTableTrainers/SentimentDataTrainer.cs b/ExampleCode/DataTableTrainers/SentimentDataTrainer.cs index f3106ca6..9ebe9f71 100644 --- a/ExampleCode/DataTableTrainers/SentimentDataTrainer.cs +++ b/ExampleCode/DataTableTrainers/SentimentDataTrainer.cs @@ -54,8 +54,6 @@ public SentimentDataTrainer(BrightDataContext context, DirectoryInfo directory) _context = context; } - public StringTable StringTable => _stringTable.StringTable; - public BernoulliNaiveBayes TrainBernoulli() { var bernoulli = _indexedSentencesTraining.TrainBernoulliNaiveBayes();