Skip to content

Commit

Permalink
small fixes and added OffsetAndSize
Browse files Browse the repository at this point in the history
  • Loading branch information
Jack Dermody committed Dec 13, 2024
1 parent ebe9fcc commit 9392b75
Show file tree
Hide file tree
Showing 13 changed files with 1,061 additions and 386 deletions.
341 changes: 257 additions & 84 deletions BrightData/BrightData.xml

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions BrightData/Buffer/BlockHeader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace BrightData.Buffer
/// </summary>
/// <param name="Offset">The start offset of the block in the buffer</param>
/// <param name="Size">The size of the block in the buffer</param>
public readonly partial record struct BlockHeader(int Offset, int Size) : IHaveSize
public readonly record struct BlockHeader(int Offset, int Size) : IHaveSize
{
/// <summary>
/// The end offset of the block in the buffer
Expand Down Expand Up @@ -83,13 +83,13 @@ public static ReadOnlyMemory<BlockHeader> Create(params int[] blockSizes)
/// <returns></returns>
public static ReadOnlyMemory<byte> Combine<T>(in T tuple) where T: IAmTupleOfSpans, allows ref struct
{
var sizes = tuple.Sizes;
var sizes = tuple.ByteSizes;
var header = Create(sizes);
var headerBytes = header.Span.AsBytes();
var totalSize = headerBytes.Length + sizes.Sum();

Memory<byte> ret = new byte[totalSize];
headerBytes.CopyTo(header.Span[0].Get(ret).Span);
headerBytes.CopyTo(ret.Span[..headerBytes.Length]);
tuple.ForEach<byte>((x, i) => x.CopyTo(header.Span[i].Get(ret).Span));
return ret;
}
Expand Down
49 changes: 49 additions & 0 deletions BrightData/Buffer/OffsetAndSize.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
using System;

namespace BrightData.Buffer
{
/// <summary>
/// A range (with an offset and size) of a buffer
/// </summary>
/// <param name="StartOffset">Start position within the buffer</param>
/// <param name="Size">Length within the buffer</param>
public readonly record struct OffsetAndSize(uint StartOffset, uint Size) : IHaveOffset, IHaveSize, IComparable<OffsetAndSize>
{
/// <summary>
/// Returns the section referenced by this offset and length
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="span">Base buffer</param>
/// <returns></returns>
public ReadOnlySpan<T> GetSpan<T>(ReadOnlySpan<T> span) => span.Slice((int)StartOffset, (int)Size);

/// <summary>
/// The end offset
/// </summary>
public uint EndOffset => StartOffset + Size;

/// <summary>
/// Checks if a position is within the range
/// </summary>
/// <param name="position"></param>
/// <returns></returns>
public bool Intersects(uint position) => position >= StartOffset && position < EndOffset;

/// <summary>
/// Checks if another range intersects
/// </summary>
/// <param name="other"></param>
/// <returns></returns>
public bool Intersects(in OffsetAndSize other) => Intersects(other.StartOffset) || Intersects(other.EndOffset);

/// <inheritdoc />
public int CompareTo(OffsetAndSize other)
{
var ret = StartOffset.CompareTo(other.StartOffset);
if(ret != 0) return ret;
return Size.CompareTo(other.Size);
}

uint IHaveOffset.Offset => StartOffset;
}
}
6 changes: 6 additions & 0 deletions BrightData/ExtensionMethods.Buffers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,12 @@ public static async Task<VectorisationModel> GetVectoriser(this IReadOnlyBufferW
return new VectorisationModel(vectorisers);
}

/// <summary>
/// Writes the item (that can be represented as a byte array) to disk
/// </summary>
/// <param name="itemWithMemory"></param>
/// <param name="filePath"></param>
/// <returns></returns>
public static Task WriteTo(this IHaveMemory<byte> itemWithMemory, string filePath)
{
return File.WriteAllBytesAsync(filePath, itemWithMemory.ReadOnlyMemory);
Expand Down
202 changes: 0 additions & 202 deletions BrightData/Helper/StringTables/FileBasedStringTable.cs

This file was deleted.

48 changes: 12 additions & 36 deletions BrightData/Helper/StringTables/InMemoryStringTableBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using System.Runtime.CompilerServices;
using System.Text;
using System.Threading.Tasks;
using BrightData.Buffer;
using CommunityToolkit.HighPerformance;
using CommunityToolkit.HighPerformance.Buffers;

Expand All @@ -16,9 +17,8 @@ namespace BrightData.Helper.StringTables
/// </summary>
public class InMemoryStringTableBuilder : IDisposable, IStringTableInMemory
{
internal readonly record struct OffsetAndLength(uint Offset, uint Length);
readonly ArrayPoolBufferWriter<byte> _writer = new();
readonly List<OffsetAndLength> _stringTable = [];
readonly ArrayPoolBufferWriter<byte> _dataWriter = new();
readonly ArrayPoolBufferWriter<OffsetAndSize> _stringTable = new();

/// <summary>
/// Creates a string table builder from a string indexer
Expand All @@ -31,30 +31,27 @@ public InMemoryStringTableBuilder(IIndexStrings stringIndexer, int maxStringSize
{
Span<byte> buffer = stackalloc byte[maxStringSizeInBytes];
foreach (var str in stringIndexer.OrderedStrings) {
var offset = (uint)_writer.WrittenCount;
var offset = (uint)_dataWriter.WrittenCount;
if (!Encoding.UTF8.TryGetBytes(str, buffer, out var size))
throw new Exception($"String was too large to encode in {maxStringSizeInBytes:N0} bytes: \"{str[..32]}...\" ({str.Length} characters)");
_writer.Write(buffer[..size]);
_stringTable.Add(new(offset, (uint)size));
_dataWriter.Write(buffer[..size]);
_stringTable.Write(new OffsetAndSize(offset, (uint)size));
}
}

/// <inheritdoc />
public void Dispose()
{
GC.SuppressFinalize(this);
_writer.Dispose();
_dataWriter.Dispose();
_stringTable.Dispose();
}

/// <inheritdoc />
public uint Size => (uint)_stringTable.Count;
public uint Size => (uint)_stringTable.WrittenCount;

/// <inheritdoc />
public ReadOnlySpan<byte> GetUtf8(uint index)
{
var (offset, size) = _stringTable[(int)index];
return _writer.WrittenSpan.Slice((int)offset, (int)size);
}
public ReadOnlySpan<byte> GetUtf8(uint index) => _stringTable.WrittenSpan[(int)index].GetSpan(_dataWriter.WrittenSpan);

/// <inheritdoc />
public string GetString(uint index) => Encoding.UTF8.GetString(GetUtf8(index));
Expand All @@ -74,29 +71,8 @@ public string[] GetAll(int maxStringSize)
/// <param name="outputPath"></param>
public async Task WriteTo(string outputPath)
{
const uint HeaderSize = 12U;
var sizeHeader = new byte[HeaderSize];
var strings = _stringTable.ToArray().AsMemory().Cast<OffsetAndLength, byte>();
var header = WriteHeader(sizeHeader, [
Size,
HeaderSize,
HeaderSize + (uint)strings.Length
]);

// write to file
using var file = File.OpenHandle(outputPath, FileMode.Create, FileAccess.Write, FileShare.None, FileOptions.Asynchronous | FileOptions.SequentialScan);
await RandomAccess.WriteAsync(file, sizeHeader, 0);
await RandomAccess.WriteAsync(file, strings, header[1]);
await RandomAccess.WriteAsync(file, _writer.WrittenMemory, header[2]);
return;

static uint[] WriteHeader(Span<byte> span, uint[] header)
{
BinaryPrimitives.WriteUInt32LittleEndian(span[..4], header[0]);
BinaryPrimitives.WriteUInt32LittleEndian(span[4..8], header[1]);
BinaryPrimitives.WriteUInt32LittleEndian(span[8..12], header[2]);
return header;
}
var stringTable = new StringTable(_stringTable.WrittenMemory, _dataWriter.WrittenMemory);
await File.WriteAllBytesAsync(outputPath, stringTable.ReadOnlyMemory);
}
}
}
Loading

0 comments on commit 9392b75

Please sign in to comment.