diff --git a/src/checkpoint.zig b/src/checkpoint.zig index c699510..af8a534 100644 --- a/src/checkpoint.zig +++ b/src/checkpoint.zig @@ -6,7 +6,6 @@ const Tensor = @import("./tensor.zig").Tensor; const vector = @import("./vector.zig"); allocator: std.mem.Allocator, - embedding_size: usize, hidden_size: usize, n_layers: usize, @@ -18,156 +17,141 @@ shared_final_classifier_matrix: bool, weights: struct { token_embedding_vectors: Tensor(2), - attention_pre_norm_vectors: Tensor(2), attention_query_matrices: Tensor(3), attention_key_matrices: Tensor(3), attention_value_matrices: Tensor(3), attention_output_matrices: Tensor(3), - ffn_pre_norm_vectors: Tensor(2), ffn_pre_activation_matrices: Tensor(3), ffn_output_matrices: Tensor(3), ffn_gate_matrices: Tensor(3), - final_norm_vector: Tensor(1), final_classifier_matrix: Tensor(2), }, -data: []const u8, - pub fn init(allocator: std.mem.Allocator, cli: *const Cli) !Self { - const data = try readFile(allocator, cli.checkpoint_path); + const file = try std.fs.cwd().openFile(cli.checkpoint_path, .{}); - errdefer allocator.free(data); - - const config_data: [*]i32 = @alignCast(@ptrCast(data[0..28])); + defer file.close(); - const embedding_size: usize = @intCast(config_data[0]); - const hidden_size: usize = @intCast(config_data[1]); - const n_layers: usize = @intCast(config_data[2]); - const n_heads: usize = @intCast(config_data[3]); - const n_query_groups: usize = @intCast(config_data[4]); + const embedding_size: usize = @intCast(try file.reader().readIntLittle(i32)); + const hidden_size: usize = @intCast(try file.reader().readIntLittle(i32)); + const n_layers: usize = @intCast(try file.reader().readIntLittle(i32)); + const n_heads: usize = @intCast(try file.reader().readIntLittle(i32)); + const n_query_groups: usize = @intCast(try file.reader().readIntLittle(i32)); // https://github.com/karpathy/llama2.c/blob/35deb5e0fa55f0a257040bcf1624ed8386e63dc7/run.c#L153 - const signed_vocab_size: i32 = config_data[5]; + const signed_vocab_size = try file.reader().readIntLittle(i32); + const shared_final_classifier_matrix = signed_vocab_size > 0; const vocab_size: usize = std.math.absCast(signed_vocab_size); - const max_sequence_length: usize = @intCast(config_data[6]); - - var weights_data: [*]f32 = @alignCast(@ptrCast(data[28..])); + const max_sequence_length: usize = @intCast(try file.reader().readIntLittle(i32)); - const token_embedding_vectors = try Tensor(2).initView( + const token_embedding_vectors = try Tensor(2).init( allocator, - readFloatSlice(&weights_data, vocab_size * embedding_size), [_]usize{ vocab_size, embedding_size }, ); errdefer token_embedding_vectors.deinit(); + try token_embedding_vectors.read(file); - const attention_pre_norm_vectors = try Tensor(2).initView( + const attention_pre_norm_vectors = try Tensor(2).init( allocator, - readFloatSlice(&weights_data, n_layers * embedding_size), [_]usize{ n_layers, embedding_size }, ); errdefer attention_pre_norm_vectors.deinit(); + try attention_pre_norm_vectors.read(file); - const attention_query_matrices = try Tensor(3).initView( + const attention_query_matrices = try Tensor(3).init( allocator, - readFloatSlice(&weights_data, n_layers * embedding_size * embedding_size), [_]usize{ n_layers, embedding_size, embedding_size }, ); errdefer attention_query_matrices.deinit(); + try attention_query_matrices.read(file); const head_size: usize = embedding_size / n_heads; - const attention_key_matrices = try Tensor(3).initView( + const attention_key_matrices = try Tensor(3).init( allocator, - readFloatSlice(&weights_data, n_layers * (n_query_groups * head_size) * embedding_size), [_]usize{ n_layers, n_query_groups * head_size, embedding_size }, ); errdefer attention_key_matrices.deinit(); + try attention_key_matrices.read(file); - const attention_value_matrices = try Tensor(3).initView( + const attention_value_matrices = try Tensor(3).init( allocator, - readFloatSlice(&weights_data, n_layers * (n_query_groups * head_size) * embedding_size), [_]usize{ n_layers, n_query_groups * head_size, embedding_size }, ); errdefer attention_value_matrices.deinit(); + try attention_value_matrices.read(file); - const attention_output_matrices = try Tensor(3).initView( + const attention_output_matrices = try Tensor(3).init( allocator, - readFloatSlice(&weights_data, n_layers * embedding_size * embedding_size), [_]usize{ n_layers, embedding_size, embedding_size }, ); errdefer attention_output_matrices.deinit(); + try attention_output_matrices.read(file); - const ffn_pre_norm_vectors = try Tensor(2).initView( + const ffn_pre_norm_vectors = try Tensor(2).init( allocator, - readFloatSlice(&weights_data, n_layers * embedding_size), [_]usize{ n_layers, embedding_size }, ); errdefer ffn_pre_norm_vectors.deinit(); + try ffn_pre_norm_vectors.read(file); - const ffn_pre_activation_matrices = try Tensor(3).initView( + const ffn_pre_activation_matrices = try Tensor(3).init( allocator, - readFloatSlice(&weights_data, n_layers * hidden_size * embedding_size), [_]usize{ n_layers, hidden_size, embedding_size }, ); errdefer ffn_pre_activation_matrices.deinit(); + try ffn_pre_activation_matrices.read(file); - const ffn_output_matrices = try Tensor(3).initView( + const ffn_output_matrices = try Tensor(3).init( allocator, - readFloatSlice(&weights_data, n_layers * embedding_size * hidden_size), [_]usize{ n_layers, embedding_size, hidden_size }, ); errdefer ffn_output_matrices.deinit(); + try ffn_output_matrices.read(file); - const ffn_gate_matrices = try Tensor(3).initView( + const ffn_gate_matrices = try Tensor(3).init( allocator, - readFloatSlice(&weights_data, n_layers * hidden_size * embedding_size), [_]usize{ n_layers, hidden_size, embedding_size }, ); errdefer ffn_gate_matrices.deinit(); + try ffn_gate_matrices.read(file); - const final_norm_vector = try Tensor(1).initView( - allocator, - readFloatSlice(&weights_data, embedding_size), - [_]usize{embedding_size}, - ); + const final_norm_vector = try Tensor(1).init(allocator, [_]usize{embedding_size}); errdefer final_norm_vector.deinit(); + try final_norm_vector.read(file); - _ = readFloatSlice(&weights_data, max_sequence_length * head_size / 2); - _ = readFloatSlice(&weights_data, max_sequence_length * head_size / 2); - - const shared_final_classifier_matrix = signed_vocab_size > 0; + try file.seekBy(@intCast(max_sequence_length * head_size * @sizeOf(f32))); const final_classifier_matrix = if (shared_final_classifier_matrix) token_embedding_vectors else - try Tensor(2).initView( - allocator, - readFloatSlice(&weights_data, vocab_size * embedding_size), - [_]usize{ vocab_size, embedding_size }, - ); + try Tensor(2).init(allocator, [_]usize{ vocab_size, embedding_size }); errdefer if (!shared_final_classifier_matrix) { final_classifier_matrix.deinit(); }; + if (!shared_final_classifier_matrix) { + try final_classifier_matrix.read(file); + } + return Self{ .allocator = allocator, - .embedding_size = embedding_size, .hidden_size = hidden_size, .n_layers = n_layers, @@ -191,8 +175,6 @@ pub fn init(allocator: std.mem.Allocator, cli: *const Cli) !Self { .final_norm_vector = final_norm_vector, .final_classifier_matrix = final_classifier_matrix, }, - - .data = data, }; } @@ -212,34 +194,4 @@ pub fn deinit(self: *const Self) void { if (!self.shared_final_classifier_matrix) { self.weights.final_classifier_matrix.deinit(); } - - self.allocator.free(self.data); -} - -fn readFile(allocator: std.mem.Allocator, path: []const u8) ![]u8 { - const file = try std.fs.cwd().openFile(path, .{}); - - defer file.close(); - - const stat = try file.stat(); - - var data = try allocator.alloc(u8, stat.size); - - errdefer allocator.free(data); - - const n_bytes_read = try file.readAll(data); - - if (n_bytes_read != data.len) { - return error.UnexpectedEndOfFile; - } - - return data; -} - -fn readFloatSlice(data: *[*]f32, len: usize) []f32 { - const slice = data.*[0..len]; - - data.* += len; - - return slice; } diff --git a/src/sampler.zig b/src/sampler.zig index c400bb4..4f4be92 100644 --- a/src/sampler.zig +++ b/src/sampler.zig @@ -117,7 +117,7 @@ fn sampleNucleus( cumulative_probability += probability_index_pair.probability; if (cumulative_probability > top_p) { - probability_index_pairs = probability_index_pairs[0..(index + 1)]; + probability_index_pairs = probability_index_pairs[0 .. index + 1]; break; } diff --git a/src/tensor.zig b/src/tensor.zig index 41dd315..a6c453f 100644 --- a/src/tensor.zig +++ b/src/tensor.zig @@ -2,12 +2,11 @@ const std = @import("std"); const vector = @import("./vector.zig"); pub fn Tensor(comptime n_dims: comptime_int) type { - comptime if (n_dims < 1) @compileError("TODO"); + comptime if (n_dims < 1) @compileError("n_dims < 1"); return struct { const Self = @This(); - view: bool, allocator: ?std.mem.Allocator, data: []f32, sub_tensor_sizes: []const usize, @@ -26,47 +25,35 @@ pub fn Tensor(comptime n_dims: comptime_int) type { } return .{ - .view = false, .allocator = allocator, - .data = try allocator.alignedAlloc(f32, std.atomic.cache_line, tensor_size), + .data = try allocator.alloc(f32, tensor_size), .sub_tensor_sizes = sub_tensor_sizes, }; } - pub fn initView(allocator: std.mem.Allocator, data: []f32, dims: [n_dims]usize) !Self { - const sub_tensor_sizes = try allocator.alloc(usize, n_dims - 1); - - for (sub_tensor_sizes, 1..) |*sub_tensor_size, dims_offset| { - sub_tensor_size.* = 1; - - for (dims[dims_offset..]) |dim| sub_tensor_size.* *= dim; + pub fn deinit(self: *const Self) void { + if (self.allocator) |allocator| { + allocator.free(self.data); + allocator.free(self.sub_tensor_sizes); } - - return .{ - .view = true, - .allocator = allocator, - .data = data, - .sub_tensor_sizes = sub_tensor_sizes, - }; } - pub fn deinit(self: *const Self) void { - if (self.allocator) |allocator| { - if (!self.view) { - allocator.free(@as([]align(std.atomic.cache_line) f32, @alignCast(self.data))); - } + pub fn read(self: *const Self, file: std.fs.File) !void { + const buffer: [*]u8 = @ptrCast(self.data); + const n_bytes = self.data.len * @sizeOf(f32); + const n_bytes_read = try file.reader().readAll(buffer[0..n_bytes]); - allocator.free(self.sub_tensor_sizes); + if (n_bytes_read != n_bytes) { + return error.UnexpectedEndOfFile; } } pub fn slice(self: *const Self, index: usize) Tensor(n_dims - 1) { - comptime if (n_dims < 2) @compileError("TODO"); + comptime if (n_dims < 2) @compileError("n_dims < 2"); const sub_tensor_size = self.sub_tensor_sizes[0]; return Tensor(n_dims - 1){ - .view = self.view, .allocator = null, .data = self.data[(index * sub_tensor_size)..][0..sub_tensor_size], .sub_tensor_sizes = self.sub_tensor_sizes[1..], @@ -74,7 +61,7 @@ pub fn Tensor(comptime n_dims: comptime_int) type { } pub fn multiplyVector(self: *const Self, input_data: []const f32, output_data: []f32) void { - comptime if (n_dims < 2) @compileError("TODO"); + comptime if (n_dims < 2) @compileError("n_dims < 2"); const data = self.data; const sub_tensor_size = self.sub_tensor_sizes[0]; diff --git a/src/tokenizer.zig b/src/tokenizer.zig index 718da3f..a727693 100644 --- a/src/tokenizer.zig +++ b/src/tokenizer.zig @@ -135,10 +135,10 @@ fn mergeBestWordPair(self: *const Self, tokens: []usize, double_word_buffer: []u const word2 = self.vocab[tokens[token_index + 1]]; @memcpy(double_word_buffer[0..word1.len], word1); - @memcpy(double_word_buffer[word1.len..(word1.len + word2.len)], word2); + @memcpy(double_word_buffer[word1.len .. word1.len + word2.len], word2); const token = - self.lookupToken(double_word_buffer[0..(word1.len + word2.len)]) orelse continue; + self.lookupToken(double_word_buffer[0 .. word1.len + word2.len]) orelse continue; const word_score = self.word_scores[token];