Introduce Tensor.read

clebert · Oct 12, 2023 · 7825892 · 7825892
1 parent 5395f45
commit 7825892
Show file tree

Hide file tree

Showing 4 changed files with 55 additions and 116 deletions.
diff --git a/src/checkpoint.zig b/src/checkpoint.zig
@@ -6,7 +6,6 @@ const Tensor = @import("./tensor.zig").Tensor;
 const vector = @import("./vector.zig");
 
 allocator: std.mem.Allocator,
-
 embedding_size: usize,
 hidden_size: usize,
 n_layers: usize,
@@ -18,156 +17,141 @@ shared_final_classifier_matrix: bool,
 
 weights: struct {
     token_embedding_vectors: Tensor(2),
-
     attention_pre_norm_vectors: Tensor(2),
     attention_query_matrices: Tensor(3),
     attention_key_matrices: Tensor(3),
     attention_value_matrices: Tensor(3),
     attention_output_matrices: Tensor(3),
-
     ffn_pre_norm_vectors: Tensor(2),
     ffn_pre_activation_matrices: Tensor(3),
     ffn_output_matrices: Tensor(3),
     ffn_gate_matrices: Tensor(3),
-
     final_norm_vector: Tensor(1),
     final_classifier_matrix: Tensor(2),
 },
 
-data: []const u8,
-
 pub fn init(allocator: std.mem.Allocator, cli: *const Cli) !Self {
-    const data = try readFile(allocator, cli.checkpoint_path);
+    const file = try std.fs.cwd().openFile(cli.checkpoint_path, .{});
 
-    errdefer allocator.free(data);
-
-    const config_data: [*]i32 = @alignCast(@ptrCast(data[0..28]));
+    defer file.close();
 
-    const embedding_size: usize = @intCast(config_data[0]);
-    const hidden_size: usize = @intCast(config_data[1]);
-    const n_layers: usize = @intCast(config_data[2]);
-    const n_heads: usize = @intCast(config_data[3]);
-    const n_query_groups: usize = @intCast(config_data[4]);
+    const embedding_size: usize = @intCast(try file.reader().readIntLittle(i32));
+    const hidden_size: usize = @intCast(try file.reader().readIntLittle(i32));
+    const n_layers: usize = @intCast(try file.reader().readIntLittle(i32));
+    const n_heads: usize = @intCast(try file.reader().readIntLittle(i32));
+    const n_query_groups: usize = @intCast(try file.reader().readIntLittle(i32));
 
     // https://github.com/karpathy/llama2.c/blob/35deb5e0fa55f0a257040bcf1624ed8386e63dc7/run.c#L153
-    const signed_vocab_size: i32 = config_data[5];
+    const signed_vocab_size = try file.reader().readIntLittle(i32);
+    const shared_final_classifier_matrix = signed_vocab_size > 0;
 
     const vocab_size: usize = std.math.absCast(signed_vocab_size);
-    const max_sequence_length: usize = @intCast(config_data[6]);
-
-    var weights_data: [*]f32 = @alignCast(@ptrCast(data[28..]));
+    const max_sequence_length: usize = @intCast(try file.reader().readIntLittle(i32));
 
-    const token_embedding_vectors = try Tensor(2).initView(
+    const token_embedding_vectors = try Tensor(2).init(
         allocator,
-        readFloatSlice(&weights_data, vocab_size * embedding_size),
         [_]usize{ vocab_size, embedding_size },
     );
 
     errdefer token_embedding_vectors.deinit();
+    try token_embedding_vectors.read(file);
 
-    const attention_pre_norm_vectors = try Tensor(2).initView(
+    const attention_pre_norm_vectors = try Tensor(2).init(
         allocator,
-        readFloatSlice(&weights_data, n_layers * embedding_size),
         [_]usize{ n_layers, embedding_size },
     );
 
     errdefer attention_pre_norm_vectors.deinit();
+    try attention_pre_norm_vectors.read(file);
 
-    const attention_query_matrices = try Tensor(3).initView(
+    const attention_query_matrices = try Tensor(3).init(
         allocator,
-        readFloatSlice(&weights_data, n_layers * embedding_size * embedding_size),
         [_]usize{ n_layers, embedding_size, embedding_size },
     );
 
     errdefer attention_query_matrices.deinit();
+    try attention_query_matrices.read(file);
 
     const head_size: usize = embedding_size / n_heads;
 
-    const attention_key_matrices = try Tensor(3).initView(
+    const attention_key_matrices = try Tensor(3).init(
         allocator,
-        readFloatSlice(&weights_data, n_layers * (n_query_groups * head_size) * embedding_size),
         [_]usize{ n_layers, n_query_groups * head_size, embedding_size },
     );
 
     errdefer attention_key_matrices.deinit();
+    try attention_key_matrices.read(file);
 
-    const attention_value_matrices = try Tensor(3).initView(
+    const attention_value_matrices = try Tensor(3).init(
         allocator,
-        readFloatSlice(&weights_data, n_layers * (n_query_groups * head_size) * embedding_size),
         [_]usize{ n_layers, n_query_groups * head_size, embedding_size },
     );
 
     errdefer attention_value_matrices.deinit();
+    try attention_value_matrices.read(file);
 
-    const attention_output_matrices = try Tensor(3).initView(
+    const attention_output_matrices = try Tensor(3).init(
         allocator,
-        readFloatSlice(&weights_data, n_layers * embedding_size * embedding_size),
         [_]usize{ n_layers, embedding_size, embedding_size },
     );
 
     errdefer attention_output_matrices.deinit();
+    try attention_output_matrices.read(file);
 
-    const ffn_pre_norm_vectors = try Tensor(2).initView(
+    const ffn_pre_norm_vectors = try Tensor(2).init(
         allocator,
-        readFloatSlice(&weights_data, n_layers * embedding_size),
         [_]usize{ n_layers, embedding_size },
     );
 
     errdefer ffn_pre_norm_vectors.deinit();
+    try ffn_pre_norm_vectors.read(file);
 
-    const ffn_pre_activation_matrices = try Tensor(3).initView(
+    const ffn_pre_activation_matrices = try Tensor(3).init(
         allocator,
-        readFloatSlice(&weights_data, n_layers * hidden_size * embedding_size),
         [_]usize{ n_layers, hidden_size, embedding_size },
     );
 
     errdefer ffn_pre_activation_matrices.deinit();
+    try ffn_pre_activation_matrices.read(file);
 
-    const ffn_output_matrices = try Tensor(3).initView(
+    const ffn_output_matrices = try Tensor(3).init(
         allocator,
-        readFloatSlice(&weights_data, n_layers * embedding_size * hidden_size),
         [_]usize{ n_layers, embedding_size, hidden_size },
     );
 
     errdefer ffn_output_matrices.deinit();
+    try ffn_output_matrices.read(file);
 
-    const ffn_gate_matrices = try Tensor(3).initView(
+    const ffn_gate_matrices = try Tensor(3).init(
         allocator,
-        readFloatSlice(&weights_data, n_layers * hidden_size * embedding_size),
         [_]usize{ n_layers, hidden_size, embedding_size },
     );
 
     errdefer ffn_gate_matrices.deinit();
+    try ffn_gate_matrices.read(file);
 
-    const final_norm_vector = try Tensor(1).initView(
-        allocator,
-        readFloatSlice(&weights_data, embedding_size),
-        [_]usize{embedding_size},
-    );
+    const final_norm_vector = try Tensor(1).init(allocator, [_]usize{embedding_size});
 
     errdefer final_norm_vector.deinit();
+    try final_norm_vector.read(file);
 
-    _ = readFloatSlice(&weights_data, max_sequence_length * head_size / 2);
-    _ = readFloatSlice(&weights_data, max_sequence_length * head_size / 2);
-
-    const shared_final_classifier_matrix = signed_vocab_size > 0;
+    try file.seekBy(@intCast(max_sequence_length * head_size * @sizeOf(f32)));
 
     const final_classifier_matrix = if (shared_final_classifier_matrix)
         token_embedding_vectors
     else
-        try Tensor(2).initView(
-            allocator,
-            readFloatSlice(&weights_data, vocab_size * embedding_size),
-            [_]usize{ vocab_size, embedding_size },
-        );
+        try Tensor(2).init(allocator, [_]usize{ vocab_size, embedding_size });
 
     errdefer if (!shared_final_classifier_matrix) {
         final_classifier_matrix.deinit();
     };
 
+    if (!shared_final_classifier_matrix) {
+        try final_classifier_matrix.read(file);
+    }
+
     return Self{
         .allocator = allocator,
-
         .embedding_size = embedding_size,
         .hidden_size = hidden_size,
         .n_layers = n_layers,
@@ -191,8 +175,6 @@ pub fn init(allocator: std.mem.Allocator, cli: *const Cli) !Self {
             .final_norm_vector = final_norm_vector,
             .final_classifier_matrix = final_classifier_matrix,
         },
-
-        .data = data,
     };
 }
 
@@ -212,34 +194,4 @@ pub fn deinit(self: *const Self) void {
     if (!self.shared_final_classifier_matrix) {
         self.weights.final_classifier_matrix.deinit();
     }
-
-    self.allocator.free(self.data);
-}
-
-fn readFile(allocator: std.mem.Allocator, path: []const u8) ![]u8 {
-    const file = try std.fs.cwd().openFile(path, .{});
-
-    defer file.close();
-
-    const stat = try file.stat();
-
-    var data = try allocator.alloc(u8, stat.size);
-
-    errdefer allocator.free(data);
-
-    const n_bytes_read = try file.readAll(data);
-
-    if (n_bytes_read != data.len) {
-        return error.UnexpectedEndOfFile;
-    }
-
-    return data;
-}
-
-fn readFloatSlice(data: *[*]f32, len: usize) []f32 {
-    const slice = data.*[0..len];
-
-    data.* += len;
-
-    return slice;
 }
diff --git a/src/sampler.zig b/src/sampler.zig
@@ -117,7 +117,7 @@ fn sampleNucleus(
         cumulative_probability += probability_index_pair.probability;
 
         if (cumulative_probability > top_p) {
-            probability_index_pairs = probability_index_pairs[0..(index + 1)];
+            probability_index_pairs = probability_index_pairs[0 .. index + 1];
 
             break;
         }

diff --git a/src/tensor.zig b/src/tensor.zig
@@ -2,12 +2,11 @@ const std = @import("std");
 const vector = @import("./vector.zig");
 
 pub fn Tensor(comptime n_dims: comptime_int) type {
-    comptime if (n_dims < 1) @compileError("TODO");
+    comptime if (n_dims < 1) @compileError("n_dims < 1");
 
     return struct {
         const Self = @This();
 
-        view: bool,
         allocator: ?std.mem.Allocator,
         data: []f32,
         sub_tensor_sizes: []const usize,
@@ -26,55 +25,43 @@ pub fn Tensor(comptime n_dims: comptime_int) type {
             }
 
             return .{
-                .view = false,
                 .allocator = allocator,
-                .data = try allocator.alignedAlloc(f32, std.atomic.cache_line, tensor_size),
+                .data = try allocator.alloc(f32, tensor_size),
                 .sub_tensor_sizes = sub_tensor_sizes,
             };
         }
 
-        pub fn initView(allocator: std.mem.Allocator, data: []f32, dims: [n_dims]usize) !Self {
-            const sub_tensor_sizes = try allocator.alloc(usize, n_dims - 1);
-
-            for (sub_tensor_sizes, 1..) |*sub_tensor_size, dims_offset| {
-                sub_tensor_size.* = 1;
-
-                for (dims[dims_offset..]) |dim| sub_tensor_size.* *= dim;
+        pub fn deinit(self: *const Self) void {
+            if (self.allocator) |allocator| {
+                allocator.free(self.data);
+                allocator.free(self.sub_tensor_sizes);
             }
-
-            return .{
-                .view = true,
-                .allocator = allocator,
-                .data = data,
-                .sub_tensor_sizes = sub_tensor_sizes,
-            };
         }
 
-        pub fn deinit(self: *const Self) void {
-            if (self.allocator) |allocator| {
-                if (!self.view) {
-                    allocator.free(@as([]align(std.atomic.cache_line) f32, @alignCast(self.data)));
-                }
+        pub fn read(self: *const Self, file: std.fs.File) !void {
+            const buffer: [*]u8 = @ptrCast(self.data);
+            const n_bytes = self.data.len * @sizeOf(f32);
+            const n_bytes_read = try file.reader().readAll(buffer[0..n_bytes]);
 
-                allocator.free(self.sub_tensor_sizes);
+            if (n_bytes_read != n_bytes) {
+                return error.UnexpectedEndOfFile;
             }
         }
 
         pub fn slice(self: *const Self, index: usize) Tensor(n_dims - 1) {
-            comptime if (n_dims < 2) @compileError("TODO");
+            comptime if (n_dims < 2) @compileError("n_dims < 2");
 
             const sub_tensor_size = self.sub_tensor_sizes[0];
 
             return Tensor(n_dims - 1){
-                .view = self.view,
                 .allocator = null,
                 .data = self.data[(index * sub_tensor_size)..][0..sub_tensor_size],
                 .sub_tensor_sizes = self.sub_tensor_sizes[1..],
             };
         }
 
         pub fn multiplyVector(self: *const Self, input_data: []const f32, output_data: []f32) void {
-            comptime if (n_dims < 2) @compileError("TODO");
+            comptime if (n_dims < 2) @compileError("n_dims < 2");
 
             const data = self.data;
             const sub_tensor_size = self.sub_tensor_sizes[0];

diff --git a/src/tokenizer.zig b/src/tokenizer.zig
@@ -135,10 +135,10 @@ fn mergeBestWordPair(self: *const Self, tokens: []usize, double_word_buffer: []u
         const word2 = self.vocab[tokens[token_index + 1]];
 
         @memcpy(double_word_buffer[0..word1.len], word1);
-        @memcpy(double_word_buffer[word1.len..(word1.len + word2.len)], word2);
+        @memcpy(double_word_buffer[word1.len .. word1.len + word2.len], word2);
 
         const token =
-            self.lookupToken(double_word_buffer[0..(word1.len + word2.len)]) orelse continue;
+            self.lookupToken(double_word_buffer[0 .. word1.len + word2.len]) orelse continue;
 
         const word_score = self.word_scores[token];