Skip to content

Commit

Permalink
Introduce Tensor.read
Browse files Browse the repository at this point in the history
  • Loading branch information
clebert committed Oct 12, 2023
1 parent 5395f45 commit 7825892
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 116 deletions.
124 changes: 38 additions & 86 deletions src/checkpoint.zig
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ const Tensor = @import("./tensor.zig").Tensor;
const vector = @import("./vector.zig");

allocator: std.mem.Allocator,

embedding_size: usize,
hidden_size: usize,
n_layers: usize,
Expand All @@ -18,156 +17,141 @@ shared_final_classifier_matrix: bool,

weights: struct {
token_embedding_vectors: Tensor(2),

attention_pre_norm_vectors: Tensor(2),
attention_query_matrices: Tensor(3),
attention_key_matrices: Tensor(3),
attention_value_matrices: Tensor(3),
attention_output_matrices: Tensor(3),

ffn_pre_norm_vectors: Tensor(2),
ffn_pre_activation_matrices: Tensor(3),
ffn_output_matrices: Tensor(3),
ffn_gate_matrices: Tensor(3),

final_norm_vector: Tensor(1),
final_classifier_matrix: Tensor(2),
},

data: []const u8,

pub fn init(allocator: std.mem.Allocator, cli: *const Cli) !Self {
const data = try readFile(allocator, cli.checkpoint_path);
const file = try std.fs.cwd().openFile(cli.checkpoint_path, .{});

errdefer allocator.free(data);

const config_data: [*]i32 = @alignCast(@ptrCast(data[0..28]));
defer file.close();

const embedding_size: usize = @intCast(config_data[0]);
const hidden_size: usize = @intCast(config_data[1]);
const n_layers: usize = @intCast(config_data[2]);
const n_heads: usize = @intCast(config_data[3]);
const n_query_groups: usize = @intCast(config_data[4]);
const embedding_size: usize = @intCast(try file.reader().readIntLittle(i32));
const hidden_size: usize = @intCast(try file.reader().readIntLittle(i32));
const n_layers: usize = @intCast(try file.reader().readIntLittle(i32));
const n_heads: usize = @intCast(try file.reader().readIntLittle(i32));
const n_query_groups: usize = @intCast(try file.reader().readIntLittle(i32));

// https://github.com/karpathy/llama2.c/blob/35deb5e0fa55f0a257040bcf1624ed8386e63dc7/run.c#L153
const signed_vocab_size: i32 = config_data[5];
const signed_vocab_size = try file.reader().readIntLittle(i32);
const shared_final_classifier_matrix = signed_vocab_size > 0;

const vocab_size: usize = std.math.absCast(signed_vocab_size);
const max_sequence_length: usize = @intCast(config_data[6]);

var weights_data: [*]f32 = @alignCast(@ptrCast(data[28..]));
const max_sequence_length: usize = @intCast(try file.reader().readIntLittle(i32));

const token_embedding_vectors = try Tensor(2).initView(
const token_embedding_vectors = try Tensor(2).init(
allocator,
readFloatSlice(&weights_data, vocab_size * embedding_size),
[_]usize{ vocab_size, embedding_size },
);

errdefer token_embedding_vectors.deinit();
try token_embedding_vectors.read(file);

const attention_pre_norm_vectors = try Tensor(2).initView(
const attention_pre_norm_vectors = try Tensor(2).init(
allocator,
readFloatSlice(&weights_data, n_layers * embedding_size),
[_]usize{ n_layers, embedding_size },
);

errdefer attention_pre_norm_vectors.deinit();
try attention_pre_norm_vectors.read(file);

const attention_query_matrices = try Tensor(3).initView(
const attention_query_matrices = try Tensor(3).init(
allocator,
readFloatSlice(&weights_data, n_layers * embedding_size * embedding_size),
[_]usize{ n_layers, embedding_size, embedding_size },
);

errdefer attention_query_matrices.deinit();
try attention_query_matrices.read(file);

const head_size: usize = embedding_size / n_heads;

const attention_key_matrices = try Tensor(3).initView(
const attention_key_matrices = try Tensor(3).init(
allocator,
readFloatSlice(&weights_data, n_layers * (n_query_groups * head_size) * embedding_size),
[_]usize{ n_layers, n_query_groups * head_size, embedding_size },
);

errdefer attention_key_matrices.deinit();
try attention_key_matrices.read(file);

const attention_value_matrices = try Tensor(3).initView(
const attention_value_matrices = try Tensor(3).init(
allocator,
readFloatSlice(&weights_data, n_layers * (n_query_groups * head_size) * embedding_size),
[_]usize{ n_layers, n_query_groups * head_size, embedding_size },
);

errdefer attention_value_matrices.deinit();
try attention_value_matrices.read(file);

const attention_output_matrices = try Tensor(3).initView(
const attention_output_matrices = try Tensor(3).init(
allocator,
readFloatSlice(&weights_data, n_layers * embedding_size * embedding_size),
[_]usize{ n_layers, embedding_size, embedding_size },
);

errdefer attention_output_matrices.deinit();
try attention_output_matrices.read(file);

const ffn_pre_norm_vectors = try Tensor(2).initView(
const ffn_pre_norm_vectors = try Tensor(2).init(
allocator,
readFloatSlice(&weights_data, n_layers * embedding_size),
[_]usize{ n_layers, embedding_size },
);

errdefer ffn_pre_norm_vectors.deinit();
try ffn_pre_norm_vectors.read(file);

const ffn_pre_activation_matrices = try Tensor(3).initView(
const ffn_pre_activation_matrices = try Tensor(3).init(
allocator,
readFloatSlice(&weights_data, n_layers * hidden_size * embedding_size),
[_]usize{ n_layers, hidden_size, embedding_size },
);

errdefer ffn_pre_activation_matrices.deinit();
try ffn_pre_activation_matrices.read(file);

const ffn_output_matrices = try Tensor(3).initView(
const ffn_output_matrices = try Tensor(3).init(
allocator,
readFloatSlice(&weights_data, n_layers * embedding_size * hidden_size),
[_]usize{ n_layers, embedding_size, hidden_size },
);

errdefer ffn_output_matrices.deinit();
try ffn_output_matrices.read(file);

const ffn_gate_matrices = try Tensor(3).initView(
const ffn_gate_matrices = try Tensor(3).init(
allocator,
readFloatSlice(&weights_data, n_layers * hidden_size * embedding_size),
[_]usize{ n_layers, hidden_size, embedding_size },
);

errdefer ffn_gate_matrices.deinit();
try ffn_gate_matrices.read(file);

const final_norm_vector = try Tensor(1).initView(
allocator,
readFloatSlice(&weights_data, embedding_size),
[_]usize{embedding_size},
);
const final_norm_vector = try Tensor(1).init(allocator, [_]usize{embedding_size});

errdefer final_norm_vector.deinit();
try final_norm_vector.read(file);

_ = readFloatSlice(&weights_data, max_sequence_length * head_size / 2);
_ = readFloatSlice(&weights_data, max_sequence_length * head_size / 2);

const shared_final_classifier_matrix = signed_vocab_size > 0;
try file.seekBy(@intCast(max_sequence_length * head_size * @sizeOf(f32)));

const final_classifier_matrix = if (shared_final_classifier_matrix)
token_embedding_vectors
else
try Tensor(2).initView(
allocator,
readFloatSlice(&weights_data, vocab_size * embedding_size),
[_]usize{ vocab_size, embedding_size },
);
try Tensor(2).init(allocator, [_]usize{ vocab_size, embedding_size });

errdefer if (!shared_final_classifier_matrix) {
final_classifier_matrix.deinit();
};

if (!shared_final_classifier_matrix) {
try final_classifier_matrix.read(file);
}

return Self{
.allocator = allocator,

.embedding_size = embedding_size,
.hidden_size = hidden_size,
.n_layers = n_layers,
Expand All @@ -191,8 +175,6 @@ pub fn init(allocator: std.mem.Allocator, cli: *const Cli) !Self {
.final_norm_vector = final_norm_vector,
.final_classifier_matrix = final_classifier_matrix,
},

.data = data,
};
}

Expand All @@ -212,34 +194,4 @@ pub fn deinit(self: *const Self) void {
if (!self.shared_final_classifier_matrix) {
self.weights.final_classifier_matrix.deinit();
}

self.allocator.free(self.data);
}

fn readFile(allocator: std.mem.Allocator, path: []const u8) ![]u8 {
const file = try std.fs.cwd().openFile(path, .{});

defer file.close();

const stat = try file.stat();

var data = try allocator.alloc(u8, stat.size);

errdefer allocator.free(data);

const n_bytes_read = try file.readAll(data);

if (n_bytes_read != data.len) {
return error.UnexpectedEndOfFile;
}

return data;
}

fn readFloatSlice(data: *[*]f32, len: usize) []f32 {
const slice = data.*[0..len];

data.* += len;

return slice;
}
2 changes: 1 addition & 1 deletion src/sampler.zig
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ fn sampleNucleus(
cumulative_probability += probability_index_pair.probability;

if (cumulative_probability > top_p) {
probability_index_pairs = probability_index_pairs[0..(index + 1)];
probability_index_pairs = probability_index_pairs[0 .. index + 1];

break;
}
Expand Down
41 changes: 14 additions & 27 deletions src/tensor.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@ const std = @import("std");
const vector = @import("./vector.zig");

pub fn Tensor(comptime n_dims: comptime_int) type {
comptime if (n_dims < 1) @compileError("TODO");
comptime if (n_dims < 1) @compileError("n_dims < 1");

return struct {
const Self = @This();

view: bool,
allocator: ?std.mem.Allocator,
data: []f32,
sub_tensor_sizes: []const usize,
Expand All @@ -26,55 +25,43 @@ pub fn Tensor(comptime n_dims: comptime_int) type {
}

return .{
.view = false,
.allocator = allocator,
.data = try allocator.alignedAlloc(f32, std.atomic.cache_line, tensor_size),
.data = try allocator.alloc(f32, tensor_size),
.sub_tensor_sizes = sub_tensor_sizes,
};
}

pub fn initView(allocator: std.mem.Allocator, data: []f32, dims: [n_dims]usize) !Self {
const sub_tensor_sizes = try allocator.alloc(usize, n_dims - 1);

for (sub_tensor_sizes, 1..) |*sub_tensor_size, dims_offset| {
sub_tensor_size.* = 1;

for (dims[dims_offset..]) |dim| sub_tensor_size.* *= dim;
pub fn deinit(self: *const Self) void {
if (self.allocator) |allocator| {
allocator.free(self.data);
allocator.free(self.sub_tensor_sizes);
}

return .{
.view = true,
.allocator = allocator,
.data = data,
.sub_tensor_sizes = sub_tensor_sizes,
};
}

pub fn deinit(self: *const Self) void {
if (self.allocator) |allocator| {
if (!self.view) {
allocator.free(@as([]align(std.atomic.cache_line) f32, @alignCast(self.data)));
}
pub fn read(self: *const Self, file: std.fs.File) !void {
const buffer: [*]u8 = @ptrCast(self.data);
const n_bytes = self.data.len * @sizeOf(f32);
const n_bytes_read = try file.reader().readAll(buffer[0..n_bytes]);

allocator.free(self.sub_tensor_sizes);
if (n_bytes_read != n_bytes) {
return error.UnexpectedEndOfFile;
}
}

pub fn slice(self: *const Self, index: usize) Tensor(n_dims - 1) {
comptime if (n_dims < 2) @compileError("TODO");
comptime if (n_dims < 2) @compileError("n_dims < 2");

const sub_tensor_size = self.sub_tensor_sizes[0];

return Tensor(n_dims - 1){
.view = self.view,
.allocator = null,
.data = self.data[(index * sub_tensor_size)..][0..sub_tensor_size],
.sub_tensor_sizes = self.sub_tensor_sizes[1..],
};
}

pub fn multiplyVector(self: *const Self, input_data: []const f32, output_data: []f32) void {
comptime if (n_dims < 2) @compileError("TODO");
comptime if (n_dims < 2) @compileError("n_dims < 2");

const data = self.data;
const sub_tensor_size = self.sub_tensor_sizes[0];
Expand Down
4 changes: 2 additions & 2 deletions src/tokenizer.zig
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,10 @@ fn mergeBestWordPair(self: *const Self, tokens: []usize, double_word_buffer: []u
const word2 = self.vocab[tokens[token_index + 1]];

@memcpy(double_word_buffer[0..word1.len], word1);
@memcpy(double_word_buffer[word1.len..(word1.len + word2.len)], word2);
@memcpy(double_word_buffer[word1.len .. word1.len + word2.len], word2);

const token =
self.lookupToken(double_word_buffer[0..(word1.len + word2.len)]) orelse continue;
self.lookupToken(double_word_buffer[0 .. word1.len + word2.len]) orelse continue;

const word_score = self.word_scores[token];

Expand Down

0 comments on commit 7825892

Please sign in to comment.