From b930f618240b1704f0be2ffb96f96900ad03a91e Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Fri, 25 Oct 2024 13:18:06 +0000 Subject: [PATCH] basic sort functionatlity, need performance improvment --- .github/workflows/test-zig-files.yml | 10 +- .gitignore | 21 +--- Project0-word/build.zig | 80 +++++++++++++- Project0-word/build.zig.zon | 72 +++++++++++++ Project0-word/plain.txt | 10 -- Project0-word/{ => src}/P0-tests.zig | 12 ++- Project0-word/{ => src}/count_words.zig | 24 ++--- Project0-word/src/data-structures.zig | 1 + Project0-word/{ => src}/main.zig | 5 +- Project0-word/{ => src}/utils.zig | 132 +++++++++++++++++++++--- Project0-word/test_plain.txt | 57 ++++++++++ ignore_files/.codespellignore | 1 + testing_helper.js | 20 ++++ 13 files changed, 376 insertions(+), 69 deletions(-) create mode 100644 Project0-word/build.zig.zon delete mode 100644 Project0-word/plain.txt rename Project0-word/{ => src}/P0-tests.zig (57%) rename Project0-word/{ => src}/count_words.zig (51%) create mode 100755 Project0-word/src/data-structures.zig rename Project0-word/{ => src}/main.zig (97%) rename Project0-word/{ => src}/utils.zig (57%) create mode 100644 Project0-word/test_plain.txt create mode 100644 testing_helper.js diff --git a/.github/workflows/test-zig-files.yml b/.github/workflows/test-zig-files.yml index b4e8dd2..6f416b1 100644 --- a/.github/workflows/test-zig-files.yml +++ b/.github/workflows/test-zig-files.yml @@ -5,9 +5,9 @@ on: branches: - main paths: - - "**.zig" - - "**/**.zig" - - ".github/workflows/test-zig-files.yml" + - '**.zig' + - '**/**.zig' + - '.github/workflows/test-zig-files.yml' jobs: Simple-tests: @@ -40,9 +40,9 @@ jobs: - uses: goto-bus-stop/setup-zig@v2 - run: | echo 'testing P0-tests.zig' - zig test P0-tests.zig + zig test src/P0-tests.zig echo 'testing utils.zig' - zig test utils.zig + zig test src/utils.zig working-directory: Project0-word shell: bash diff --git a/.gitignore b/.gitignore index 96c6f81..98abf7b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,25 +1,8 @@ -# Ignore all -* - -# Unignore all with extensions -!*.* - -# Unignore all dirs -!*/ - -!.* - -*.o -.typo - -# Ignore `bin` dir -bin/ -*/bin/* - .history .vscode *zig-cache zig-out -test_file* +test_f*.txt** + diff --git a/Project0-word/build.zig b/Project0-word/build.zig index 28c3fd5..a3ab942 100644 --- a/Project0-word/build.zig +++ b/Project0-word/build.zig @@ -1,15 +1,91 @@ const std = @import("std"); +// Although this function looks imperative, note that its job is to +// declaratively construct a build graph that will be executed by an external +// runner. pub fn build(b: *std.Build) void { + // Standard target options allows the person running `zig build` to choose + // what target to build for. Here we do not override the defaults, which + // means any target is allowed, and the default is native. Other options + // for restricting supported target set are available. const target = b.standardTargetOptions(.{}); + + // Standard optimization options allow the person running `zig build` to select + // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not + // set a preferred release mode, allowing the user to decide how to optimize. const optimize = b.standardOptimizeOption(.{}); + // const lib = b.addStaticLibrary(.{ + // .name = "Project0-word", + // // In this case the main source file is merely a path, however, in more + // // complicated build scripts, this could be a generated file. + // .root_source_file = b.path("src/root.zig"), + // .target = target, + // .optimize = optimize, + // }); + + // This declares intent for the library to be installed into the standard + // location when the user invokes the "install" step (the default step when + // running `zig build`). + // b.installArtifact(lib); + const exe = b.addExecutable(.{ - .name = "project0part1", - .root_source_file = b.path("main.zig"), + .name = "Project0-word", + .root_source_file = b.path("src/main.zig"), .target = target, .optimize = optimize, }); + // This declares intent for the executable to be installed into the + // standard location when the user invokes the "install" step (the default + // step when running `zig build`). b.installArtifact(exe); + + // This *creates* a Run step in the build graph, to be executed when another + // step is evaluated that depends on it. The next line below will establish + // such a dependency. + const run_cmd = b.addRunArtifact(exe); + + // By making the run step depend on the install step, it will be run from the + // installation directory rather than directly from within the cache directory. + // This is not necessary, however, if the application depends on other installed + // files, this ensures they will be present and in the expected location. + run_cmd.step.dependOn(b.getInstallStep()); + + // This allows the user to pass arguments to the application in the build + // command itself, like this: `zig build run -- arg1 arg2 etc` + if (b.args) |args| { + run_cmd.addArgs(args); + } + + // This creates a build step. It will be visible in the `zig build --help` menu, + // and can be selected like this: `zig build run` + // This will evaluate the `run` step rather than the default, which is "install". + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + // Creates a step for unit testing. This only builds the test executable + // but does not run it. + const lib_unit_tests = b.addTest(.{ + .root_source_file = b.path("src/root.zig"), + .target = target, + .optimize = optimize, + }); + + const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests); + + const exe_unit_tests = b.addTest(.{ + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }); + + const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); + + // Similar to creating the run step earlier, this exposes a `test` step to + // the `zig build --help` menu, providing a way for the user to request + // running the unit tests. + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_lib_unit_tests.step); + test_step.dependOn(&run_exe_unit_tests.step); } diff --git a/Project0-word/build.zig.zon b/Project0-word/build.zig.zon new file mode 100644 index 0000000..962a4f2 --- /dev/null +++ b/Project0-word/build.zig.zon @@ -0,0 +1,72 @@ +.{ + // This is the default name used by packages depending on this one. For + // example, when a user runs `zig fetch --save `, this field is used + // as the key in the `dependencies` table. Although the user can choose a + // different name, most users will stick with this provided value. + // + // It is redundant to include "zig" in this name because it is already + // within the Zig package namespace. + .name = "Project0-word", + + // This is a [Semantic Version](https://semver.org/). + // In a future version of Zig it will be used for package deduplication. + .version = "0.0.0", + + // This field is optional. + // This is currently advisory only; Zig does not yet do anything + // with this value. + //.minimum_zig_version = "0.11.0", + + // This field is optional. + // Each dependency must either provide a `url` and `hash`, or a `path`. + // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. + // Once all dependencies are fetched, `zig build` no longer requires + // internet connectivity. + .dependencies = .{ + // See `zig fetch --save ` for a command-line interface for adding dependencies. + //.example = .{ + // // When updating this field to a new URL, be sure to delete the corresponding + // // `hash`, otherwise you are communicating that you expect to find the old hash at + // // the new URL. + // .url = "https://example.com/foo.tar.gz", + // + // // This is computed from the file contents of the directory of files that is + // // obtained after fetching `url` and applying the inclusion rules given by + // // `paths`. + // // + // // This field is the source of truth; packages do not come from a `url`; they + // // come from a `hash`. `url` is just one of many possible mirrors for how to + // // obtain a package matching this `hash`. + // // + // // Uses the [multihash](https://multiformats.io/multihash/) format. + // .hash = "...", + // + // // When this is provided, the package is found in a directory relative to the + // // build root. In this case the package's hash is irrelevant and therefore not + // // computed. This field and `url` are mutually exclusive. + // .path = "foo", + + // // When this is set to `true`, a package is declared to be lazily + // // fetched. This makes the dependency only get fetched if it is + // // actually used. + // .lazy = false, + //}, + }, + + // Specifies the set of files and directories that are included in this package. + // Only files and directories listed here are included in the `hash` that + // is computed for this package. Only files listed here will remain on disk + // when using the zig package manager. As a rule of thumb, one should list + // files required for compilation plus any license(s). + // Paths are relative to the build root. Use the empty string (`""`) to refer to + // the build root itself. + // A directory listed here means that all files within, recursively, are included. + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + // For example... + //"LICENSE", + //"README.md", + }, +} diff --git a/Project0-word/plain.txt b/Project0-word/plain.txt deleted file mode 100644 index 4941a2f..0000000 --- a/Project0-word/plain.txt +++ /dev/null @@ -1,10 +0,0 @@ -fuisdhyfsi sdfiujisdokjf sdofijhoi f s`df`iouujsoi v jufes df -fiok`sj vkvjdsjvdl -fjhjs`dhv -jkdfj -dscvsv -sdfsfd`sdfiujisdokjf` -defsg -vvvv -vvve -eeee kkf fll fppf fppf fllf diff --git a/Project0-word/P0-tests.zig b/Project0-word/src/P0-tests.zig similarity index 57% rename from Project0-word/P0-tests.zig rename to Project0-word/src/P0-tests.zig index 576f5d4..7aea71e 100644 --- a/Project0-word/P0-tests.zig +++ b/Project0-word/src/P0-tests.zig @@ -3,17 +3,19 @@ const count_words = @import("count_words.zig"); const utils = @import("utils.zig"); const expect = std.testing.expect; const eql = std.mem.eql; +const log = std.log; +const dir = std.fs.cwd(); test "parse_args" { - var args = [_][]const u8{ "-cw", "file.txt" }; - const expected = "file.txt"; + var args = [_][]const u8{ "-cw", "test_file.txt" }; + const expected = "test_file.txt"; const result = try utils.parseArgs(&args); try expect(eql(u8, expected, result)); } test "count_words" { - const path = "plain.txt"; - const expected = 26; - const result = count_words.countWords(path); + const path = "test_plain.txt"; + const expected = 423; + const result = try count_words.countWords(path); try expect(result == expected); } diff --git a/Project0-word/count_words.zig b/Project0-word/src/count_words.zig similarity index 51% rename from Project0-word/count_words.zig rename to Project0-word/src/count_words.zig index aa35c28..80f12f1 100644 --- a/Project0-word/count_words.zig +++ b/Project0-word/src/count_words.zig @@ -2,8 +2,10 @@ const std = @import("std"); const print = std.debug.print; const split = std.mem.splitScalar; const utils = @import("utils.zig"); +const log = std.log; +const continueToNextLine = utils.continueToNextLine; -pub fn countWords(file_path: []const u8) u64 { +pub fn countWords(file_path: []const u8) !u64 { const file = std.fs.cwd().openFile(file_path, .{ .mode = .read_only }) catch |err| { print("Error opening file: {}\n", .{err}); return 0; @@ -12,20 +14,16 @@ pub fn countWords(file_path: []const u8) u64 { var buffer: [1024]u8 = undefined; var words_count: u64 = 0; const reader = file.reader(); - while (true) { - var line: []const u8 = undefined; - const line_options = utils.nextLine(reader, &buffer) catch |err| { - print("Error reading line: {}\n", .{err}); - break; - }; - if (line_options) |value| { - line = value; - } else { - break; + + var line_options = try utils.nextLine(reader, &buffer); + while (line_options != null) { + if (continueToNextLine(line_options.?)) { + line_options = try utils.nextLine(reader, &buffer) orelse return words_count; + continue; } - var words = split(u8, line, ' '); + var words = split(u8, line_options.?, ' '); while (words.next() != null) words_count += 1; + line_options = try utils.nextLine(reader, &buffer); } - print("Words count: {}\n", .{words_count}); return words_count; } diff --git a/Project0-word/src/data-structures.zig b/Project0-word/src/data-structures.zig new file mode 100755 index 0000000..95a0b68 --- /dev/null +++ b/Project0-word/src/data-structures.zig @@ -0,0 +1 @@ +const std = @import("std"); diff --git a/Project0-word/main.zig b/Project0-word/src/main.zig similarity index 97% rename from Project0-word/main.zig rename to Project0-word/src/main.zig index 53202b6..ca5121b 100644 --- a/Project0-word/main.zig +++ b/Project0-word/src/main.zig @@ -1,8 +1,9 @@ const std = @import("std"); -const heap = std.heap; const count_words = @import("count_words.zig"); const utils = @import("utils.zig"); const print = std.debug.print; +pub const log_level: std.log.Level = .debug; +const heap = std.heap; pub fn main() !void { const stdout = std.io.getStdOut(); @@ -37,7 +38,7 @@ pub fn main() !void { line = value; } else { try stdout.writeAll( - \\ + \\ \\ No arguments provided. \\ ); diff --git a/Project0-word/utils.zig b/Project0-word/src/utils.zig similarity index 57% rename from Project0-word/utils.zig rename to Project0-word/src/utils.zig index 1c02e1f..37f0d57 100644 --- a/Project0-word/utils.zig +++ b/Project0-word/src/utils.zig @@ -1,7 +1,9 @@ const std = @import("std"); const Reader = std.fs.File.Reader; const print = std.debug.print; -const os_tag = @import("builtin").os.tag; +const log = std.log; +const builtin = @import("builtin"); +const os_tag = builtin.os.tag; const File = std.fs.File; const CreateFlags = File.CreateFlags; const isAlphabetic = std.ascii.isAlphabetic; @@ -14,8 +16,7 @@ const mem = std.mem; const eql = mem.eql; const GPA = std.heap.GeneralPurposeAllocator; const Allocator = std.mem.Allocator; - -pub fn nextLine(reader: anytype, buffer: []u8) !?[]const u8 { +pub fn nextLine(reader: File.Reader, buffer: []u8) !?[]const u8 { const line = (try reader.readUntilDelimiterOrEof(buffer, '\n')) orelse return null; if (os_tag == .windows) { return mem.trimRight(u8, line, "\r"); @@ -89,6 +90,57 @@ pub fn cleanFile(path: []u8, buffer: []u8) ![]u8 { return new_file_sub_path; } +pub fn sortFile(file_path: []u8, buffer: []u8) ![]u8 { + var general_purpose_allocator = GPA(.{}){}; + const gpa = general_purpose_allocator.allocator(); + var encoded_path_buffer = gpa.alloc(u8, file_path.len) catch unreachable; + const encoded_file_sub_path = try encodePathForOs(file_path, encoded_path_buffer); + var file_read: File = try cwd().openFile(encoded_file_sub_path, .{}); + defer file_read.close(); + const file_reader = file_read.reader(); + const new_file_sub_path = mem.concat(gpa, u8, &.{ file_path, ".sorted" }) catch unreachable; + gpa.free(encoded_path_buffer); + encoded_path_buffer = gpa.alloc(u8, new_file_sub_path.len) catch unreachable; + const encoded_new_file_sub_path = try encodePathForOs(new_file_sub_path, encoded_path_buffer); + var new_file: File = try cwd().createFile(encoded_new_file_sub_path, CreateFlags{ .truncate = true }); + const writer = new_file.writer(); + var lines = std.ArrayList([]const u8).init(gpa); + while (try file_reader.readUntilDelimiterOrEof(buffer, '\n')) |line| { + const new_line: []u8 = try gpa.alloc(u8, line.len); + @memcpy(new_line, line); + try lines.append(new_line); + } + const items = lines.items; + std.mem.sort([]const u8, items, {}, lessThan); + for (items) |line| { + writer.writeAll(line) catch |err| { + return err; + }; + writer.writeAll("\n") catch |err| { + return err; + }; + } + defer new_file.close(); + defer gpa.free(encoded_path_buffer); + defer lines.deinit(); + return new_file_sub_path; +} + +fn lessThan(_: void, a: []const u8, b: []const u8) bool { + var i: usize = 0; + while (i < a.len) : (i += 1) { + if (i >= b.len) { + return false; + } + if (a[i] < b[i]) { + return true; + } else if (a[i] > b[i]) { + return false; + } + } + return a.len < b.len; +} + fn encodePathForOs(path: []u8, encoded_path_buffer: []u8) ![]u8 { if (os_tag == .windows) { var i: usize = 0; @@ -102,7 +154,7 @@ fn encodePathForOs(path: []u8, encoded_path_buffer: []u8) ![]u8 { } } -fn continueToNextLine(line: []u8) bool { +pub fn continueToNextLine(line: []const u8) bool { if (line.len == 0) { return true; } @@ -127,7 +179,7 @@ fn continueToNextLine(line: []u8) bool { return false; } -test "clean file" { +fn createTestFile() ![]u8 { var file_path = "test_file.txt".*; var file: File = try std.fs.cwd().createFile(&file_path, CreateFlags{ .truncate = true, .read = true }); defer file.close(); @@ -148,8 +200,13 @@ test "clean file" { print("Error writing to file: {any}\n", .{err}); return err; }; + return &file_path; +} + +test "clean file" { + const file_path = try createTestFile(); var buffer: [1024]u8 = undefined; - const cleaned_file: []const u8 = cleanFile(&file_path, &buffer) catch |err| { + const cleaned_file: []const u8 = cleanFile(file_path, &buffer) catch |err| { print("Error cleaning file: {any}\n", .{err}); return err; }; @@ -157,21 +214,70 @@ test "clean file" { defer cleaned_file_read.close(); const cleaned_file_reader = cleaned_file_read.reader(); var cleaned_buffer: [1024]u8 = undefined; - const expected: [6][:0]u8 = .{ @constCast("hello"), @constCast("this"), @constCast("is"), @constCast("a"), @constCast("test"), @constCast("file") }; - print("expected: {s}\n", .{expected[0..expected.len]}); var foundExpected = false; - while (try nextLine(cleaned_file_reader, &cleaned_buffer)) |line| { - print("line: {s}\n", .{line}); + var next_line = try nextLine(cleaned_file_reader, &cleaned_buffer); + while (next_line != null) { inner: for (&expected) |expectedValue| { - print("{s}\n", .{expectedValue}); - if (eql(u8, line, expectedValue)) { - print("found expected: {s}\n", .{expectedValue}); + if (eql(u8, next_line.?, expectedValue)) { foundExpected = true; break :inner; } } try expect(foundExpected); foundExpected = false; + next_line = try nextLine(cleaned_file_reader, &cleaned_buffer); + } +} + +test "sort file" { + const file_path = try createTestFile(); + var buffer: [1024]u8 = undefined; + const cleaned_file_path: []u8 = try cleanFile(file_path, &buffer); + buffer = undefined; + const sorted_file_path = try sortFile(cleaned_file_path, &buffer); + buffer = undefined; + const sorted_file_read = try std.fs.cwd().openFile(sorted_file_path, .{}); + defer sorted_file_read.close(); + const sorted_file_reader = sorted_file_read.reader(); + var sorted_buffer: [1024]u8 = undefined; + var i: usize = 0; + while (try sorted_file_reader.readUntilDelimiterOrEof(&sorted_buffer, '\n')) |line| { + if (i < 4) { + const expected: []const u8 = "a"; + try std.testing.expectEqualStrings(expected, line); + i += 1; + continue; + } + if (i == 4) { + const expected: []const u8 = "hello"; + try std.testing.expectEqualStrings(expected, line); + i += 1; + continue; + } + + if (i < 9) { + const expected: []const u8 = "is"; + try std.testing.expectEqualStrings(expected, line); + i += 1; + continue; + } + + if (i < 13) { + const expected: []const u8 = "test"; + try std.testing.expectEqualStrings(expected, line); + i += 1; + continue; + } + if (i < 17) { + const expected: []const u8 = "this"; + try std.testing.expectEqualStrings(expected, line); + i += 1; + continue; + } + if (i > 16) { + std.debug.print("unexpected line: {s}\n", .{line}); + try std.testing.expect(false); + } } } diff --git a/Project0-word/test_plain.txt b/Project0-word/test_plain.txt new file mode 100644 index 0000000..5c256c6 --- /dev/null +++ b/Project0-word/test_plain.txt @@ -0,0 +1,57 @@ +Pellentesque convallis feugiat vehicula. +Praesent convallis non mi in consectetur. +Ut nibh ipsum, tincidunt vel lacus vitae, rutrum maximus dui. +Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. +Quisque eu porttitor quam, et semper leo. +Sed sed mauris justo. +Suspendisse potenti. +Cras ultrices lacus id porttitor euismod. +Sed a cursus arcu. +Interdum et malesuada fames ac ante ipsum primis in faucibus. +Aliquam pretium consequat pellentesque. +Curabitur a ante ut leo sodales faucibus. +Nulla ut suscipit nulla. +Phasellus sodales in felis nec mollis. +Sed nec eros non velit euismod varius. +Interdum et malesuada fames ac ante ipsum primis in faucibus. +Nam malesuada est sit amet ultrices venenatis. +Donec eu volutpat dolor. +Vestibulum et nisl in turpis pellentesque hendrerit facilisis non metus. +Nunc quam libero, bibendum ut lorem sed, molestie egestas elit. +Nam convallis ante vitae massa ornare ultrices. +Donec laoreet, quam aliquam sollicitudin cursus, ipsum velit viverra justo, eget eleifend mauris risus vel mi. +Etiam mi urna, aliquet ac ligula a, finibus elementum ex. +Pellentesque sit amet eros eget massa feugiat aliquam. +Morbi ex arcu, fringilla sed porttitor vel, ullamcorper quis nisi. +Pellentesque placerat velit odio, in tempus sapien porttitor vel. +Integer efficitur egestas ipsum suscipit aliquam. +Donec sagittis ipsum ac finibus hendrerit. +Aliquam erat volutpat. +Duis fermentum tempus lacus eu tristique. +Curabitur cursus lacus vel turpis laoreet malesuada. +In id ornare neque. +Ut nec congue leo. +Phasellus tortor est, finibus pretium imperdiet id, varius in lacus. +Praesent at metus a quam tempor venenatis. +Aenean euismod erat lectus, non pretium sem vehicula ut. +Sed tellus risus, consectetur et rutrum in, ornare at nunc. +Donec suscipit nisi metus, nec imperdiet sem pretium et. +Curabitur ut metus eget magna bibendum porttitor eget id nisi. +Nullam tristique sodales urna ac viverra. +Fusce ut tortor vitae arcu viverra mollis et ut ipsum. +Donec ornare eget tellus ut congue. +Nam maximus, nunc eleifend bibendum vestibulum, turpis justo cursus arcu, at tincidunt mauris ligula vel est. +Pellentesque vitae egestas leo. +Donec at sem tristique magna interdum posuere. +Pellentesque euismod vitae massa sed tincidunt. +Integer eros ipsum, vestibulum sed pharetra non, interdum et ex. +Mauris sodales luctus neque, sed aliquam erat feugiat vel. +Suspendisse ut tortor enim. +Aliquam suscipit urna mi, id accumsan lectus sollicitudin sed. +Aliquam ornare nisi nibh. +Donec elementum enim mi, eu gravida turpis sagittis non. +In arcu purus, vestibulum quis viverra nec, pellentesque sed nunc. +Phasellus ut elit non augue gravida commodo id quis urna. +Vivamus semper et nulla vel egestas. +Nulla facilisi. +Aenean laoreet leo urna, ac euismod turpis ultricies blandit. diff --git a/ignore_files/.codespellignore b/ignore_files/.codespellignore index 7c49c60..4dd1096 100644 --- a/ignore_files/.codespellignore +++ b/ignore_files/.codespellignore @@ -1,3 +1,4 @@ .gitignore .vscode/* node_modules/* +Project0-word/test_plain.txt diff --git a/testing_helper.js b/testing_helper.js new file mode 100644 index 0000000..5ee3b42 --- /dev/null +++ b/testing_helper.js @@ -0,0 +1,20 @@ +// get file name as arg from command line and return the count of the words in the file +// if file not found return -1 +// if file is empty return 0 +// if file is not a text file return -2 + +import { readFileSync } from 'fs'; +import { join, extname } from 'path'; +try { + const filePath = join(process.cwd(), process.argv[2]); + if (extname(filePath) !== '.txt') console.log(-2); + else { + console.log( + readFileSync(filePath, 'utf-8') + .split(/(\s+)/) + .filter((e) => e.trim().length > 0).length + ); + } +} catch (err) { + console.log(err); +}