From b257a5d704aacf7e1de5568d957ec85c305b2c39 Mon Sep 17 00:00:00 2001 From: Clayton Gentry Date: Mon, 11 Nov 2024 11:45:03 -0500 Subject: [PATCH] Vectors (#9) * Add Entities * turn off rules * Rename to vectors and improve cli interface * fix vector writes * nest dimensions * update readme * add usage * snip extra docs * more bdd * logger spy * Generate rubocop todo * split tests * load yaml * more yaml parsing * clean up cops * rm empty todo * snip * squash test log output * absolute * more absolute * more absolute * more absolute * improve test legibility * Respect example length rule * Respect nesting rule * enforce docs * absolute * dry * api extraction * vector extraction * remove redundant FakeFS * cleaner * more assertion singularization * snip * further API extrapolation * Add spec persistence * pass tests again * snip * 3.0 is past EOL * fix coverage * refactor CLI spec * remove unnecessary context * more review feedback * more simplification * more reduction * safe load * one more * no need to manually activate / deactivate fakefs * Add more coverage to with-directory project specs * force pathname * more pathname * rename to fields * a million pathnames * use pathname methods * path not a file * improve test log * Implement Project.create as a class method * shorter line * pathname .freeze * turn back on * improve memoized helpers * Pass rubocops * delete unused * vectors do not create routines or tables directories * no todos * tighten * tighten * snip * abstract ugly template setup * nicen * read and write * more comments * bump version --------- Co-authored-by: tyler --- .github/workflows/ci.yml | 7 +- .gitignore | 1 + .rubocop.yml | 2 + .rubocop_todo.yml | 7 + Gemfile | 2 + Gemfile.lock | 17 +++ README.md | 17 ++- lib/manifolds.rb | 9 +- lib/manifolds/api.rb | 7 + lib/manifolds/cli.rb | 63 ++++----- lib/manifolds/project/project.rb | 33 +++++ lib/manifolds/project/vector.rb | 37 ++++++ lib/manifolds/project/workspace.rb | 51 ++++++++ lib/manifolds/services/big_query_service.rb | 53 ++++---- lib/manifolds/services/vector_service.rb | 40 ++++++ lib/manifolds/templates/vector_template.yml | 7 + ...ig_template.yml => workspace_template.yml} | 25 ++-- lib/manifolds/version.rb | 2 +- spec/manifolds/api/project_spec.rb | 47 +++++++ spec/manifolds/api/vector_spec.rb | 25 ++++ spec/manifolds/api/workspace_spec.rb | 59 +++++++++ spec/manifolds/cli_spec.rb | 122 ++++++++++-------- .../services/big_query_service_spec.rb | 73 +++++------ .../manifolds/services/vector_service_spec.rb | 67 ++++++++++ spec/spec_helper.rb | 14 +- .../shared_contexts/with_template_files.rb | 22 ++++ 26 files changed, 617 insertions(+), 192 deletions(-) create mode 100644 .rubocop_todo.yml create mode 100644 lib/manifolds/api.rb create mode 100644 lib/manifolds/project/project.rb create mode 100644 lib/manifolds/project/vector.rb create mode 100644 lib/manifolds/project/workspace.rb create mode 100644 lib/manifolds/services/vector_service.rb create mode 100644 lib/manifolds/templates/vector_template.yml rename lib/manifolds/templates/{config_template.yml => workspace_template.yml} (52%) create mode 100644 spec/manifolds/api/project_spec.rb create mode 100644 spec/manifolds/api/vector_spec.rb create mode 100644 spec/manifolds/api/workspace_spec.rb create mode 100644 spec/manifolds/services/vector_service_spec.rb create mode 100644 spec/support/shared_contexts/with_template_files.rb diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f8d71ac..ca5d5d3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,10 +14,9 @@ jobs: strategy: matrix: ruby: - - '3.0.6' - - '3.1.4' - - '3.2.2' - - '3.3.5' + - '3.1.6' + - '3.2.6' + - '3.3.6' steps: - uses: actions/checkout@v4 diff --git a/.gitignore b/.gitignore index 226bef0..1e05b9b 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ /doc/ /pkg/ /spec/reports/ +/spec/examples.txt /tmp/ *.gem diff --git a/.rubocop.yml b/.rubocop.yml index af3621a..fec8967 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -1,3 +1,5 @@ +inherit_from: .rubocop_todo.yml + require: rubocop-rspec AllCops: diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml new file mode 100644 index 0000000..7139704 --- /dev/null +++ b/.rubocop_todo.yml @@ -0,0 +1,7 @@ +# This configuration was generated by +# `rubocop --auto-gen-config` +# on 2024-11-09 13:21:19 UTC using RuboCop version 1.68.0. +# The point is for the user to remove these configuration records +# one by one as the offenses are removed from the code base. +# Note that changes in the inspected code, or installation of new +# versions of RuboCop, may require this file to be generated again. diff --git a/Gemfile b/Gemfile index 2fb3d8c..5ca3f39 100644 --- a/Gemfile +++ b/Gemfile @@ -11,6 +11,8 @@ group :development do end group :development, :test do + gem "debug" + gem "fakefs" gem "rspec" gem "simplecov" gem "simplecov-json" diff --git a/Gemfile.lock b/Gemfile.lock index 82b93e5..65aa532 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -2,8 +2,16 @@ GEM remote: https://rubygems.org/ specs: ast (2.4.2) + debug (1.9.2) + irb (~> 1.10) + reline (>= 0.3.8) diff-lcs (1.5.1) docile (1.4.1) + fakefs (2.5.0) + io-console (0.7.2) + irb (1.14.1) + rdoc (>= 4.0.0) + reline (>= 0.4.2) json (2.7.2) language_server-protocol (3.17.0.3) logger (1.6.1) @@ -11,9 +19,15 @@ GEM parser (3.3.1.0) ast (~> 2.4.1) racc + psych (5.1.2) + stringio racc (1.7.3) rainbow (3.1.1) + rdoc (6.7.0) + psych (>= 4.0.0) regexp_parser (2.9.0) + reline (0.5.10) + io-console (~> 0.5) rexml (3.2.6) rspec (3.13.0) rspec-core (~> 3.13.0) @@ -63,6 +77,7 @@ GEM simplecov simplecov-lcov (0.8.0) simplecov_json_formatter (0.1.4) + stringio (3.1.1) thor (1.3.1) unicode-display_width (2.5.0) @@ -71,6 +86,8 @@ PLATFORMS ruby DEPENDENCIES + debug + fakefs logger rspec rubocop diff --git a/README.md b/README.md index c5e38e3..2ed5ec5 100644 --- a/README.md +++ b/README.md @@ -66,16 +66,19 @@ manifolds generate bq ## Manifolds Configuration -### Dimensions +### Vectors -Dimensions are fields that describe the context of the data. They are typically used to segment and filter data in reports. +Vectors are the entities you can roll up data for. Each vector has a set of dimensions defined in its `vectors/.yml` configuration file. ```yaml -dimensions: - - name: user_id - type: STRING - - name: date - type: DATE +vectors: + - page +``` + +#### Add a vector to your project + +```bash +manifolds vectors add page ``` ### Metrics diff --git a/lib/manifolds.rb b/lib/manifolds.rb index 7ae5a60..95574e5 100644 --- a/lib/manifolds.rb +++ b/lib/manifolds.rb @@ -1,7 +1,12 @@ # frozen_string_literal: true -require_relative "manifolds/cli" -require_relative "manifolds/version" +require "pathname" +require "thor" +require "yaml" + +Dir[File.join(__dir__, "manifolds", "**", "*.rb")].sort.each do |file| + require file +end module Manifolds class Error < StandardError; end diff --git a/lib/manifolds/api.rb b/lib/manifolds/api.rb new file mode 100644 index 0000000..26a01e0 --- /dev/null +++ b/lib/manifolds/api.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +module Manifolds + # API for interacting with project folders and file structures. + module API + end +end diff --git a/lib/manifolds/cli.rb b/lib/manifolds/cli.rb index d29cccd..83bd456 100644 --- a/lib/manifolds/cli.rb +++ b/lib/manifolds/cli.rb @@ -1,59 +1,60 @@ # frozen_string_literal: true -require "thor" -require "fileutils" -require "logger" - -require_relative "services/big_query_service" - module Manifolds # CLI provides command line interface functionality # for creating and managing umbrella projects for data management. class CLI < Thor + attr_accessor :logger, :bq_service + def initialize(*args, logger: Logger.new($stdout)) super(*args) - @logger = logger - @logger.level = Logger::INFO - @bq_service = Services::BigQueryService.new(@logger) + self.logger = logger + logger.level = Logger::INFO + + self.bq_service = Services::BigQueryService.new(logger) end desc "init NAME", "Generate a new umbrella project for data management" def init(name) - directory_path = "./#{name}/projects" - FileUtils.mkdir_p(directory_path) - @logger.info "Created umbrella project '#{name}' with a projects directory." + Manifolds::API::Project.create(name) + logger.info "Created umbrella project '#{name}' with projects and vectors directories." end - desc "add PROJECT_NAME", "Add a new project within the current umbrella project" - def add(project_name) - project_path = "./projects/#{project_name}" - unless Dir.exist?("./projects") - @logger.error("Not inside a Manifolds umbrella project.") - return + desc "vectors SUBCOMMAND ...ARGS", "Manage vectors" + subcommand "vectors", Class.new(Thor) { + namespace :vectors + + attr_accessor :logger + + def initialize(*args, logger: Logger.new($stdout)) + super(*args) + self.logger = logger end - FileUtils.mkdir_p("#{project_path}/tables") - FileUtils.mkdir_p("#{project_path}/routines") - copy_config_template(project_path) - @logger.info "Added project '#{project_name}' with tables and routines directories." + desc "add VECTOR_NAME", "Add a new vector configuration" + def add(name, project: API::Project.new(File.basename(Dir.getwd))) + vector = API::Vector.new(name, project: project) + vector.add + logger.info "Created vector configuration for '#{name}'." + end + } + + desc "add WORKSPACE_NAME", "Add a new workspace to a project" + def add(name, project: API::Project.new(File.basename(Dir.getwd))) + workspace = API::Workspace.new(name, project: project) + workspace.add + logger.info "Added workspace '#{name}' with tables and routines directories." end desc "generate PROJECT_NAME SERVICE", "Generate services for a project" def generate(project_name, service) case service when "bq" - @bq_service.generate_dimensions_schema(project_name) + bq_service.generate_dimensions_schema(project_name) else - @logger.error("Unsupported service: #{service}") + logger.error("Unsupported service: #{service}") end end - - private - - def copy_config_template(project_path) - template_path = File.join(File.dirname(__FILE__), "templates", "config_template.yml") - FileUtils.cp(template_path, "#{project_path}/manifold.yml") - end end end diff --git a/lib/manifolds/project/project.rb b/lib/manifolds/project/project.rb new file mode 100644 index 0000000..e8f191b --- /dev/null +++ b/lib/manifolds/project/project.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +module Manifolds + module API + # Projects API + class Project + attr_reader :name, :directory + + def initialize(name, directory: Pathname.pwd.join(name)) + self.name = name + self.directory = Pathname(directory) + end + + def self.create(name, directory: Pathname.pwd.join(name)) + new(name, directory: directory).tap do |project| + [project.workspaces_directory, project.vectors_directory].each(&:mkpath) + end + end + + def workspaces_directory + directory.join("workspaces") + end + + def vectors_directory + directory.join("vectors") + end + + private + + attr_writer :name, :directory + end + end +end diff --git a/lib/manifolds/project/vector.rb b/lib/manifolds/project/vector.rb new file mode 100644 index 0000000..ef59b29 --- /dev/null +++ b/lib/manifolds/project/vector.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +module Manifolds + module API + # Describes the entities for whom metrics are calculated. + class Vector + attr_reader :name, :project, :template_path + + DEFAULT_TEMPLATE_PATH = Pathname.pwd.join( + "lib", "manifolds", "templates", "vector_template.yml" + ).freeze + + def initialize(name, project:, template_path: DEFAULT_TEMPLATE_PATH) + self.name = name + self.project = project + self.template_path = Pathname(template_path) + end + + def add + directory.mkpath + FileUtils.cp(template_path, config_path) + end + + private + + attr_writer :name, :project, :template_path + + def directory + project.directory.join("vectors") + end + + def config_path + directory.join("#{name.downcase}.yml") + end + end + end +end diff --git a/lib/manifolds/project/workspace.rb b/lib/manifolds/project/workspace.rb new file mode 100644 index 0000000..c2dffb4 --- /dev/null +++ b/lib/manifolds/project/workspace.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +module Manifolds + module API + # Encapsulates a single manifold. + class Workspace + attr_reader :name, :project, :template_path + + DEFAULT_TEMPLATE_PATH = Pathname.pwd.join( + "lib", "manifolds", "templates", "workspace_template.yml" + ) + + def initialize(name, project:, template_path: DEFAULT_TEMPLATE_PATH) + self.name = name + self.project = project + self.template_path = template_path + end + + def add + [tables_directory, routines_directory].each(&:mkpath) + FileUtils.cp(template_path, manifold_path) + end + + def tables_directory + project.workspaces_directory.join(name, "tables") + end + + def routines_directory + project.workspaces_directory.join(name, "routines") + end + + def manifold_file + return nil unless manifold_exists? + + File.new(manifold_path) + end + + def manifold_exists? + manifold_path.file? + end + + def manifold_path + project.workspaces_directory.join(name, "manifold.yml") + end + + private + + attr_writer :name, :project, :template_path + end + end +end diff --git a/lib/manifolds/services/big_query_service.rb b/lib/manifolds/services/big_query_service.rb index 6faf3d7..f434ecf 100644 --- a/lib/manifolds/services/big_query_service.rb +++ b/lib/manifolds/services/big_query_service.rb @@ -1,64 +1,61 @@ # frozen_string_literal: true -require "yaml" -require "json" - module Manifolds module Services # Handles the generation of BigQuery schemas based on project configurations class BigQueryService def initialize(logger) @logger = logger + @vector_service = Manifolds::Services::VectorService.new(logger) end def generate_dimensions_schema(project_name) - config_path = "./projects/#{project_name}/manifold.yml" + config_path = Pathname.pwd.join("projects", project_name, "manifold.yml") return unless validate_config_exists(config_path, project_name) - config = YAML.load_file(config_path) - dimensions = extract_dimensions(config["dimensions"]) - create_dimensions_file(project_name, dimensions) + config = YAML.safe_load_file(config_path) + + fields = config["vectors"].reduce([]) do |list, vector| + @logger.info("Loading vector schema for '#{vector}'.") + [*@vector_service.load_vector_schema(vector), *list] + end + + create_dimensions_file(project_name, fields) end private def validate_config_exists(config_path, project_name) - unless File.exist?(config_path) + unless config_path.file? @logger.error("Config file missing for project '#{project_name}'.") return false end true end - def extract_dimensions(dimensions_hash) - dimensions_hash.map do |dimension| - extract_fields(dimension) - end - end + def create_dimensions_file(project_name, fields) + tables_directory(project_name).mkpath + dimensions = dimensions_schema(fields) - def extract_fields(fields_hash, mode = "NULLABLE") - fields_hash.map do |name, type| - if type.is_a?(Hash) - { "type" => "RECORD", "name" => name, "fields" => extract_fields(type) } - elsif type - { "type" => type.upcase, "name" => name, "mode" => mode } - end - end - end - - def create_dimensions_file(project_name, dimensions) - FileUtils.mkdir_p("./projects/#{project_name}/bq/tables") - File.write("./projects/#{project_name}/bq/tables/dimensions.json", dimensions_schema(dimensions)) + dimensions_path(project_name).write(dimensions) @logger.info("Generated BigQuery dimensions table schema for '#{project_name}'.") end - def dimensions_schema(dimensions) + def dimensions_schema(fields) JSON.pretty_generate([ { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }, { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED", - "fields" => dimensions } + "fields" => fields } ]).concat("\n") end + + def tables_directory(project_name) + Pathname.pwd.join("projects", project_name, "bq", "tables") + end + + def dimensions_path(project_name) + tables_directory(project_name).join("dimensions.json") + end end end end diff --git a/lib/manifolds/services/vector_service.rb b/lib/manifolds/services/vector_service.rb new file mode 100644 index 0000000..bd16996 --- /dev/null +++ b/lib/manifolds/services/vector_service.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +module Manifolds + module Services + # Handles the loading of vector schemas from configuration files + class VectorService + def initialize(logger) + @logger = logger + end + + def load_vector_schema(vector_name) + path = config_path(vector_name) + unless path.file? + @logger.error("Vector configuration not found: #{path}") + return nil + end + + config = YAML.safe_load_file(path) + fields = transform_attributes_to_schema(config["attributes"]) + { "name" => vector_name.downcase, "type" => "RECORD", "fields" => fields } + end + + private + + def transform_attributes_to_schema(attributes) + attributes.map do |name, type| + { + "name" => name, + "type" => type.upcase, + "mode" => "NULLABLE" + } + end + end + + def config_path(vector_name) + Pathname.pwd.join("vectors", "#{vector_name.downcase}.yml") + end + end + end +end diff --git a/lib/manifolds/templates/vector_template.yml b/lib/manifolds/templates/vector_template.yml new file mode 100644 index 0000000..ad9a1c2 --- /dev/null +++ b/lib/manifolds/templates/vector_template.yml @@ -0,0 +1,7 @@ +attributes: + # Add your vector attributes here + # Format: attribute_name: TYPE + # Example: + # id: STRING + # created_at: TIMESTAMP + # status: STRING diff --git a/lib/manifolds/templates/config_template.yml b/lib/manifolds/templates/workspace_template.yml similarity index 52% rename from lib/manifolds/templates/config_template.yml rename to lib/manifolds/templates/workspace_template.yml index ccbd056..f908302 100644 --- a/lib/manifolds/templates/config_template.yml +++ b/lib/manifolds/templates/workspace_template.yml @@ -1,13 +1,13 @@ -dimensions: - # Add your dimensions here, e.g. - # myName: STRING - # myFlag: BOOLEAN +vectors: + # List the vectors you want to include in this manifold + # Example: + # - User metrics: - name: # Add your metric name here, e.g. Pageviews id: - field: # Identify the field that uniquely identifies each manifold entity + field: # Identify the field that uniquely identifies each manifold vector type: # Specify the type of that field, e.g. INTEGER interval: @@ -16,19 +16,10 @@ metrics: aggregations: # Add any aggregations this metric should present - # e.g. - # - name: impressions - # method: count source: type: BIGQUERY_TABLE - project: # Add your project name here, e.g. my-project - dataset: # Add your dataset name here, e.g. events - table: # Add your table name, e.g. pageviews + project: # Add your project name here + dataset: # Add your dataset name here + table: # Add your table name filter: # (optional) Add your filter condition here - - segments: - # (optional) Add any custom breakouts for this metric - # e.g. - # - name: organic - # condition: paid IS FALSE diff --git a/lib/manifolds/version.rb b/lib/manifolds/version.rb index 21ca835..56bb89a 100644 --- a/lib/manifolds/version.rb +++ b/lib/manifolds/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module Manifolds - VERSION = "0.0.5" + VERSION = "0.0.6" end diff --git a/spec/manifolds/api/project_spec.rb b/spec/manifolds/api/project_spec.rb new file mode 100644 index 0000000..a0d380a --- /dev/null +++ b/spec/manifolds/api/project_spec.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +RSpec.describe Manifolds::API::Project do + include FakeFS::SpecHelpers + + subject(:project) { described_class.new(name) } + + let(:name) { "wetland" } + + it { is_expected.to have_attributes(name: name) } + + describe ".create" do + before { described_class.create(name) } + + it "creates the vectors directory" do + expect(project.vectors_directory).to be_directory + end + + it "creates the workspaces directory" do + expect(project.workspaces_directory).to be_directory + end + end + + describe ".workspaces_directory" do + it { expect(project.workspaces_directory).to be_an_instance_of(Pathname) } + end + + describe ".vectors_directory" do + it { expect(project.vectors_directory).to be_an_instance_of(Pathname) } + end + + context "with directory" do + subject(:project) { described_class.new(name, directory: directory) } + + let(:directory) { Pathname.pwd.join("supplied_directory") } + + it { is_expected.to have_attributes(directory: directory) } + + it "uses it as the base for the vectors directory" do + expect(project.vectors_directory).to eq directory.join("vectors") + end + + it "uses it as the base for the workspaces directory" do + expect(project.workspaces_directory).to eq directory.join("workspaces") + end + end +end diff --git a/spec/manifolds/api/vector_spec.rb b/spec/manifolds/api/vector_spec.rb new file mode 100644 index 0000000..0448f5c --- /dev/null +++ b/spec/manifolds/api/vector_spec.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +RSpec.describe Manifolds::API::Vector do + include FakeFS::SpecHelpers + subject(:vector) { described_class.new(name, project: project) } + + include_context "with template files" + + let(:project) { Manifolds::API::Project.new("wetland") } + let(:name) { "page" } + + it { is_expected.to have_attributes(name: name, project: project) } + + describe ".add" do + before { vector.add } + + it "creates the config template file" do + expect(File).to exist(vector.template_path) + end + end + + describe ".template_path" do + it { expect(vector.template_path).to be_an_instance_of(Pathname) } + end +end diff --git a/spec/manifolds/api/workspace_spec.rb b/spec/manifolds/api/workspace_spec.rb new file mode 100644 index 0000000..e2dc02c --- /dev/null +++ b/spec/manifolds/api/workspace_spec.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +RSpec.describe Manifolds::API::Workspace do + include FakeFS::SpecHelpers + subject(:workspace) { described_class.new(name, project: project) } + + include_context "with template files" + + let(:project) { Manifolds::API::Project.new("wetland") } + let(:name) { "people" } + + it { is_expected.to have_attributes(name: name, project: project) } + + describe ".add" do + before { workspace.add } + + it "creates the routines directory" do + expect(workspace.routines_directory).to be_directory + end + + it "creates the tables directory" do + expect(workspace.tables_directory).to be_directory + end + + it "creates the manifold file" do + expect(File).to exist(workspace.manifold_path) + end + end + + describe ".routines_directory" do + it { expect(workspace.routines_directory).to be_an_instance_of(Pathname) } + end + + describe ".tables_directory" do + it { expect(workspace.tables_directory).to be_an_instance_of(Pathname) } + end + + context "when not created" do + describe ".manifold_exists?" do + it { expect(workspace.manifold_exists?).to be false } + end + + describe ".manifold_file" do + it { expect(workspace.manifold_file).to be_nil } + end + end + + context "when created" do + before { workspace.add } + + describe ".manifold_exists?" do + it { expect(workspace.manifold_exists?).to be true } + end + + describe ".manifold_file" do + it { expect(workspace.manifold_file).to be_an_instance_of(File) } + end + end +end diff --git a/spec/manifolds/cli_spec.rb b/spec/manifolds/cli_spec.rb index 84afed8..8d4026f 100644 --- a/spec/manifolds/cli_spec.rb +++ b/spec/manifolds/cli_spec.rb @@ -1,88 +1,98 @@ # frozen_string_literal: true -require_relative "../../lib/manifolds/cli" -require "fileutils" -require "logger" - RSpec.describe Manifolds::CLI do - let(:project_name) { "commerce" } - let(:sub_project_name) { "Pages" } - let(:null_logger) { Logger.new(File::NULL) } - let(:bq_service) { instance_double("Manifolds::Services::BigQueryService", "commerce") } + include FakeFS::SpecHelpers + + let(:null_logger) { instance_double(Logger) } + let(:mock_project) { instance_double(Manifolds::API::Project) } + let(:mock_workspace) { instance_double(Manifolds::API::Workspace) } + let(:mock_vector) { instance_double(Manifolds::API::Vector) } + + before do + allow(Manifolds::API::Project).to receive(:new).and_return(mock_project) + allow(Manifolds::API::Workspace).to receive(:new).and_return(mock_workspace) + allow(Manifolds::API::Vector).to receive(:new).and_return(mock_vector) + allow(null_logger).to receive(:info) + allow(null_logger).to receive(:level=) + end describe "#init" do subject(:cli) { described_class.new(logger: null_logger) } - before do - allow(FileUtils).to receive(:mkdir_p) - allow(File).to receive(:open) - cli.init(project_name) - end + let(:project_name) { "wetland" } - it "creates the projects directory" do - expect(FileUtils).to have_received(:mkdir_p).with("./#{project_name}/projects") - end - end - - describe "#add" do - let(:cli) { described_class.new(logger: null_logger) } - - context "when within an umbrella project" do + context "when initializing a new project" do before do - FileUtils.mkdir_p("#{project_name}/projects") # Simulate an umbrella project - Dir.chdir(project_name) - cli.add(sub_project_name) + allow(Manifolds::API::Project).to receive(:create).and_return(mock_project) end - after do - Dir.chdir("..") - FileUtils.rm_rf(project_name) + it "creates a new project through the API" do + cli.init(project_name) + expect(Manifolds::API::Project).to have_received(:create).with(project_name) end - it "creates a tables directory within the project" do - expect(Dir.exist?("./projects/#{sub_project_name}/tables")).to be true + it "logs the project creation" do + cli.init(project_name) + expect(null_logger).to have_received(:info) + .with("Created umbrella project '#{project_name}' with projects and vectors directories.") end + end + end + + describe "#add" do + subject(:cli) { described_class.new(logger: null_logger) } - it "creates a routines directory within the project" do - expect(Dir.exist?("./projects/#{sub_project_name}/routines")).to be true + let(:workspace_name) { "Commerce" } + + context "when adding a workspace" do + before do + allow(mock_workspace).to receive(:add) + cli.add(workspace_name) end - it "creates a manifold.yml file" do - expect(File.exist?("./projects/#{sub_project_name}/manifold.yml")).to be true + it "instantiates a new workspace through the API" do + expect(Manifolds::API::Workspace).to have_received(:new) + .with(workspace_name, project: mock_project) end - it "writes the manifold.yml file with dimensions" do - expect(File.read("./projects/#{sub_project_name}/manifold.yml")).to include("dimensions") + it "adds the workspace through the API" do + expect(mock_workspace).to have_received(:add) end - it "writes the manifold.yml file with metrics" do - config = File.read("./projects/#{sub_project_name}/manifold.yml") - expect(config).to include("metrics") + it "logs the workspace creation" do + expect(null_logger).to have_received(:info) + .with("Added workspace '#{workspace_name}' with tables and routines directories.") end end + end - context "when outside an umbrella project" do - subject(:cli_with_stdout) { described_class.new(logger: Logger.new($stdout)) } + describe "vectors#add" do + subject(:cli) do + subcommands = described_class.new.class.subcommand_classes + subcommands["vectors"].new(logger: null_logger) + end - it "does not allow adding projects and logs an error" do - expect do - cli_with_stdout.add("Pages") - end.to output(/Not inside a Manifolds umbrella project./).to_stdout + let(:vector_name) { "page" } + + context "when adding a vector" do + before do + allow(mock_vector).to receive(:add) + cli.add(vector_name) end - end - end - describe "#generate" do - subject(:cli) { described_class.new(logger: null_logger) } + it "instantiates a new vector through the API" do + expect(Manifolds::API::Vector).to have_received(:new) + .with(vector_name, project: mock_project) + end - before do - allow(Manifolds::Services::BigQueryService).to receive(:new).and_return(bq_service) - allow(bq_service).to receive(:generate_dimensions_schema) - end + it "adds the vector through the API" do + expect(mock_vector).to have_received(:add) + end - it "calls generate_dimensions_schema on bq service with correct project name" do - cli.generate("Pages", "bq") - expect(bq_service).to have_received(:generate_dimensions_schema).with("Pages") + it "logs the vector creation" do + expect(null_logger).to have_received(:info) + .with("Created vector configuration for '#{vector_name}'.") + end end end end diff --git a/spec/manifolds/services/big_query_service_spec.rb b/spec/manifolds/services/big_query_service_spec.rb index bf319b9..39c7a54 100644 --- a/spec/manifolds/services/big_query_service_spec.rb +++ b/spec/manifolds/services/big_query_service_spec.rb @@ -1,60 +1,55 @@ # frozen_string_literal: true +require "fakefs/spec_helpers" + RSpec.describe Manifolds::Services::BigQueryService do - let(:logger) { instance_double("Logger") } + include FakeFS::SpecHelpers + + let(:logger) { instance_spy(Logger) } let(:service) { described_class.new(logger) } let(:project_name) { "test_project" } - let(:config_path) { "./projects/#{project_name}/manifold.yml" } - let(:config) do - { - "dimensions" => { - "context" => { - "site" => "STRING", - "user" => { - "id" => "INTEGER", - "preferences" => { - "notifications" => "BOOLEAN" - } - } - } - } - } + let(:dimensions_path) do + Pathname.pwd.join("projects", project_name, "bq", "tables", "dimensions.json") end before do - allow(File).to receive(:exist?).with(config_path).and_return(true) - allow(YAML).to receive(:load_file).with(config_path).and_return(config) - allow(FileUtils).to receive(:mkdir_p) - allow(File).to receive(:write) - allow(logger).to receive(:info) # Allow 'info' to be called to avoid unexpected message errors + Pathname.pwd.join("projects", project_name).mkpath end describe "#generate_dimensions_schema" do - it "checks if the configuration file exists" do - service.generate_dimensions_schema(project_name) - expect(File).to have_received(:exist?).with(config_path) - end - - context "when configuration file does not exist" do + context "when the project configuration exists" do before do - allow(File).to receive(:exist?).with(config_path).and_return(false) - allow(logger).to receive(:error) - end + Pathname.pwd.join("vectors").mkpath + Pathname.pwd.join("vectors", "user.yml").write(<<~YAML) + attributes: + user_id: string + email: string + YAML + + Pathname.pwd.join("projects", project_name, "manifold.yml").write(<<~YAML) + vectors: + - User + YAML - it "logs an error message" do service.generate_dimensions_schema(project_name) - expect(logger).to have_received(:error).with("Config file missing for project 'test_project'.") end - end - it "writes the dimensions schema to a file" do - service.generate_dimensions_schema(project_name) - expect(File).to have_received(:write).with("./projects/#{project_name}/bq/tables/dimensions.json", anything) + it "generates a dimensions schema file" do + expect(dimensions_path.file?).to be true + end + + it "includes the expected schema structure" do + schema = JSON.parse(dimensions_path.read) + expect(schema).to include({ "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }) + end end - it "logs success message" do - service.generate_dimensions_schema(project_name) - expect(logger).to have_received(:info).with("Generated BigQuery dimensions table schema for 'test_project'.") + context "when the project configuration is missing" do + it "indicates the configuration is missing" do + service.generate_dimensions_schema(project_name) + expect(logger).to have_received(:error) + .with(/Config file missing for project/) + end end end end diff --git a/spec/manifolds/services/vector_service_spec.rb b/spec/manifolds/services/vector_service_spec.rb new file mode 100644 index 0000000..a9b2e48 --- /dev/null +++ b/spec/manifolds/services/vector_service_spec.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +RSpec.describe Manifolds::Services::VectorService do + include FakeFS::SpecHelpers + + let(:logger) { instance_double(Logger) } + let(:service) { described_class.new(logger) } + + describe "#load_vector_schema" do + let(:vector_name) { "page" } + let(:vector_config) do + { + "attributes" => { + "id" => "string", + "url" => "string", + "created_at" => "timestamp" + } + } + end + + let(:expected_schema) do + { + "name" => "page", + "type" => "RECORD", + "fields" => [ + { "name" => "id", "type" => "STRING", "mode" => "NULLABLE" }, + { "name" => "url", "type" => "STRING", "mode" => "NULLABLE" }, + { "name" => "created_at", "type" => "TIMESTAMP", "mode" => "NULLABLE" } + ] + } + end + + context "when vector configuration exists" do + before do + Pathname.pwd.join("vectors").mkpath + config_path = Pathname.pwd.join("vectors", "#{vector_name}.yml") + config_path.write(YAML.dump(vector_config)) + end + + it "loads and transforms vector schema" do + expect(service.load_vector_schema(vector_name)).to eq(expected_schema) + end + + it "handles uppercase vector names" do + expect(service.load_vector_schema(vector_name.upcase)).to eq(expected_schema) + end + end + + context "when vector configuration doesn't exist" do + before do + allow(logger).to receive(:error) + end + + it "returns nil" do + expect(service.load_vector_schema(vector_name)).to be_nil + end + + it "logs an error message" do + path = Pathname.pwd.join("vectors", "#{vector_name}.yml") + service.load_vector_schema(vector_name) + + expect(logger).to have_received(:error) + .with("Vector configuration not found: #{path}") + end + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index e7c60bd..ac0a369 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,8 +1,13 @@ # frozen_string_literal: true +require "manifolds" +require "debug" +require "logger" require "simplecov" require "simplecov-json" require "simplecov-lcov" +require "fakefs/spec_helpers" +Dir[File.join(__dir__, "support", "**", "*.rb")].sort.each { |f| require f } SimpleCov::Formatter::LcovFormatter.config do |c| c.report_with_single_file = true @@ -16,13 +21,6 @@ ]) SimpleCov.start do - add_filter "/spec/" - add_filter "/vendor/" - - # Track all directories containing source code - add_group "Manifolds", "lib/manifolds" - add_group "Services", "lib/manifolds/services" - # Set the minimum coverage percentage minimum_coverage 95 minimum_coverage_by_file 90 @@ -40,6 +38,8 @@ mocks.verify_partial_doubles = true end + config.example_status_persistence_file_path = "spec/examples.txt" + config.shared_context_metadata_behavior = :apply_to_host_groups config.order = :random end diff --git a/spec/support/shared_contexts/with_template_files.rb b/spec/support/shared_contexts/with_template_files.rb new file mode 100644 index 0000000..2ba00fe --- /dev/null +++ b/spec/support/shared_contexts/with_template_files.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +RSpec.shared_context "with template files" do + before do + template_dir.mkpath + + workspace_template_path.write("vectors:\nmetrics:") + vector_template_path.write("attributes:") + end + + def template_dir + Pathname.new(File.dirname(__FILE__)).join("../../../lib/manifolds/templates") + end + + def vector_template_path + template_dir.join("vector_template.yml") + end + + def workspace_template_path + template_dir.join("workspace_template.yml") + end +end