Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AI insights #8384

Draft
wants to merge 13 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ jobs:
fail-fast: false
matrix:
include:
- { ruby: '3.1', postgres: 13.5 }
- { ruby: '3.2', postgres: 13.5 }
- { ruby: '3.1', postgres: 13 }
- { ruby: '3.2', postgres: 13 }

services:
postgres:
image: fixmystreet/postgres:${{ matrix.postgres }}
image: pgvector/pgvector:pg${{ matrix.postgres }}
env:
POSTGRES_PASSWORD: postgres
ports:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
/coverage/
/db/schema.rb
/db/structure.sql
/db/secondary_structure.sql
/files/
/lib/themes/*
/locale/model_attributes.rb
Expand Down
10 changes: 9 additions & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -169,12 +169,19 @@ gem 'alaveteli_features', path: 'gems/alaveteli_features'

# Storage backends
gem 'aws-sdk-s3', require: false
gem 'azure-storage', require: false
gem 'google-cloud-storage', '~> 1.47', require: false

# Storage content analyzers
gem 'excel_analyzer', path: 'gems/excel_analyzer', require: false

# AI
gem "faraday", "~> 2.10"
gem 'langchainrb_rails', '~> 0.1.10'
gem 'pgvector', '~> 0.2'
gem 'sequel', '~> 5.68.0'
gem 'neighbor', '~> 0.4.3'
gem 'tiktoken_ruby', '~> 0.0.9'

group :test do
gem 'fivemat', '~> 1.3.7'
gem 'webmock', '~> 3.23.1'
Expand All @@ -193,6 +200,7 @@ group :test, :development do
gem 'rspec-activemodel-mocks', '~> 1.2.0'
gem 'rspec-rails', '~> 7.0.1'
gem 'pry', '~> 0.14.2'
gem 'vcr', '~> 6.3.1'
end

group :development do
Expand Down
61 changes: 46 additions & 15 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -157,15 +157,8 @@ GEM
aws-sigv4 (~> 1.5)
aws-sigv4 (1.9.1)
aws-eventstream (~> 1, >= 1.0.2)
azure-core (0.1.15)
faraday (~> 0.9)
faraday_middleware (~> 0.10)
nokogiri (~> 1.6)
azure-storage (0.15.0.preview)
azure-core (~> 0.1)
faraday (~> 0.9)
faraday_middleware (~> 0.10)
nokogiri (~> 1.6, >= 1.6.8)
baran (0.1.12)
base64 (0.2.0)
bcrypt (3.1.20)
bigdecimal (3.1.8)
bindex (0.8.1)
Expand Down Expand Up @@ -194,6 +187,7 @@ GEM
charlock_holmes (0.7.9)
childprocess (5.0.0)
coderay (1.1.3)
colorize (1.1.0)
concurrent-ruby (1.3.4)
connection_pool (2.4.1)
crack (1.0.0)
Expand All @@ -220,10 +214,11 @@ GEM
railties (>= 5.0.0)
fancybox-rails (0.3.1)
railties (>= 3.1.0)
faraday (0.17.6)
multipart-post (>= 1.2, < 3)
faraday_middleware (0.14.0)
faraday (>= 0.7.4, < 1.0)
faraday (2.10.1)
faraday-net_http (>= 2.0, < 3.2)
logger
faraday-net_http (3.1.1)
net-http
fast_gettext (3.1.0)
prime
fivemat (1.3.7)
Expand Down Expand Up @@ -308,7 +303,19 @@ GEM
jquery-ui-rails (6.0.1)
railties (>= 3.2.16)
json (2.7.2)
json-schema (4.3.1)
addressable (>= 2.8)
jwt (2.7.1)
langchainrb (0.13.5)
baran (~> 0.1.9)
colorize (~> 1.1.0)
json-schema (~> 4)
matrix
pragmatic_segmenter (~> 0.3.0)
to_bool (~> 2.0.0)
zeitwerk (~> 2.5)
langchainrb_rails (0.1.10)
langchainrb (>= 0.7, < 0.14)
language_server-protocol (3.17.0.3)
launchy (3.0.1)
addressable (~> 2.8)
Expand All @@ -319,6 +326,7 @@ GEM
libv8-node (18.19.0.0-x86_64-darwin)
libv8-node (18.19.0.0-x86_64-linux)
locale (2.1.4)
logger (1.6.0)
loofah (2.22.0)
crass (~> 1.0.2)
nokogiri (>= 1.12.0)
Expand All @@ -344,7 +352,10 @@ GEM
money (6.19.0)
i18n (>= 0.6.4, <= 2)
multi_json (1.15.0)
multipart-post (2.3.0)
neighbor (0.4.3)
activerecord (>= 6.1)
net-http (0.4.1)
uri
net-imap (0.4.12)
date
net-protocol
Expand Down Expand Up @@ -383,6 +394,8 @@ GEM
ast (~> 2.4.1)
racc
pg (1.5.7)
pgvector (0.3.2)
pragmatic_segmenter (0.3.24)
prime (0.1.2)
forwardable
singleton
Expand Down Expand Up @@ -435,6 +448,7 @@ GEM
zeitwerk (~> 2.5)
rainbow (3.1.1)
rake (13.2.1)
rb_sys (0.9.87)
recaptcha (5.17.0)
redcarpet (3.6.0)
redis (4.8.1)
Expand Down Expand Up @@ -506,6 +520,7 @@ GEM
sprockets-rails (>= 2.0, < 4.0)
tilt (>= 1.1, < 3)
secure_headers (6.7.0)
sequel (5.68.0)
sidekiq (6.5.12)
connection_pool (>= 2.2.5, < 3)
rack (~> 2.0)
Expand Down Expand Up @@ -537,8 +552,15 @@ GEM
syslog_protocol (0.9.2)
text (1.3.1)
thor (1.3.2)
tiktoken_ruby (0.0.9)
rb_sys (= 0.9.87)
tiktoken_ruby (0.0.9-aarch64-linux)
tiktoken_ruby (0.0.9-arm64-darwin)
tiktoken_ruby (0.0.9-x86_64-darwin)
tiktoken_ruby (0.0.9-x86_64-linux)
tilt (2.0.10)
timeout (0.4.1)
to_bool (2.0.0)
trailblazer-option (0.1.2)
turbo-rails (2.0.6)
actionpack (>= 6.0.0)
Expand All @@ -553,6 +575,9 @@ GEM
unicode-display_width (2.5.0)
unidecoder (1.1.2)
uniform_notifier (1.16.0)
uri (0.13.0)
vcr (6.3.1)
base64
vpim (24.2.20)
web-console (4.2.1)
actionview (>= 6.0.0)
Expand Down Expand Up @@ -593,7 +618,6 @@ DEPENDENCIES
alaveteli_features!
annotate (< 3.2.1)
aws-sdk-s3
azure-storage
bcrypt (~> 3.1.20)
bootstrap-sass (~> 2.3.2.2)
bullet (~> 7.2.0)
Expand All @@ -606,6 +630,7 @@ DEPENDENCIES
exception_notification (~> 4.5.0)
factory_bot_rails (~> 6.4.3)
fancybox-rails (~> 0.3.0)
faraday (~> 2.10)
fast_gettext (~> 3.1.0)
fivemat (~> 1.3.7)
friendly_id (~> 5.5.1)
Expand All @@ -624,6 +649,7 @@ DEPENDENCIES
jquery-rails (~> 4.6.0)
jquery-ui-rails (~> 6.0.0)
json (~> 2.7.2)
langchainrb_rails (~> 0.1.10)
launchy (< 3.1.0)
locale (~> 2.1.4)
mahoro (~> 0.5)
Expand All @@ -634,13 +660,15 @@ DEPENDENCIES
mini_magick (~> 4.13.1)
mini_racer (~> 0.16.0)
money (~> 6.19.0)
neighbor (~> 0.4.3)
net-protocol (~> 0.1.3)
net-ssh (~> 7.2.3)
net-ssh-gateway (>= 1.1.0, < 3.0.0)
nokogiri (~> 1.16.7)
oink (~> 0.10.1)
open4 (~> 1.3.0)
pg (~> 1.5.7)
pgvector (~> 0.2)
pry (~> 0.14.2)
puma (~> 6.4.2)
rack (~> 2.2.9)
Expand All @@ -661,6 +689,7 @@ DEPENDENCIES
rubyzip (~> 2.3.2)
sass-rails (~> 5.0.8)
secure_headers (~> 6.7.0)
sequel (~> 5.68.0)
sidekiq (~> 6.5.12)
sidekiq-limit_fetch (~> 4.4.1)
simplecov (~> 0.22.0)
Expand All @@ -673,10 +702,12 @@ DEPENDENCIES
stripe-ruby-mock!
syck (~> 1.4.1)
syslog_protocol (~> 0.9.0)
tiktoken_ruby (~> 0.0.9)
turbo-rails (~> 2.0.6)
uglifier (~> 4.2.0)
unicode (~> 0.4.4)
unidecoder (~> 1.1.0)
vcr (~> 6.3.1)
vpim (~> 24.2.20)
web-console (>= 3.3.0)
webmock (~> 3.23.1)
Expand Down
12 changes: 11 additions & 1 deletion app/controllers/projects/extracts_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ def show
return
end

@insights = Project::Insight.find_by(
project: @project, info_request: @info_request
)

@main_insight = @insights.output.find do |i|
i[:answers].values.any?(&:present?)
end

@value_set = Dataset::ValueSet.new
end

Expand Down Expand Up @@ -59,7 +67,9 @@ def authenticate
end

def find_info_request
if params[:url_title]
if params[:id]
@info_request = @project.info_requests.find(params[:id])
elsif params[:url_title]
@info_request = @project.info_requests.extractable.find_by!(
url_title: params[:url_title]
)
Expand Down
18 changes: 18 additions & 0 deletions app/jobs/workflow_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
##
# WorkflowJob is a background job class that processes workflows.
# It inherits from ApplicationJob and is enqueued in the :workflow queue.
#
# This job takes a workflow object as an argument and calls its perform! method.
#
# Usage:
# WorkflowJob.perform_later(workflow)
#
# @param workflow [Workflow] The workflow object to be processed
#
class WorkflowJob < ApplicationJob
queue_as :workflows

def perform(workflow)
workflow.perform!
end
end
32 changes: 32 additions & 0 deletions app/models/chunk.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# == Schema Information
# Schema version: 20240905062817
#
# Table name: chunks
#
# id :bigint not null, primary key
# info_request_id :bigint
# incoming_message_id :bigint
# foi_attachment_id :bigint
# text :text
# created_at :datetime not null
# updated_at :datetime not null
# embedding :vector(4096)
#

##
# This class represents a chunk of text for which embedding vectors are
# generated.
#
class Chunk < SecondaryRecord
vectorsearch

after_save :upsert_to_vectorsearch

belongs_to :info_request
belongs_to :incoming_message
belongs_to :foi_attachment

def as_vector
text
end
end
Loading
Loading