Skip to content

Commit

Permalink
Commit initial
Browse files Browse the repository at this point in the history
  • Loading branch information
jplot committed Aug 3, 2019
0 parents commit a85367e
Show file tree
Hide file tree
Showing 11 changed files with 533 additions and 0 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/.bundle/
/.yardoc
/_yardoc/
/coverage/
/doc/
/pkg/
/spec/reports/
/tmp/
Empty file added CHANGELOG.md
Empty file.
5 changes: 5 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
source 'https://rubygems.org'

git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }

gemspec
427 changes: 427 additions & 0 deletions LICENSE.txt

Large diffs are not rendered by default.

Empty file added README.md
Empty file.
2 changes: 2 additions & 0 deletions Rakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
require 'bundler/gem_tasks'
task default: :spec
7 changes: 7 additions & 0 deletions bin/console
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/usr/bin/env ruby

require 'bundler/setup'
require 'crawler/movie'

require 'irb'
IRB.start(__FILE__)
6 changes: 6 additions & 0 deletions bin/setup
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env bash
set -euo pipefail
IFS=$'\n\t'
set -vx

bundle install
33 changes: 33 additions & 0 deletions crawler-core.gemspec
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
lib = File.expand_path('../lib', __FILE__)
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
require 'crawler/core/version'

Gem::Specification.new do |spec|
spec.name = 'crawler-core'
spec.version = Crawler::Core::VERSION
spec.authors = ['Jonathan PHILIPPE']
spec.email = ['jonathan@cinema.paris']

spec.summary = %q{}
spec.description = %q{}
spec.homepage = 'https://crawler.cinema.paris'
spec.license = 'CC-BY-SA-4.0'

if spec.respond_to?(:metadata)
spec.metadata['homepage_uri'] = spec.homepage
spec.metadata['source_code_uri'] = 'https://github.com/cinema-paris/crawler-core'
spec.metadata['changelog_uri'] = 'https://github.com/cinema-paris/crawler-core/CHANGELOG.md'
end

spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
end
spec.bindir = 'exe'
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
spec.require_paths = ['lib']

spec.add_development_dependency 'bundler', '~> 1.17'
spec.add_development_dependency 'rake', '~> 10.0'
spec.add_runtime_dependency 'activesupport', '>= 3.0'
spec.add_runtime_dependency 'levenshtein-ffi', '>= 1.0'
end
40 changes: 40 additions & 0 deletions lib/crawler/base.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
require 'active_support/concern'
require 'active_support/inflector'
require 'levenshtein-ffi'

module Crawler
module Base
extend ActiveSupport::Concern

class_methods do
def add_provider(provider_name, options = {})
raise NotImplementedError
end

def configure
yield self
end

def transliterate(string)
ActiveSupport::Inflector.transliterate(string.gsub(/[:\-.,!?]/, ' ').strip.gsub(/\s+/, ' '), nil).downcase
end

def levenshtein_score(string_1, string_2)
string_1_transliterated = transliterate(string_1)
string_2_transliterated = transliterate(string_2)
levenshtein_distance = Levenshtein.distance(string_1_transliterated, string_2_transliterated)
max_size = [string_1_transliterated.size, string_2_transliterated.size].max.to_f

(max_size - levenshtein_distance) / max_size
end

def search(*args)
raise NotImplementedError
end

def best(*args)
raise NotImplementedError
end
end
end
end
5 changes: 5 additions & 0 deletions lib/crawler/core/version.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
module Crawler
module Core
VERSION = '0.1.0'
end
end

0 comments on commit a85367e

Please sign in to comment.