- Plugin type: input
- Resume supported: yes
- Cleanup supported: yes
- Guess supported: no
- nodes: nodes (array, required)
- host: host (string, required)
- port: port (integer, required)
- queries: lucene query array. (array, required)
- index: index (string, required)
- index_type: index_type (string)
- request_timeout: request timeout (integer)
- per_size: per size query. (integer, required, default:
1000
) - limit_size: limit size unit query. (integer, default: unlimit)
- num_threads: number of threads for queries. (integer, default: 1)
- retry_on_failure: retry on failure. set 0 is retry forever. (integer, default: 5)
- sort: sort order. (hash, default: nil)
- scroll: scroll. to keep the search context. (string, default: '1m')
- fields: fields (array, required)
- name: name (string, required)
- type: type (string, required)
- metadata: metadata (boolean, default: false)
- time_format: time_format (string)
in:
type: elasticsearch
nodes:
- {host: localhost, port: 9200}
queries:
- 'page_type: HP'
- 'page_type: GP'
index: crawl
index_type: m_corporation_page
request_timeout: 60
per_size: 1000
limit_size: 200000
num_threads: 2
sort:
m_corporation_id: desc
employee_range: asc
fields:
- { name: _id, type: string, metadata: true }
- { name: _type, type: string, metadata: true }
- { name: _index, type: string, metadata: true }
- { name: _score, type: double, metadata: true }
- { name: page_type, type: string }
- { name: corp_name, type: string }
- { name: corp_key, type: string }
- { name: title, type: string }
- { name: body, type: string }
- { name: url, type: string }
- { name: employee_range, type: long }
- { name: m_corporation_id, type: long }
- { name: cg_lv1, type: json }
- { name: cg_lv2, type: json }
- { name: cg_lv3, type: json }
- string
- long
- double
- timestamp
- json
- boolean
curl -o embulk.jar --create-dirs -L "http://dl.embulk.org/embulk-latest.jar"
chmod +x embulk.jar
./embulk.jar gem install bundler
./embulk.jar bundle install --path vendor/bundle
./embulk.jar bundle exec rake test
$ rake