- 1 - Download and install Solr.
- 2 - Download and install MongoDB.
- 3 - Download and intall Java.
- 4 - Download and install Maven.
- 5 - Download and install git.
- 1 - org.mongodb.Bson - 3.6.3
- 2 - org.mongodb.mongo-java-driver - 3.6.3
- 3 - org.apache.solr.solr-dataimporthandler
cd ~
git clone https://github.com/GuilhermeViterboGalvao/solrMongoDBDataImporter
mvn clean install
Example:
cd /opt/solr-4.7.2/example/lib
cp ~/.m2/repository/org/mongodb/mongo-java-driver/3.6.3/mongo-java-driver-3.6.3-sources.jar .
cp ~/.m2/repository/org/mongodb/bson/3.6.3/bson-3.6.3.jar .
cp ~/.m2/repository/org/apache/solr/solr-dataimporthandler/4.7.0/solr-dataimporthandler-4.7.0.jar .
cp ~/SolrMongoDBDataImporter/target/solrMongoDBDataImporter-1.0.jar .
- 1 - Change the file solrconfig.xml, and add the following content after config tag:
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
<lst name="defaults">
<str name="config">data-config.xml</str>
</lst>
</requestHandler>
- 2 - Add the new libs on file solrconfig.xml. Add after config tag:
<lib path="../../lib/bson-3.6.3.jar" />
<lib path="../../lib/mongo-java-driver-3.6.3.jar" />
<lib path="../../lib/solr-dataimporthandler-4.7.0.jar" />
<lib path="../../lib/solrMongoDBDataImporter-1.0.jar" />
- 3 - Configure your schema.xml, for example:
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="hists" version="1.1">
<types>
<fieldType name="integer" class="solr.IntField" omitNorms="true"/>
<fieldType name="long" class="solr.LongField" omitNorms="true"/>
<fieldType name="float" class="solr.FloatField" omitNorms="true"/>
<fieldType name="double" class="solr.DoubleField" omitNorms="true"/>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="sint" class="solr.TrieIntField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="slong" class="solr.TrieLongField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="sfloat" class="solr.TrieFloatField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="sdouble" class="solr.TrieDoubleField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="date" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="uuid" class="solr.UUIDField" indexed="true" />
<fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="Portuguese" />
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="Portuguese" />
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
</analyzer>
</fieldType>
<fieldType name="text_formated" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
</analyzer>
</fieldType>
<fieldType name="text_cust" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
</analyzer>
</fieldType>
</types>
<fields>
<!-- Your mongodb document fields goes here... -->
<field name="_id" type="string" indexed="true" stored="true" required="true" />
<field name="totalTime" type="long" indexed="true" stored="true" required="true" />
<field name="token" type="string" indexed="true" stored="true" required="true" />
<field name="statusCode" type="integer" indexed="true" stored="true" required="true" />
<field name="requestId" type="string" indexed="true" stored="true" required="true" />
<field name="date" type="date" indexed="true" stored="true" required="true" />
<field name="month" type="integer" indexed="true" stored="true" required="true" />
<field name="day" type="integer" indexed="true" stored="true" required="true" />
<field name="hour" type="integer" indexed="true" stored="true" required="true" />
<field name="text" type="text_pt" indexed="true" stored="false" multiValued="true"/>
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false" />
<dynamicField name="*_i" type="sint" indexed="true" stored="true" />
<dynamicField name="*_s" type="string" indexed="true" stored="true" />
<dynamicField name="*_l" type="slong" indexed="true" stored="true" />
<dynamicField name="*_b" type="boolean" indexed="true" stored="true" />
<dynamicField name="*_f" type="sfloat" indexed="true" stored="true" />
<dynamicField name="*_d" type="sdouble" indexed="true" stored="true" />
<dynamicField name="*_dt" type="date" indexed="true" stored="true" />
<dynamicField name="*_cust" type="text_cust" indexed="true" stored="true" />
<dynamicField name="*_pt" type="text_pt" indexed="true" stored="true" multiValued="true" />
<dynamicField name="*_formated" type="text_formated" indexed="true" stored="true" multiValued="true" />
</fields>
<uniqueKey>_id</uniqueKey>
<defaultSearchField>_id</defaultSearchField>
<solrQueryParser defaultOperator="OR"/>
</schema>
- 4 - Now create the file data-config.xml in the same path of file solrconfig.xml:
<?xml version="1.0"?>
<dataConfig>
<dataSource
name="MyMongoDBDataSourceName"
type="MongoDBDataSource"
database="myDatabase"
host="localhost"
port="27017"
username="admin"
password="admin123"/>
<document name="hists">
<entity
processor="MongoDBEntityProcessor"
collection="myCollection"
fullDumpQuery="[ { '$project' :{ totalTime: 1, token: 1, statusCode: 1, requestId: 1, date: 1, month: { $month: '$date' }, day: { $dayOfMonth: '$date' }, hour: { $hour: '$date' } } } ]"
findDeltaQuery="[ { '$match':{ 'date': { '$gt': ISODate('${dih.hists.last_index_time') } } }, { '$project' :{ totalTime: 1, token: 1, statusCode: 1, requestId: 1, date: 1, month: { $month: '$date' }, day: { $dayOfMonth: '$date' }, hour: { $hour: '$date' } } } ]"
deltaDumpQuery="[ { '$match': { '_id': ObjectId('${dih.delta._id}') } }, { '$project' :{ totalTime: 1, token: 1, statusCode: 1, requestId: 1, date: 1, month: { $month: '$date' }, day: { $dayOfMonth: '$date' }, hour: { $hour: '$date' } } } ]"
datasource="MyMongoDBDataSourceName-service_development"
transformer="ObjectIdToLongTransformer"
name="myEntityOfCollectionXXX">
<field column="_id" name="_id" mongoField="_id" />
<field column="totalTime" name="totalTime" mongoField="totalTime" />
<field column="token" name="token" mongoField="token" />
<field column="statusCode" name="statusCode" mongoField="statusCode" />
<field column="requestId" name="requestId" mongoField="requestId" />
<field column="date" name="date" mongoField="date" />
<field column="day" name="day" mongoField="day" />
<field column="month" name="month" mongoField="month" />
<field column="hour" name="hour" mongoField="hour" />
</entity>
</document>
</dataConfig>
- 5 - Start your Solr and enjoy =).
- database: Is the name of your MongoDB database.
- host: Is the your MongoDB host you want to connect (by default is localhost).
- port: The MongoDB port (by default is 27017).
- username: Username you want to use for connect on MongoDB.
- password: The password of username you inform.
- collection: The name of collection you wanth extract the data.
- fullDumpQuery: MongoDB query for full import process.
- findDeltaQuery: MongoDB query for delta import process.
- deltaDumpQuery: MongoDB query to import a single record. It will be executed for each result of each item in the delta import process.
Allow you use other types of mongo fields like 'document' and 'arrays'. Above all options available.
If you need to use a document field, this option is for you. You just need to add this 'tag' on your field declaration. Example:
<field column="address.number" name="address" mongoField="address.number" documentObject="true" />
To use an array field, this option is for you. You just need to add this 'tag' on your field declaration. Example:
<field column="phones.0.phoneType" name="phoneType" mongoField="phones.0.phoneType" arrayObject="true" />
If you need to trasnform a text enum value in an integer value, this option is for you. You just need to add this 'tag' and the 'tag' fromStringEnumToIntegerValueObjectData with the values on your field declaration. Example:
<field column="genderType" name="genderType" mongoField="genderType" fromStringEnumToIntegerValueObject="true" fromStringEnumToIntegerValueObjectData="Male=1,Female=2" />
If you need to ignore the mongo value and use a fix value, this option is for you. You just need to add this 'tag' and the 'tag' fixValueObjectData with the fix value on your field declaration. Example:
<field column="companyToken" name="companyToken" mongoField="no-use" fixValueObject="true" fixValueObjectData="dgfhsjs7262818kjjh" />