diff options
| author | ab <abdullah_ahmed@brown.edu> | 2019-07-01 15:40:55 -0400 |
|---|---|---|
| committer | ab <abdullah_ahmed@brown.edu> | 2019-07-01 15:40:55 -0400 |
| commit | 0fc7edd5f9bd1234f8de6e5b1c7b8f23668d784c (patch) | |
| tree | 2ddff97c032657fb86de37c3be2cf2d201980088 /solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml | |
| parent | 4f8b7cecea3ceed6861c38bde9ce03c9c46e2d09 (diff) | |
| parent | 6bd79baf2d9301304194d87667bb5c66c17e5298 (diff) | |
Merge branch 'monikasearch2' of https://github.com/browngraphicslab/Dash-Web into text_box_ab
Diffstat (limited to 'solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml')
| -rw-r--r-- | solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml b/solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml new file mode 100644 index 000000000..b7de812d0 --- /dev/null +++ b/solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml @@ -0,0 +1,35 @@ +<dataConfig> + <dataSource type="URLDataSource"/> + <document> + + <entity name="stackoverflow" + url="https://stackoverflow.com/feeds/tag/solr" + processor="XPathEntityProcessor" + forEach="/feed|/feed/entry" + transformer="HTMLStripTransformer,RegexTransformer"> + + <!-- Pick this value up from the feed level and apply to all documents --> + <field column="lastchecked_dt" xpath="/feed/updated" commonField="true"/> + + <!-- Keep only the final numeric part of the URL --> + <field column="id" xpath="/feed/entry/id" regex=".*/" replaceWith=""/> + + <field column="title" xpath="/feed/entry/title"/> + <field column="author" xpath="/feed/entry/author/name"/> + <field column="category" xpath="/feed/entry/category/@term"/> + <field column="link" xpath="/feed/entry/link[@rel='alternate']/@href"/> + + <!-- Use transformers to convert HTML into plain text. + There is also an UpdateRequestProcess to trim remaining spaces. + --> + <field column="summary" xpath="/feed/entry/summary" stripHTML="true" regex="( |\n)+" replaceWith=" "/> + + <!-- Ignore namespaces when matching XPath --> + <field column="rank" xpath="/feed/entry/rank"/> + + <field column="published_dt" xpath="/feed/entry/published"/> + <field column="updated_dt" xpath="/feed/entry/updated"/> + </entity> + + </document> +</dataConfig> |
