diff options
Diffstat (limited to 'solr-8.1.1/example/example-DIH/solr/atom/conf')
7 files changed, 0 insertions, 306 deletions
diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml b/solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml deleted file mode 100644 index b7de812d0..000000000 --- a/solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml +++ /dev/null @@ -1,35 +0,0 @@ -<dataConfig> -  <dataSource type="URLDataSource"/> -  <document> - -    <entity name="stackoverflow" -            url="https://stackoverflow.com/feeds/tag/solr" -            processor="XPathEntityProcessor" -            forEach="/feed|/feed/entry" -            transformer="HTMLStripTransformer,RegexTransformer"> - -      <!-- Pick this value up from the feed level and apply to all documents --> -      <field column="lastchecked_dt" xpath="/feed/updated" commonField="true"/> - -      <!-- Keep only the final numeric part of the URL --> -      <field column="id" xpath="/feed/entry/id" regex=".*/" replaceWith=""/> - -      <field column="title"    xpath="/feed/entry/title"/> -      <field column="author"   xpath="/feed/entry/author/name"/> -      <field column="category" xpath="/feed/entry/category/@term"/> -      <field column="link"     xpath="/feed/entry/link[@rel='alternate']/@href"/> - -      <!-- Use transformers to convert HTML into plain text. -        There is also an UpdateRequestProcess to trim remaining spaces. -      --> -      <field column="summary" xpath="/feed/entry/summary" stripHTML="true" regex="( |\n)+" replaceWith=" "/> - -      <!-- Ignore namespaces when matching XPath --> -      <field column="rank" xpath="/feed/entry/rank"/> - -      <field column="published_dt" xpath="/feed/entry/published"/> -      <field column="updated_dt" xpath="/feed/entry/updated"/> -    </entity> - -  </document> -</dataConfig> diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt b/solr-8.1.1/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt deleted file mode 100644 index 2c164c0b2..000000000 --- a/solr-8.1.1/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements.  See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License.  You may obtain a copy of the License at -# -#     http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# a couple of test stopwords to test that the words are really being -# configured from this file: -stopworda -stopwordb - -# Standard english stop words taken from Lucene's StopAnalyzer -a -an -and -are -as -at -be -but -by -for -if -in -into -is -it -no -not -of -on -or -such -that -the -their -then -there -these -they -this -to -was -will -with diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/managed-schema b/solr-8.1.1/example/example-DIH/solr/atom/conf/managed-schema deleted file mode 100644 index 58751520d..000000000 --- a/solr-8.1.1/example/example-DIH/solr/atom/conf/managed-schema +++ /dev/null @@ -1,106 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements.  See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License.  You may obtain a copy of the License at - -     http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<schema name="example-DIH-atom" version="1.6"> -  <uniqueKey>id</uniqueKey> - -  <field name="id" type="string" indexed="true" stored="true" required="true"/> -  <field name="title" type="text_en_splitting" indexed="true" stored="true"/> -  <field name="author" type="string" indexed="true" stored="true"/> -  <field name="category" type="string" indexed="true" stored="true" multiValued="true"/> -  <field name="link" type="string" indexed="true" stored="true"/> -  <field name="summary" type="text_en_splitting" indexed="true" stored="true"/> -  <field name="rank" type="pint" indexed="true" stored="true"/> - -  <dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/> - -  <!-- Catch-all field, aggregating all "useful to search as text" fields via the copyField instructions --> -  <field name="text" type="text_en_splitting" indexed="true" stored="false" multiValued="true"/> - -  <field name="urls" type="url_only" indexed="true" stored="false"/> - - -  <copyField source="id" dest="text"/> -  <copyField source="title" dest="text"/> -  <copyField source="author" dest="text"/> -  <copyField source="category" dest="text"/> -  <copyField source="summary" dest="text"/> - -  <!-- extract URLs from summary for faceting --> -  <copyField source="summary" dest="urls"/> - -  <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true"/> -  <fieldType name="pint" class="solr.IntPointField" docValues="true"/> -  <fieldType name="pdate" class="solr.DatePointField" docValues="true"/> - - -  <!-- A text field with defaults appropriate for English, plus -   aggressive word-splitting and autophrase features enabled. -   This field is just like text_en, except it adds -   WordDelimiterFilter to enable splitting and matching of -   words on case-change, alpha numeric boundaries, and -   non-alphanumeric chars.  This means certain compound word -   cases will work, for example query "wi fi" will match -   document "WiFi" or "wi-fi". -  --> -  <fieldType name="text_en_splitting" class="solr.TextField" -             positionIncrementGap="100" autoGeneratePhraseQueries="true"> -    <analyzer type="index"> -      <tokenizer class="solr.WhitespaceTokenizerFactory"/> -      <!-- in this example, we will only use synonyms at query time -      <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> -      --> -      <!-- Case insensitive stop word removal. --> -      <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> -      <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" -              catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> -      <filter class="solr.LowerCaseFilterFactory"/> -      <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> -      <filter class="solr.PorterStemFilterFactory"/> -      <filter class="solr.FlattenGraphFilterFactory"/> -    </analyzer> -    <analyzer type="query"> -      <tokenizer class="solr.WhitespaceTokenizerFactory"/> -      <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> -      <filter class="solr.StopFilterFactory" -              ignoreCase="true" -              words="lang/stopwords_en.txt" -      /> -      <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" -              catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> -      <filter class="solr.LowerCaseFilterFactory"/> -      <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> -      <filter class="solr.PorterStemFilterFactory"/> -    </analyzer> -  </fieldType> - -  <!-- Field type that extracts URLs from the text. -   As the stored representation is not changed, it is only useful for faceting. -   It is not terribly useful for searching URLs either, as there are too many special symbols. -  --> -  <fieldType name="url_only" class="solr.TextField" positionIncrementGap="100"> -    <analyzer type="index"> -      <tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/> -      <filter class="solr.TypeTokenFilterFactory" types="url_types.txt" useWhitelist="true"/> -    </analyzer> -    <analyzer type="query"> -      <tokenizer class="solr.KeywordTokenizerFactory"/> -    </analyzer> -  </fieldType> - -</schema> diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/protwords.txt b/solr-8.1.1/example/example-DIH/solr/atom/conf/protwords.txt deleted file mode 100644 index 1303e42a0..000000000 --- a/solr-8.1.1/example/example-DIH/solr/atom/conf/protwords.txt +++ /dev/null @@ -1,17 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License.  You may obtain a copy of the License at -# -#     http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -# Use a protected word file to protect against the stemmer reducing two -# unrelated words to the same base word. - -lucene diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/solrconfig.xml b/solr-8.1.1/example/example-DIH/solr/atom/conf/solrconfig.xml deleted file mode 100644 index f78511354..000000000 --- a/solr-8.1.1/example/example-DIH/solr/atom/conf/solrconfig.xml +++ /dev/null @@ -1,64 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements.  See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License.  You may obtain a copy of the License at - -     http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<!-- - This is a DEMO configuration, highlighting elements - specifically needed to get this example running - such as libraries and request handler specifics. - - It uses defaults or does not define most of production-level settings - such as various caches or auto-commit policies. - - See Solr Reference Guide and other examples for - more details on a well configured solrconfig.xml - https://lucene.apache.org/solr/guide/the-well-configured-solr-instance.html ---> -<config> - -  <!-- Controls what version of Lucene various components of Solr -    adhere to.  Generally, you want to use the latest version to -    get all bug fixes and improvements. It is highly recommended -    that you fully re-index after changing this setting as it can -    affect both how text is indexed and queried. -  --> -  <luceneMatchVersion>8.1.1</luceneMatchVersion> - -  <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-dataimporthandler-.*\.jar"/> - -  <requestHandler name="/select" class="solr.SearchHandler"> -    <lst name="defaults"> -      <str name="echoParams">explicit</str> -      <str name="df">text</str> -       <!-- Change from JSON to XML format (the default prior to Solr 7.0) -          <str name="wt">xml</str>  -         --> -    </lst> -  </requestHandler> - -  <requestHandler name="/dataimport" class="solr.DataImportHandler"> -    <lst name="defaults"> -      <str name="config">atom-data-config.xml</str> -      <str name="processor">trim_text</str> -    </lst> -  </requestHandler> - -  <updateProcessor class="solr.processor.TrimFieldUpdateProcessorFactory" name="trim_text"> -    <str name="typeName">text_en_splitting</str> -  </updateProcessor> - -</config> diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/synonyms.txt b/solr-8.1.1/example/example-DIH/solr/atom/conf/synonyms.txt deleted file mode 100644 index eab4ee875..000000000 --- a/solr-8.1.1/example/example-DIH/solr/atom/conf/synonyms.txt +++ /dev/null @@ -1,29 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License.  You may obtain a copy of the License at -# -#     http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -#some test synonym mappings unlikely to appear in real input text -aaafoo => aaabar -bbbfoo => bbbfoo bbbbar -cccfoo => cccbar cccbaz -fooaaa,baraaa,bazaaa - -# Some synonym groups specific to this example -GB,gib,gigabyte,gigabytes -MB,mib,megabyte,megabytes -Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming -#after us won't split it into two words. - -# Synonym mappings can be used for spelling correction too -pixima => pixma - diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/url_types.txt b/solr-8.1.1/example/example-DIH/solr/atom/conf/url_types.txt deleted file mode 100644 index 808f31384..000000000 --- a/solr-8.1.1/example/example-DIH/solr/atom/conf/url_types.txt +++ /dev/null @@ -1 +0,0 @@ -<URL>  | 
