diff options
| author | Stanley Yip <stanley_yip@brown.edu> | 2020-01-08 13:47:29 -0500 | 
|---|---|---|
| committer | Stanley Yip <stanley_yip@brown.edu> | 2020-01-08 13:47:29 -0500 | 
| commit | abfa42b6f2cf863deee19aac19328a23687464cb (patch) | |
| tree | b481f23ffa7bccbde7a31de34f50d765b6b73162 /solr-8.1.1/example/example-DIH/solr/mail | |
| parent | d8fc218f3481728f221ceacc60ac4bc553f8e295 (diff) | |
| parent | 19a71cb2788b9c1c8d8ced4af285bf91033ba626 (diff) | |
Merge branch 'master' of https://github.com/browngraphicslab/Dash-Web into pen
Diffstat (limited to 'solr-8.1.1/example/example-DIH/solr/mail')
60 files changed, 0 insertions, 13626 deletions
diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/clustering/carrot2/kmeans-attributes.xml b/solr-8.1.1/example/example-DIH/solr/mail/conf/clustering/carrot2/kmeans-attributes.xml deleted file mode 100644 index d802465f6..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/clustering/carrot2/kmeans-attributes.xml +++ /dev/null @@ -1,19 +0,0 @@ -<!--  -  Default configuration for the bisecting k-means clustering algorithm. -   -  This file can be loaded (and saved) by Carrot2 Workbench. -  http://project.carrot2.org/download.html ---> -<attribute-sets default="attributes"> -    <attribute-set id="attributes"> -      <value-set> -        <label>attributes</label> -          <attribute key="MultilingualClustering.defaultLanguage"> -            <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/> -          </attribute> -          <attribute key="MultilingualClustering.languageAggregationStrategy"> -            <value type="org.carrot2.text.clustering.MultilingualClustering$LanguageAggregationStrategy" value="FLATTEN_MAJOR_LANGUAGE"/> -          </attribute> -      </value-set> -  </attribute-set> -</attribute-sets> diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/clustering/carrot2/lingo-attributes.xml b/solr-8.1.1/example/example-DIH/solr/mail/conf/clustering/carrot2/lingo-attributes.xml deleted file mode 100644 index 5febfc320..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/clustering/carrot2/lingo-attributes.xml +++ /dev/null @@ -1,24 +0,0 @@ -<!--  -  Default configuration for the Lingo clustering algorithm. - -  This file can be loaded (and saved) by Carrot2 Workbench. -  http://project.carrot2.org/download.html ---> -<attribute-sets default="attributes"> -    <attribute-set id="attributes"> -      <value-set> -        <label>attributes</label> -          <!--  -          The language to assume for clustered documents. -          For a list of allowed values, see:  -          http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage -          --> -          <attribute key="MultilingualClustering.defaultLanguage"> -            <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/> -          </attribute> -          <attribute key="LingoClusteringAlgorithm.desiredClusterCountBase"> -            <value type="java.lang.Integer" value="20"/> -          </attribute> -      </value-set> -  </attribute-set> -</attribute-sets> diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/clustering/carrot2/stc-attributes.xml b/solr-8.1.1/example/example-DIH/solr/mail/conf/clustering/carrot2/stc-attributes.xml deleted file mode 100644 index c1bf110c8..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/clustering/carrot2/stc-attributes.xml +++ /dev/null @@ -1,19 +0,0 @@ -<!--  -  Default configuration for the STC clustering algorithm. - -  This file can be loaded (and saved) by Carrot2 Workbench. -  http://project.carrot2.org/download.html ---> -<attribute-sets default="attributes"> -    <attribute-set id="attributes"> -      <value-set> -        <label>attributes</label> -          <attribute key="MultilingualClustering.defaultLanguage"> -            <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/> -          </attribute> -          <attribute key="MultilingualClustering.languageAggregationStrategy"> -            <value type="org.carrot2.text.clustering.MultilingualClustering$LanguageAggregationStrategy" value="FLATTEN_MAJOR_LANGUAGE"/> -          </attribute> -      </value-set> -  </attribute-set> -</attribute-sets> diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/currency.xml b/solr-8.1.1/example/example-DIH/solr/mail/conf/currency.xml deleted file mode 100644 index 3a9c58afe..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/currency.xml +++ /dev/null @@ -1,67 +0,0 @@ -<?xml version="1.0" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements.  See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License.  You may obtain a copy of the License at - -     http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<!-- Example exchange rates file for CurrencyField type named "currency" in example schema --> - -<currencyConfig version="1.0"> -  <rates> -    <!-- Updated from http://www.exchangerate.com/ at 2011-09-27 --> -    <rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" /> -    <rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" /> -    <rate from="USD" to="EUR" rate="0.743676" comment="European Euro" /> -    <rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" /> -    <rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" /> -    <rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" /> -    <rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" /> -    <rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" /> -    <rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" /> -    <rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" /> -    <rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" /> -    <rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" /> -    <rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" /> -    <rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" /> -    <rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" /> -    <rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" /> -    <rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" /> -    <rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" /> -    <rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" /> -    <rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" /> -    <rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" /> -    <rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" /> -    <rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" /> -    <rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" /> -    <rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" /> -    <rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" /> -    <rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" /> -    <rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" /> -    <rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" /> -    <rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" /> -    <rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" /> -    <rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" /> -    <rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" /> -    <rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" /> -    <rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" /> -    <rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" /> -    <rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" /> -     -    <!-- Cross-rates for some common currencies --> -    <rate from="EUR" to="GBP" rate="0.869914" />   -    <rate from="EUR" to="NOK" rate="7.800095" />   -    <rate from="GBP" to="NOK" rate="8.966508" />   -  </rates> -</currencyConfig> diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/elevate.xml b/solr-8.1.1/example/example-DIH/solr/mail/conf/elevate.xml deleted file mode 100644 index 2c09ebed6..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/elevate.xml +++ /dev/null @@ -1,42 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements.  See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License.  You may obtain a copy of the License at - -     http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<!-- If this file is found in the config directory, it will only be -     loaded once at startup.  If it is found in Solr's data -     directory, it will be re-loaded every commit. - -   See http://wiki.apache.org/solr/QueryElevationComponent for more info - ---> -<elevate> - <!-- Query elevation examples -  <query text="foo bar"> -    <doc id="1" /> -    <doc id="2" /> -    <doc id="3" /> -  </query> - -for use with techproducts example -  -  <query text="ipod"> -    <doc id="MA147LL/A" />  put the actual ipod at the top  -    <doc id="IW-02" exclude="true" /> exclude this cable -  </query> ---> - -</elevate> diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/contractions_ca.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/contractions_ca.txt deleted file mode 100644 index 307a85f91..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/contractions_ca.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Set of Catalan contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -d -l -m -n -s -t diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/contractions_fr.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/contractions_fr.txt deleted file mode 100644 index f1bba51b2..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/contractions_fr.txt +++ /dev/null @@ -1,15 +0,0 @@ -# Set of French contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -l -m -t -qu -n -s -j -d -c -jusqu -quoiqu -lorsqu -puisqu diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/contractions_ga.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/contractions_ga.txt deleted file mode 100644 index 9ebe7fa34..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/contractions_ga.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Set of Irish contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -d -m -b diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/contractions_it.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/contractions_it.txt deleted file mode 100644 index cac040953..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/contractions_it.txt +++ /dev/null @@ -1,23 +0,0 @@ -# Set of Italian contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -c -l  -all  -dall  -dell  -nell  -sull  -coll  -pell  -gl  -agl  -dagl  -degl  -negl  -sugl  -un  -m  -t  -s  -v  -d diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/hyphenations_ga.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/hyphenations_ga.txt deleted file mode 100644 index 4d2642cc5..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/hyphenations_ga.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Set of Irish hyphenations for StopFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -h -n -t diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stemdict_nl.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stemdict_nl.txt deleted file mode 100644 index 441072971..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stemdict_nl.txt +++ /dev/null @@ -1,6 +0,0 @@ -# Set of overrides for the dutch stemmer -# TODO: load this as a resource from the analyzer and sync it in build.xml -fiets	fiets -bromfiets	bromfiets -ei	eier -kind	kinder diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stoptags_ja.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stoptags_ja.txt deleted file mode 100644 index 71b750845..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stoptags_ja.txt +++ /dev/null @@ -1,420 +0,0 @@ -# -# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. -# -# Any token with a part-of-speech tag that exactly matches those defined in this -# file are removed from the token stream. -# -# Set your own stoptags by uncommenting the lines below.  Note that comments are -# not allowed on the same line as a stoptag.  See LUCENE-3745 for frequency lists, -# etc. that can be useful for building you own stoptag set. -# -# The entire possible tagset is provided below for convenience. -# -##### -#  noun: unclassified nouns -#名詞 -# -#  noun-common: Common nouns or nouns where the sub-classification is undefined -#名詞-一般 -# -#  noun-proper: Proper nouns where the sub-classification is undefined  -#名詞-固有名詞 -# -#  noun-proper-misc: miscellaneous proper nouns -#名詞-固有名詞-一般 -# -#  noun-proper-person: Personal names where the sub-classification is undefined -#名詞-固有名詞-人名 -# -#  noun-proper-person-misc: names that cannot be divided into surname and  -#  given name; foreign names; names where the surname or given name is unknown. -#  e.g. お市の方 -#名詞-固有名詞-人名-一般 -# -#  noun-proper-person-surname: Mainly Japanese surnames. -#  e.g. 山田 -#名詞-固有名詞-人名-姓 -# -#  noun-proper-person-given_name: Mainly Japanese given names. -#  e.g. 太郎 -#名詞-固有名詞-人名-名 -# -#  noun-proper-organization: Names representing organizations. -#  e.g. 通産省, NHK -#名詞-固有名詞-組織 -# -#  noun-proper-place: Place names where the sub-classification is undefined -#名詞-固有名詞-地域 -# -#  noun-proper-place-misc: Place names excluding countries. -#  e.g. アジア, バルセロナ, 京都 -#名詞-固有名詞-地域-一般 -# -#  noun-proper-place-country: Country names.  -#  e.g. 日本, オーストラリア -#名詞-固有名詞-地域-国 -# -#  noun-pronoun: Pronouns where the sub-classification is undefined -#名詞-代名詞 -# -#  noun-pronoun-misc: miscellaneous pronouns:  -#  e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ -#名詞-代名詞-一般 -# -#  noun-pronoun-contraction: Spoken language contraction made by combining a  -#  pronoun and the particle 'wa'. -#  e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ  -#名詞-代名詞-縮約 -# -#  noun-adverbial: Temporal nouns such as names of days or months that behave  -#  like adverbs. Nouns that represent amount or ratios and can be used adverbially, -#  e.g. 金曜, 一月, 午後, 少量 -#名詞-副詞可能 -# -#  noun-verbal: Nouns that take arguments with case and can appear followed by  -#  'suru' and related verbs (する, できる, なさる, くださる) -#  e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り -#名詞-サ変接続 -# -#  noun-adjective-base: The base form of adjectives, words that appear before な ("na") -#  e.g. 健康, 安易, 駄目, だめ -#名詞-形容動詞語幹 -# -#  noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. -#  e.g. 0, 1, 2, 何, 数, 幾 -#名詞-数 -# -#  noun-affix: noun affixes where the sub-classification is undefined -#名詞-非自立 -# -#  noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that  -#  attach to the base form of inflectional words, words that cannot be classified  -#  into any of the other categories below. This category includes indefinite nouns. -#  e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第,  -#       順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み,  -#       拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, -#       わり, 割り, 割, ん-口語/, もん-口語/ -#名詞-非自立-一般 -# -#  noun-affix-adverbial: noun affixes that that can behave as adverbs. -#  e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ,  -#       上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか,  -#       最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所,  -#       とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま,  -#       儘, 侭, みぎり, 矢先 -#名詞-非自立-副詞可能 -# -#  noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars  -#  with the stem よう(だ) ("you(da)"). -#  e.g.  よう, やう, 様 (よう) -#名詞-非自立-助動詞語幹 -#   -#  noun-affix-adjective-base: noun affixes that can connect to the indeclinable -#  connection form な (aux "da"). -#  e.g. みたい, ふう -#名詞-非自立-形容動詞語幹 -# -#  noun-special: special nouns where the sub-classification is undefined. -#名詞-特殊 -# -#  noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is  -#  treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base  -#  form of inflectional words. -#  e.g. そう -#名詞-特殊-助動詞語幹 -# -#  noun-suffix: noun suffixes where the sub-classification is undefined. -#名詞-接尾 -# -#  noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect  -#  to ガル or タイ and can combine into compound nouns, words that cannot be classified into -#  any of the other categories below. In general, this category is more inclusive than  -#  接尾語 ("suffix") and is usually the last element in a compound noun. -#  e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, -#       よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 -#名詞-接尾-一般 -# -#  noun-suffix-person: Suffixes that form nouns and attach to person names more often -#  than other nouns. -#  e.g. 君, 様, 著 -#名詞-接尾-人名 -# -#  noun-suffix-place: Suffixes that form nouns and attach to place names more often  -#  than other nouns. -#  e.g. 町, 市, 県 -#名詞-接尾-地域 -# -#  noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that  -#  can appear before スル ("suru"). -#  e.g. 化, 視, 分け, 入り, 落ち, 買い -#名詞-接尾-サ変接続 -# -#  noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions,  -#  is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the  -#  conjunctive form of inflectional words. -#  e.g. そう -#名詞-接尾-助動詞語幹 -# -#  noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive  -#  form of inflectional words and appear before the copula だ ("da"). -#  e.g. 的, げ, がち -#名詞-接尾-形容動詞語幹 -# -#  noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. -#  e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) -#名詞-接尾-副詞可能 -# -#  noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category  -#  is more inclusive than 助数詞 ("classifier") and includes common nouns that attach  -#  to numbers. -#  e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 -#名詞-接尾-助数詞 -# -#  noun-suffix-special: Special suffixes that mainly attach to inflecting words. -#  e.g. (楽し) さ, (考え) 方 -#名詞-接尾-特殊 -# -#  noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words  -#  together. -#  e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) -#名詞-接続詞的 -# -#  noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are  -#  semantically verb-like. -#  e.g. ごらん, ご覧, 御覧, 頂戴 -#名詞-動詞非自立的 -# -#  noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,  -#  dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation")  -#  is いわく ("iwaku"). -#名詞-引用文字列 -# -#  noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and -#  behave like an adjective. -#  e.g. 申し訳, 仕方, とんでも, 違い -#名詞-ナイ形容詞語幹 -# -##### -#  prefix: unclassified prefixes -#接頭詞 -# -#  prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)  -#  excluding numerical expressions. -#  e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) -#接頭詞-名詞接続 -# -#  prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb -#  in conjunctive form followed by なる/なさる/くださる. -#  e.g. お (読みなさい), お (座り) -#接頭詞-動詞接続 -# -#  prefix-adjectival: Prefixes that attach to adjectives. -#  e.g. お (寒いですねえ), バカ (でかい) -#接頭詞-形容詞接続 -# -#  prefix-numerical: Prefixes that attach to numerical expressions. -#  e.g. 約, およそ, 毎時 -#接頭詞-数接続 -# -##### -#  verb: unclassified verbs -#動詞 -# -#  verb-main: -#動詞-自立 -# -#  verb-auxiliary: -#動詞-非自立 -# -#  verb-suffix: -#動詞-接尾 -# -##### -#  adjective: unclassified adjectives -#形容詞 -# -#  adjective-main: -#形容詞-自立 -# -#  adjective-auxiliary: -#形容詞-非自立 -# -#  adjective-suffix: -#形容詞-接尾 -# -##### -#  adverb: unclassified adverbs -#副詞 -# -#  adverb-misc: Words that can be segmented into one unit and where adnominal  -#  modification is not possible. -#  e.g. あいかわらず, 多分 -#副詞-一般 -# -#  adverb-particle_conjunction: Adverbs that can be followed by の, は, に,  -#  な, する, だ, etc. -#  e.g. こんなに, そんなに, あんなに, なにか, なんでも -#副詞-助詞類接続 -# -##### -#  adnominal: Words that only have noun-modifying forms. -#  e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう,  -#       どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした,  -#       「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き -#連体詞 -# -##### -#  conjunction: Conjunctions that can occur independently. -#  e.g. が, けれども, そして, じゃあ, それどころか -接続詞 -# -##### -#  particle: unclassified particles. -助詞 -# -#  particle-case: case particles where the subclassification is undefined. -助詞-格助詞 -# -#  particle-case-misc: Case particles. -#  e.g. から, が, で, と, に, へ, より, を, の, にて -助詞-格助詞-一般 -# -#  particle-case-quote: the "to" that appears after nouns, a person’s speech,  -#  quotation marks, expressions of decisions from a meeting, reasons, judgements, -#  conjectures, etc. -#  e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) -助詞-格助詞-引用 -# -#  particle-case-compound: Compounds of particles and verbs that mainly behave  -#  like case particles. -#  e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, -#       にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける,  -#       にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し,  -#       に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして,  -#       に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, -#       にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る,  -#       にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, -#       って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ -助詞-格助詞-連語 -# -#  particle-conjunctive: -#  e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども,  -#       ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/,  -#       (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ -助詞-接続助詞 -# -#  particle-dependency: -#  e.g. こそ, さえ, しか, すら, は, も, ぞ -助詞-係助詞 -# -#  particle-adverbial: -#  e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/,  -#       (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, -#       (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに,  -#       (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, -#       ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) -助詞-副助詞 -# -#  particle-interjective: particles with interjective grammatical roles. -#  e.g. (松島) や -助詞-間投助詞 -# -#  particle-coordinate: -#  e.g. と, たり, だの, だり, とか, なり, や, やら -助詞-並立助詞 -# -#  particle-final: -#  e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ,  -#       ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ -助詞-終助詞 -# -#  particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is  -#  adverbial, conjunctive, or sentence final. For example: -#       (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 -#       (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 -#           「(祈りが届いたせい) か (, 試験に合格した.)」 -#       (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 -#  e.g. か -助詞-副助詞/並立助詞/終助詞 -# -#  particle-adnominalizer: The "no" that attaches to nouns and modifies  -#  non-inflectional words. -助詞-連体化 -# -#  particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs  -#  that are giongo, giseigo, or gitaigo. -#  e.g. に, と -助詞-副詞化 -# -#  particle-special: A particle that does not fit into one of the above classifications.  -#  This includes particles that are used in Tanka, Haiku, and other poetry. -#  e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) -助詞-特殊 -# -##### -#  auxiliary-verb: -助動詞 -# -##### -#  interjection: Greetings and other exclamations. -#  e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます,  -#       いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい -#感動詞 -# -##### -#  symbol: unclassified Symbols. -記号 -# -#  symbol-misc: A general symbol not in one of the categories below. -#  e.g. [○◎@$〒→+] -記号-一般 -# -#  symbol-comma: Commas -#  e.g. [,、] -記号-読点 -# -#  symbol-period: Periods and full stops. -#  e.g. [..。] -記号-句点 -# -#  symbol-space: Full-width whitespace. -記号-空白 -# -#  symbol-open_bracket: -#  e.g. [({‘“『【] -記号-括弧開 -# -#  symbol-close_bracket: -#  e.g. [)}’”』」】] -記号-括弧閉 -# -#  symbol-alphabetic: -#記号-アルファベット -# -##### -#  other: unclassified other -#その他 -# -#  other-interjection: Words that are hard to classify as noun-suffixes or  -#  sentence-final particles. -#  e.g. (だ)ァ -その他-間投 -# -##### -#  filler: Aizuchi that occurs during a conversation or sounds inserted as filler. -#  e.g. あの, うんと, えと -フィラー -# -##### -#  non-verbal: non-verbal sound. -非言語音 -# -##### -#  fragment: -#語断片 -# -##### -#  unknown: unknown part of speech. -#未知語 -# -##### End of file diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ar.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ar.txt deleted file mode 100644 index 046829db6..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ar.txt +++ /dev/null @@ -1,125 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -# Cleaned on October 11, 2009 (not normalized, so use before normalization) -# This means that when modifying this list, you might need to add some  -# redundant entries, for example containing forms with both أ and ا -من -ومن -منها -منه -في -وفي -فيها -فيه -و -ف -ثم -او -أو -ب -بها -به -ا -أ -اى -اي -أي -أى -لا -ولا -الا -ألا -إلا -لكن -ما -وما -كما -فما -عن -مع -اذا -إذا -ان -أن -إن -انها -أنها -إنها -انه -أنه -إنه -بان -بأن -فان -فأن -وان -وأن -وإن -التى -التي -الذى -الذي -الذين -الى -الي -إلى -إلي -على -عليها -عليه -اما -أما -إما -ايضا -أيضا -كل -وكل -لم -ولم -لن -ولن -هى -هي -هو -وهى -وهي -وهو -فهى -فهي -فهو -انت -أنت -لك -لها -له -هذه -هذا -تلك -ذلك -هناك -كانت -كان -يكون -تكون -وكانت -وكان -غير -بعض -قد -نحو -بين -بينما -منذ -ضمن -حيث -الان -الآن -خلال -بعد -قبل -حتى -عند -عندما -لدى -جميع diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_bg.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_bg.txt deleted file mode 100644 index 1ae4ba2ae..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_bg.txt +++ /dev/null @@ -1,193 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -а -аз -ако -ала -бе -без -беше -би -бил -била -били -било -близо -бъдат -бъде -бяха -в -вас -ваш -ваша -вероятно -вече -взема -ви -вие -винаги -все -всеки -всички -всичко -всяка -във -въпреки -върху -г -ги -главно -го -д -да -дали -до -докато -докога -дори -досега -доста -е -едва -един -ето -за -зад -заедно -заради -засега -затова -защо -защото -и -из -или -им -има -имат -иска -й -каза -как -каква -какво -както -какъв -като -кога -когато -което -които -кой -който -колко -която -къде -където -към -ли -м -ме -между -мен -ми -мнозина -мога -могат -може -моля -момента -му -н -на -над -назад -най -направи -напред -например -нас -не -него -нея -ни -ние -никой -нито -но -някои -някой -няма -обаче -около -освен -особено -от -отгоре -отново -още -пак -по -повече -повечето -под -поне -поради -после -почти -прави -пред -преди -през -при -пък -първо -с -са -само -се -сега -си -скоро -след -сме -според -сред -срещу -сте -съм -със -също -т -тази -така -такива -такъв -там -твой -те -тези -ти -тн -то -това -тогава -този -той -толкова -точно -трябва -тук -тъй -тя -тях -у -харесва -ч -че -често -чрез -ще -щом -я diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ca.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ca.txt deleted file mode 100644 index 3da65deaf..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ca.txt +++ /dev/null @@ -1,220 +0,0 @@ -# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) -a -abans -ací -ah -així -això -al -als -aleshores -algun -alguna -algunes -alguns -alhora -allà -allí -allò -altra -altre -altres -amb -ambdós -ambdues -apa -aquell -aquella -aquelles -aquells -aquest -aquesta -aquestes -aquests -aquí -baix -cada -cadascú -cadascuna -cadascunes -cadascuns -com -contra -d'un -d'una -d'unes -d'uns -dalt -de -del -dels -des -després -dins -dintre -donat -doncs -durant -e -eh -el -els -em -en -encara -ens -entre -érem -eren -éreu -es -és -esta -està -estàvem -estaven -estàveu -esteu -et -etc -ets -fins -fora -gairebé -ha -han -has -havia -he -hem -heu -hi  -ho -i -igual -iguals -ja -l'hi -la -les -li -li'n -llavors -m'he -ma -mal -malgrat -mateix -mateixa -mateixes -mateixos -me -mentre -més -meu -meus -meva -meves -molt -molta -moltes -molts -mon -mons -n'he -n'hi -ne -ni -no -nogensmenys -només -nosaltres -nostra -nostre -nostres -o -oh -oi -on -pas -pel -pels -per -però -perquè -poc  -poca -pocs -poques -potser -propi -qual -quals -quan -quant  -que -què -quelcom -qui -quin -quina -quines -quins -s'ha -s'han -sa -semblant -semblants -ses -seu  -seus -seva -seva -seves -si -sobre -sobretot -sóc -solament -sols -son  -són -sons  -sota -sou -t'ha -t'han -t'he -ta -tal -també -tampoc -tan -tant -tanta -tantes -teu -teus -teva -teves -ton -tons -tot -tota -totes -tots -un -una -unes -uns -us -va -vaig -vam -van -vas -veu -vosaltres -vostra -vostre -vostres diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ckb.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ckb.txt deleted file mode 100644 index 87abf118f..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ckb.txt +++ /dev/null @@ -1,136 +0,0 @@ -# set of kurdish stopwords -# note these have been normalized with our scheme (e represented with U+06D5, etc) -# constructed from: -# * Fig 5 of "Building A Test Collection For Sorani Kurdish" (Esmaili et al) -# * "Sorani Kurdish: A Reference Grammar with selected readings" (Thackston) -# * Corpus-based analysis of 77M word Sorani collection: wikipedia, news, blogs, etc - -# and -و -# which -کە -# of -ی -# made/did -کرد -# that/which -ئەوەی -# on/head -سەر -# two -دوو -# also -هەروەها -# from/that -لەو -# makes/does -دەکات -# some -چەند -# every -هەر - -# demonstratives -# that -ئەو -# this -ئەم - -# personal pronouns -# I -من -# we -ئێمە -# you -تۆ -# you -ئێوە -# he/she/it -ئەو -# they -ئەوان - -# prepositions -# to/with/by -بە -پێ -# without -بەبێ -# along with/while/during -بەدەم -# in the opinion of -بەلای -# according to -بەپێی -# before -بەرلە -# in the direction of -بەرەوی -# in front of/toward -بەرەوە -# before/in the face of -بەردەم -# without -بێ -# except for -بێجگە -# for -بۆ -# on/in -دە -تێ -# with -دەگەڵ -# after -دوای -# except for/aside from -جگە -# in/from -لە -لێ -# in front of/before/because of -لەبەر -# between/among -لەبەینی -# concerning/about -لەبابەت -# concerning -لەبارەی -# instead of -لەباتی -# beside -لەبن -# instead of -لەبرێتی -# behind -لەدەم -# with/together with -لەگەڵ -# by -لەلایەن -# within -لەناو -# between/among -لەنێو -# for the sake of -لەپێناوی -# with respect to -لەرەوی -# by means of/for -لەرێ -# for the sake of -لەرێگا -# on/on top of/according to -لەسەر -# under -لەژێر -# between/among -ناو -# between/among -نێوان -# after -پاش -# before -پێش -# like -وەک diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_cz.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_cz.txt deleted file mode 100644 index 53c6097da..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_cz.txt +++ /dev/null @@ -1,172 +0,0 @@ -a -s -k -o -i -u -v -z -dnes -cz -tímto -budeš -budem -byli -jseš -můj -svým -ta -tomto -tohle -tuto -tyto -jej -zda -proč -máte -tato -kam -tohoto -kdo -kteří -mi -nám -tom -tomuto -mít -nic -proto -kterou -byla -toho -protože -asi -ho -naši -napište -re -což -tím -takže -svých -její -svými -jste -aj -tu -tedy -teto -bylo -kde -ke -pravé -ji -nad -nejsou -či -pod -téma -mezi -přes -ty -pak -vám -ani -když -však -neg -jsem -tento -článku -články -aby -jsme -před -pta -jejich -byl -ještě -až -bez -také -pouze -první -vaše -která -nás -nový -tipy -pokud -může -strana -jeho -své -jiné -zprávy -nové -není -vás -jen -podle -zde -už -být -více -bude -již -než -který -by -které -co -nebo -ten -tak -má -při -od -po -jsou -jak -další -ale -si -se -ve -to -jako -za -zpět -ze -do -pro -je -na -atd -atp -jakmile -přičemž -já -on -ona -ono -oni -ony -my -vy -jí -ji -mě -mne -jemu -tomu -těm -těmu -němu -němuž -jehož -jíž -jelikož -jež -jakož -načež diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_da.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_da.txt deleted file mode 100644 index 42e6145b9..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_da.txt +++ /dev/null @@ -1,110 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - |  - Encoding was converted to UTF-8. - |  - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Danish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - -og           | and -i            | in -jeg          | I -det          | that (dem. pronoun)/it (pers. pronoun) -at           | that (in front of a sentence)/to (with infinitive) -en           | a/an -den          | it (pers. pronoun)/that (dem. pronoun) -til          | to/at/for/until/against/by/of/into, more -er           | present tense of "to be" -som          | who, as -på           | on/upon/in/on/at/to/after/of/with/for, on -de           | they -med          | with/by/in, along -han          | he -af           | of/by/from/off/for/in/with/on, off -for          | at/for/to/from/by/of/ago, in front/before, because -ikke         | not -der          | who/which, there/those -var          | past tense of "to be" -mig          | me/myself -sig          | oneself/himself/herself/itself/themselves -men          | but -et           | a/an/one, one (number), someone/somebody/one -har          | present tense of "to have" -om           | round/about/for/in/a, about/around/down, if -vi           | we -min          | my -havde        | past tense of "to have" -ham          | him -hun          | she -nu           | now -over         | over/above/across/by/beyond/past/on/about, over/past -da           | then, when/as/since -fra          | from/off/since, off, since -du           | you -ud           | out -sin          | his/her/its/one's -dem          | them -os           | us/ourselves -op           | up -man          | you/one -hans         | his -hvor         | where -eller        | or -hvad         | what -skal         | must/shall etc. -selv         | myself/youself/herself/ourselves etc., even -her          | here -alle         | all/everyone/everybody etc. -vil          | will (verb) -blev         | past tense of "to stay/to remain/to get/to become" -kunne        | could -ind          | in -når          | when -være         | present tense of "to be" -dog          | however/yet/after all -noget        | something -ville        | would -jo           | you know/you see (adv), yes -deres        | their/theirs -efter        | after/behind/according to/for/by/from, later/afterwards -ned          | down -skulle       | should -denne        | this -end          | than -dette        | this -mit          | my/mine -også         | also -under        | under/beneath/below/during, below/underneath -have         | have -dig          | you -anden        | other -hende        | her -mine         | my -alt          | everything -meget        | much/very, plenty of -sit          | his, her, its, one's -sine         | his, her, its, one's -vor          | our -mod          | against -disse        | these -hvis         | if -din          | your/yours -nogle        | some -hos          | by/at -blive        | be/become -mange        | many -ad           | by/through -bliver       | present tense of "to be/to become" -hendes       | her/hers -været        | be -thi          | for (conj) -jer          | you -sådan        | such, like this/like that diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_de.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_de.txt deleted file mode 100644 index 86525e7ae..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_de.txt +++ /dev/null @@ -1,294 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - |  - Encoding was converted to UTF-8. - |  - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A German stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | The number of forms in this list is reduced significantly by passing it - | through the German stemmer. - - -aber           |  but - -alle           |  all -allem -allen -aller -alles - -als            |  than, as -also           |  so -am             |  an + dem -an             |  at - -ander          |  other -andere -anderem -anderen -anderer -anderes -anderm -andern -anderr -anders - -auch           |  also -auf            |  on -aus            |  out of -bei            |  by -bin            |  am -bis            |  until -bist           |  art -da             |  there -damit          |  with it -dann           |  then - -der            |  the -den -des -dem -die -das - -daß            |  that - -derselbe       |  the same -derselben -denselben -desselben -demselben -dieselbe -dieselben -dasselbe - -dazu           |  to that - -dein           |  thy -deine -deinem -deinen -deiner -deines - -denn           |  because - -derer          |  of those -dessen         |  of him - -dich           |  thee -dir            |  to thee -du             |  thou - -dies           |  this -diese -diesem -diesen -dieser -dieses - - -doch           |  (several meanings) -dort           |  (over) there - - -durch          |  through - -ein            |  a -eine -einem -einen -einer -eines - -einig          |  some -einige -einigem -einigen -einiger -einiges - -einmal         |  once - -er             |  he -ihn            |  him -ihm            |  to him - -es             |  it -etwas          |  something - -euer           |  your -eure -eurem -euren -eurer -eures - -für            |  for -gegen          |  towards -gewesen        |  p.p. of sein -hab            |  have -habe           |  have -haben          |  have -hat            |  has -hatte          |  had -hatten         |  had -hier           |  here -hin            |  there -hinter         |  behind - -ich            |  I -mich           |  me -mir            |  to me - - -ihr            |  you, to her -ihre -ihrem -ihren -ihrer -ihres -euch           |  to you - -im             |  in + dem -in             |  in -indem          |  while -ins            |  in + das -ist            |  is - -jede           |  each, every -jedem -jeden -jeder -jedes - -jene           |  that -jenem -jenen -jener -jenes - -jetzt          |  now -kann           |  can - -kein           |  no -keine -keinem -keinen -keiner -keines - -können         |  can -könnte         |  could -machen         |  do -man            |  one - -manche         |  some, many a -manchem -manchen -mancher -manches - -mein           |  my -meine -meinem -meinen -meiner -meines - -mit            |  with -muss           |  must -musste         |  had to -nach           |  to(wards) -nicht          |  not -nichts         |  nothing -noch           |  still, yet -nun            |  now -nur            |  only -ob             |  whether -oder           |  or -ohne           |  without -sehr           |  very - -sein           |  his -seine -seinem -seinen -seiner -seines - -selbst         |  self -sich           |  herself - -sie            |  they, she -ihnen          |  to them - -sind           |  are -so             |  so - -solche         |  such -solchem -solchen -solcher -solches - -soll           |  shall -sollte         |  should -sondern        |  but -sonst          |  else -über           |  over -um             |  about, around -und            |  and - -uns            |  us -unse -unsem -unsen -unser -unses - -unter          |  under -viel           |  much -vom            |  von + dem -von            |  from -vor            |  before -während        |  while -war            |  was -waren          |  were -warst          |  wast -was            |  what -weg            |  away, off -weil           |  because -weiter         |  further - -welche         |  which -welchem -welchen -welcher -welches - -wenn           |  when -werde          |  will -werden         |  will -wie            |  how -wieder         |  again -will           |  want -wir            |  we -wird           |  will -wirst          |  willst -wo             |  where -wollen         |  want -wollte         |  wanted -würde          |  would -würden         |  would -zu             |  to -zum            |  zu + dem -zur            |  zu + der -zwar           |  indeed -zwischen       |  between - diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_el.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_el.txt deleted file mode 100644 index 232681f5b..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_el.txt +++ /dev/null @@ -1,78 +0,0 @@ -# Lucene Greek Stopwords list -# Note: by default this file is used after GreekLowerCaseFilter, -# so when modifying this file use 'σ' instead of 'ς'  -ο -η -το -οι -τα -του -τησ -των -τον -την -και  -κι -κ -ειμαι -εισαι -ειναι -ειμαστε -ειστε -στο -στον -στη -στην -μα -αλλα -απο -για -προσ -με -σε -ωσ -παρα -αντι -κατα -μετα -θα -να -δε -δεν -μη -μην -επι -ενω -εαν -αν -τοτε -που -πωσ -ποιοσ -ποια -ποιο -ποιοι -ποιεσ -ποιων -ποιουσ -αυτοσ -αυτη -αυτο -αυτοι -αυτων -αυτουσ -αυτεσ -αυτα -εκεινοσ -εκεινη -εκεινο -εκεινοι -εκεινεσ -εκεινα -εκεινων -εκεινουσ -οπωσ -ομωσ -ισωσ -οσο -οτι diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_en.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_en.txt deleted file mode 100644 index 2c164c0b2..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_en.txt +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements.  See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License.  You may obtain a copy of the License at -# -#     http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# a couple of test stopwords to test that the words are really being -# configured from this file: -stopworda -stopwordb - -# Standard english stop words taken from Lucene's StopAnalyzer -a -an -and -are -as -at -be -but -by -for -if -in -into -is -it -no -not -of -on -or -such -that -the -their -then -there -these -they -this -to -was -will -with diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_es.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_es.txt deleted file mode 100644 index 487d78c8d..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_es.txt +++ /dev/null @@ -1,356 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - |  - Encoding was converted to UTF-8. - |  - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Spanish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - - | The following is a ranked list (commonest to rarest) of stopwords - | deriving from a large sample of text. - - | Extra words have been added at the end. - -de             |  from, of -la             |  the, her -que            |  who, that -el             |  the -en             |  in -y              |  and -a              |  to -los            |  the, them -del            |  de + el -se             |  himself, from him etc -las            |  the, them -por            |  for, by, etc -un             |  a -para           |  for -con            |  with -no             |  no -una            |  a -su             |  his, her -al             |  a + el -  | es         from SER -lo             |  him -como           |  how -más            |  more -pero           |  pero -sus            |  su plural -le             |  to him, her -ya             |  already -o              |  or -  | fue        from SER -este           |  this -  | ha         from HABER -sí             |  himself etc -porque         |  because -esta           |  this -  | son        from SER -entre          |  between -  | está     from ESTAR -cuando         |  when -muy            |  very -sin            |  without -sobre          |  on -  | ser        from SER -  | tiene      from TENER -también        |  also -me             |  me -hasta          |  until -hay            |  there is/are -donde          |  where -  | han        from HABER -quien          |  whom, that -  | están      from ESTAR -  | estado     from ESTAR -desde          |  from -todo           |  all -nos            |  us -durante        |  during -  | estados    from ESTAR -todos          |  all -uno            |  a -les            |  to them -ni             |  nor -contra         |  against -otros          |  other -  | fueron     from SER -ese            |  that -eso            |  that -  | había      from HABER -ante           |  before -ellos          |  they -e              |  and (variant of y) -esto           |  this -mí             |  me -antes          |  before -algunos        |  some -qué            |  what? -unos           |  a -yo             |  I -otro           |  other -otras          |  other -otra           |  other -él             |  he -tanto          |  so much, many -esa            |  that -estos          |  these -mucho          |  much, many -quienes        |  who -nada           |  nothing -muchos         |  many -cual           |  who -  | sea        from SER -poco           |  few -ella           |  she -estar          |  to be -  | haber      from HABER -estas          |  these -  | estaba     from ESTAR -  | estamos    from ESTAR -algunas        |  some -algo           |  something -nosotros       |  we - -      | other forms - -mi             |  me -mis            |  mi plural -tú             |  thou -te             |  thee -ti             |  thee -tu             |  thy -tus            |  tu plural -ellas          |  they -nosotras       |  we -vosotros       |  you -vosotras       |  you -os             |  you -mío            |  mine -mía            | -míos           | -mías           | -tuyo           |  thine -tuya           | -tuyos          | -tuyas          | -suyo           |  his, hers, theirs -suya           | -suyos          | -suyas          | -nuestro        |  ours -nuestra        | -nuestros       | -nuestras       | -vuestro        |  yours -vuestra        | -vuestros       | -vuestras       | -esos           |  those -esas           |  those - -               | forms of estar, to be (not including the infinitive): -estoy -estás -está -estamos -estáis -están -esté -estés -estemos -estéis -estén -estaré -estarás -estará -estaremos -estaréis -estarán -estaría -estarías -estaríamos -estaríais -estarían -estaba -estabas -estábamos -estabais -estaban -estuve -estuviste -estuvo -estuvimos -estuvisteis -estuvieron -estuviera -estuvieras -estuviéramos -estuvierais -estuvieran -estuviese -estuvieses -estuviésemos -estuvieseis -estuviesen -estando -estado -estada -estados -estadas -estad - -               | forms of haber, to have (not including the infinitive): -he -has -ha -hemos -habéis -han -haya -hayas -hayamos -hayáis -hayan -habré -habrás -habrá -habremos -habréis -habrán -habría -habrías -habríamos -habríais -habrían -había -habías -habíamos -habíais -habían -hube -hubiste -hubo -hubimos -hubisteis -hubieron -hubiera -hubieras -hubiéramos -hubierais -hubieran -hubiese -hubieses -hubiésemos -hubieseis -hubiesen -habiendo -habido -habida -habidos -habidas - -               | forms of ser, to be (not including the infinitive): -soy -eres -es -somos -sois -son -sea -seas -seamos -seáis -sean -seré -serás -será -seremos -seréis -serán -sería -serías -seríamos -seríais -serían -era -eras -éramos -erais -eran -fui -fuiste -fue -fuimos -fuisteis -fueron -fuera -fueras -fuéramos -fuerais -fueran -fuese -fueses -fuésemos -fueseis -fuesen -siendo -sido -  |  sed also means 'thirst' - -               | forms of tener, to have (not including the infinitive): -tengo -tienes -tiene -tenemos -tenéis -tienen -tenga -tengas -tengamos -tengáis -tengan -tendré -tendrás -tendrá -tendremos -tendréis -tendrán -tendría -tendrías -tendríamos -tendríais -tendrían -tenía -tenías -teníamos -teníais -tenían -tuve -tuviste -tuvo -tuvimos -tuvisteis -tuvieron -tuviera -tuvieras -tuviéramos -tuvierais -tuvieran -tuviese -tuvieses -tuviésemos -tuvieseis -tuviesen -teniendo -tenido -tenida -tenidos -tenidas -tened - diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_eu.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_eu.txt deleted file mode 100644 index 25f1db934..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_eu.txt +++ /dev/null @@ -1,99 +0,0 @@ -# example set of basque stopwords -al -anitz -arabera -asko -baina -bat -batean -batek -bati -batzuei -batzuek -batzuetan -batzuk -bera -beraiek -berau -berauek -bere -berori -beroriek -beste -bezala -da -dago -dira -ditu -du -dute -edo -egin -ere -eta -eurak -ez -gainera -gu -gutxi -guzti -haiei -haiek -haietan -hainbeste -hala -han -handik -hango -hara -hari -hark -hartan -hau -hauei -hauek -hauetan -hemen -hemendik -hemengo -hi -hona -honek -honela -honetan -honi -hor -hori -horiei -horiek -horietan -horko -horra -horrek -horrela -horretan -horri -hortik -hura -izan -ni -noiz -nola -non -nondik -nongo -nor -nora -ze -zein -zen -zenbait -zenbat -zer -zergatik -ziren -zituen -zu -zuek -zuen -zuten diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_fa.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_fa.txt deleted file mode 100644 index 723641c6d..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_fa.txt +++ /dev/null @@ -1,313 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -# Note: by default this file is used after normalization, so when adding entries -# to this file, use the arabic 'ي' instead of 'ی' -انان -نداشته -سراسر -خياه -ايشان -وي -تاكنون -بيشتري -دوم -پس -ناشي -وگو -يا -داشتند -سپس -هنگام -هرگز -پنج -نشان -امسال -ديگر -گروهي -شدند -چطور -ده -و -دو -نخستين -ولي -چرا -چه -وسط -ه -كدام -قابل -يك -رفت -هفت -همچنين -در -هزار -بله -بلي -شايد -اما -شناسي -گرفته -دهد -داشته -دانست -داشتن -خواهيم -ميليارد -وقتيكه -امد -خواهد -جز -اورده -شده -بلكه -خدمات -شدن -برخي -نبود -بسياري -جلوگيري -حق -كردند -نوعي -بعري -نكرده -نظير -نبايد -بوده -بودن -داد -اورد -هست -جايي -شود -دنبال -داده -بايد -سابق -هيچ -همان -انجا -كمتر -كجاست -گردد -كسي -تر -مردم -تان -دادن -بودند -سري -جدا -ندارند -مگر -يكديگر -دارد -دهند -بنابراين -هنگامي -سمت -جا -انچه -خود -دادند -زياد -دارند -اثر -بدون -بهترين -بيشتر -البته -به -براساس -بيرون -كرد -بعضي -گرفت -توي -اي -ميليون -او -جريان -تول -بر -مانند -برابر -باشيم -مدتي -گويند -اكنون -تا -تنها -جديد -چند -بي -نشده -كردن -كردم -گويد -كرده -كنيم -نمي -نزد -روي -قصد -فقط -بالاي -ديگران -اين -ديروز -توسط -سوم -ايم -دانند -سوي -استفاده -شما -كنار -داريم -ساخته -طور -امده -رفته -نخست -بيست -نزديك -طي -كنيد -از -انها -تمامي -داشت -يكي -طريق -اش -چيست -روب -نمايد -گفت -چندين -چيزي -تواند -ام -ايا -با -ان -ايد -ترين -اينكه -ديگري -راه -هايي -بروز -همچنان -پاعين -كس -حدود -مختلف -مقابل -چيز -گيرد -ندارد -ضد -همچون -سازي -شان -مورد -باره -مرسي -خويش -برخوردار -چون -خارج -شش -هنوز -تحت -ضمن -هستيم -گفته -فكر -بسيار -پيش -براي -روزهاي -انكه -نخواهد -بالا -كل -وقتي -كي -چنين -كه -گيري -نيست -است -كجا -كند -نيز -يابد -بندي -حتي -توانند -عقب -خواست -كنند -بين -تمام -همه -ما -باشند -مثل -شد -اري -باشد -اره -طبق -بعد -اگر -صورت -غير -جاي -بيش -ريزي -اند -زيرا -چگونه -بار -لطفا -مي -درباره -من -ديده -همين -گذاري -برداري -علت -گذاشته -هم -فوق -نه -ها -شوند -اباد -همواره -هر -اول -خواهند -چهار -نام -امروز -مان -هاي -قبل -كنم -سعي -تازه -را -هستند -زير -جلوي -عنوان -بود diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_fi.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_fi.txt deleted file mode 100644 index 4372c9a05..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_fi.txt +++ /dev/null @@ -1,97 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - |  - Encoding was converted to UTF-8. - |  - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" -  -| forms of BE - -olla -olen -olet -on -olemme -olette -ovat -ole        | negative form - -oli -olisi -olisit -olisin -olisimme -olisitte -olisivat -olit -olin -olimme -olitte -olivat -ollut -olleet - -en         | negation -et -ei -emme -ette -eivät - -|Nom   Gen    Acc    Part   Iness   Elat    Illat  Adess   Ablat   Allat   Ess    Trans -minä   minun  minut  minua  minussa minusta minuun minulla minulta minulle               | I -sinä   sinun  sinut  sinua  sinussa sinusta sinuun sinulla sinulta sinulle               | you -hän    hänen  hänet  häntä  hänessä hänestä häneen hänellä häneltä hänelle               | he she -me     meidän meidät meitä  meissä  meistä  meihin meillä  meiltä  meille                | we -te     teidän teidät teitä  teissä  teistä  teihin teillä  teiltä  teille                | you -he     heidän heidät heitä  heissä  heistä  heihin heillä  heiltä  heille                | they - -tämä   tämän         tätä   tässä   tästä   tähän  tallä   tältä   tälle   tänä   täksi  | this -tuo    tuon          tuotä  tuossa  tuosta  tuohon tuolla  tuolta  tuolle  tuona  tuoksi | that -se     sen           sitä   siinä   siitä   siihen sillä   siltä   sille   sinä   siksi  | it -nämä   näiden        näitä  näissä  näistä  näihin näillä  näiltä  näille  näinä  näiksi | these -nuo    noiden        noita  noissa  noista  noihin noilla  noilta  noille  noina  noiksi | those -ne     niiden        niitä  niissä  niistä  niihin niillä  niiltä  niille  niinä  niiksi | they - -kuka   kenen kenet   ketä   kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who -ketkä  keiden ketkä  keitä  keissä  keistä  keihin keillä  keiltä  keille  keinä  keiksi | (pl) -mikä   minkä minkä   mitä   missä   mistä   mihin  millä   miltä   mille   minä   miksi  | which what -mitkä                                                                                    | (pl) - -joka   jonka         jota   jossa   josta   johon  jolla   jolta   jolle   jona   joksi  | who which -jotka  joiden        joita  joissa  joista  joihin joilla  joilta  joille  joina  joiksi | (pl) - -| conjunctions - -että   | that -ja     | and -jos    | if -koska  | because -kuin   | than -mutta  | but -niin   | so -sekä   | and -sillä  | for -tai    | or -vaan   | but -vai    | or -vaikka | although - - -| prepositions - -kanssa  | with -mukaan  | according to -noin    | about -poikki  | across -yli     | over, across - -| other - -kun    | when -niin   | so -nyt    | now -itse   | self - diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_fr.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_fr.txt deleted file mode 100644 index 749abae68..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_fr.txt +++ /dev/null @@ -1,186 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - |  - Encoding was converted to UTF-8. - |  - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A French stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - -au             |  a + le -aux            |  a + les -avec           |  with -ce             |  this -ces            |  these -dans           |  with -de             |  of -des            |  de + les -du             |  de + le -elle           |  she -en             |  `of them' etc -et             |  and -eux            |  them -il             |  he -je             |  I -la             |  the -le             |  the -leur           |  their -lui            |  him -ma             |  my (fem) -mais           |  but -me             |  me -même           |  same; as in moi-même (myself) etc -mes            |  me (pl) -moi            |  me -mon            |  my (masc) -ne             |  not -nos            |  our (pl) -notre          |  our -nous           |  we -on             |  one -ou             |  where -par            |  by -pas            |  not -pour           |  for -qu             |  que before vowel -que            |  that -qui            |  who -sa             |  his, her (fem) -se             |  oneself -ses            |  his (pl) -son            |  his, her (masc) -sur            |  on -ta             |  thy (fem) -te             |  thee -tes            |  thy (pl) -toi            |  thee -ton            |  thy (masc) -tu             |  thou -un             |  a -une            |  a -vos            |  your (pl) -votre          |  your -vous           |  you - -               |  single letter forms - -c              |  c' -d              |  d' -j              |  j' -l              |  l' -à              |  to, at -m              |  m' -n              |  n' -s              |  s' -t              |  t' -y              |  there - -               | forms of être (not including the infinitive): -été -étée -étées -étés -étant -suis -es -est -sommes -êtes -sont -serai -seras -sera -serons -serez -seront -serais -serait -serions -seriez -seraient -étais -était -étions -étiez -étaient -fus -fut -fûmes -fûtes -furent -sois -soit -soyons -soyez -soient -fusse -fusses -fût -fussions -fussiez -fussent - -               | forms of avoir (not including the infinitive): -ayant -eu -eue -eues -eus -ai -as -avons -avez -ont -aurai -auras -aura -aurons -aurez -auront -aurais -aurait -aurions -auriez -auraient -avais -avait -avions -aviez -avaient -eut -eûmes -eûtes -eurent -aie -aies -ait -ayons -ayez -aient -eusse -eusses -eût -eussions -eussiez -eussent - -               | Later additions (from Jean-Christophe Deschamps) -ceci           |  this -cela           |  that -celà           |  that -cet            |  this -cette          |  this -ici            |  here -ils            |  they -les            |  the (pl) -leurs          |  their (pl) -quel           |  which -quels          |  which -quelle         |  which -quelles        |  which -sans           |  without -soi            |  oneself - diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ga.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ga.txt deleted file mode 100644 index 9ff88d747..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ga.txt +++ /dev/null @@ -1,110 +0,0 @@ - -a -ach -ag -agus -an -aon -ar -arna -as -b' -ba -beirt -bhúr -caoga -ceathair -ceathrar -chomh -chtó -chuig -chun -cois -céad -cúig -cúigear -d' -daichead -dar -de -deich -deichniúr -den -dhá -do -don -dtí -dá -dár -dó -faoi -faoin -faoina -faoinár -fara -fiche -gach -gan -go -gur -haon -hocht -i -iad -idir -in -ina -ins -inár -is -le -leis -lena -lenár -m' -mar -mo -mé -na -nach -naoi -naonúr -ná -ní -níor -nó -nócha -ocht -ochtar -os -roimh -sa -seacht -seachtar -seachtó -seasca -seisear -siad -sibh -sinn -sna -sé -sí -tar -thar -thú -triúr -trí -trína -trínár -tríocha -tú -um -ár -é -éis -í -ó -ón -óna -ónár diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_gl.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_gl.txt deleted file mode 100644 index d8760b12c..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_gl.txt +++ /dev/null @@ -1,161 +0,0 @@ -# galican stopwords -a -aínda -alí -aquel -aquela -aquelas -aqueles -aquilo -aquí -ao -aos -as -así -á -ben -cando -che -co -coa -comigo -con -connosco -contigo -convosco -coas -cos -cun -cuns -cunha -cunhas -da -dalgunha -dalgunhas -dalgún -dalgúns -das -de -del -dela -delas -deles -desde -deste -do -dos -dun -duns -dunha -dunhas -e -el -ela -elas -eles -en -era -eran -esa -esas -ese -eses -esta -estar -estaba -está -están -este -estes -estiven -estou -eu -é -facer -foi -foron -fun -había -hai -iso -isto -la -las -lle -lles -lo -los -mais -me -meu -meus -min -miña -miñas -moi -na -nas -neste -nin -no -non -nos -nosa -nosas -noso -nosos -nós -nun -nunha -nuns -nunhas -o -os -ou -ó -ós -para -pero -pode -pois -pola -polas -polo -polos -por -que -se -senón -ser -seu -seus -sexa -sido -sobre -súa -súas -tamén -tan -te -ten -teñen -teño -ter -teu -teus -ti -tido -tiña -tiven -túa -túas -un -unha -unhas -uns -vos -vosa -vosas -voso -vosos -vós diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_hi.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_hi.txt deleted file mode 100644 index 86286bb08..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_hi.txt +++ /dev/null @@ -1,235 +0,0 @@ -# Also see http://www.opensource.org/licenses/bsd-license.html -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# This file was created by Jacques Savoy and is distributed under the BSD license. -# Note: by default this file also contains forms normalized by HindiNormalizer  -# for spelling variation (see section below), such that it can be used whether or  -# not you enable that feature. When adding additional entries to this list, -# please add the normalized form as well.  -अंदर -अत -अपना -अपनी -अपने -अभी -आदि -आप -इत्यादि -इन  -इनका -इन्हीं -इन्हें -इन्हों -इस -इसका -इसकी -इसके -इसमें -इसी -इसे -उन -उनका -उनकी -उनके -उनको -उन्हीं -उन्हें -उन्हों -उस -उसके -उसी -उसे -एक -एवं -एस -ऐसे -और -कई -कर -करता -करते -करना -करने -करें -कहते -कहा -का -काफ़ी -कि -कितना -किन्हें -किन्हों -किया -किर -किस -किसी -किसे -की -कुछ -कुल -के -को -कोई -कौन -कौनसा -गया -घर -जब -जहाँ -जा -जितना -जिन -जिन्हें -जिन्हों -जिस -जिसे -जीधर -जैसा -जैसे -जो -तक -तब -तरह -तिन -तिन्हें -तिन्हों -तिस -तिसे -तो -था -थी -थे -दबारा -दिया -दुसरा -दूसरे -दो -द्वारा -न -नहीं -ना -निहायत -नीचे -ने -पर -पर   -पहले -पूरा -पे -फिर -बनी -बही -बहुत -बाद -बाला -बिलकुल -भी -भीतर -मगर -मानो -मे -में -यदि -यह -यहाँ -यही -या -यिह  -ये -रखें -रहा -रहे -ऱ्वासा -लिए -लिये -लेकिन -व -वर्ग -वह -वह  -वहाँ -वहीं -वाले -वुह  -वे -वग़ैरह -संग -सकता -सकते -सबसे -सभी -साथ -साबुत -साभ -सारा -से -सो -ही -हुआ -हुई -हुए -है -हैं -हो -होता -होती -होते -होना -होने -# additional normalized forms of the above -अपनि -जेसे -होति -सभि -तिंहों -इंहों -दवारा -इसि -किंहें -थि -उंहों -ओर -जिंहें -वहिं -अभि -बनि -हि -उंहिं -उंहें -हें -वगेरह -एसे -रवासा -कोन -निचे -काफि -उसि -पुरा -भितर -हे -बहि -वहां -कोइ -यहां -जिंहों -तिंहें -किसि -कइ -यहि -इंहिं -जिधर -इंहें -अदि -इतयादि -हुइ -कोनसा -इसकि -दुसरे -जहां -अप -किंहों -उनकि -भि -वरग -हुअ -जेसा -नहिं diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_hu.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_hu.txt deleted file mode 100644 index 37526da8a..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_hu.txt +++ /dev/null @@ -1,211 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - |  - Encoding was converted to UTF-8. - |  - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" -  -| Hungarian stop word list -| prepared by Anna Tordai - -a -ahogy -ahol -aki -akik -akkor -alatt -által -általában -amely -amelyek -amelyekben -amelyeket -amelyet -amelynek -ami -amit -amolyan -amíg -amikor -át -abban -ahhoz -annak -arra -arról -az -azok -azon -azt -azzal -azért -aztán -azután -azonban -bár -be -belül -benne -cikk -cikkek -cikkeket -csak -de -e -eddig -egész -egy -egyes -egyetlen -egyéb -egyik -egyre -ekkor -el -elég -ellen -elő -először -előtt -első -én -éppen -ebben -ehhez -emilyen -ennek -erre -ez -ezt -ezek -ezen -ezzel -ezért -és -fel -felé -hanem -hiszen -hogy -hogyan -igen -így -illetve -ill. -ill -ilyen -ilyenkor -ison -ismét -itt -jó -jól -jobban -kell -kellett -keresztül -keressünk -ki -kívül -között -közül -legalább -lehet -lehetett -legyen -lenne -lenni -lesz -lett -maga -magát -majd -majd -már -más -másik -meg -még -mellett -mert -mely -melyek -mi -mit -míg -miért -milyen -mikor -minden -mindent -mindenki -mindig -mint -mintha -mivel -most -nagy -nagyobb -nagyon -ne -néha -nekem -neki -nem -néhány -nélkül -nincs -olyan -ott -össze -ő -ők -őket -pedig -persze -rá -s -saját -sem -semmi -sok -sokat -sokkal -számára -szemben -szerint -szinte -talán -tehát -teljes -tovább -továbbá -több -úgy -ugyanis -új -újabb -újra -után -utána -utolsó -vagy -vagyis -valaki -valami -valamint -való -vagyok -van -vannak -volt -voltam -voltak -voltunk -vissza -vele -viszont -volna diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_hy.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_hy.txt deleted file mode 100644 index 60c1c50fb..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_hy.txt +++ /dev/null @@ -1,46 +0,0 @@ -# example set of Armenian stopwords. -այդ -այլ -այն -այս -դու -դուք -եմ -են -ենք -ես -եք -է -էի -էին -էինք -էիր -էիք -էր -ըստ -թ -ի -ին -իսկ -իր -կամ -համար -հետ -հետո -մենք -մեջ -մի -ն -նա -նաև -նրա -նրանք -որ -որը -որոնք -որպես -ու -ում -պիտի -վրա -և diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_id.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_id.txt deleted file mode 100644 index 4617f83a5..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_id.txt +++ /dev/null @@ -1,359 +0,0 @@ -# from appendix D of: A Study of Stemming Effects on Information -# Retrieval in Bahasa Indonesia -ada -adanya -adalah -adapun -agak -agaknya -agar -akan -akankah -akhirnya -aku -akulah -amat -amatlah -anda -andalah -antar -diantaranya -antara -antaranya -diantara -apa -apaan -mengapa -apabila -apakah -apalagi -apatah -atau -ataukah -ataupun -bagai -bagaikan -sebagai -sebagainya -bagaimana -bagaimanapun -sebagaimana -bagaimanakah -bagi -bahkan -bahwa -bahwasanya -sebaliknya -banyak -sebanyak -beberapa -seberapa -begini -beginian -beginikah -beginilah -sebegini -begitu -begitukah -begitulah -begitupun -sebegitu -belum -belumlah -sebelum -sebelumnya -sebenarnya -berapa -berapakah -berapalah -berapapun -betulkah -sebetulnya -biasa -biasanya -bila -bilakah -bisa -bisakah -sebisanya -boleh -bolehkah -bolehlah -buat -bukan -bukankah -bukanlah -bukannya -cuma -percuma -dahulu -dalam -dan -dapat -dari -daripada -dekat -demi -demikian -demikianlah -sedemikian -dengan -depan -di -dia -dialah -dini -diri -dirinya -terdiri -dong -dulu -enggak -enggaknya -entah -entahlah -terhadap -terhadapnya -hal -hampir -hanya -hanyalah -harus -haruslah -harusnya -seharusnya -hendak -hendaklah -hendaknya -hingga -sehingga -ia -ialah -ibarat -ingin -inginkah -inginkan -ini -inikah -inilah -itu -itukah -itulah -jangan -jangankan -janganlah -jika -jikalau -juga -justru -kala -kalau -kalaulah -kalaupun -kalian -kami -kamilah -kamu -kamulah -kan -kapan -kapankah -kapanpun -dikarenakan -karena -karenanya -ke -kecil -kemudian -kenapa -kepada -kepadanya -ketika -seketika -khususnya -kini -kinilah -kiranya -sekiranya -kita -kitalah -kok -lagi -lagian -selagi -lah -lain -lainnya -melainkan -selaku -lalu -melalui -terlalu -lama -lamanya -selama -selama -selamanya -lebih -terlebih -bermacam -macam -semacam -maka -makanya -makin -malah -malahan -mampu -mampukah -mana -manakala -manalagi -masih -masihkah -semasih -masing -mau -maupun -semaunya -memang -mereka -merekalah -meski -meskipun -semula -mungkin -mungkinkah -nah -namun -nanti -nantinya -nyaris -oleh -olehnya -seorang -seseorang -pada -padanya -padahal -paling -sepanjang -pantas -sepantasnya -sepantasnyalah -para -pasti -pastilah -per -pernah -pula -pun -merupakan -rupanya -serupa -saat -saatnya -sesaat -saja -sajalah -saling -bersama -sama -sesama -sambil -sampai -sana -sangat -sangatlah -saya -sayalah -se -sebab -sebabnya -sebuah -tersebut -tersebutlah -sedang -sedangkan -sedikit -sedikitnya -segala -segalanya -segera -sesegera -sejak -sejenak -sekali -sekalian -sekalipun -sesekali -sekaligus -sekarang -sekarang -sekitar -sekitarnya -sela -selain -selalu -seluruh -seluruhnya -semakin -sementara -sempat -semua -semuanya -sendiri -sendirinya -seolah -seperti -sepertinya -sering -seringnya -serta -siapa -siapakah -siapapun -disini -disinilah -sini -sinilah -sesuatu -sesuatunya -suatu -sesudah -sesudahnya -sudah -sudahkah -sudahlah -supaya -tadi -tadinya -tak -tanpa -setelah -telah -tentang -tentu -tentulah -tentunya -tertentu -seterusnya -tapi -tetapi -setiap -tiap -setidaknya -tidak -tidakkah -tidaklah -toh -waduh -wah -wahai -sewaktu -walau -walaupun -wong -yaitu -yakni -yang diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_it.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_it.txt deleted file mode 100644 index 1219cc773..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_it.txt +++ /dev/null @@ -1,303 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - |  - Encoding was converted to UTF-8. - |  - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | An Italian stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - -ad             |  a (to) before vowel -al             |  a + il -allo           |  a + lo -ai             |  a + i -agli           |  a + gli -all            |  a + l' -agl            |  a + gl' -alla           |  a + la -alle           |  a + le -con            |  with -col            |  con + il -coi            |  con + i (forms collo, cogli etc are now very rare) -da             |  from -dal            |  da + il -dallo          |  da + lo -dai            |  da + i -dagli          |  da + gli -dall           |  da + l' -dagl           |  da + gll' -dalla          |  da + la -dalle          |  da + le -di             |  of -del            |  di + il -dello          |  di + lo -dei            |  di + i -degli          |  di + gli -dell           |  di + l' -degl           |  di + gl' -della          |  di + la -delle          |  di + le -in             |  in -nel            |  in + el -nello          |  in + lo -nei            |  in + i -negli          |  in + gli -nell           |  in + l' -negl           |  in + gl' -nella          |  in + la -nelle          |  in + le -su             |  on -sul            |  su + il -sullo          |  su + lo -sui            |  su + i -sugli          |  su + gli -sull           |  su + l' -sugl           |  su + gl' -sulla          |  su + la -sulle          |  su + le -per            |  through, by -tra            |  among -contro         |  against -io             |  I -tu             |  thou -lui            |  he -lei            |  she -noi            |  we -voi            |  you -loro           |  they -mio            |  my -mia            | -miei           | -mie            | -tuo            | -tua            | -tuoi           |  thy -tue            | -suo            | -sua            | -suoi           |  his, her -sue            | -nostro         |  our -nostra         | -nostri         | -nostre         | -vostro         |  your -vostra         | -vostri         | -vostre         | -mi             |  me -ti             |  thee -ci             |  us, there -vi             |  you, there -lo             |  him, the -la             |  her, the -li             |  them -le             |  them, the -gli            |  to him, the -ne             |  from there etc -il             |  the -un             |  a -uno            |  a -una            |  a -ma             |  but -ed             |  and -se             |  if -perché         |  why, because -anche          |  also -come           |  how -dov            |  where (as dov') -dove           |  where -che            |  who, that -chi            |  who -cui            |  whom -non            |  not -più            |  more -quale          |  who, that -quanto         |  how much -quanti         | -quanta         | -quante         | -quello         |  that -quelli         | -quella         | -quelle         | -questo         |  this -questi         | -questa         | -queste         | -si             |  yes -tutto          |  all -tutti          |  all - -               |  single letter forms: - -a              |  at -c              |  as c' for ce or ci -e              |  and -i              |  the -l              |  as l' -o              |  or - -               | forms of avere, to have (not including the infinitive): - -ho -hai -ha -abbiamo -avete -hanno -abbia -abbiate -abbiano -avrò -avrai -avrà -avremo -avrete -avranno -avrei -avresti -avrebbe -avremmo -avreste -avrebbero -avevo -avevi -aveva -avevamo -avevate -avevano -ebbi -avesti -ebbe -avemmo -aveste -ebbero -avessi -avesse -avessimo -avessero -avendo -avuto -avuta -avuti -avute - -               | forms of essere, to be (not including the infinitive): -sono -sei -è -siamo -siete -sia -siate -siano -sarò -sarai -sarà -saremo -sarete -saranno -sarei -saresti -sarebbe -saremmo -sareste -sarebbero -ero -eri -era -eravamo -eravate -erano -fui -fosti -fu -fummo -foste -furono -fossi -fosse -fossimo -fossero -essendo - -               | forms of fare, to do (not including the infinitive, fa, fat-): -faccio -fai -facciamo -fanno -faccia -facciate -facciano -farò -farai -farà -faremo -farete -faranno -farei -faresti -farebbe -faremmo -fareste -farebbero -facevo -facevi -faceva -facevamo -facevate -facevano -feci -facesti -fece -facemmo -faceste -fecero -facessi -facesse -facessimo -facessero -facendo - -               | forms of stare, to be (not including the infinitive): -sto -stai -sta -stiamo -stanno -stia -stiate -stiano -starò -starai -starà -staremo -starete -staranno -starei -staresti -starebbe -staremmo -stareste -starebbero -stavo -stavi -stava -stavamo -stavate -stavano -stetti -stesti -stette -stemmo -steste -stettero -stessi -stesse -stessimo -stessero -stando diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ja.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ja.txt deleted file mode 100644 index d4321be6b..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ja.txt +++ /dev/null @@ -1,127 +0,0 @@ -# -# This file defines a stopword set for Japanese. -# -# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. -# Punctuation characters and frequent kanji have mostly been left out.  See LUCENE-3745 -# for frequency lists, etc. that can be useful for making your own set (if desired) -# -# Note that there is an overlap between these stopwords and the terms stopped when used -# in combination with the JapanesePartOfSpeechStopFilter.  When editing this file, note -# that comments are not allowed on the same line as stopwords. -# -# Also note that stopping is done in a case-insensitive manner.  Change your StopFilter -# configuration if you need case-sensitive stopping.  Lastly, note that stopping is done -# using the same character width as the entries in this file.  Since this StopFilter is -# normally done after a CJKWidthFilter in your chain, you would usually want your romaji -# entries to be in half-width and your kana entries to be in full-width. -# -の -に -は -を -た -が -で -て -と -し -れ -さ -ある -いる -も -する -から -な -こと -として -い -や -れる -など -なっ -ない -この -ため -その -あっ -よう -また -もの -という -あり -まで -られ -なる -へ -か -だ -これ -によって -により -おり -より -による -ず -なり -られる -において -ば -なかっ -なく -しかし -について -せ -だっ -その後 -できる -それ -う -ので -なお -のみ -でき -き -つ -における -および -いう -さらに -でも -ら -たり -その他 -に関する -たち -ます -ん -なら -に対して -特に -せる -及び -これら -とき -では -にて -ほか -ながら -うち -そして -とともに -ただし -かつて -それぞれ -または -お -ほど -ものの -に対する -ほとんど -と共に -といった -です -とも -ところ -ここ -##### End of file diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_lv.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_lv.txt deleted file mode 100644 index e21a23c06..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_lv.txt +++ /dev/null @@ -1,172 +0,0 @@ -# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins -# the original list of over 800 forms was refined:  -#   pronouns, adverbs, interjections were removed -#  -# prepositions -aiz -ap -ar -apakš -ārpus -augšpus -bez -caur -dēļ -gar -iekš -iz -kopš -labad -lejpus -līdz -no -otrpus -pa -par -pār -pēc -pie -pirms -pret -priekš -starp -šaipus -uz -viņpus -virs -virspus -zem -apakšpus -# Conjunctions -un -bet -jo -ja -ka -lai -tomēr -tikko -turpretī -arī -kaut -gan -tādēļ -tā -ne -tikvien -vien -kā -ir -te -vai -kamēr -# Particles -ar -diezin -droši -diemžēl -nebūt -ik -it -taču -nu -pat -tiklab -iekšpus -nedz -tik -nevis -turpretim -jeb -iekam -iekām -iekāms -kolīdz -līdzko -tiklīdz -jebšu -tālab -tāpēc -nekā -itin -jā -jau -jel -nē -nezin -tad -tikai -vis -tak -iekams -vien -# modal verbs -būt   -biju  -biji -bija -bijām -bijāt -esmu -esi -esam -esat  -būšu      -būsi -būs -būsim -būsiet -tikt -tiku -tiki -tika -tikām -tikāt -tieku -tiec -tiek -tiekam -tiekat -tikšu -tiks -tiksim -tiksiet -tapt -tapi -tapāt -topat -tapšu -tapsi -taps -tapsim -tapsiet -kļūt -kļuvu -kļuvi -kļuva -kļuvām -kļuvāt -kļūstu -kļūsti -kļūst -kļūstam -kļūstat -kļūšu -kļūsi -kļūs -kļūsim -kļūsiet -# verbs -varēt -varēju -varējām -varēšu -varēsim -var -varēji -varējāt -varēsi -varēsiet -varat -varēja -varēs diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_nl.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_nl.txt deleted file mode 100644 index 47a2aeacf..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_nl.txt +++ /dev/null @@ -1,119 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - |  - Encoding was converted to UTF-8. - |  - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Dutch stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large sample of Dutch text. - - | Dutch stop words frequently exhibit homonym clashes. These are indicated - | clearly below. - -de             |  the -en             |  and -van            |  of, from -ik             |  I, the ego -te             |  (1) chez, at etc, (2) to, (3) too -dat            |  that, which -die            |  that, those, who, which -in             |  in, inside -een            |  a, an, one -hij            |  he -het            |  the, it -niet           |  not, nothing, naught -zijn           |  (1) to be, being, (2) his, one's, its -is             |  is -was            |  (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river -op             |  on, upon, at, in, up, used up -aan            |  on, upon, to (as dative) -met            |  with, by -als            |  like, such as, when -voor           |  (1) before, in front of, (2) furrow -had            |  had, past tense all persons sing. of 'hebben' (have) -er             |  there -maar           |  but, only -om             |  round, about, for etc -hem            |  him -dan            |  then -zou            |  should/would, past tense all persons sing. of 'zullen' -of             |  or, whether, if -wat            |  what, something, anything -mijn           |  possessive and noun 'mine' -men            |  people, 'one' -dit            |  this -zo             |  so, thus, in this way -door           |  through by -over           |  over, across -ze             |  she, her, they, them -zich           |  oneself -bij            |  (1) a bee, (2) by, near, at -ook            |  also, too -tot            |  till, until -je             |  you -mij            |  me -uit            |  out of, from -der            |  Old Dutch form of 'van der' still found in surnames -daar           |  (1) there, (2) because -haar           |  (1) her, their, them, (2) hair -naar           |  (1) unpleasant, unwell etc, (2) towards, (3) as -heb            |  present first person sing. of 'to have' -hoe            |  how, why -heeft          |  present third person sing. of 'to have' -hebben         |  'to have' and various parts thereof -deze           |  this -u              |  you -want           |  (1) for, (2) mitten, (3) rigging -nog            |  yet, still -zal            |  'shall', first and third person sing. of verb 'zullen' (will) -me             |  me -zij            |  she, they -nu             |  now -ge             |  'thou', still used in Belgium and south Netherlands -geen           |  none -omdat          |  because -iets           |  something, somewhat -worden         |  to become, grow, get -toch           |  yet, still -al             |  all, every, each -waren          |  (1) 'were' (2) to wander, (3) wares, (3) -veel           |  much, many -meer           |  (1) more, (2) lake -doen           |  to do, to make -toen           |  then, when -moet           |  noun 'spot/mote' and present form of 'to must' -ben            |  (1) am, (2) 'are' in interrogative second person singular of 'to be' -zonder         |  without -kan            |  noun 'can' and present form of 'to be able' -hun            |  their, them -dus            |  so, consequently -alles          |  all, everything, anything -onder          |  under, beneath -ja             |  yes, of course -eens           |  once, one day -hier           |  here -wie            |  who -werd           |  imperfect third person sing. of 'become' -altijd         |  always -doch           |  yet, but etc -wordt          |  present third person sing. of 'become' -wezen          |  (1) to be, (2) 'been' as in 'been fishing', (3) orphans -kunnen         |  to be able -ons            |  us/our -zelf           |  self -tegen          |  against, towards, at -na             |  after, near -reeds          |  already -wil            |  (1) present tense of 'want', (2) 'will', noun, (3) fender -kon            |  could; past tense of 'to be able' -niets          |  nothing -uw             |  your -iemand         |  somebody -geweest        |  been; past participle of 'be' -andere         |  other diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_no.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_no.txt deleted file mode 100644 index a7a2c28ba..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_no.txt +++ /dev/null @@ -1,194 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - |  - Encoding was converted to UTF-8. - |  - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Norwegian stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This stop word list is for the dominant bokmål dialect. Words unique - | to nynorsk are marked *. - - | Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005 - -og             | and -i              | in -jeg            | I -det            | it/this/that -at             | to (w. inf.) -en             | a/an -et             | a/an -den            | it/this/that -til            | to -er             | is/am/are -som            | who/that -på             | on -de             | they / you(formal) -med            | with -han            | he -av             | of -ikke           | not -ikkje          | not * -der            | there -så             | so -var            | was/were -meg            | me -seg            | you -men            | but -ett            | one -har            | have -om             | about -vi             | we -min            | my -mitt           | my -ha             | have -hadde          | had -hun            | she -nå             | now -over           | over -da             | when/as -ved            | by/know -fra            | from -du             | you -ut             | out -sin            | your -dem            | them -oss            | us -opp            | up -man            | you/one -kan            | can -hans           | his -hvor           | where -eller          | or -hva            | what -skal           | shall/must -selv           | self (reflective) -sjøl           | self (reflective) -her            | here -alle           | all -vil            | will -bli            | become -ble            | became -blei           | became * -blitt          | have become -kunne          | could -inn            | in -når            | when -være           | be -kom            | come -noen           | some -noe            | some -ville          | would -dere           | you -som            | who/which/that -deres          | their/theirs -kun            | only/just -ja             | yes -etter          | after -ned            | down -skulle         | should -denne          | this -for            | for/because -deg            | you -si             | hers/his -sine           | hers/his -sitt           | hers/his -mot            | against -å              | to -meget          | much -hvorfor        | why -dette          | this -disse          | these/those -uten           | without -hvordan        | how -ingen          | none -din            | your -ditt           | your -blir           | become -samme          | same -hvilken        | which -hvilke         | which (plural) -sånn           | such a -inni           | inside/within -mellom         | between -vår            | our -hver           | each -hvem           | who -vors           | us/ours -hvis           | whose -både           | both -bare           | only/just -enn            | than -fordi          | as/because -før            | before -mange          | many -også           | also -slik           | just -vært           | been -være           | to be -båe            | both * -begge          | both -siden          | since -dykk           | your * -dykkar         | yours * -dei            | they * -deira          | them * -deires         | theirs * -deim           | them * -di             | your (fem.) * -då             | as/when * -eg             | I * -ein            | a/an * -eit            | a/an * -eitt           | a/an * -elles          | or * -honom          | he * -hjå            | at * -ho             | she * -hoe            | she * -henne          | her -hennar         | her/hers -hennes         | hers -hoss           | how * -hossen         | how * -ikkje          | not * -ingi           | noone * -inkje          | noone * -korleis        | how * -korso          | how * -kva            | what/which * -kvar           | where * -kvarhelst      | where * -kven           | who/whom * -kvi            | why * -kvifor         | why * -me             | we * -medan          | while * -mi             | my * -mine           | my * -mykje          | much * -no             | now * -nokon          | some (masc./neut.) * -noka           | some (fem.) * -nokor          | some * -noko           | some * -nokre          | some * -si             | his/hers * -sia            | since * -sidan          | since * -so             | so * -somt           | some * -somme          | some * -um             | about* -upp            | up * -vere           | be * -vore           | was * -verte          | become * -vort           | become * -varte          | became * -vart           | became * - diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_pt.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_pt.txt deleted file mode 100644 index acfeb01af..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_pt.txt +++ /dev/null @@ -1,253 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - |  - Encoding was converted to UTF-8. - |  - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Portuguese stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - - | The following is a ranked list (commonest to rarest) of stopwords - | deriving from a large sample of text. - - | Extra words have been added at the end. - -de             |  of, from -a              |  the; to, at; her -o              |  the; him -que            |  who, that -e              |  and -do             |  de + o -da             |  de + a -em             |  in -um             |  a -para           |  for -  | é          from SER -com            |  with -não            |  not, no -uma            |  a -os             |  the; them -no             |  em + o -se             |  himself etc -na             |  em + a -por            |  for -mais           |  more -as             |  the; them -dos            |  de + os -como           |  as, like -mas            |  but -  | foi        from SER -ao             |  a + o -ele            |  he -das            |  de + as -  | tem        from TER -à              |  a + a -seu            |  his -sua            |  her -ou             |  or -  | ser        from SER -quando         |  when -muito          |  much -  | há         from HAV -nos            |  em + os; us -já             |  already, now -  | está       from EST -eu             |  I -também         |  also -só             |  only, just -pelo           |  per + o -pela           |  per + a -até            |  up to -isso           |  that -ela            |  he -entre          |  between -  | era        from SER -depois         |  after -sem            |  without -mesmo          |  same -aos            |  a + os -  | ter        from TER -seus           |  his -quem           |  whom -nas            |  em + as -me             |  me -esse           |  that -eles           |  they -  | estão      from EST -você           |  you -  | tinha      from TER -  | foram      from SER -essa           |  that -num            |  em + um -nem            |  nor -suas           |  her -meu            |  my -às             |  a + as -minha          |  my -  | têm        from TER -numa           |  em + uma -pelos          |  per + os -elas           |  they -  | havia      from HAV -  | seja       from SER -qual           |  which -  | será       from SER -nós            |  we -  | tenho      from TER -lhe            |  to him, her -deles          |  of them -essas          |  those -esses          |  those -pelas          |  per + as -este           |  this -  | fosse      from SER -dele           |  of him - - | other words. There are many contractions such as naquele = em+aquele, - | mo = me+o, but they are rare. - | Indefinite article plural forms are also rare. - -tu             |  thou -te             |  thee -vocês          |  you (plural) -vos            |  you -lhes           |  to them -meus           |  my -minhas -teu            |  thy -tua -teus -tuas -nosso          | our -nossa -nossos -nossas - -dela           |  of her -delas          |  of them - -esta           |  this -estes          |  these -estas          |  these -aquele         |  that -aquela         |  that -aqueles        |  those -aquelas        |  those -isto           |  this -aquilo         |  that - -               | forms of estar, to be (not including the infinitive): -estou -está -estamos -estão -estive -esteve -estivemos -estiveram -estava -estávamos -estavam -estivera -estivéramos -esteja -estejamos -estejam -estivesse -estivéssemos -estivessem -estiver -estivermos -estiverem - -               | forms of haver, to have (not including the infinitive): -hei -há -havemos -hão -houve -houvemos -houveram -houvera -houvéramos -haja -hajamos -hajam -houvesse -houvéssemos -houvessem -houver -houvermos -houverem -houverei -houverá -houveremos -houverão -houveria -houveríamos -houveriam - -               | forms of ser, to be (not including the infinitive): -sou -somos -são -era -éramos -eram -fui -foi -fomos -foram -fora -fôramos -seja -sejamos -sejam -fosse -fôssemos -fossem -for -formos -forem -serei -será -seremos -serão -seria -seríamos -seriam - -               | forms of ter, to have (not including the infinitive): -tenho -tem -temos -tém -tinha -tínhamos -tinham -tive -teve -tivemos -tiveram -tivera -tivéramos -tenha -tenhamos -tenham -tivesse -tivéssemos -tivessem -tiver -tivermos -tiverem -terei -terá -teremos -terão -teria -teríamos -teriam diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ro.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ro.txt deleted file mode 100644 index 4fdee90a5..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ro.txt +++ /dev/null @@ -1,233 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -acea -aceasta -această -aceea -acei -aceia -acel -acela -acele -acelea -acest -acesta -aceste -acestea -aceşti -aceştia -acolo -acum -ai -aia -aibă -aici -al -ăla -ale -alea -ălea -altceva -altcineva -am -ar -are -aş -aşadar -asemenea -asta -ăsta -astăzi -astea -ăstea -ăştia -asupra -aţi -au -avea -avem -aveţi -azi -bine -bucur -bună -ca -că -căci -când -care -cărei -căror -cărui -cât -câte -câţi -către -câtva -ce -cel -ceva -chiar -cînd -cine -cineva -cît -cîte -cîţi -cîtva -contra -cu -cum -cumva -curând -curînd -da -dă -dacă -dar -datorită -de -deci -deja -deoarece -departe -deşi -din -dinaintea -dintr -dintre -drept -după -ea -ei -el -ele -eram -este -eşti -eu -face -fără -fi -fie -fiecare -fii -fim -fiţi -iar -ieri -îi -îl -îmi -împotriva -în  -înainte -înaintea -încât -încît -încotro -între -întrucât -întrucît -îţi -la -lângă -le -li -lîngă -lor -lui -mă -mâine -mea -mei -mele -mereu -meu -mi -mine -mult -multă -mulţi -ne -nicăieri -nici -nimeni -nişte -noastră -noastre -noi -noştri -nostru -nu -ori -oricând -oricare -oricât -orice -oricînd -oricine -oricît -oricum -oriunde -până -pe -pentru -peste -pînă -poate -pot -prea -prima -primul -prin -printr -sa -să -săi -sale -sau -său -se -şi -sînt -sîntem -sînteţi -spre -sub -sunt -suntem -sunteţi -ta -tăi -tale -tău -te -ţi -ţie -tine -toată -toate -tot -toţi -totuşi -tu -un -una -unde -undeva -unei -unele -uneori -unor -vă -vi -voastră -voastre -voi -voştri -vostru -vouă -vreo -vreun diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ru.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ru.txt deleted file mode 100644 index 55271400c..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_ru.txt +++ /dev/null @@ -1,243 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - |  - Encoding was converted to UTF-8. - |  - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | a russian stop word list. comments begin with vertical bar. each stop - | word is at the start of a line. - - | this is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - | letter `ё' is translated to `е'. - -и              | and -в              | in/into -во             | alternative form -не             | not -что            | what/that -он             | he -на             | on/onto -я              | i -с              | from -со             | alternative form -как            | how -а              | milder form of `no' (but) -то             | conjunction and form of `that' -все            | all -она            | she -так            | so, thus -его            | him -но             | but -да             | yes/and -ты             | thou -к              | towards, by -у              | around, chez -же             | intensifier particle -вы             | you -за             | beyond, behind -бы             | conditional/subj. particle -по             | up to, along -только         | only -ее             | her -мне            | to me -было           | it was -вот            | here is/are, particle -от             | away from -меня           | me -еще            | still, yet, more -нет            | no, there isnt/arent -о              | about -из             | out of -ему            | to him -теперь         | now -когда          | when -даже           | even -ну             | so, well -вдруг          | suddenly -ли             | interrogative particle -если           | if -уже            | already, but homonym of `narrower' -или            | or -ни             | neither -быть           | to be -был            | he was -него           | prepositional form of его -до             | up to -вас            | you accusative -нибудь         | indef. suffix preceded by hyphen -опять          | again -уж             | already, but homonym of `adder' -вам            | to you -сказал         | he said -ведь           | particle `after all' -там            | there -потом          | then -себя           | oneself -ничего         | nothing -ей             | to her -может          | usually with `быть' as `maybe' -они            | they -тут            | here -где            | where -есть           | there is/are -надо           | got to, must -ней            | prepositional form of  ей -для            | for -мы             | we -тебя           | thee -их             | them, their -чем            | than -была           | she was -сам            | self -чтоб           | in order to -без            | without -будто          | as if -человек        | man, person, one -чего           | genitive form of `what' -раз            | once -тоже           | also -себе           | to oneself -под            | beneath -жизнь          | life -будет          | will be -ж              | short form of intensifer particle `же' -тогда          | then -кто            | who -этот           | this -говорил        | was saying -того           | genitive form of `that' -потому         | for that reason -этого          | genitive form of `this' -какой          | which -совсем         | altogether -ним            | prepositional form of `его', `они' -здесь          | here -этом           | prepositional form of `этот' -один           | one -почти          | almost -мой            | my -тем            | instrumental/dative plural of `тот', `то' -чтобы          | full form of `in order that' -нее            | her (acc.) -кажется        | it seems -сейчас         | now -были           | they were -куда           | where to -зачем          | why -сказать        | to say -всех           | all (acc., gen. preposn. plural) -никогда        | never -сегодня        | today -можно          | possible, one can -при            | by -наконец        | finally -два            | two -об             | alternative form of `о', about -другой         | another -хоть           | even -после          | after -над            | above -больше         | more -тот            | that one (masc.) -через          | across, in -эти            | these -нас            | us -про            | about -всего          | in all, only, of all -них            | prepositional form of `они' (they) -какая          | which, feminine -много          | lots -разве          | interrogative particle -сказала        | she said -три            | three -эту            | this, acc. fem. sing. -моя            | my, feminine -впрочем        | moreover, besides -хорошо         | good -свою           | ones own, acc. fem. sing. -этой           | oblique form of `эта', fem. `this' -перед          | in front of -иногда         | sometimes -лучше          | better -чуть           | a little -том            | preposn. form of `that one' -нельзя         | one must not -такой          | such a one -им             | to them -более          | more -всегда         | always -конечно        | of course -всю            | acc. fem. sing of `all' -между          | between - - -  | b: some paradigms -  | -  | personal pronouns -  | -  | я  меня  мне  мной  [мною] -  | ты  тебя  тебе  тобой  [тобою] -  | он  его  ему  им  [него, нему, ним] -  | она  ее  эи  ею  [нее, нэи, нею] -  | оно  его  ему  им  [него, нему, ним] -  | -  | мы  нас  нам  нами -  | вы  вас  вам  вами -  | они  их  им  ими  [них, ним, ними] -  | -  |   себя  себе  собой   [собою] -  | -  | demonstrative pronouns: этот (this), тот (that) -  | -  | этот  эта  это  эти -  | этого  эты  это  эти -  | этого  этой  этого  этих -  | этому  этой  этому  этим -  | этим  этой  этим  [этою]  этими -  | этом  этой  этом  этих -  | -  | тот  та  то  те -  | того  ту  то  те -  | того  той  того  тех -  | тому  той  тому  тем -  | тем  той  тем  [тою]  теми -  | том  той  том  тех -  | -  | determinative pronouns -  | -  | (a) весь (all) -  | -  | весь  вся  все  все -  | всего  всю  все  все -  | всего  всей  всего  всех -  | всему  всей  всему  всем -  | всем  всей  всем  [всею]  всеми -  | всем  всей  всем  всех -  | -  | (b) сам (himself etc) -  | -  | сам  сама  само  сами -  | самого саму  само  самих -  | самого самой самого  самих -  | самому самой самому  самим -  | самим  самой  самим  [самою]  самими -  | самом самой самом  самих -  | -  | stems of verbs `to be', `to have', `to do' and modal -  | -  | быть  бы  буд  быв  есть  суть -  | име -  | дел -  | мог   мож  мочь -  | уме -  | хоч  хот -  | долж -  | можн -  | нужн -  | нельзя - diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_sv.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_sv.txt deleted file mode 100644 index 096f87f67..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_sv.txt +++ /dev/null @@ -1,133 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - |  - Encoding was converted to UTF-8. - |  - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Swedish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - | Swedish stop words occasionally exhibit homonym clashes. For example - |  så = so, but also seed. These are indicated clearly below. - -och            | and -det            | it, this/that -att            | to (with infinitive) -i              | in, at -en             | a -jag            | I -hon            | she -som            | who, that -han            | he -på             | on -den            | it, this/that -med            | with -var            | where, each -sig            | him(self) etc -för            | for -så             | so (also: seed) -till           | to -är             | is -men            | but -ett            | a -om             | if; around, about -hade           | had -de             | they, these/those -av             | of -icke           | not, no -mig            | me -du             | you -henne          | her -då             | then, when -sin            | his -nu             | now -har            | have -inte           | inte någon = no one -hans           | his -honom          | him -skulle         | 'sake' -hennes         | her -där            | there -min            | my -man            | one (pronoun) -ej             | nor -vid            | at, by, on (also: vast) -kunde          | could -något          | some etc -från           | from, off -ut             | out -när            | when -efter          | after, behind -upp            | up -vi             | we -dem            | them -vara           | be -vad            | what -över           | over -än             | than -dig            | you -kan            | can -sina           | his -här            | here -ha             | have -mot            | towards -alla           | all -under          | under (also: wonder) -någon          | some etc -eller          | or (else) -allt           | all -mycket         | much -sedan          | since -ju             | why -denna          | this/that -själv          | myself, yourself etc -detta          | this/that -åt             | to -utan           | without -varit          | was -hur            | how -ingen          | no -mitt           | my -ni             | you -bli            | to be, become -blev           | from bli -oss            | us -din            | thy -dessa          | these/those -några          | some etc -deras          | their -blir           | from bli -mina           | my -samma          | (the) same -vilken         | who, that -er             | you, your -sådan          | such a -vår            | our -blivit         | from bli -dess           | its -inom           | within -mellan         | between -sådant         | such a -varför         | why -varje          | each -vilka          | who, that -ditt           | thy -vem            | who -vilket         | who, that -sitta          | his -sådana         | such a -vart           | each -dina           | thy -vars           | whose -vårt           | our -våra           | our -ert            | your -era            | your -vilkas         | whose - diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_th.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_th.txt deleted file mode 100644 index 07f0fabe6..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_th.txt +++ /dev/null @@ -1,119 +0,0 @@ -# Thai stopwords from: -# "Opinion Detection in Thai Political News Columns -# Based on Subjectivity Analysis" -# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak -ไว้ -ไม่ -ไป -ได้ -ให้ -ใน -โดย -แห่ง -แล้ว -และ -แรก -แบบ -แต่ -เอง -เห็น -เลย -เริ่ม -เรา -เมื่อ -เพื่อ -เพราะ -เป็นการ -เป็น -เปิดเผย -เปิด -เนื่องจาก -เดียวกัน -เดียว -เช่น -เฉพาะ -เคย -เข้า -เขา -อีก -อาจ -อะไร -ออก -อย่าง -อยู่ -อยาก -หาก -หลาย -หลังจาก -หลัง -หรือ -หนึ่ง -ส่วน -ส่ง -สุด -สําหรับ -ว่า -วัน -ลง -ร่วม -ราย -รับ -ระหว่าง -รวม -ยัง -มี -มาก -มา -พร้อม -พบ -ผ่าน -ผล -บาง -น่า -นี้ -นํา -นั้น -นัก -นอกจาก -ทุก -ที่สุด -ที่ -ทําให้ -ทํา -ทาง -ทั้งนี้ -ทั้ง -ถ้า -ถูก -ถึง -ต้อง -ต่างๆ -ต่าง -ต่อ -ตาม -ตั้งแต่ -ตั้ง -ด้าน -ด้วย -ดัง -ซึ่ง -ช่วง -จึง -จาก -จัด -จะ -คือ -ความ -ครั้ง -คง -ขึ้น -ของ -ขอ -ขณะ -ก่อน -ก็ -การ -กับ -กัน -กว่า -กล่าว diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_tr.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_tr.txt deleted file mode 100644 index 84d9408d4..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/stopwords_tr.txt +++ /dev/null @@ -1,212 +0,0 @@ -# Turkish stopwords from LUCENE-559 -# merged with the list from "Information Retrieval on Turkish Texts" -#   (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) -acaba -altmış -altı -ama -ancak -arada -aslında -ayrıca -bana -bazı -belki -ben -benden -beni -benim -beri -beş -bile -bin -bir -birçok -biri -birkaç -birkez -birşey -birşeyi -biz -bize -bizden -bizi -bizim -böyle -böylece -bu -buna -bunda -bundan -bunlar -bunları -bunların -bunu -bunun -burada -çok -çünkü -da -daha -dahi -de -defa -değil -diğer -diye -doksan -dokuz -dolayı -dolayısıyla -dört -edecek -eden -ederek -edilecek -ediliyor -edilmesi -ediyor -eğer -elli -en -etmesi -etti -ettiği -ettiğini -gibi -göre -halen -hangi -hatta -hem -henüz -hep -hepsi -her -herhangi -herkesin -hiç -hiçbir -için -iki -ile -ilgili -ise -işte -itibaren -itibariyle -kadar -karşın -katrilyon -kendi -kendilerine -kendini -kendisi -kendisine -kendisini -kez -ki -kim -kimden -kime -kimi -kimse -kırk -milyar -milyon -mu -mü -mı -nasıl -ne -neden -nedenle -nerde -nerede -nereye -niye -niçin -o -olan -olarak -oldu -olduğu -olduğunu -olduklarını -olmadı -olmadığı -olmak -olması -olmayan -olmaz -olsa -olsun -olup -olur -olursa -oluyor -on -ona -ondan -onlar -onlardan -onları -onların -onu -onun -otuz -oysa -öyle -pek -rağmen -sadece -sanki -sekiz -seksen -sen -senden -seni -senin -siz -sizden -sizi -sizin -şey -şeyden -şeyi -şeyler -şöyle -şu -şuna -şunda -şundan -şunları -şunu -tarafından -trilyon -tüm -üç -üzere -var -vardı -ve -veya -ya -yani -yapacak -yapılan -yapılması -yapıyor -yapmak -yaptı -yaptığı -yaptığını -yaptıkları -yedi -yerine -yetmiş -yine -yirmi -yoksa -yüz -zaten diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/userdict_ja.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/userdict_ja.txt deleted file mode 100644 index 6f0368e4d..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/lang/userdict_ja.txt +++ /dev/null @@ -1,29 +0,0 @@ -# -# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) -# -# Add entries to this file in order to override the statistical model in terms -# of segmentation, readings and part-of-speech tags.  Notice that entries do -# not have weights since they are always used when found.  This is by-design -# in order to maximize ease-of-use. -# -# Entries are defined using the following CSV format: -#  <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag> -# -# Notice that a single half-width space separates tokens and readings, and -# that the number tokens and readings must match exactly. -# -# Also notice that multiple entries with the same <text> is undefined. -# -# Whitespace only lines are ignored.  Comments are not allowed on entry lines. -# - -# Custom segmentation for kanji compounds -日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 -関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 - -# Custom segmentation for compound katakana -トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 -ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 - -# Custom reading for former sumo wrestler -朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/mail-data-config.xml b/solr-8.1.1/example/example-DIH/solr/mail/conf/mail-data-config.xml deleted file mode 100644 index 736aea7cc..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/mail-data-config.xml +++ /dev/null @@ -1,12 +0,0 @@ -<dataConfig> -  <document> -      <!-- -        Note - In order to index attachments, set processAttachement="true" and drop -        Tika and its dependencies to example-DIH/solr/mail/lib directory -       --> -      <entity processor="MailEntityProcessor" user="email@gmail.com" -            password="password" host="imap.gmail.com" protocol="gimaps" -            fetchMailsSince="2014-06-30 00:00:00" batchSize="20" folders="inbox" processAttachement="false" -            name="mail_entity"/> -  </document> -</dataConfig> diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/managed-schema b/solr-8.1.1/example/example-DIH/solr/mail/conf/managed-schema deleted file mode 100644 index 1a371d446..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/managed-schema +++ /dev/null @@ -1,1062 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements.  See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License.  You may obtain a copy of the License at - -     http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<!--   - This is the Solr schema file. This file should be named "schema.xml" and - should be in the conf directory under the solr home - (i.e. ./solr/conf/schema.xml by default)  - or located where the classloader for the Solr webapp can find it. - - This example schema is the recommended starting point for users. - It should be kept correct and concise, usable out-of-the-box. - - For more information, on how to customize this file, please see - http://wiki.apache.org/solr/SchemaXml - - PERFORMANCE NOTE: this schema includes many optional features and should not - be used for benchmarking.  To improve performance one could -  - set stored="false" for all fields possible (esp large fields) when you -    only need to search on the field but don't need to return the original -    value. -  - set indexed="false" if you don't need to search on the field, but only -    return the field as a result of searching on other indexed fields. -  - remove all unneeded copyField statements -  - for best index size and searching performance, set "index" to false -    for all general text fields, use copyField to copy them to the -    catchall "text" field, and use that for searching. -  - For maximum indexing performance, use the ConcurrentUpdateSolrServer -    java client. -  - Remember to run the JVM in server mode, and use a higher logging level -    that avoids logging every request ---> - -<schema name="example-DIH-mail" version="1.6"> -  <!-- attribute "name" is the name of this schema and is only used for display purposes. -       version="x.y" is Solr's version number for the schema syntax and  -       semantics.  It should not normally be changed by applications. - -       1.0: multiValued attribute did not exist, all fields are multiValued  -            by nature -       1.1: multiValued attribute introduced, false by default  -       1.2: omitTermFreqAndPositions attribute introduced, true by default  -            except for text fields. -       1.3: removed optional field compress feature -       1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser -            behavior when a single string produces multiple tokens.  Defaults  -            to off for version >= 1.4 -       1.5: omitNorms defaults to true for primitive field types  -            (int, float, boolean, string...) -       1.6: useDocValuesAsStored defaults to true.             -     --> - - -    <!-- Valid attributes for fields: -     name: mandatory - the name for the field -     type: mandatory - the name of a field type from the  -       fieldTypes section -     indexed: true if this field should be indexed (searchable or sortable) -     stored: true if this field should be retrievable -     docValues: true if this field should have doc values. Doc values are -       useful (required, if you are using *Point fields) for faceting,  -       grouping, sorting and function queries. Doc values will make the index  -       faster to load, more NRT-friendly and more memory-efficient.  -       They however come with some limitations: they are currently only  -       supported by StrField, UUIDField, all *PointFields, and depending -       on the field type, they might require the field to be single-valued, -       be required or have a default value (check the documentation -       of the field type you're interested in for more information) -     multiValued: true if this field may contain multiple values per document -     omitNorms: (expert) set to true to omit the norms associated with -       this field (this disables length normalization and index-time -       boosting for the field, and saves some memory).  Only full-text -       fields or fields that need an index-time boost need norms. -       Norms are omitted for primitive (non-analyzed) types by default. -     termVectors: [false] set to true to store the term vector for a -       given field. -       When using MoreLikeThis, fields used for similarity should be -       stored for best performance. -     termPositions: Store position information with the term vector.   -       This will increase storage costs. -     termOffsets: Store offset information with the term vector. This  -       will increase storage costs. -     required: The field is required.  It will throw an error if the -       value does not exist -     default: a value that should be used if no value is specified -       when adding a document. -    --> - -   <!-- field names should consist of alphanumeric or underscore characters only and -      not start with a digit.  This is not currently strictly enforced, -      but other field names will not have first class support from all components -      and back compatibility is not guaranteed.  Names with both leading and -      trailing underscores (e.g. _version_) are reserved. -   --> - -   <!-- If you remove this field, you must _also_ disable the update log in solrconfig.xml -      or Solr won't start. _version_ and update log are required for SolrCloud -   -->  -   <field name="_version_" type="plong" indexed="true" stored="true"/> -    -   <field name="content" type="text_general" indexed="true" stored="true" multiValued="true"/> - -   <!-- catchall field, containing all other searchable text fields (implemented -        via copyField further on in this schema  --> -   <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/> - -   <field name="messageId" type="string" indexed="true" stored="true" required="true" multiValued="false"/> -   <field name="subject" type="text_general" indexed="true" stored="true"/> -   <field name="from" type="string" indexed="true" stored="true" omitNorms="true"/> -   <field name="sentDate" type="pdate" indexed="true" stored="true"/> -   <field name="xMailer" type="string" indexed="true" stored="true" omitNorms="true"/> - -   <field name="allTo" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/> -   <field name="flags" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/> -   <field name="attachment" type="text_general" indexed="true" stored="true" multiValued="true"/> -   <field name="attachmentNames" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/> - -   <!-- Dynamic field definitions allow using convention over configuration -       for fields via the specification of patterns to match field names. -       EXAMPLE:  name="*_i" will match any field ending in _i (like myid_i, z_i) -       RESTRICTION: the glob-like pattern in the name attribute must have -       a "*" only at the start or the end.  --> -    -   <dynamicField name="*_i"  type="pint"    indexed="true"  stored="true"/> -   <dynamicField name="*_is" type="pint"    indexed="true"  stored="true"  multiValued="true"/> -   <dynamicField name="*_s"  type="string"  indexed="true"  stored="true" /> -   <dynamicField name="*_s_ns"  type="string"  indexed="true"  stored="false" /> -   <dynamicField name="*_ss" type="string"  indexed="true"  stored="true" multiValued="true"/> -   <dynamicField name="*_l"  type="plong"   indexed="true"  stored="true"/> -   <dynamicField name="*_l_ns"  type="plong"   indexed="true"  stored="false"/> -   <dynamicField name="*_ls" type="plong"   indexed="true"  stored="true"  multiValued="true"/> -   <dynamicField name="*_t"  type="text_general"    indexed="true"  stored="true"/> -   <dynamicField name="*_txt" type="text_general"   indexed="true"  stored="true" multiValued="true"/> -   <dynamicField name="*_en"  type="text_en"    indexed="true"  stored="true" multiValued="true"/> -   <dynamicField name="*_b"  type="boolean" indexed="true" stored="true"/> -   <dynamicField name="*_bs" type="boolean" indexed="true" stored="true"  multiValued="true"/> -   <dynamicField name="*_f"  type="pfloat"  indexed="true"  stored="true"/> -   <dynamicField name="*_fs" type="pfloat"  indexed="true"  stored="true"  multiValued="true"/> -   <dynamicField name="*_d"  type="pdouble" indexed="true"  stored="true"/> -   <dynamicField name="*_ds" type="pdouble" indexed="true"  stored="true"  multiValued="true"/> - -   <!-- Type used to index the lat and lon components for the "location" FieldType --> -   <dynamicField name="*_coordinate"  type="pdouble" indexed="true"  stored="false" /> - -   <dynamicField name="*_dt"  type="pdate"    indexed="true"  stored="true"/> -   <dynamicField name="*_dts" type="pdate"    indexed="true"  stored="true" multiValued="true"/> -   <dynamicField name="*_p"  type="location" indexed="true" stored="true"/> - -   <dynamicField name="*_c"   type="currency" indexed="true"  stored="true"/> - -   <dynamicField name="ignored_*" type="ignored" multiValued="true"/> -   <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/> - -   <dynamicField name="random_*" type="random" /> - -   <!-- uncomment the following to ignore any fields that don't already match an existing  -        field name or dynamic field, rather than reporting them as an error.  -        alternately, change the type="ignored" to some other type e.g. "text" if you want  -        unknown fields indexed and/or stored by default -->  -   <!--dynamicField name="*" type="ignored" multiValued="true" /--> -    - - - - <!-- Field to use to determine and enforce document uniqueness.  -      Unless this field is marked with required="false", it will be a required field -   --> - <uniqueKey>messageId</uniqueKey> - -  <!-- copyField commands copy one field to another at the time a document -        is added to the index.  It's used either to index the same field differently, -        or to add multiple fields to the same field for easier/faster searching.  --> - -    <copyField source="content" dest="text"/> -    <copyField source="attachmentNames" dest="text"/> -    <copyField source="attachment" dest="text"/> -    <copyField source="subject" dest="text"/> -    <copyField source="allTo" dest="text"/> - -   <!-- Above, multiple source fields are copied to the [text] field.  -    Another way to map multiple source fields to the same  -    destination field is to use the dynamic field syntax.  -    copyField also supports a maxChars to copy setting.  --> -      -   <!-- <copyField source="*_t" dest="text" maxChars="3000"/> --> - -   <!-- copy name to alphaNameSort, a field designed for sorting by name --> -   <!-- <copyField source="name" dest="alphaNameSort"/> --> -  -   -    <!-- field type definitions. The "name" attribute is -       just a label to be used by field definitions.  The "class" -       attribute and any other attributes determine the real -       behavior of the fieldType. -         Class names starting with "solr" refer to java classes in a -       standard package such as org.apache.solr.analysis -    --> - -    <!-- The StrField type is not analyzed, but indexed/stored verbatim. --> -    <fieldType name="string" class="solr.StrField" sortMissingLast="true" /> - -    <!-- boolean type: "true" or "false" --> -    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> - -    <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are -         currently supported on types that are sorted internally as strings -         and on numeric types. -	     This includes "string", "boolean", "pint", "pfloat", "plong", "pdate", "pdouble". -       - If sortMissingLast="true", then a sort on this field will cause documents -         without the field to come after documents with the field, -         regardless of the requested sort order (asc or desc). -       - If sortMissingFirst="true", then a sort on this field will cause documents -         without the field to come before documents with the field, -         regardless of the requested sort order. -       - If sortMissingLast="false" and sortMissingFirst="false" (the default), -         then default lucene sorting will be used which places docs without the -         field first in an ascending sort and last in a descending sort. -    --> - -    <!-- -      Numeric field types that index values using KD-trees. -      Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc. -    --> -    <fieldType name="pint" class="solr.IntPointField" docValues="true"/> -    <fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/> -    <fieldType name="plong" class="solr.LongPointField" docValues="true"/> -    <fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/> -     -    <fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/> -    <fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/> -    <fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/> -    <fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/> - -    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and -         is a more restricted form of the canonical representation of dateTime -         http://www.w3.org/TR/xmlschema-2/#dateTime     -         The trailing "Z" designates UTC time and is mandatory. -         Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z -         All other components are mandatory. - -         Expressions can also be used to denote calculations that should be -         performed relative to "NOW" to determine the value, ie... - -               NOW/HOUR -                  ... Round to the start of the current hour -               NOW-1DAY -                  ... Exactly 1 day prior to now -               NOW/DAY+6MONTHS+3DAYS -                  ... 6 months and 3 days in the future from the start of -                      the current day -                       -         Consult the DatePointField javadocs for more information. -      --> -    <!-- KD-tree versions of date fields --> -    <fieldType name="pdate" class="solr.DatePointField" docValues="true"/> -    <fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/> -     -    <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings --> -    <fieldType name="binary" class="solr.BinaryField"/> - -    <!-- The "RandomSortField" is not used to store or search any -         data.  You can declare fields of this type it in your schema -         to generate pseudo-random orderings of your docs for sorting  -         or function purposes.  The ordering is generated based on the field -         name and the version of the index. As long as the index version -         remains unchanged, and the same field name is reused, -         the ordering of the docs will be consistent.   -         If you want different psuedo-random orderings of documents, -         for the same version of the index, use a dynamicField and -         change the field name in the request. -     --> -    <fieldType name="random" class="solr.RandomSortField" indexed="true" /> - -    <!-- solr.TextField allows the specification of custom text analyzers -         specified as a tokenizer and a list of token filters. Different -         analyzers may be specified for indexing and querying. - -         The optional positionIncrementGap puts space between multiple fields of -         this type on the same document, with the purpose of preventing false phrase -         matching across fields. - -         For more info on customizing your analyzer chain, please see -         http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters -     --> - -    <!-- One can also specify an existing Analyzer class that has a -         default constructor via the class attribute on the analyzer element. -         Example: -    <fieldType name="text_greek" class="solr.TextField"> -      <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/> -    </fieldType> -    --> - -    <!-- A text field that only splits on whitespace for exact matching of words --> -    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> -      <analyzer> -        <tokenizer class="solr.WhitespaceTokenizerFactory"/> -      </analyzer> -    </fieldType> - -    <!-- A general text field that has reasonable, generic -         cross-language defaults: it tokenizes with StandardTokenizer, -   removes stop words from case-insensitive "stopwords.txt" -   (empty by default), and down cases.  At query time only, it -   also applies synonyms. --> -    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> -      <analyzer type="index"> -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> -        <!-- in this example, we will only use synonyms at query time -        <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> -        <filter class="solr.FlattenGraphFilterFactory"/> -        --> -        <filter class="solr.LowerCaseFilterFactory"/> -      </analyzer> -      <analyzer type="query"> -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> -        <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> -        <filter class="solr.LowerCaseFilterFactory"/> -      </analyzer> -    </fieldType> - -    <!-- A text field with defaults appropriate for English: it -         tokenizes with StandardTokenizer, removes English stop words -         (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and -         finally applies Porter's stemming.  The query time analyzer -         also applies synonyms from synonyms.txt. --> -    <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> -      <analyzer type="index"> -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <!-- in this example, we will only use synonyms at query time -        <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> -        <filter class="solr.FlattenGraphFilterFactory"/> -        --> -        <!-- Case insensitive stop word removal. -        --> -        <filter class="solr.StopFilterFactory" -                ignoreCase="true" -                words="lang/stopwords_en.txt" -                /> -        <filter class="solr.LowerCaseFilterFactory"/> -  <filter class="solr.EnglishPossessiveFilterFactory"/> -        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> -  <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: -        <filter class="solr.EnglishMinimalStemFilterFactory"/> -  --> -        <filter class="solr.PorterStemFilterFactory"/> -      </analyzer> -      <analyzer type="query"> -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> -        <filter class="solr.StopFilterFactory" -                ignoreCase="true" -                words="lang/stopwords_en.txt" -                /> -        <filter class="solr.LowerCaseFilterFactory"/> -  <filter class="solr.EnglishPossessiveFilterFactory"/> -        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> -  <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: -        <filter class="solr.EnglishMinimalStemFilterFactory"/> -  --> -        <filter class="solr.PorterStemFilterFactory"/> -      </analyzer> -    </fieldType> - -    <!-- A text field with defaults appropriate for English, plus -   aggressive word-splitting and autophrase features enabled. -   This field is just like text_en, except it adds -   WordDelimiterGraphFilter to enable splitting and matching of -   words on case-change, alpha numeric boundaries, and -   non-alphanumeric chars.  This means certain compound word -   cases will work, for example query "wi fi" will match -   document "WiFi" or "wi-fi". -        --> -    <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> -      <analyzer type="index"> -        <tokenizer class="solr.WhitespaceTokenizerFactory"/> -        <!-- in this example, we will only use synonyms at query time -        <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> -        --> -        <!-- Case insensitive stop word removal. -        --> -        <filter class="solr.StopFilterFactory" -                ignoreCase="true" -                words="lang/stopwords_en.txt" -                /> -        <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> -        <filter class="solr.PorterStemFilterFactory"/> -        <filter class="solr.FlattenGraphFilterFactory" /> -      </analyzer> -      <analyzer type="query"> -        <tokenizer class="solr.WhitespaceTokenizerFactory"/> -        <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> -        <filter class="solr.StopFilterFactory" -                ignoreCase="true" -                words="lang/stopwords_en.txt" -                /> -        <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> -        <filter class="solr.PorterStemFilterFactory"/> -      </analyzer> -    </fieldType> - -    <!-- Less flexible matching, but less false matches.  Probably not ideal for product names, -         but may be good for SKUs.  Can insert dashes in the wrong place and still match. --> -    <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> -      <analyzer type="index"> -        <tokenizer class="solr.WhitespaceTokenizerFactory"/> -        <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> -        <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> -        <filter class="solr.EnglishMinimalStemFilterFactory"/> -        <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes -             possible with WordDelimiterGraphFilter in conjuncton with stemming. --> -        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> -        <filter class="solr.FlattenGraphFilterFactory" /> -      </analyzer> -      <analyzer type="query"> -        <tokenizer class="solr.WhitespaceTokenizerFactory"/> -        <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> -        <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> -        <filter class="solr.EnglishMinimalStemFilterFactory"/> -        <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes -             possible with WordDelimiterGraphFilter in conjuncton with stemming. --> -        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> -      </analyzer> -    </fieldType> - -    <!-- Just like text_general except it reverses the characters of -   each token, to enable more efficient leading wildcard queries. --> -    <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> -      <analyzer type="index"> -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" -           maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> -      </analyzer> -      <analyzer type="query"> -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> -        <filter class="solr.LowerCaseFilterFactory"/> -      </analyzer> -    </fieldType> - -    <!-- charFilter + WhitespaceTokenizer  --> -    <!-- -    <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" > -      <analyzer> -        <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/> -        <tokenizer class="solr.WhitespaceTokenizerFactory"/> -      </analyzer> -    </fieldType> -    --> - -    <!-- This is an example of using the KeywordTokenizer along -         With various TokenFilterFactories to produce a sortable field -         that does not include some properties of the source text -      --> -    <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true"> -      <analyzer> -        <!-- KeywordTokenizer does no actual tokenizing, so the entire -             input string is preserved as a single token -          --> -        <tokenizer class="solr.KeywordTokenizerFactory"/> -        <!-- The LowerCase TokenFilter does what you expect, which can be -             when you want your sorting to be case insensitive -          --> -        <filter class="solr.LowerCaseFilterFactory" /> -        <!-- The TrimFilter removes any leading or trailing whitespace --> -        <filter class="solr.TrimFilterFactory" /> -        <!-- The PatternReplaceFilter gives you the flexibility to use -             Java Regular expression to replace any sequence of characters -             matching a pattern with an arbitrary replacement string,  -             which may include back references to portions of the original -             string matched by the pattern. -              -             See the Java Regular Expression documentation for more -             information on pattern and replacement string syntax. -              -             http://docs.oracle.com/javase/8/docs/api/java/util/regex/package-summary.html -          --> -        <filter class="solr.PatternReplaceFilterFactory" -                pattern="([^a-z])" replacement="" replace="all" -        /> -      </analyzer> -    </fieldType> -     -    <fieldType name="phonetic" stored="false" indexed="true" class="solr.TextField" > -      <analyzer> -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> -      </analyzer> -    </fieldType> - -    <fieldType name="payloads" stored="false" indexed="true" class="solr.TextField" > -      <analyzer> -        <tokenizer class="solr.WhitespaceTokenizerFactory"/> -        <!-- -        The DelimitedPayloadTokenFilter can put payloads on tokens... for example, -        a token of "foo|1.4"  would be indexed as "foo" with a payload of 1.4f -        Attributes of the DelimitedPayloadTokenFilterFactory :  -         "delimiter" - a one character delimiter. Default is | (pipe) -   "encoder" - how to encode the following value into a playload -      float -> org.apache.lucene.analysis.payloads.FloatEncoder, -      integer -> o.a.l.a.p.IntegerEncoder -      identity -> o.a.l.a.p.IdentityEncoder -            Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor. -         --> -        <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/> -      </analyzer> -    </fieldType> - -    <!-- lowercases the entire field value, keeping it as a single token.  --> -    <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> -      <analyzer> -        <tokenizer class="solr.KeywordTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory" /> -      </analyzer> -    </fieldType> - -    <!--  -      Example of using PathHierarchyTokenizerFactory at index time, so -      queries for paths match documents at that path, or in descendent paths -    --> -    <fieldType name="descendent_path" class="solr.TextField"> -      <analyzer type="index"> -  <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> -      </analyzer> -      <analyzer type="query"> -  <tokenizer class="solr.KeywordTokenizerFactory" /> -      </analyzer> -    </fieldType> -    <!--  -      Example of using PathHierarchyTokenizerFactory at query time, so -      queries for paths match documents at that path, or in ancestor paths -    --> -    <fieldType name="ancestor_path" class="solr.TextField"> -      <analyzer type="index"> -  <tokenizer class="solr.KeywordTokenizerFactory" /> -      </analyzer> -      <analyzer type="query"> -  <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> -      </analyzer> -    </fieldType> - -    <!-- since fields of this type are by default not stored or indexed, -         any data added to them will be ignored outright.  -->  -    <fieldType name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> - -    <!-- This point type indexes the coordinates as separate fields (subFields) -      If subFieldType is defined, it references a type, and a dynamic field -      definition is created matching *___<typename>.  Alternately, if  -      subFieldSuffix is defined, that is used to create the subFields. -      Example: if subFieldType="double", then the coordinates would be -        indexed in fields myloc_0___double,myloc_1___double. -      Example: if subFieldSuffix="_d" then the coordinates would be indexed -        in fields myloc_0_d,myloc_1_d -      The subFields are an implementation detail of the fieldType, and end -      users normally should not need to know about them. -     --> -    <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/> - -    <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. --> -    <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/> - -    <!-- An alternative geospatial field type new to Solr 4.  It supports multiValued and polygon shapes. -      For more information about this and other Spatial fields new to Solr 4, see: -      http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4 -    --> -    <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" -        geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" /> - -   <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType -        Parameters: -          amountLongSuffix: Required. Refers to a dynamic field for the raw amount sub-field.  -                              The dynamic field must have a field type that extends LongValueFieldType. -                              Note: If you expect to use Atomic Updates, this dynamic field may not be stored. -          codeStrSuffix:    Required. Refers to a dynamic field for the currency code sub-field. -                              The dynamic field must have a field type that extends StrField. -                              Note: If you expect to use Atomic Updates, this dynamic field may not be stored. -          defaultCurrency:  Specifies the default currency if none specified. Defaults to "USD" -          providerClass:    Lets you plug in other exchange provider backend: -                            solr.FileExchangeRateProvider is the default and takes one parameter: -                              currencyConfig: name of an xml file holding exchange rates -                            solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org: -                              ratesFileLocation: URL or path to rates JSON file (default latest.json on the web) -                              refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60) -   --> -    <fieldType name="currency" class="solr.CurrencyFieldType" amountLongSuffix="_l_ns" codeStrSuffix="_s_ns" -               defaultCurrency="USD" currencyConfig="currency.xml" /> - - -   <!-- some examples for different languages (generally ordered by ISO code) --> - -    <!-- Arabic --> -    <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <!-- for any non-arabic --> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" /> -        <!-- normalizes ﻯ to ﻱ, etc --> -        <filter class="solr.ArabicNormalizationFilterFactory"/> -        <filter class="solr.ArabicStemFilterFactory"/> -      </analyzer> -    </fieldType> - -    <!-- Bulgarian --> -    <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/>  -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" />  -        <filter class="solr.BulgarianStemFilterFactory"/>        -      </analyzer> -    </fieldType> -     -    <!-- Catalan --> -    <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <!-- removes l', etc --> -        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" /> -        <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>        -      </analyzer> -    </fieldType> -     -    <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) --> -    <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"> -      <analyzer> -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <!-- normalize width before bigram, as e.g. half-width dakuten combine  --> -        <filter class="solr.CJKWidthFilterFactory"/> -        <!-- for any non-CJK --> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.CJKBigramFilterFactory"/> -      </analyzer> -    </fieldType> - -    <!-- Kurdish --> -    <fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100"> -      <analyzer> -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.SoraniNormalizationFilterFactory"/> -        <!-- for any latin text --> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ckb.txt"/> -        <filter class="solr.SoraniStemFilterFactory"/> -      </analyzer> -    </fieldType> - -    <!-- Czech --> -    <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" /> -        <filter class="solr.CzechStemFilterFactory"/>        -      </analyzer> -    </fieldType> -     -    <!-- Danish --> -    <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" /> -        <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>        -      </analyzer> -    </fieldType> -     -    <!-- German --> -    <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" /> -        <filter class="solr.GermanNormalizationFilterFactory"/> -        <filter class="solr.GermanLightStemFilterFactory"/> -        <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> --> -        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> --> -      </analyzer> -    </fieldType> -     -    <!-- Greek --> -    <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <!-- greek specific lowercase for sigma --> -        <filter class="solr.GreekLowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" /> -        <filter class="solr.GreekStemFilterFactory"/> -      </analyzer> -    </fieldType> -     -    <!-- Spanish --> -    <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" /> -        <filter class="solr.SpanishLightStemFilterFactory"/> -        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> --> -      </analyzer> -    </fieldType> -     -    <!-- Basque --> -    <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" /> -        <filter class="solr.SnowballPorterFilterFactory" language="Basque"/> -      </analyzer> -    </fieldType> -     -    <!-- Persian --> -    <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"> -      <analyzer> -        <!-- for ZWNJ --> -        <charFilter class="solr.PersianCharFilterFactory"/> -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.ArabicNormalizationFilterFactory"/> -        <filter class="solr.PersianNormalizationFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" /> -      </analyzer> -    </fieldType> -     -    <!-- Finnish --> -    <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" /> -        <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/> -        <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> --> -      </analyzer> -    </fieldType> -     -    <!-- French --> -    <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <!-- removes l', etc --> -        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" /> -        <filter class="solr.FrenchLightStemFilterFactory"/> -        <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> --> -        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> --> -      </analyzer> -    </fieldType> -     -    <!-- Irish --> -    <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <!-- removes d', etc --> -        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/> -        <!-- removes n-, etc. position increments is intentionally false! --> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/> -        <filter class="solr.IrishLowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/> -        <filter class="solr.SnowballPorterFilterFactory" language="Irish"/> -      </analyzer> -    </fieldType> -     -    <!-- Galician --> -    <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" /> -        <filter class="solr.GalicianStemFilterFactory"/> -        <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> --> -      </analyzer> -    </fieldType> -     -    <!-- Hindi --> -    <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <!-- normalizes unicode representation --> -        <filter class="solr.IndicNormalizationFilterFactory"/> -        <!-- normalizes variation in spelling --> -        <filter class="solr.HindiNormalizationFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" /> -        <filter class="solr.HindiStemFilterFactory"/> -      </analyzer> -    </fieldType> -     -    <!-- Hungarian --> -    <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" /> -        <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/> -        <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->    -      </analyzer> -    </fieldType> -     -    <!-- Armenian --> -    <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" /> -        <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/> -      </analyzer> -    </fieldType> -     -    <!-- Indonesian --> -    <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" /> -        <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false --> -        <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/> -      </analyzer> -    </fieldType> -     -    <!-- Italian --> -    <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <!-- removes l', etc --> -        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" /> -        <filter class="solr.ItalianLightStemFilterFactory"/> -        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> --> -      </analyzer> -    </fieldType> -     -    <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming) - -         NOTE: If you want to optimize search for precision, use default operator AND in your request -         handler config (q.op) Use OR if you would like to optimize for recall (default). -    --> -    <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false"> -      <analyzer> -      <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer) - -           Kuromoji has a search mode (default) that does segmentation useful for search.  A heuristic -           is used to segment compounds into its parts and the compound itself is kept as synonym. - -           Valid values for attribute mode are: -              normal: regular segmentation -              search: segmentation useful for search with synonyms compounds (default) -            extended: same as search mode, but unigrams unknown words (experimental) - -           For some applications it might be good to use search mode for indexing and normal mode for -           queries to reduce recall and prevent parts of compounds from being matched and highlighted. -           Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query. - -           Kuromoji also has a convenient user dictionary feature that allows overriding the statistical -           model with your own entries for segmentation, part-of-speech tags and readings without a need -           to specify weights.  Notice that user dictionaries have not been subject to extensive testing. - -           User dictionary attributes are: -                     userDictionary: user dictionary filename -             userDictionaryEncoding: user dictionary encoding (default is UTF-8) - -           See lang/userdict_ja.txt for a sample user dictionary file. - -           Punctuation characters are discarded by default.  Use discardPunctuation="false" to keep them. - -           See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support. -        --> -        <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/> -        <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>--> -        <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) --> -        <filter class="solr.JapaneseBaseFormFilterFactory"/> -        <!-- Removes tokens with certain part-of-speech tags --> -        <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" /> -        <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) --> -        <filter class="solr.CJKWidthFilterFactory"/> -        <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking --> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" /> -        <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) --> -        <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/> -        <!-- Lower-cases romaji characters --> -        <filter class="solr.LowerCaseFilterFactory"/> -      </analyzer> -    </fieldType> -     -    <!-- Korean morphological analysis --> -    <dynamicField name="*_txt_ko" type="text_ko"  indexed="true"  stored="true"/> -    <fieldType name="text_ko" class="solr.TextField" positionIncrementGap="100"> -      <analyzer> -        <!-- Nori Korean morphological analyzer/tokenizer (KoreanTokenizer) -          The Korean (nori) analyzer integrates Lucene nori analysis module into Solr. -          It uses the mecab-ko-dic dictionary to perform morphological analysis of Korean texts. - -          This dictionary was built with MeCab, it defines a format for the features adapted -          for the Korean language. -           -          Nori also has a convenient user dictionary feature that allows overriding the statistical -          model with your own entries for segmentation, part-of-speech tags and readings without a need -          to specify weights. Notice that user dictionaries have not been subject to extensive testing. - -          The tokenizer supports multiple schema attributes: -            * userDictionary: User dictionary path. -            * userDictionaryEncoding: User dictionary encoding. -            * decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'. -            * outputUnknownUnigrams: If true outputs unigrams for unknown words. -        --> -        <tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/> -        <!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags', -          listing the tags to remove. By default it removes:  -          E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV -          This is basically an equivalent to stemming. -        --> -        <filter class="solr.KoreanPartOfSpeechStopFilterFactory" /> -        <!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: --> -        <filter class="solr.KoreanReadingFormFilterFactory" /> -        <filter class="solr.LowerCaseFilterFactory" /> -      </analyzer> -    </fieldType> - -    <!-- Latvian --> -    <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" /> -        <filter class="solr.LatvianStemFilterFactory"/> -      </analyzer> -    </fieldType> -     -    <!-- Dutch --> -    <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" /> -        <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/> -        <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/> -      </analyzer> -    </fieldType> -     -    <!-- Norwegian --> -    <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" /> -        <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/> -        <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory" variant="nb"/> --> -        <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory" variant="nb"/> --> -        <!-- The "light" and "minimal" stemmers support variants: nb=Bokmål, nn=Nynorsk, no=Both --> -      </analyzer> -    </fieldType> -     -    <!-- Portuguese --> -    <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" /> -        <filter class="solr.PortugueseLightStemFilterFactory"/> -        <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> --> -        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> --> -        <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> --> -      </analyzer> -    </fieldType> -     -    <!-- Romanian --> -    <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" /> -        <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/> -      </analyzer> -    </fieldType> -     -    <!-- Russian --> -    <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" /> -        <filter class="solr.SnowballPorterFilterFactory" language="Russian"/> -        <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> --> -      </analyzer> -    </fieldType> -     -    <!-- Swedish --> -    <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" /> -        <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/> -        <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> --> -      </analyzer> -    </fieldType> -     -    <!-- Thai --> -    <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.ThaiTokenizerFactory"/> -        <filter class="solr.LowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" /> -      </analyzer> -    </fieldType> -     -    <!-- Turkish --> -    <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100"> -      <analyzer>  -        <tokenizer class="solr.StandardTokenizerFactory"/> -        <filter class="solr.ApostropheFilterFactory"/> -        <filter class="solr.TurkishLowerCaseFilterFactory"/> -        <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" /> -        <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/> -      </analyzer> -    </fieldType> -   -  <!-- Similarity is the scoring routine for each document vs. a query. -       A custom Similarity or SimilarityFactory may be specified here, but  -       the default is fine for most applications.   -       For more info: http://wiki.apache.org/solr/SchemaXml#Similarity -    --> -  <!-- -     <similarity class="com.example.solr.CustomSimilarityFactory"> -       <str name="paramkey">param value</str> -     </similarity> -    --> - -</schema> diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/mapping-FoldToASCII.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/mapping-FoldToASCII.txt deleted file mode 100644 index 9a84b6eac..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/mapping-FoldToASCII.txt +++ /dev/null @@ -1,3813 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License.  You may obtain a copy of the License at -# -#     http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# This map converts alphabetic, numeric, and symbolic Unicode characters -# which are not in the first 127 ASCII characters (the "Basic Latin" Unicode -# block) into their ASCII equivalents, if one exists. -# -# Characters from the following Unicode blocks are converted; however, only -# those characters with reasonable ASCII alternatives are converted: -# -# - C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf -# - Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf -# - Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf -# - Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf -# - Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf -# - Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf -# - IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf -# - Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf -# - Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf -# - General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf -# - Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf -# - Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf -# - Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf -# - Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf -# - Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf -# - Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf -#   -# See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode -# -# The set of character conversions supported by this map is a superset of -# those supported by the map represented by mapping-ISOLatin1Accent.txt. -# -# See the bottom of this file for the Perl script used to generate the contents -# of this file (without this header) from ASCIIFoldingFilter.java. - - -# Syntax: -#   "source" => "target" -#     "source".length() > 0 (source cannot be empty.) -#     "target".length() >= 0 (target can be empty.) - - -# À  [LATIN CAPITAL LETTER A WITH GRAVE] -"\u00C0" => "A" - -# Á  [LATIN CAPITAL LETTER A WITH ACUTE] -"\u00C1" => "A" - -#   [LATIN CAPITAL LETTER A WITH CIRCUMFLEX] -"\u00C2" => "A" - -# à  [LATIN CAPITAL LETTER A WITH TILDE] -"\u00C3" => "A" - -# Ä  [LATIN CAPITAL LETTER A WITH DIAERESIS] -"\u00C4" => "A" - -# Å  [LATIN CAPITAL LETTER A WITH RING ABOVE] -"\u00C5" => "A" - -# Ā  [LATIN CAPITAL LETTER A WITH MACRON] -"\u0100" => "A" - -# Ă  [LATIN CAPITAL LETTER A WITH BREVE] -"\u0102" => "A" - -# Ą  [LATIN CAPITAL LETTER A WITH OGONEK] -"\u0104" => "A" - -# Ə  http://en.wikipedia.org/wiki/Schwa  [LATIN CAPITAL LETTER SCHWA] -"\u018F" => "A" - -# Ǎ  [LATIN CAPITAL LETTER A WITH CARON] -"\u01CD" => "A" - -# Ǟ  [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON] -"\u01DE" => "A" - -# Ǡ  [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON] -"\u01E0" => "A" - -# Ǻ  [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE] -"\u01FA" => "A" - -# Ȁ  [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE] -"\u0200" => "A" - -# Ȃ  [LATIN CAPITAL LETTER A WITH INVERTED BREVE] -"\u0202" => "A" - -# Ȧ  [LATIN CAPITAL LETTER A WITH DOT ABOVE] -"\u0226" => "A" - -# Ⱥ  [LATIN CAPITAL LETTER A WITH STROKE] -"\u023A" => "A" - -# ᴀ  [LATIN LETTER SMALL CAPITAL A] -"\u1D00" => "A" - -# Ḁ  [LATIN CAPITAL LETTER A WITH RING BELOW] -"\u1E00" => "A" - -# Ạ  [LATIN CAPITAL LETTER A WITH DOT BELOW] -"\u1EA0" => "A" - -# Ả  [LATIN CAPITAL LETTER A WITH HOOK ABOVE] -"\u1EA2" => "A" - -# Ấ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE] -"\u1EA4" => "A" - -# Ầ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE] -"\u1EA6" => "A" - -# Ẩ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EA8" => "A" - -# Ẫ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE] -"\u1EAA" => "A" - -# Ậ  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW] -"\u1EAC" => "A" - -# Ắ  [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE] -"\u1EAE" => "A" - -# Ằ  [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE] -"\u1EB0" => "A" - -# Ẳ  [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE] -"\u1EB2" => "A" - -# Ẵ  [LATIN CAPITAL LETTER A WITH BREVE AND TILDE] -"\u1EB4" => "A" - -# Ặ  [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW] -"\u1EB6" => "A" - -# Ⓐ  [CIRCLED LATIN CAPITAL LETTER A] -"\u24B6" => "A" - -# A  [FULLWIDTH LATIN CAPITAL LETTER A] -"\uFF21" => "A" - -# à  [LATIN SMALL LETTER A WITH GRAVE] -"\u00E0" => "a" - -# á  [LATIN SMALL LETTER A WITH ACUTE] -"\u00E1" => "a" - -# â  [LATIN SMALL LETTER A WITH CIRCUMFLEX] -"\u00E2" => "a" - -# ã  [LATIN SMALL LETTER A WITH TILDE] -"\u00E3" => "a" - -# ä  [LATIN SMALL LETTER A WITH DIAERESIS] -"\u00E4" => "a" - -# å  [LATIN SMALL LETTER A WITH RING ABOVE] -"\u00E5" => "a" - -# ā  [LATIN SMALL LETTER A WITH MACRON] -"\u0101" => "a" - -# ă  [LATIN SMALL LETTER A WITH BREVE] -"\u0103" => "a" - -# ą  [LATIN SMALL LETTER A WITH OGONEK] -"\u0105" => "a" - -# ǎ  [LATIN SMALL LETTER A WITH CARON] -"\u01CE" => "a" - -# ǟ  [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON] -"\u01DF" => "a" - -# ǡ  [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON] -"\u01E1" => "a" - -# ǻ  [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE] -"\u01FB" => "a" - -# ȁ  [LATIN SMALL LETTER A WITH DOUBLE GRAVE] -"\u0201" => "a" - -# ȃ  [LATIN SMALL LETTER A WITH INVERTED BREVE] -"\u0203" => "a" - -# ȧ  [LATIN SMALL LETTER A WITH DOT ABOVE] -"\u0227" => "a" - -# ɐ  [LATIN SMALL LETTER TURNED A] -"\u0250" => "a" - -# ə  [LATIN SMALL LETTER SCHWA] -"\u0259" => "a" - -# ɚ  [LATIN SMALL LETTER SCHWA WITH HOOK] -"\u025A" => "a" - -# ᶏ  [LATIN SMALL LETTER A WITH RETROFLEX HOOK] -"\u1D8F" => "a" - -# ᶕ  [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK] -"\u1D95" => "a" - -# ạ  [LATIN SMALL LETTER A WITH RING BELOW] -"\u1E01" => "a" - -# ả  [LATIN SMALL LETTER A WITH RIGHT HALF RING] -"\u1E9A" => "a" - -# ạ  [LATIN SMALL LETTER A WITH DOT BELOW] -"\u1EA1" => "a" - -# ả  [LATIN SMALL LETTER A WITH HOOK ABOVE] -"\u1EA3" => "a" - -# ấ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE] -"\u1EA5" => "a" - -# ầ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE] -"\u1EA7" => "a" - -# ẩ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EA9" => "a" - -# ẫ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE] -"\u1EAB" => "a" - -# ậ  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW] -"\u1EAD" => "a" - -# ắ  [LATIN SMALL LETTER A WITH BREVE AND ACUTE] -"\u1EAF" => "a" - -# ằ  [LATIN SMALL LETTER A WITH BREVE AND GRAVE] -"\u1EB1" => "a" - -# ẳ  [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE] -"\u1EB3" => "a" - -# ẵ  [LATIN SMALL LETTER A WITH BREVE AND TILDE] -"\u1EB5" => "a" - -# ặ  [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW] -"\u1EB7" => "a" - -# ₐ  [LATIN SUBSCRIPT SMALL LETTER A] -"\u2090" => "a" - -# ₔ  [LATIN SUBSCRIPT SMALL LETTER SCHWA] -"\u2094" => "a" - -# ⓐ  [CIRCLED LATIN SMALL LETTER A] -"\u24D0" => "a" - -# ⱥ  [LATIN SMALL LETTER A WITH STROKE] -"\u2C65" => "a" - -# Ɐ  [LATIN CAPITAL LETTER TURNED A] -"\u2C6F" => "a" - -# a  [FULLWIDTH LATIN SMALL LETTER A] -"\uFF41" => "a" - -# Ꜳ  [LATIN CAPITAL LETTER AA] -"\uA732" => "AA" - -# Æ  [LATIN CAPITAL LETTER AE] -"\u00C6" => "AE" - -# Ǣ  [LATIN CAPITAL LETTER AE WITH MACRON] -"\u01E2" => "AE" - -# Ǽ  [LATIN CAPITAL LETTER AE WITH ACUTE] -"\u01FC" => "AE" - -# ᴁ  [LATIN LETTER SMALL CAPITAL AE] -"\u1D01" => "AE" - -# Ꜵ  [LATIN CAPITAL LETTER AO] -"\uA734" => "AO" - -# Ꜷ  [LATIN CAPITAL LETTER AU] -"\uA736" => "AU" - -# Ꜹ  [LATIN CAPITAL LETTER AV] -"\uA738" => "AV" - -# Ꜻ  [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR] -"\uA73A" => "AV" - -# Ꜽ  [LATIN CAPITAL LETTER AY] -"\uA73C" => "AY" - -# ⒜  [PARENTHESIZED LATIN SMALL LETTER A] -"\u249C" => "(a)" - -# ꜳ  [LATIN SMALL LETTER AA] -"\uA733" => "aa" - -# æ  [LATIN SMALL LETTER AE] -"\u00E6" => "ae" - -# ǣ  [LATIN SMALL LETTER AE WITH MACRON] -"\u01E3" => "ae" - -# ǽ  [LATIN SMALL LETTER AE WITH ACUTE] -"\u01FD" => "ae" - -# ᴂ  [LATIN SMALL LETTER TURNED AE] -"\u1D02" => "ae" - -# ꜵ  [LATIN SMALL LETTER AO] -"\uA735" => "ao" - -# ꜷ  [LATIN SMALL LETTER AU] -"\uA737" => "au" - -# ꜹ  [LATIN SMALL LETTER AV] -"\uA739" => "av" - -# ꜻ  [LATIN SMALL LETTER AV WITH HORIZONTAL BAR] -"\uA73B" => "av" - -# ꜽ  [LATIN SMALL LETTER AY] -"\uA73D" => "ay" - -# Ɓ  [LATIN CAPITAL LETTER B WITH HOOK] -"\u0181" => "B" - -# Ƃ  [LATIN CAPITAL LETTER B WITH TOPBAR] -"\u0182" => "B" - -# Ƀ  [LATIN CAPITAL LETTER B WITH STROKE] -"\u0243" => "B" - -# ʙ  [LATIN LETTER SMALL CAPITAL B] -"\u0299" => "B" - -# ᴃ  [LATIN LETTER SMALL CAPITAL BARRED B] -"\u1D03" => "B" - -# Ḃ  [LATIN CAPITAL LETTER B WITH DOT ABOVE] -"\u1E02" => "B" - -# Ḅ  [LATIN CAPITAL LETTER B WITH DOT BELOW] -"\u1E04" => "B" - -# Ḇ  [LATIN CAPITAL LETTER B WITH LINE BELOW] -"\u1E06" => "B" - -# Ⓑ  [CIRCLED LATIN CAPITAL LETTER B] -"\u24B7" => "B" - -# B  [FULLWIDTH LATIN CAPITAL LETTER B] -"\uFF22" => "B" - -# ƀ  [LATIN SMALL LETTER B WITH STROKE] -"\u0180" => "b" - -# ƃ  [LATIN SMALL LETTER B WITH TOPBAR] -"\u0183" => "b" - -# ɓ  [LATIN SMALL LETTER B WITH HOOK] -"\u0253" => "b" - -# ᵬ  [LATIN SMALL LETTER B WITH MIDDLE TILDE] -"\u1D6C" => "b" - -# ᶀ  [LATIN SMALL LETTER B WITH PALATAL HOOK] -"\u1D80" => "b" - -# ḃ  [LATIN SMALL LETTER B WITH DOT ABOVE] -"\u1E03" => "b" - -# ḅ  [LATIN SMALL LETTER B WITH DOT BELOW] -"\u1E05" => "b" - -# ḇ  [LATIN SMALL LETTER B WITH LINE BELOW] -"\u1E07" => "b" - -# ⓑ  [CIRCLED LATIN SMALL LETTER B] -"\u24D1" => "b" - -# b  [FULLWIDTH LATIN SMALL LETTER B] -"\uFF42" => "b" - -# ⒝  [PARENTHESIZED LATIN SMALL LETTER B] -"\u249D" => "(b)" - -# Ç  [LATIN CAPITAL LETTER C WITH CEDILLA] -"\u00C7" => "C" - -# Ć  [LATIN CAPITAL LETTER C WITH ACUTE] -"\u0106" => "C" - -# Ĉ  [LATIN CAPITAL LETTER C WITH CIRCUMFLEX] -"\u0108" => "C" - -# Ċ  [LATIN CAPITAL LETTER C WITH DOT ABOVE] -"\u010A" => "C" - -# Č  [LATIN CAPITAL LETTER C WITH CARON] -"\u010C" => "C" - -# Ƈ  [LATIN CAPITAL LETTER C WITH HOOK] -"\u0187" => "C" - -# Ȼ  [LATIN CAPITAL LETTER C WITH STROKE] -"\u023B" => "C" - -# ʗ  [LATIN LETTER STRETCHED C] -"\u0297" => "C" - -# ᴄ  [LATIN LETTER SMALL CAPITAL C] -"\u1D04" => "C" - -# Ḉ  [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE] -"\u1E08" => "C" - -# Ⓒ  [CIRCLED LATIN CAPITAL LETTER C] -"\u24B8" => "C" - -# C  [FULLWIDTH LATIN CAPITAL LETTER C] -"\uFF23" => "C" - -# ç  [LATIN SMALL LETTER C WITH CEDILLA] -"\u00E7" => "c" - -# ć  [LATIN SMALL LETTER C WITH ACUTE] -"\u0107" => "c" - -# ĉ  [LATIN SMALL LETTER C WITH CIRCUMFLEX] -"\u0109" => "c" - -# ċ  [LATIN SMALL LETTER C WITH DOT ABOVE] -"\u010B" => "c" - -# č  [LATIN SMALL LETTER C WITH CARON] -"\u010D" => "c" - -# ƈ  [LATIN SMALL LETTER C WITH HOOK] -"\u0188" => "c" - -# ȼ  [LATIN SMALL LETTER C WITH STROKE] -"\u023C" => "c" - -# ɕ  [LATIN SMALL LETTER C WITH CURL] -"\u0255" => "c" - -# ḉ  [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE] -"\u1E09" => "c" - -# ↄ  [LATIN SMALL LETTER REVERSED C] -"\u2184" => "c" - -# ⓒ  [CIRCLED LATIN SMALL LETTER C] -"\u24D2" => "c" - -# Ꜿ  [LATIN CAPITAL LETTER REVERSED C WITH DOT] -"\uA73E" => "c" - -# ꜿ  [LATIN SMALL LETTER REVERSED C WITH DOT] -"\uA73F" => "c" - -# c  [FULLWIDTH LATIN SMALL LETTER C] -"\uFF43" => "c" - -# ⒞  [PARENTHESIZED LATIN SMALL LETTER C] -"\u249E" => "(c)" - -# Ð  [LATIN CAPITAL LETTER ETH] -"\u00D0" => "D" - -# Ď  [LATIN CAPITAL LETTER D WITH CARON] -"\u010E" => "D" - -# Đ  [LATIN CAPITAL LETTER D WITH STROKE] -"\u0110" => "D" - -# Ɖ  [LATIN CAPITAL LETTER AFRICAN D] -"\u0189" => "D" - -# Ɗ  [LATIN CAPITAL LETTER D WITH HOOK] -"\u018A" => "D" - -# Ƌ  [LATIN CAPITAL LETTER D WITH TOPBAR] -"\u018B" => "D" - -# ᴅ  [LATIN LETTER SMALL CAPITAL D] -"\u1D05" => "D" - -# ᴆ  [LATIN LETTER SMALL CAPITAL ETH] -"\u1D06" => "D" - -# Ḋ  [LATIN CAPITAL LETTER D WITH DOT ABOVE] -"\u1E0A" => "D" - -# Ḍ  [LATIN CAPITAL LETTER D WITH DOT BELOW] -"\u1E0C" => "D" - -# Ḏ  [LATIN CAPITAL LETTER D WITH LINE BELOW] -"\u1E0E" => "D" - -# Ḑ  [LATIN CAPITAL LETTER D WITH CEDILLA] -"\u1E10" => "D" - -# Ḓ  [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW] -"\u1E12" => "D" - -# Ⓓ  [CIRCLED LATIN CAPITAL LETTER D] -"\u24B9" => "D" - -# Ꝺ  [LATIN CAPITAL LETTER INSULAR D] -"\uA779" => "D" - -# D  [FULLWIDTH LATIN CAPITAL LETTER D] -"\uFF24" => "D" - -# ð  [LATIN SMALL LETTER ETH] -"\u00F0" => "d" - -# ď  [LATIN SMALL LETTER D WITH CARON] -"\u010F" => "d" - -# đ  [LATIN SMALL LETTER D WITH STROKE] -"\u0111" => "d" - -# ƌ  [LATIN SMALL LETTER D WITH TOPBAR] -"\u018C" => "d" - -# ȡ  [LATIN SMALL LETTER D WITH CURL] -"\u0221" => "d" - -# ɖ  [LATIN SMALL LETTER D WITH TAIL] -"\u0256" => "d" - -# ɗ  [LATIN SMALL LETTER D WITH HOOK] -"\u0257" => "d" - -# ᵭ  [LATIN SMALL LETTER D WITH MIDDLE TILDE] -"\u1D6D" => "d" - -# ᶁ  [LATIN SMALL LETTER D WITH PALATAL HOOK] -"\u1D81" => "d" - -# ᶑ  [LATIN SMALL LETTER D WITH HOOK AND TAIL] -"\u1D91" => "d" - -# ḋ  [LATIN SMALL LETTER D WITH DOT ABOVE] -"\u1E0B" => "d" - -# ḍ  [LATIN SMALL LETTER D WITH DOT BELOW] -"\u1E0D" => "d" - -# ḏ  [LATIN SMALL LETTER D WITH LINE BELOW] -"\u1E0F" => "d" - -# ḑ  [LATIN SMALL LETTER D WITH CEDILLA] -"\u1E11" => "d" - -# ḓ  [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW] -"\u1E13" => "d" - -# ⓓ  [CIRCLED LATIN SMALL LETTER D] -"\u24D3" => "d" - -# ꝺ  [LATIN SMALL LETTER INSULAR D] -"\uA77A" => "d" - -# d  [FULLWIDTH LATIN SMALL LETTER D] -"\uFF44" => "d" - -# DŽ  [LATIN CAPITAL LETTER DZ WITH CARON] -"\u01C4" => "DZ" - -# DZ  [LATIN CAPITAL LETTER DZ] -"\u01F1" => "DZ" - -# Dž  [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON] -"\u01C5" => "Dz" - -# Dz  [LATIN CAPITAL LETTER D WITH SMALL LETTER Z] -"\u01F2" => "Dz" - -# ⒟  [PARENTHESIZED LATIN SMALL LETTER D] -"\u249F" => "(d)" - -# ȸ  [LATIN SMALL LETTER DB DIGRAPH] -"\u0238" => "db" - -# dž  [LATIN SMALL LETTER DZ WITH CARON] -"\u01C6" => "dz" - -# dz  [LATIN SMALL LETTER DZ] -"\u01F3" => "dz" - -# ʣ  [LATIN SMALL LETTER DZ DIGRAPH] -"\u02A3" => "dz" - -# ʥ  [LATIN SMALL LETTER DZ DIGRAPH WITH CURL] -"\u02A5" => "dz" - -# È  [LATIN CAPITAL LETTER E WITH GRAVE] -"\u00C8" => "E" - -# É  [LATIN CAPITAL LETTER E WITH ACUTE] -"\u00C9" => "E" - -# Ê  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX] -"\u00CA" => "E" - -# Ë  [LATIN CAPITAL LETTER E WITH DIAERESIS] -"\u00CB" => "E" - -# Ē  [LATIN CAPITAL LETTER E WITH MACRON] -"\u0112" => "E" - -# Ĕ  [LATIN CAPITAL LETTER E WITH BREVE] -"\u0114" => "E" - -# Ė  [LATIN CAPITAL LETTER E WITH DOT ABOVE] -"\u0116" => "E" - -# Ę  [LATIN CAPITAL LETTER E WITH OGONEK] -"\u0118" => "E" - -# Ě  [LATIN CAPITAL LETTER E WITH CARON] -"\u011A" => "E" - -# Ǝ  [LATIN CAPITAL LETTER REVERSED E] -"\u018E" => "E" - -# Ɛ  [LATIN CAPITAL LETTER OPEN E] -"\u0190" => "E" - -# Ȅ  [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE] -"\u0204" => "E" - -# Ȇ  [LATIN CAPITAL LETTER E WITH INVERTED BREVE] -"\u0206" => "E" - -# Ȩ  [LATIN CAPITAL LETTER E WITH CEDILLA] -"\u0228" => "E" - -# Ɇ  [LATIN CAPITAL LETTER E WITH STROKE] -"\u0246" => "E" - -# ᴇ  [LATIN LETTER SMALL CAPITAL E] -"\u1D07" => "E" - -# Ḕ  [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE] -"\u1E14" => "E" - -# Ḗ  [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE] -"\u1E16" => "E" - -# Ḙ  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW] -"\u1E18" => "E" - -# Ḛ  [LATIN CAPITAL LETTER E WITH TILDE BELOW] -"\u1E1A" => "E" - -# Ḝ  [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE] -"\u1E1C" => "E" - -# Ẹ  [LATIN CAPITAL LETTER E WITH DOT BELOW] -"\u1EB8" => "E" - -# Ẻ  [LATIN CAPITAL LETTER E WITH HOOK ABOVE] -"\u1EBA" => "E" - -# Ẽ  [LATIN CAPITAL LETTER E WITH TILDE] -"\u1EBC" => "E" - -# Ế  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE] -"\u1EBE" => "E" - -# Ề  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE] -"\u1EC0" => "E" - -# Ể  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EC2" => "E" - -# Ễ  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE] -"\u1EC4" => "E" - -# Ệ  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW] -"\u1EC6" => "E" - -# Ⓔ  [CIRCLED LATIN CAPITAL LETTER E] -"\u24BA" => "E" - -# ⱻ  [LATIN LETTER SMALL CAPITAL TURNED E] -"\u2C7B" => "E" - -# E  [FULLWIDTH LATIN CAPITAL LETTER E] -"\uFF25" => "E" - -# è  [LATIN SMALL LETTER E WITH GRAVE] -"\u00E8" => "e" - -# é  [LATIN SMALL LETTER E WITH ACUTE] -"\u00E9" => "e" - -# ê  [LATIN SMALL LETTER E WITH CIRCUMFLEX] -"\u00EA" => "e" - -# ë  [LATIN SMALL LETTER E WITH DIAERESIS] -"\u00EB" => "e" - -# ē  [LATIN SMALL LETTER E WITH MACRON] -"\u0113" => "e" - -# ĕ  [LATIN SMALL LETTER E WITH BREVE] -"\u0115" => "e" - -# ė  [LATIN SMALL LETTER E WITH DOT ABOVE] -"\u0117" => "e" - -# ę  [LATIN SMALL LETTER E WITH OGONEK] -"\u0119" => "e" - -# ě  [LATIN SMALL LETTER E WITH CARON] -"\u011B" => "e" - -# ǝ  [LATIN SMALL LETTER TURNED E] -"\u01DD" => "e" - -# ȅ  [LATIN SMALL LETTER E WITH DOUBLE GRAVE] -"\u0205" => "e" - -# ȇ  [LATIN SMALL LETTER E WITH INVERTED BREVE] -"\u0207" => "e" - -# ȩ  [LATIN SMALL LETTER E WITH CEDILLA] -"\u0229" => "e" - -# ɇ  [LATIN SMALL LETTER E WITH STROKE] -"\u0247" => "e" - -# ɘ  [LATIN SMALL LETTER REVERSED E] -"\u0258" => "e" - -# ɛ  [LATIN SMALL LETTER OPEN E] -"\u025B" => "e" - -# ɜ  [LATIN SMALL LETTER REVERSED OPEN E] -"\u025C" => "e" - -# ɝ  [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK] -"\u025D" => "e" - -# ɞ  [LATIN SMALL LETTER CLOSED REVERSED OPEN E] -"\u025E" => "e" - -# ʚ  [LATIN SMALL LETTER CLOSED OPEN E] -"\u029A" => "e" - -# ᴈ  [LATIN SMALL LETTER TURNED OPEN E] -"\u1D08" => "e" - -# ᶒ  [LATIN SMALL LETTER E WITH RETROFLEX HOOK] -"\u1D92" => "e" - -# ᶓ  [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK] -"\u1D93" => "e" - -# ᶔ  [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK] -"\u1D94" => "e" - -# ḕ  [LATIN SMALL LETTER E WITH MACRON AND GRAVE] -"\u1E15" => "e" - -# ḗ  [LATIN SMALL LETTER E WITH MACRON AND ACUTE] -"\u1E17" => "e" - -# ḙ  [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW] -"\u1E19" => "e" - -# ḛ  [LATIN SMALL LETTER E WITH TILDE BELOW] -"\u1E1B" => "e" - -# ḝ  [LATIN SMALL LETTER E WITH CEDILLA AND BREVE] -"\u1E1D" => "e" - -# ẹ  [LATIN SMALL LETTER E WITH DOT BELOW] -"\u1EB9" => "e" - -# ẻ  [LATIN SMALL LETTER E WITH HOOK ABOVE] -"\u1EBB" => "e" - -# ẽ  [LATIN SMALL LETTER E WITH TILDE] -"\u1EBD" => "e" - -# ế  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE] -"\u1EBF" => "e" - -# ề  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE] -"\u1EC1" => "e" - -# ể  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EC3" => "e" - -# ễ  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE] -"\u1EC5" => "e" - -# ệ  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW] -"\u1EC7" => "e" - -# ₑ  [LATIN SUBSCRIPT SMALL LETTER E] -"\u2091" => "e" - -# ⓔ  [CIRCLED LATIN SMALL LETTER E] -"\u24D4" => "e" - -# ⱸ  [LATIN SMALL LETTER E WITH NOTCH] -"\u2C78" => "e" - -# e  [FULLWIDTH LATIN SMALL LETTER E] -"\uFF45" => "e" - -# ⒠  [PARENTHESIZED LATIN SMALL LETTER E] -"\u24A0" => "(e)" - -# Ƒ  [LATIN CAPITAL LETTER F WITH HOOK] -"\u0191" => "F" - -# Ḟ  [LATIN CAPITAL LETTER F WITH DOT ABOVE] -"\u1E1E" => "F" - -# Ⓕ  [CIRCLED LATIN CAPITAL LETTER F] -"\u24BB" => "F" - -# ꜰ  [LATIN LETTER SMALL CAPITAL F] -"\uA730" => "F" - -# Ꝼ  [LATIN CAPITAL LETTER INSULAR F] -"\uA77B" => "F" - -# ꟻ  [LATIN EPIGRAPHIC LETTER REVERSED F] -"\uA7FB" => "F" - -# F  [FULLWIDTH LATIN CAPITAL LETTER F] -"\uFF26" => "F" - -# ƒ  [LATIN SMALL LETTER F WITH HOOK] -"\u0192" => "f" - -# ᵮ  [LATIN SMALL LETTER F WITH MIDDLE TILDE] -"\u1D6E" => "f" - -# ᶂ  [LATIN SMALL LETTER F WITH PALATAL HOOK] -"\u1D82" => "f" - -# ḟ  [LATIN SMALL LETTER F WITH DOT ABOVE] -"\u1E1F" => "f" - -# ẛ  [LATIN SMALL LETTER LONG S WITH DOT ABOVE] -"\u1E9B" => "f" - -# ⓕ  [CIRCLED LATIN SMALL LETTER F] -"\u24D5" => "f" - -# ꝼ  [LATIN SMALL LETTER INSULAR F] -"\uA77C" => "f" - -# f  [FULLWIDTH LATIN SMALL LETTER F] -"\uFF46" => "f" - -# ⒡  [PARENTHESIZED LATIN SMALL LETTER F] -"\u24A1" => "(f)" - -# ff  [LATIN SMALL LIGATURE FF] -"\uFB00" => "ff" - -# ffi  [LATIN SMALL LIGATURE FFI] -"\uFB03" => "ffi" - -# ffl  [LATIN SMALL LIGATURE FFL] -"\uFB04" => "ffl" - -# fi  [LATIN SMALL LIGATURE FI] -"\uFB01" => "fi" - -# fl  [LATIN SMALL LIGATURE FL] -"\uFB02" => "fl" - -# Ĝ  [LATIN CAPITAL LETTER G WITH CIRCUMFLEX] -"\u011C" => "G" - -# Ğ  [LATIN CAPITAL LETTER G WITH BREVE] -"\u011E" => "G" - -# Ġ  [LATIN CAPITAL LETTER G WITH DOT ABOVE] -"\u0120" => "G" - -# Ģ  [LATIN CAPITAL LETTER G WITH CEDILLA] -"\u0122" => "G" - -# Ɠ  [LATIN CAPITAL LETTER G WITH HOOK] -"\u0193" => "G" - -# Ǥ  [LATIN CAPITAL LETTER G WITH STROKE] -"\u01E4" => "G" - -# ǥ  [LATIN SMALL LETTER G WITH STROKE] -"\u01E5" => "G" - -# Ǧ  [LATIN CAPITAL LETTER G WITH CARON] -"\u01E6" => "G" - -# ǧ  [LATIN SMALL LETTER G WITH CARON] -"\u01E7" => "G" - -# Ǵ  [LATIN CAPITAL LETTER G WITH ACUTE] -"\u01F4" => "G" - -# ɢ  [LATIN LETTER SMALL CAPITAL G] -"\u0262" => "G" - -# ʛ  [LATIN LETTER SMALL CAPITAL G WITH HOOK] -"\u029B" => "G" - -# Ḡ  [LATIN CAPITAL LETTER G WITH MACRON] -"\u1E20" => "G" - -# Ⓖ  [CIRCLED LATIN CAPITAL LETTER G] -"\u24BC" => "G" - -# Ᵹ  [LATIN CAPITAL LETTER INSULAR G] -"\uA77D" => "G" - -# Ꝿ  [LATIN CAPITAL LETTER TURNED INSULAR G] -"\uA77E" => "G" - -# G  [FULLWIDTH LATIN CAPITAL LETTER G] -"\uFF27" => "G" - -# ĝ  [LATIN SMALL LETTER G WITH CIRCUMFLEX] -"\u011D" => "g" - -# ğ  [LATIN SMALL LETTER G WITH BREVE] -"\u011F" => "g" - -# ġ  [LATIN SMALL LETTER G WITH DOT ABOVE] -"\u0121" => "g" - -# ģ  [LATIN SMALL LETTER G WITH CEDILLA] -"\u0123" => "g" - -# ǵ  [LATIN SMALL LETTER G WITH ACUTE] -"\u01F5" => "g" - -# ɠ  [LATIN SMALL LETTER G WITH HOOK] -"\u0260" => "g" - -# ɡ  [LATIN SMALL LETTER SCRIPT G] -"\u0261" => "g" - -# ᵷ  [LATIN SMALL LETTER TURNED G] -"\u1D77" => "g" - -# ᵹ  [LATIN SMALL LETTER INSULAR G] -"\u1D79" => "g" - -# ᶃ  [LATIN SMALL LETTER G WITH PALATAL HOOK] -"\u1D83" => "g" - -# ḡ  [LATIN SMALL LETTER G WITH MACRON] -"\u1E21" => "g" - -# ⓖ  [CIRCLED LATIN SMALL LETTER G] -"\u24D6" => "g" - -# ꝿ  [LATIN SMALL LETTER TURNED INSULAR G] -"\uA77F" => "g" - -# g  [FULLWIDTH LATIN SMALL LETTER G] -"\uFF47" => "g" - -# ⒢  [PARENTHESIZED LATIN SMALL LETTER G] -"\u24A2" => "(g)" - -# Ĥ  [LATIN CAPITAL LETTER H WITH CIRCUMFLEX] -"\u0124" => "H" - -# Ħ  [LATIN CAPITAL LETTER H WITH STROKE] -"\u0126" => "H" - -# Ȟ  [LATIN CAPITAL LETTER H WITH CARON] -"\u021E" => "H" - -# ʜ  [LATIN LETTER SMALL CAPITAL H] -"\u029C" => "H" - -# Ḣ  [LATIN CAPITAL LETTER H WITH DOT ABOVE] -"\u1E22" => "H" - -# Ḥ  [LATIN CAPITAL LETTER H WITH DOT BELOW] -"\u1E24" => "H" - -# Ḧ  [LATIN CAPITAL LETTER H WITH DIAERESIS] -"\u1E26" => "H" - -# Ḩ  [LATIN CAPITAL LETTER H WITH CEDILLA] -"\u1E28" => "H" - -# Ḫ  [LATIN CAPITAL LETTER H WITH BREVE BELOW] -"\u1E2A" => "H" - -# Ⓗ  [CIRCLED LATIN CAPITAL LETTER H] -"\u24BD" => "H" - -# Ⱨ  [LATIN CAPITAL LETTER H WITH DESCENDER] -"\u2C67" => "H" - -# Ⱶ  [LATIN CAPITAL LETTER HALF H] -"\u2C75" => "H" - -# H  [FULLWIDTH LATIN CAPITAL LETTER H] -"\uFF28" => "H" - -# ĥ  [LATIN SMALL LETTER H WITH CIRCUMFLEX] -"\u0125" => "h" - -# ħ  [LATIN SMALL LETTER H WITH STROKE] -"\u0127" => "h" - -# ȟ  [LATIN SMALL LETTER H WITH CARON] -"\u021F" => "h" - -# ɥ  [LATIN SMALL LETTER TURNED H] -"\u0265" => "h" - -# ɦ  [LATIN SMALL LETTER H WITH HOOK] -"\u0266" => "h" - -# ʮ  [LATIN SMALL LETTER TURNED H WITH FISHHOOK] -"\u02AE" => "h" - -# ʯ  [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL] -"\u02AF" => "h" - -# ḣ  [LATIN SMALL LETTER H WITH DOT ABOVE] -"\u1E23" => "h" - -# ḥ  [LATIN SMALL LETTER H WITH DOT BELOW] -"\u1E25" => "h" - -# ḧ  [LATIN SMALL LETTER H WITH DIAERESIS] -"\u1E27" => "h" - -# ḩ  [LATIN SMALL LETTER H WITH CEDILLA] -"\u1E29" => "h" - -# ḫ  [LATIN SMALL LETTER H WITH BREVE BELOW] -"\u1E2B" => "h" - -# ẖ  [LATIN SMALL LETTER H WITH LINE BELOW] -"\u1E96" => "h" - -# ⓗ  [CIRCLED LATIN SMALL LETTER H] -"\u24D7" => "h" - -# ⱨ  [LATIN SMALL LETTER H WITH DESCENDER] -"\u2C68" => "h" - -# ⱶ  [LATIN SMALL LETTER HALF H] -"\u2C76" => "h" - -# h  [FULLWIDTH LATIN SMALL LETTER H] -"\uFF48" => "h" - -# Ƕ  http://en.wikipedia.org/wiki/Hwair  [LATIN CAPITAL LETTER HWAIR] -"\u01F6" => "HV" - -# ⒣  [PARENTHESIZED LATIN SMALL LETTER H] -"\u24A3" => "(h)" - -# ƕ  [LATIN SMALL LETTER HV] -"\u0195" => "hv" - -# Ì  [LATIN CAPITAL LETTER I WITH GRAVE] -"\u00CC" => "I" - -# Í  [LATIN CAPITAL LETTER I WITH ACUTE] -"\u00CD" => "I" - -# Î  [LATIN CAPITAL LETTER I WITH CIRCUMFLEX] -"\u00CE" => "I" - -# Ï  [LATIN CAPITAL LETTER I WITH DIAERESIS] -"\u00CF" => "I" - -# Ĩ  [LATIN CAPITAL LETTER I WITH TILDE] -"\u0128" => "I" - -# Ī  [LATIN CAPITAL LETTER I WITH MACRON] -"\u012A" => "I" - -# Ĭ  [LATIN CAPITAL LETTER I WITH BREVE] -"\u012C" => "I" - -# Į  [LATIN CAPITAL LETTER I WITH OGONEK] -"\u012E" => "I" - -# İ  [LATIN CAPITAL LETTER I WITH DOT ABOVE] -"\u0130" => "I" - -# Ɩ  [LATIN CAPITAL LETTER IOTA] -"\u0196" => "I" - -# Ɨ  [LATIN CAPITAL LETTER I WITH STROKE] -"\u0197" => "I" - -# Ǐ  [LATIN CAPITAL LETTER I WITH CARON] -"\u01CF" => "I" - -# Ȉ  [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE] -"\u0208" => "I" - -# Ȋ  [LATIN CAPITAL LETTER I WITH INVERTED BREVE] -"\u020A" => "I" - -# ɪ  [LATIN LETTER SMALL CAPITAL I] -"\u026A" => "I" - -# ᵻ  [LATIN SMALL CAPITAL LETTER I WITH STROKE] -"\u1D7B" => "I" - -# Ḭ  [LATIN CAPITAL LETTER I WITH TILDE BELOW] -"\u1E2C" => "I" - -# Ḯ  [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE] -"\u1E2E" => "I" - -# Ỉ  [LATIN CAPITAL LETTER I WITH HOOK ABOVE] -"\u1EC8" => "I" - -# Ị  [LATIN CAPITAL LETTER I WITH DOT BELOW] -"\u1ECA" => "I" - -# Ⓘ  [CIRCLED LATIN CAPITAL LETTER I] -"\u24BE" => "I" - -# ꟾ  [LATIN EPIGRAPHIC LETTER I LONGA] -"\uA7FE" => "I" - -# I  [FULLWIDTH LATIN CAPITAL LETTER I] -"\uFF29" => "I" - -# ì  [LATIN SMALL LETTER I WITH GRAVE] -"\u00EC" => "i" - -# í  [LATIN SMALL LETTER I WITH ACUTE] -"\u00ED" => "i" - -# î  [LATIN SMALL LETTER I WITH CIRCUMFLEX] -"\u00EE" => "i" - -# ï  [LATIN SMALL LETTER I WITH DIAERESIS] -"\u00EF" => "i" - -# ĩ  [LATIN SMALL LETTER I WITH TILDE] -"\u0129" => "i" - -# ī  [LATIN SMALL LETTER I WITH MACRON] -"\u012B" => "i" - -# ĭ  [LATIN SMALL LETTER I WITH BREVE] -"\u012D" => "i" - -# į  [LATIN SMALL LETTER I WITH OGONEK] -"\u012F" => "i" - -# ı  [LATIN SMALL LETTER DOTLESS I] -"\u0131" => "i" - -# ǐ  [LATIN SMALL LETTER I WITH CARON] -"\u01D0" => "i" - -# ȉ  [LATIN SMALL LETTER I WITH DOUBLE GRAVE] -"\u0209" => "i" - -# ȋ  [LATIN SMALL LETTER I WITH INVERTED BREVE] -"\u020B" => "i" - -# ɨ  [LATIN SMALL LETTER I WITH STROKE] -"\u0268" => "i" - -# ᴉ  [LATIN SMALL LETTER TURNED I] -"\u1D09" => "i" - -# ᵢ  [LATIN SUBSCRIPT SMALL LETTER I] -"\u1D62" => "i" - -# ᵼ  [LATIN SMALL LETTER IOTA WITH STROKE] -"\u1D7C" => "i" - -# ᶖ  [LATIN SMALL LETTER I WITH RETROFLEX HOOK] -"\u1D96" => "i" - -# ḭ  [LATIN SMALL LETTER I WITH TILDE BELOW] -"\u1E2D" => "i" - -# ḯ  [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE] -"\u1E2F" => "i" - -# ỉ  [LATIN SMALL LETTER I WITH HOOK ABOVE] -"\u1EC9" => "i" - -# ị  [LATIN SMALL LETTER I WITH DOT BELOW] -"\u1ECB" => "i" - -# ⁱ  [SUPERSCRIPT LATIN SMALL LETTER I] -"\u2071" => "i" - -# ⓘ  [CIRCLED LATIN SMALL LETTER I] -"\u24D8" => "i" - -# i  [FULLWIDTH LATIN SMALL LETTER I] -"\uFF49" => "i" - -# IJ  [LATIN CAPITAL LIGATURE IJ] -"\u0132" => "IJ" - -# ⒤  [PARENTHESIZED LATIN SMALL LETTER I] -"\u24A4" => "(i)" - -# ij  [LATIN SMALL LIGATURE IJ] -"\u0133" => "ij" - -# Ĵ  [LATIN CAPITAL LETTER J WITH CIRCUMFLEX] -"\u0134" => "J" - -# Ɉ  [LATIN CAPITAL LETTER J WITH STROKE] -"\u0248" => "J" - -# ᴊ  [LATIN LETTER SMALL CAPITAL J] -"\u1D0A" => "J" - -# Ⓙ  [CIRCLED LATIN CAPITAL LETTER J] -"\u24BF" => "J" - -# J  [FULLWIDTH LATIN CAPITAL LETTER J] -"\uFF2A" => "J" - -# ĵ  [LATIN SMALL LETTER J WITH CIRCUMFLEX] -"\u0135" => "j" - -# ǰ  [LATIN SMALL LETTER J WITH CARON] -"\u01F0" => "j" - -# ȷ  [LATIN SMALL LETTER DOTLESS J] -"\u0237" => "j" - -# ɉ  [LATIN SMALL LETTER J WITH STROKE] -"\u0249" => "j" - -# ɟ  [LATIN SMALL LETTER DOTLESS J WITH STROKE] -"\u025F" => "j" - -# ʄ  [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK] -"\u0284" => "j" - -# ʝ  [LATIN SMALL LETTER J WITH CROSSED-TAIL] -"\u029D" => "j" - -# ⓙ  [CIRCLED LATIN SMALL LETTER J] -"\u24D9" => "j" - -# ⱼ  [LATIN SUBSCRIPT SMALL LETTER J] -"\u2C7C" => "j" - -# j  [FULLWIDTH LATIN SMALL LETTER J] -"\uFF4A" => "j" - -# ⒥  [PARENTHESIZED LATIN SMALL LETTER J] -"\u24A5" => "(j)" - -# Ķ  [LATIN CAPITAL LETTER K WITH CEDILLA] -"\u0136" => "K" - -# Ƙ  [LATIN CAPITAL LETTER K WITH HOOK] -"\u0198" => "K" - -# Ǩ  [LATIN CAPITAL LETTER K WITH CARON] -"\u01E8" => "K" - -# ᴋ  [LATIN LETTER SMALL CAPITAL K] -"\u1D0B" => "K" - -# Ḱ  [LATIN CAPITAL LETTER K WITH ACUTE] -"\u1E30" => "K" - -# Ḳ  [LATIN CAPITAL LETTER K WITH DOT BELOW] -"\u1E32" => "K" - -# Ḵ  [LATIN CAPITAL LETTER K WITH LINE BELOW] -"\u1E34" => "K" - -# Ⓚ  [CIRCLED LATIN CAPITAL LETTER K] -"\u24C0" => "K" - -# Ⱪ  [LATIN CAPITAL LETTER K WITH DESCENDER] -"\u2C69" => "K" - -# Ꝁ  [LATIN CAPITAL LETTER K WITH STROKE] -"\uA740" => "K" - -# Ꝃ  [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE] -"\uA742" => "K" - -# Ꝅ  [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE] -"\uA744" => "K" - -# K  [FULLWIDTH LATIN CAPITAL LETTER K] -"\uFF2B" => "K" - -# ķ  [LATIN SMALL LETTER K WITH CEDILLA] -"\u0137" => "k" - -# ƙ  [LATIN SMALL LETTER K WITH HOOK] -"\u0199" => "k" - -# ǩ  [LATIN SMALL LETTER K WITH CARON] -"\u01E9" => "k" - -# ʞ  [LATIN SMALL LETTER TURNED K] -"\u029E" => "k" - -# ᶄ  [LATIN SMALL LETTER K WITH PALATAL HOOK] -"\u1D84" => "k" - -# ḱ  [LATIN SMALL LETTER K WITH ACUTE] -"\u1E31" => "k" - -# ḳ  [LATIN SMALL LETTER K WITH DOT BELOW] -"\u1E33" => "k" - -# ḵ  [LATIN SMALL LETTER K WITH LINE BELOW] -"\u1E35" => "k" - -# ⓚ  [CIRCLED LATIN SMALL LETTER K] -"\u24DA" => "k" - -# ⱪ  [LATIN SMALL LETTER K WITH DESCENDER] -"\u2C6A" => "k" - -# ꝁ  [LATIN SMALL LETTER K WITH STROKE] -"\uA741" => "k" - -# ꝃ  [LATIN SMALL LETTER K WITH DIAGONAL STROKE] -"\uA743" => "k" - -# ꝅ  [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE] -"\uA745" => "k" - -# k  [FULLWIDTH LATIN SMALL LETTER K] -"\uFF4B" => "k" - -# ⒦  [PARENTHESIZED LATIN SMALL LETTER K] -"\u24A6" => "(k)" - -# Ĺ  [LATIN CAPITAL LETTER L WITH ACUTE] -"\u0139" => "L" - -# Ļ  [LATIN CAPITAL LETTER L WITH CEDILLA] -"\u013B" => "L" - -# Ľ  [LATIN CAPITAL LETTER L WITH CARON] -"\u013D" => "L" - -# Ŀ  [LATIN CAPITAL LETTER L WITH MIDDLE DOT] -"\u013F" => "L" - -# Ł  [LATIN CAPITAL LETTER L WITH STROKE] -"\u0141" => "L" - -# Ƚ  [LATIN CAPITAL LETTER L WITH BAR] -"\u023D" => "L" - -# ʟ  [LATIN LETTER SMALL CAPITAL L] -"\u029F" => "L" - -# ᴌ  [LATIN LETTER SMALL CAPITAL L WITH STROKE] -"\u1D0C" => "L" - -# Ḷ  [LATIN CAPITAL LETTER L WITH DOT BELOW] -"\u1E36" => "L" - -# Ḹ  [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON] -"\u1E38" => "L" - -# Ḻ  [LATIN CAPITAL LETTER L WITH LINE BELOW] -"\u1E3A" => "L" - -# Ḽ  [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW] -"\u1E3C" => "L" - -# Ⓛ  [CIRCLED LATIN CAPITAL LETTER L] -"\u24C1" => "L" - -# Ⱡ  [LATIN CAPITAL LETTER L WITH DOUBLE BAR] -"\u2C60" => "L" - -# Ɫ  [LATIN CAPITAL LETTER L WITH MIDDLE TILDE] -"\u2C62" => "L" - -# Ꝇ  [LATIN CAPITAL LETTER BROKEN L] -"\uA746" => "L" - -# Ꝉ  [LATIN CAPITAL LETTER L WITH HIGH STROKE] -"\uA748" => "L" - -# Ꞁ  [LATIN CAPITAL LETTER TURNED L] -"\uA780" => "L" - -# L  [FULLWIDTH LATIN CAPITAL LETTER L] -"\uFF2C" => "L" - -# ĺ  [LATIN SMALL LETTER L WITH ACUTE] -"\u013A" => "l" - -# ļ  [LATIN SMALL LETTER L WITH CEDILLA] -"\u013C" => "l" - -# ľ  [LATIN SMALL LETTER L WITH CARON] -"\u013E" => "l" - -# ŀ  [LATIN SMALL LETTER L WITH MIDDLE DOT] -"\u0140" => "l" - -# ł  [LATIN SMALL LETTER L WITH STROKE] -"\u0142" => "l" - -# ƚ  [LATIN SMALL LETTER L WITH BAR] -"\u019A" => "l" - -# ȴ  [LATIN SMALL LETTER L WITH CURL] -"\u0234" => "l" - -# ɫ  [LATIN SMALL LETTER L WITH MIDDLE TILDE] -"\u026B" => "l" - -# ɬ  [LATIN SMALL LETTER L WITH BELT] -"\u026C" => "l" - -# ɭ  [LATIN SMALL LETTER L WITH RETROFLEX HOOK] -"\u026D" => "l" - -# ᶅ  [LATIN SMALL LETTER L WITH PALATAL HOOK] -"\u1D85" => "l" - -# ḷ  [LATIN SMALL LETTER L WITH DOT BELOW] -"\u1E37" => "l" - -# ḹ  [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON] -"\u1E39" => "l" - -# ḻ  [LATIN SMALL LETTER L WITH LINE BELOW] -"\u1E3B" => "l" - -# ḽ  [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW] -"\u1E3D" => "l" - -# ⓛ  [CIRCLED LATIN SMALL LETTER L] -"\u24DB" => "l" - -# ⱡ  [LATIN SMALL LETTER L WITH DOUBLE BAR] -"\u2C61" => "l" - -# ꝇ  [LATIN SMALL LETTER BROKEN L] -"\uA747" => "l" - -# ꝉ  [LATIN SMALL LETTER L WITH HIGH STROKE] -"\uA749" => "l" - -# ꞁ  [LATIN SMALL LETTER TURNED L] -"\uA781" => "l" - -# l  [FULLWIDTH LATIN SMALL LETTER L] -"\uFF4C" => "l" - -# LJ  [LATIN CAPITAL LETTER LJ] -"\u01C7" => "LJ" - -# Ỻ  [LATIN CAPITAL LETTER MIDDLE-WELSH LL] -"\u1EFA" => "LL" - -# Lj  [LATIN CAPITAL LETTER L WITH SMALL LETTER J] -"\u01C8" => "Lj" - -# ⒧  [PARENTHESIZED LATIN SMALL LETTER L] -"\u24A7" => "(l)" - -# lj  [LATIN SMALL LETTER LJ] -"\u01C9" => "lj" - -# ỻ  [LATIN SMALL LETTER MIDDLE-WELSH LL] -"\u1EFB" => "ll" - -# ʪ  [LATIN SMALL LETTER LS DIGRAPH] -"\u02AA" => "ls" - -# ʫ  [LATIN SMALL LETTER LZ DIGRAPH] -"\u02AB" => "lz" - -# Ɯ  [LATIN CAPITAL LETTER TURNED M] -"\u019C" => "M" - -# ᴍ  [LATIN LETTER SMALL CAPITAL M] -"\u1D0D" => "M" - -# Ḿ  [LATIN CAPITAL LETTER M WITH ACUTE] -"\u1E3E" => "M" - -# Ṁ  [LATIN CAPITAL LETTER M WITH DOT ABOVE] -"\u1E40" => "M" - -# Ṃ  [LATIN CAPITAL LETTER M WITH DOT BELOW] -"\u1E42" => "M" - -# Ⓜ  [CIRCLED LATIN CAPITAL LETTER M] -"\u24C2" => "M" - -# Ɱ  [LATIN CAPITAL LETTER M WITH HOOK] -"\u2C6E" => "M" - -# ꟽ  [LATIN EPIGRAPHIC LETTER INVERTED M] -"\uA7FD" => "M" - -# ꟿ  [LATIN EPIGRAPHIC LETTER ARCHAIC M] -"\uA7FF" => "M" - -# M  [FULLWIDTH LATIN CAPITAL LETTER M] -"\uFF2D" => "M" - -# ɯ  [LATIN SMALL LETTER TURNED M] -"\u026F" => "m" - -# ɰ  [LATIN SMALL LETTER TURNED M WITH LONG LEG] -"\u0270" => "m" - -# ɱ  [LATIN SMALL LETTER M WITH HOOK] -"\u0271" => "m" - -# ᵯ  [LATIN SMALL LETTER M WITH MIDDLE TILDE] -"\u1D6F" => "m" - -# ᶆ  [LATIN SMALL LETTER M WITH PALATAL HOOK] -"\u1D86" => "m" - -# ḿ  [LATIN SMALL LETTER M WITH ACUTE] -"\u1E3F" => "m" - -# ṁ  [LATIN SMALL LETTER M WITH DOT ABOVE] -"\u1E41" => "m" - -# ṃ  [LATIN SMALL LETTER M WITH DOT BELOW] -"\u1E43" => "m" - -# ⓜ  [CIRCLED LATIN SMALL LETTER M] -"\u24DC" => "m" - -# m  [FULLWIDTH LATIN SMALL LETTER M] -"\uFF4D" => "m" - -# ⒨  [PARENTHESIZED LATIN SMALL LETTER M] -"\u24A8" => "(m)" - -# Ñ  [LATIN CAPITAL LETTER N WITH TILDE] -"\u00D1" => "N" - -# Ń  [LATIN CAPITAL LETTER N WITH ACUTE] -"\u0143" => "N" - -# Ņ  [LATIN CAPITAL LETTER N WITH CEDILLA] -"\u0145" => "N" - -# Ň  [LATIN CAPITAL LETTER N WITH CARON] -"\u0147" => "N" - -# Ŋ  http://en.wikipedia.org/wiki/Eng_(letter)  [LATIN CAPITAL LETTER ENG] -"\u014A" => "N" - -# Ɲ  [LATIN CAPITAL LETTER N WITH LEFT HOOK] -"\u019D" => "N" - -# Ǹ  [LATIN CAPITAL LETTER N WITH GRAVE] -"\u01F8" => "N" - -# Ƞ  [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG] -"\u0220" => "N" - -# ɴ  [LATIN LETTER SMALL CAPITAL N] -"\u0274" => "N" - -# ᴎ  [LATIN LETTER SMALL CAPITAL REVERSED N] -"\u1D0E" => "N" - -# Ṅ  [LATIN CAPITAL LETTER N WITH DOT ABOVE] -"\u1E44" => "N" - -# Ṇ  [LATIN CAPITAL LETTER N WITH DOT BELOW] -"\u1E46" => "N" - -# Ṉ  [LATIN CAPITAL LETTER N WITH LINE BELOW] -"\u1E48" => "N" - -# Ṋ  [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW] -"\u1E4A" => "N" - -# Ⓝ  [CIRCLED LATIN CAPITAL LETTER N] -"\u24C3" => "N" - -# N  [FULLWIDTH LATIN CAPITAL LETTER N] -"\uFF2E" => "N" - -# ñ  [LATIN SMALL LETTER N WITH TILDE] -"\u00F1" => "n" - -# ń  [LATIN SMALL LETTER N WITH ACUTE] -"\u0144" => "n" - -# ņ  [LATIN SMALL LETTER N WITH CEDILLA] -"\u0146" => "n" - -# ň  [LATIN SMALL LETTER N WITH CARON] -"\u0148" => "n" - -# ʼn  [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE] -"\u0149" => "n" - -# ŋ  http://en.wikipedia.org/wiki/Eng_(letter)  [LATIN SMALL LETTER ENG] -"\u014B" => "n" - -# ƞ  [LATIN SMALL LETTER N WITH LONG RIGHT LEG] -"\u019E" => "n" - -# ǹ  [LATIN SMALL LETTER N WITH GRAVE] -"\u01F9" => "n" - -# ȵ  [LATIN SMALL LETTER N WITH CURL] -"\u0235" => "n" - -# ɲ  [LATIN SMALL LETTER N WITH LEFT HOOK] -"\u0272" => "n" - -# ɳ  [LATIN SMALL LETTER N WITH RETROFLEX HOOK] -"\u0273" => "n" - -# ᵰ  [LATIN SMALL LETTER N WITH MIDDLE TILDE] -"\u1D70" => "n" - -# ᶇ  [LATIN SMALL LETTER N WITH PALATAL HOOK] -"\u1D87" => "n" - -# ṅ  [LATIN SMALL LETTER N WITH DOT ABOVE] -"\u1E45" => "n" - -# ṇ  [LATIN SMALL LETTER N WITH DOT BELOW] -"\u1E47" => "n" - -# ṉ  [LATIN SMALL LETTER N WITH LINE BELOW] -"\u1E49" => "n" - -# ṋ  [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW] -"\u1E4B" => "n" - -# ⁿ  [SUPERSCRIPT LATIN SMALL LETTER N] -"\u207F" => "n" - -# ⓝ  [CIRCLED LATIN SMALL LETTER N] -"\u24DD" => "n" - -# n  [FULLWIDTH LATIN SMALL LETTER N] -"\uFF4E" => "n" - -# NJ  [LATIN CAPITAL LETTER NJ] -"\u01CA" => "NJ" - -# Nj  [LATIN CAPITAL LETTER N WITH SMALL LETTER J] -"\u01CB" => "Nj" - -# ⒩  [PARENTHESIZED LATIN SMALL LETTER N] -"\u24A9" => "(n)" - -# nj  [LATIN SMALL LETTER NJ] -"\u01CC" => "nj" - -# Ò  [LATIN CAPITAL LETTER O WITH GRAVE] -"\u00D2" => "O" - -# Ó  [LATIN CAPITAL LETTER O WITH ACUTE] -"\u00D3" => "O" - -# Ô  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX] -"\u00D4" => "O" - -# Õ  [LATIN CAPITAL LETTER O WITH TILDE] -"\u00D5" => "O" - -# Ö  [LATIN CAPITAL LETTER O WITH DIAERESIS] -"\u00D6" => "O" - -# Ø  [LATIN CAPITAL LETTER O WITH STROKE] -"\u00D8" => "O" - -# Ō  [LATIN CAPITAL LETTER O WITH MACRON] -"\u014C" => "O" - -# Ŏ  [LATIN CAPITAL LETTER O WITH BREVE] -"\u014E" => "O" - -# Ő  [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE] -"\u0150" => "O" - -# Ɔ  [LATIN CAPITAL LETTER OPEN O] -"\u0186" => "O" - -# Ɵ  [LATIN CAPITAL LETTER O WITH MIDDLE TILDE] -"\u019F" => "O" - -# Ơ  [LATIN CAPITAL LETTER O WITH HORN] -"\u01A0" => "O" - -# Ǒ  [LATIN CAPITAL LETTER O WITH CARON] -"\u01D1" => "O" - -# Ǫ  [LATIN CAPITAL LETTER O WITH OGONEK] -"\u01EA" => "O" - -# Ǭ  [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON] -"\u01EC" => "O" - -# Ǿ  [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE] -"\u01FE" => "O" - -# Ȍ  [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE] -"\u020C" => "O" - -# Ȏ  [LATIN CAPITAL LETTER O WITH INVERTED BREVE] -"\u020E" => "O" - -# Ȫ  [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON] -"\u022A" => "O" - -# Ȭ  [LATIN CAPITAL LETTER O WITH TILDE AND MACRON] -"\u022C" => "O" - -# Ȯ  [LATIN CAPITAL LETTER O WITH DOT ABOVE] -"\u022E" => "O" - -# Ȱ  [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON] -"\u0230" => "O" - -# ᴏ  [LATIN LETTER SMALL CAPITAL O] -"\u1D0F" => "O" - -# ᴐ  [LATIN LETTER SMALL CAPITAL OPEN O] -"\u1D10" => "O" - -# Ṍ  [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE] -"\u1E4C" => "O" - -# Ṏ  [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS] -"\u1E4E" => "O" - -# Ṑ  [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE] -"\u1E50" => "O" - -# Ṓ  [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE] -"\u1E52" => "O" - -# Ọ  [LATIN CAPITAL LETTER O WITH DOT BELOW] -"\u1ECC" => "O" - -# Ỏ  [LATIN CAPITAL LETTER O WITH HOOK ABOVE] -"\u1ECE" => "O" - -# Ố  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE] -"\u1ED0" => "O" - -# Ồ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE] -"\u1ED2" => "O" - -# Ổ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1ED4" => "O" - -# Ỗ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE] -"\u1ED6" => "O" - -# Ộ  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW] -"\u1ED8" => "O" - -# Ớ  [LATIN CAPITAL LETTER O WITH HORN AND ACUTE] -"\u1EDA" => "O" - -# Ờ  [LATIN CAPITAL LETTER O WITH HORN AND GRAVE] -"\u1EDC" => "O" - -# Ở  [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE] -"\u1EDE" => "O" - -# Ỡ  [LATIN CAPITAL LETTER O WITH HORN AND TILDE] -"\u1EE0" => "O" - -# Ợ  [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW] -"\u1EE2" => "O" - -# Ⓞ  [CIRCLED LATIN CAPITAL LETTER O] -"\u24C4" => "O" - -# Ꝋ  [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY] -"\uA74A" => "O" - -# Ꝍ  [LATIN CAPITAL LETTER O WITH LOOP] -"\uA74C" => "O" - -# O  [FULLWIDTH LATIN CAPITAL LETTER O] -"\uFF2F" => "O" - -# ò  [LATIN SMALL LETTER O WITH GRAVE] -"\u00F2" => "o" - -# ó  [LATIN SMALL LETTER O WITH ACUTE] -"\u00F3" => "o" - -# ô  [LATIN SMALL LETTER O WITH CIRCUMFLEX] -"\u00F4" => "o" - -# õ  [LATIN SMALL LETTER O WITH TILDE] -"\u00F5" => "o" - -# ö  [LATIN SMALL LETTER O WITH DIAERESIS] -"\u00F6" => "o" - -# ø  [LATIN SMALL LETTER O WITH STROKE] -"\u00F8" => "o" - -# ō  [LATIN SMALL LETTER O WITH MACRON] -"\u014D" => "o" - -# ŏ  [LATIN SMALL LETTER O WITH BREVE] -"\u014F" => "o" - -# ő  [LATIN SMALL LETTER O WITH DOUBLE ACUTE] -"\u0151" => "o" - -# ơ  [LATIN SMALL LETTER O WITH HORN] -"\u01A1" => "o" - -# ǒ  [LATIN SMALL LETTER O WITH CARON] -"\u01D2" => "o" - -# ǫ  [LATIN SMALL LETTER O WITH OGONEK] -"\u01EB" => "o" - -# ǭ  [LATIN SMALL LETTER O WITH OGONEK AND MACRON] -"\u01ED" => "o" - -# ǿ  [LATIN SMALL LETTER O WITH STROKE AND ACUTE] -"\u01FF" => "o" - -# ȍ  [LATIN SMALL LETTER O WITH DOUBLE GRAVE] -"\u020D" => "o" - -# ȏ  [LATIN SMALL LETTER O WITH INVERTED BREVE] -"\u020F" => "o" - -# ȫ  [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON] -"\u022B" => "o" - -# ȭ  [LATIN SMALL LETTER O WITH TILDE AND MACRON] -"\u022D" => "o" - -# ȯ  [LATIN SMALL LETTER O WITH DOT ABOVE] -"\u022F" => "o" - -# ȱ  [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON] -"\u0231" => "o" - -# ɔ  [LATIN SMALL LETTER OPEN O] -"\u0254" => "o" - -# ɵ  [LATIN SMALL LETTER BARRED O] -"\u0275" => "o" - -# ᴖ  [LATIN SMALL LETTER TOP HALF O] -"\u1D16" => "o" - -# ᴗ  [LATIN SMALL LETTER BOTTOM HALF O] -"\u1D17" => "o" - -# ᶗ  [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK] -"\u1D97" => "o" - -# ṍ  [LATIN SMALL LETTER O WITH TILDE AND ACUTE] -"\u1E4D" => "o" - -# ṏ  [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS] -"\u1E4F" => "o" - -# ṑ  [LATIN SMALL LETTER O WITH MACRON AND GRAVE] -"\u1E51" => "o" - -# ṓ  [LATIN SMALL LETTER O WITH MACRON AND ACUTE] -"\u1E53" => "o" - -# ọ  [LATIN SMALL LETTER O WITH DOT BELOW] -"\u1ECD" => "o" - -# ỏ  [LATIN SMALL LETTER O WITH HOOK ABOVE] -"\u1ECF" => "o" - -# ố  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE] -"\u1ED1" => "o" - -# ồ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE] -"\u1ED3" => "o" - -# ổ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1ED5" => "o" - -# ỗ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE] -"\u1ED7" => "o" - -# ộ  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW] -"\u1ED9" => "o" - -# ớ  [LATIN SMALL LETTER O WITH HORN AND ACUTE] -"\u1EDB" => "o" - -# ờ  [LATIN SMALL LETTER O WITH HORN AND GRAVE] -"\u1EDD" => "o" - -# ở  [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE] -"\u1EDF" => "o" - -# ỡ  [LATIN SMALL LETTER O WITH HORN AND TILDE] -"\u1EE1" => "o" - -# ợ  [LATIN SMALL LETTER O WITH HORN AND DOT BELOW] -"\u1EE3" => "o" - -# ₒ  [LATIN SUBSCRIPT SMALL LETTER O] -"\u2092" => "o" - -# ⓞ  [CIRCLED LATIN SMALL LETTER O] -"\u24DE" => "o" - -# ⱺ  [LATIN SMALL LETTER O WITH LOW RING INSIDE] -"\u2C7A" => "o" - -# ꝋ  [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY] -"\uA74B" => "o" - -# ꝍ  [LATIN SMALL LETTER O WITH LOOP] -"\uA74D" => "o" - -# o  [FULLWIDTH LATIN SMALL LETTER O] -"\uFF4F" => "o" - -# Œ  [LATIN CAPITAL LIGATURE OE] -"\u0152" => "OE" - -# ɶ  [LATIN LETTER SMALL CAPITAL OE] -"\u0276" => "OE" - -# Ꝏ  [LATIN CAPITAL LETTER OO] -"\uA74E" => "OO" - -# Ȣ  http://en.wikipedia.org/wiki/OU  [LATIN CAPITAL LETTER OU] -"\u0222" => "OU" - -# ᴕ  [LATIN LETTER SMALL CAPITAL OU] -"\u1D15" => "OU" - -# ⒪  [PARENTHESIZED LATIN SMALL LETTER O] -"\u24AA" => "(o)" - -# œ  [LATIN SMALL LIGATURE OE] -"\u0153" => "oe" - -# ᴔ  [LATIN SMALL LETTER TURNED OE] -"\u1D14" => "oe" - -# ꝏ  [LATIN SMALL LETTER OO] -"\uA74F" => "oo" - -# ȣ  http://en.wikipedia.org/wiki/OU  [LATIN SMALL LETTER OU] -"\u0223" => "ou" - -# Ƥ  [LATIN CAPITAL LETTER P WITH HOOK] -"\u01A4" => "P" - -# ᴘ  [LATIN LETTER SMALL CAPITAL P] -"\u1D18" => "P" - -# Ṕ  [LATIN CAPITAL LETTER P WITH ACUTE] -"\u1E54" => "P" - -# Ṗ  [LATIN CAPITAL LETTER P WITH DOT ABOVE] -"\u1E56" => "P" - -# Ⓟ  [CIRCLED LATIN CAPITAL LETTER P] -"\u24C5" => "P" - -# Ᵽ  [LATIN CAPITAL LETTER P WITH STROKE] -"\u2C63" => "P" - -# Ꝑ  [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER] -"\uA750" => "P" - -# Ꝓ  [LATIN CAPITAL LETTER P WITH FLOURISH] -"\uA752" => "P" - -# Ꝕ  [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL] -"\uA754" => "P" - -# P  [FULLWIDTH LATIN CAPITAL LETTER P] -"\uFF30" => "P" - -# ƥ  [LATIN SMALL LETTER P WITH HOOK] -"\u01A5" => "p" - -# ᵱ  [LATIN SMALL LETTER P WITH MIDDLE TILDE] -"\u1D71" => "p" - -# ᵽ  [LATIN SMALL LETTER P WITH STROKE] -"\u1D7D" => "p" - -# ᶈ  [LATIN SMALL LETTER P WITH PALATAL HOOK] -"\u1D88" => "p" - -# ṕ  [LATIN SMALL LETTER P WITH ACUTE] -"\u1E55" => "p" - -# ṗ  [LATIN SMALL LETTER P WITH DOT ABOVE] -"\u1E57" => "p" - -# ⓟ  [CIRCLED LATIN SMALL LETTER P] -"\u24DF" => "p" - -# ꝑ  [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER] -"\uA751" => "p" - -# ꝓ  [LATIN SMALL LETTER P WITH FLOURISH] -"\uA753" => "p" - -# ꝕ  [LATIN SMALL LETTER P WITH SQUIRREL TAIL] -"\uA755" => "p" - -# ꟼ  [LATIN EPIGRAPHIC LETTER REVERSED P] -"\uA7FC" => "p" - -# p  [FULLWIDTH LATIN SMALL LETTER P] -"\uFF50" => "p" - -# ⒫  [PARENTHESIZED LATIN SMALL LETTER P] -"\u24AB" => "(p)" - -# Ɋ  [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL] -"\u024A" => "Q" - -# Ⓠ  [CIRCLED LATIN CAPITAL LETTER Q] -"\u24C6" => "Q" - -# Ꝗ  [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER] -"\uA756" => "Q" - -# Ꝙ  [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE] -"\uA758" => "Q" - -# Q  [FULLWIDTH LATIN CAPITAL LETTER Q] -"\uFF31" => "Q" - -# ĸ  http://en.wikipedia.org/wiki/Kra_(letter)  [LATIN SMALL LETTER KRA] -"\u0138" => "q" - -# ɋ  [LATIN SMALL LETTER Q WITH HOOK TAIL] -"\u024B" => "q" - -# ʠ  [LATIN SMALL LETTER Q WITH HOOK] -"\u02A0" => "q" - -# ⓠ  [CIRCLED LATIN SMALL LETTER Q] -"\u24E0" => "q" - -# ꝗ  [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER] -"\uA757" => "q" - -# ꝙ  [LATIN SMALL LETTER Q WITH DIAGONAL STROKE] -"\uA759" => "q" - -# q  [FULLWIDTH LATIN SMALL LETTER Q] -"\uFF51" => "q" - -# ⒬  [PARENTHESIZED LATIN SMALL LETTER Q] -"\u24AC" => "(q)" - -# ȹ  [LATIN SMALL LETTER QP DIGRAPH] -"\u0239" => "qp" - -# Ŕ  [LATIN CAPITAL LETTER R WITH ACUTE] -"\u0154" => "R" - -# Ŗ  [LATIN CAPITAL LETTER R WITH CEDILLA] -"\u0156" => "R" - -# Ř  [LATIN CAPITAL LETTER R WITH CARON] -"\u0158" => "R" - -# Ȓ  [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE] -"\u0210" => "R" - -# Ȓ  [LATIN CAPITAL LETTER R WITH INVERTED BREVE] -"\u0212" => "R" - -# Ɍ  [LATIN CAPITAL LETTER R WITH STROKE] -"\u024C" => "R" - -# ʀ  [LATIN LETTER SMALL CAPITAL R] -"\u0280" => "R" - -# ʁ  [LATIN LETTER SMALL CAPITAL INVERTED R] -"\u0281" => "R" - -# ᴙ  [LATIN LETTER SMALL CAPITAL REVERSED R] -"\u1D19" => "R" - -# ᴚ  [LATIN LETTER SMALL CAPITAL TURNED R] -"\u1D1A" => "R" - -# Ṙ  [LATIN CAPITAL LETTER R WITH DOT ABOVE] -"\u1E58" => "R" - -# Ṛ  [LATIN CAPITAL LETTER R WITH DOT BELOW] -"\u1E5A" => "R" - -# Ṝ  [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON] -"\u1E5C" => "R" - -# Ṟ  [LATIN CAPITAL LETTER R WITH LINE BELOW] -"\u1E5E" => "R" - -# Ⓡ  [CIRCLED LATIN CAPITAL LETTER R] -"\u24C7" => "R" - -# Ɽ  [LATIN CAPITAL LETTER R WITH TAIL] -"\u2C64" => "R" - -# Ꝛ  [LATIN CAPITAL LETTER R ROTUNDA] -"\uA75A" => "R" - -# Ꞃ  [LATIN CAPITAL LETTER INSULAR R] -"\uA782" => "R" - -# R  [FULLWIDTH LATIN CAPITAL LETTER R] -"\uFF32" => "R" - -# ŕ  [LATIN SMALL LETTER R WITH ACUTE] -"\u0155" => "r" - -# ŗ  [LATIN SMALL LETTER R WITH CEDILLA] -"\u0157" => "r" - -# ř  [LATIN SMALL LETTER R WITH CARON] -"\u0159" => "r" - -# ȑ  [LATIN SMALL LETTER R WITH DOUBLE GRAVE] -"\u0211" => "r" - -# ȓ  [LATIN SMALL LETTER R WITH INVERTED BREVE] -"\u0213" => "r" - -# ɍ  [LATIN SMALL LETTER R WITH STROKE] -"\u024D" => "r" - -# ɼ  [LATIN SMALL LETTER R WITH LONG LEG] -"\u027C" => "r" - -# ɽ  [LATIN SMALL LETTER R WITH TAIL] -"\u027D" => "r" - -# ɾ  [LATIN SMALL LETTER R WITH FISHHOOK] -"\u027E" => "r" - -# ɿ  [LATIN SMALL LETTER REVERSED R WITH FISHHOOK] -"\u027F" => "r" - -# ᵣ  [LATIN SUBSCRIPT SMALL LETTER R] -"\u1D63" => "r" - -# ᵲ  [LATIN SMALL LETTER R WITH MIDDLE TILDE] -"\u1D72" => "r" - -# ᵳ  [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE] -"\u1D73" => "r" - -# ᶉ  [LATIN SMALL LETTER R WITH PALATAL HOOK] -"\u1D89" => "r" - -# ṙ  [LATIN SMALL LETTER R WITH DOT ABOVE] -"\u1E59" => "r" - -# ṛ  [LATIN SMALL LETTER R WITH DOT BELOW] -"\u1E5B" => "r" - -# ṝ  [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON] -"\u1E5D" => "r" - -# ṟ  [LATIN SMALL LETTER R WITH LINE BELOW] -"\u1E5F" => "r" - -# ⓡ  [CIRCLED LATIN SMALL LETTER R] -"\u24E1" => "r" - -# ꝛ  [LATIN SMALL LETTER R ROTUNDA] -"\uA75B" => "r" - -# ꞃ  [LATIN SMALL LETTER INSULAR R] -"\uA783" => "r" - -# r  [FULLWIDTH LATIN SMALL LETTER R] -"\uFF52" => "r" - -# ⒭  [PARENTHESIZED LATIN SMALL LETTER R] -"\u24AD" => "(r)" - -# Ś  [LATIN CAPITAL LETTER S WITH ACUTE] -"\u015A" => "S" - -# Ŝ  [LATIN CAPITAL LETTER S WITH CIRCUMFLEX] -"\u015C" => "S" - -# Ş  [LATIN CAPITAL LETTER S WITH CEDILLA] -"\u015E" => "S" - -# Š  [LATIN CAPITAL LETTER S WITH CARON] -"\u0160" => "S" - -# Ș  [LATIN CAPITAL LETTER S WITH COMMA BELOW] -"\u0218" => "S" - -# Ṡ  [LATIN CAPITAL LETTER S WITH DOT ABOVE] -"\u1E60" => "S" - -# Ṣ  [LATIN CAPITAL LETTER S WITH DOT BELOW] -"\u1E62" => "S" - -# Ṥ  [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE] -"\u1E64" => "S" - -# Ṧ  [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE] -"\u1E66" => "S" - -# Ṩ  [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE] -"\u1E68" => "S" - -# Ⓢ  [CIRCLED LATIN CAPITAL LETTER S] -"\u24C8" => "S" - -# ꜱ  [LATIN LETTER SMALL CAPITAL S] -"\uA731" => "S" - -# ꞅ  [LATIN SMALL LETTER INSULAR S] -"\uA785" => "S" - -# S  [FULLWIDTH LATIN CAPITAL LETTER S] -"\uFF33" => "S" - -# ś  [LATIN SMALL LETTER S WITH ACUTE] -"\u015B" => "s" - -# ŝ  [LATIN SMALL LETTER S WITH CIRCUMFLEX] -"\u015D" => "s" - -# ş  [LATIN SMALL LETTER S WITH CEDILLA] -"\u015F" => "s" - -# š  [LATIN SMALL LETTER S WITH CARON] -"\u0161" => "s" - -# ſ  http://en.wikipedia.org/wiki/Long_S  [LATIN SMALL LETTER LONG S] -"\u017F" => "s" - -# ș  [LATIN SMALL LETTER S WITH COMMA BELOW] -"\u0219" => "s" - -# ȿ  [LATIN SMALL LETTER S WITH SWASH TAIL] -"\u023F" => "s" - -# ʂ  [LATIN SMALL LETTER S WITH HOOK] -"\u0282" => "s" - -# ᵴ  [LATIN SMALL LETTER S WITH MIDDLE TILDE] -"\u1D74" => "s" - -# ᶊ  [LATIN SMALL LETTER S WITH PALATAL HOOK] -"\u1D8A" => "s" - -# ṡ  [LATIN SMALL LETTER S WITH DOT ABOVE] -"\u1E61" => "s" - -# ṣ  [LATIN SMALL LETTER S WITH DOT BELOW] -"\u1E63" => "s" - -# ṥ  [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE] -"\u1E65" => "s" - -# ṧ  [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE] -"\u1E67" => "s" - -# ṩ  [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE] -"\u1E69" => "s" - -# ẜ  [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE] -"\u1E9C" => "s" - -# ẝ  [LATIN SMALL LETTER LONG S WITH HIGH STROKE] -"\u1E9D" => "s" - -# ⓢ  [CIRCLED LATIN SMALL LETTER S] -"\u24E2" => "s" - -# Ꞅ  [LATIN CAPITAL LETTER INSULAR S] -"\uA784" => "s" - -# s  [FULLWIDTH LATIN SMALL LETTER S] -"\uFF53" => "s" - -# ẞ  [LATIN CAPITAL LETTER SHARP S] -"\u1E9E" => "SS" - -# ⒮  [PARENTHESIZED LATIN SMALL LETTER S] -"\u24AE" => "(s)" - -# ß  [LATIN SMALL LETTER SHARP S] -"\u00DF" => "ss" - -# st  [LATIN SMALL LIGATURE ST] -"\uFB06" => "st" - -# Ţ  [LATIN CAPITAL LETTER T WITH CEDILLA] -"\u0162" => "T" - -# Ť  [LATIN CAPITAL LETTER T WITH CARON] -"\u0164" => "T" - -# Ŧ  [LATIN CAPITAL LETTER T WITH STROKE] -"\u0166" => "T" - -# Ƭ  [LATIN CAPITAL LETTER T WITH HOOK] -"\u01AC" => "T" - -# Ʈ  [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK] -"\u01AE" => "T" - -# Ț  [LATIN CAPITAL LETTER T WITH COMMA BELOW] -"\u021A" => "T" - -# Ⱦ  [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE] -"\u023E" => "T" - -# ᴛ  [LATIN LETTER SMALL CAPITAL T] -"\u1D1B" => "T" - -# Ṫ  [LATIN CAPITAL LETTER T WITH DOT ABOVE] -"\u1E6A" => "T" - -# Ṭ  [LATIN CAPITAL LETTER T WITH DOT BELOW] -"\u1E6C" => "T" - -# Ṯ  [LATIN CAPITAL LETTER T WITH LINE BELOW] -"\u1E6E" => "T" - -# Ṱ  [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW] -"\u1E70" => "T" - -# Ⓣ  [CIRCLED LATIN CAPITAL LETTER T] -"\u24C9" => "T" - -# Ꞇ  [LATIN CAPITAL LETTER INSULAR T] -"\uA786" => "T" - -# T  [FULLWIDTH LATIN CAPITAL LETTER T] -"\uFF34" => "T" - -# ţ  [LATIN SMALL LETTER T WITH CEDILLA] -"\u0163" => "t" - -# ť  [LATIN SMALL LETTER T WITH CARON] -"\u0165" => "t" - -# ŧ  [LATIN SMALL LETTER T WITH STROKE] -"\u0167" => "t" - -# ƫ  [LATIN SMALL LETTER T WITH PALATAL HOOK] -"\u01AB" => "t" - -# ƭ  [LATIN SMALL LETTER T WITH HOOK] -"\u01AD" => "t" - -# ț  [LATIN SMALL LETTER T WITH COMMA BELOW] -"\u021B" => "t" - -# ȶ  [LATIN SMALL LETTER T WITH CURL] -"\u0236" => "t" - -# ʇ  [LATIN SMALL LETTER TURNED T] -"\u0287" => "t" - -# ʈ  [LATIN SMALL LETTER T WITH RETROFLEX HOOK] -"\u0288" => "t" - -# ᵵ  [LATIN SMALL LETTER T WITH MIDDLE TILDE] -"\u1D75" => "t" - -# ṫ  [LATIN SMALL LETTER T WITH DOT ABOVE] -"\u1E6B" => "t" - -# ṭ  [LATIN SMALL LETTER T WITH DOT BELOW] -"\u1E6D" => "t" - -# ṯ  [LATIN SMALL LETTER T WITH LINE BELOW] -"\u1E6F" => "t" - -# ṱ  [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW] -"\u1E71" => "t" - -# ẗ  [LATIN SMALL LETTER T WITH DIAERESIS] -"\u1E97" => "t" - -# ⓣ  [CIRCLED LATIN SMALL LETTER T] -"\u24E3" => "t" - -# ⱦ  [LATIN SMALL LETTER T WITH DIAGONAL STROKE] -"\u2C66" => "t" - -# t  [FULLWIDTH LATIN SMALL LETTER T] -"\uFF54" => "t" - -# Þ  [LATIN CAPITAL LETTER THORN] -"\u00DE" => "TH" - -# Ꝧ  [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER] -"\uA766" => "TH" - -# Ꜩ  [LATIN CAPITAL LETTER TZ] -"\uA728" => "TZ" - -# ⒯  [PARENTHESIZED LATIN SMALL LETTER T] -"\u24AF" => "(t)" - -# ʨ  [LATIN SMALL LETTER TC DIGRAPH WITH CURL] -"\u02A8" => "tc" - -# þ  [LATIN SMALL LETTER THORN] -"\u00FE" => "th" - -# ᵺ  [LATIN SMALL LETTER TH WITH STRIKETHROUGH] -"\u1D7A" => "th" - -# ꝧ  [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER] -"\uA767" => "th" - -# ʦ  [LATIN SMALL LETTER TS DIGRAPH] -"\u02A6" => "ts" - -# ꜩ  [LATIN SMALL LETTER TZ] -"\uA729" => "tz" - -# Ù  [LATIN CAPITAL LETTER U WITH GRAVE] -"\u00D9" => "U" - -# Ú  [LATIN CAPITAL LETTER U WITH ACUTE] -"\u00DA" => "U" - -# Û  [LATIN CAPITAL LETTER U WITH CIRCUMFLEX] -"\u00DB" => "U" - -# Ü  [LATIN CAPITAL LETTER U WITH DIAERESIS] -"\u00DC" => "U" - -# Ũ  [LATIN CAPITAL LETTER U WITH TILDE] -"\u0168" => "U" - -# Ū  [LATIN CAPITAL LETTER U WITH MACRON] -"\u016A" => "U" - -# Ŭ  [LATIN CAPITAL LETTER U WITH BREVE] -"\u016C" => "U" - -# Ů  [LATIN CAPITAL LETTER U WITH RING ABOVE] -"\u016E" => "U" - -# Ű  [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE] -"\u0170" => "U" - -# Ų  [LATIN CAPITAL LETTER U WITH OGONEK] -"\u0172" => "U" - -# Ư  [LATIN CAPITAL LETTER U WITH HORN] -"\u01AF" => "U" - -# Ǔ  [LATIN CAPITAL LETTER U WITH CARON] -"\u01D3" => "U" - -# Ǖ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON] -"\u01D5" => "U" - -# Ǘ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE] -"\u01D7" => "U" - -# Ǚ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON] -"\u01D9" => "U" - -# Ǜ  [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE] -"\u01DB" => "U" - -# Ȕ  [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE] -"\u0214" => "U" - -# Ȗ  [LATIN CAPITAL LETTER U WITH INVERTED BREVE] -"\u0216" => "U" - -# Ʉ  [LATIN CAPITAL LETTER U BAR] -"\u0244" => "U" - -# ᴜ  [LATIN LETTER SMALL CAPITAL U] -"\u1D1C" => "U" - -# ᵾ  [LATIN SMALL CAPITAL LETTER U WITH STROKE] -"\u1D7E" => "U" - -# Ṳ  [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW] -"\u1E72" => "U" - -# Ṵ  [LATIN CAPITAL LETTER U WITH TILDE BELOW] -"\u1E74" => "U" - -# Ṷ  [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW] -"\u1E76" => "U" - -# Ṹ  [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE] -"\u1E78" => "U" - -# Ṻ  [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS] -"\u1E7A" => "U" - -# Ụ  [LATIN CAPITAL LETTER U WITH DOT BELOW] -"\u1EE4" => "U" - -# Ủ  [LATIN CAPITAL LETTER U WITH HOOK ABOVE] -"\u1EE6" => "U" - -# Ứ  [LATIN CAPITAL LETTER U WITH HORN AND ACUTE] -"\u1EE8" => "U" - -# Ừ  [LATIN CAPITAL LETTER U WITH HORN AND GRAVE] -"\u1EEA" => "U" - -# Ử  [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE] -"\u1EEC" => "U" - -# Ữ  [LATIN CAPITAL LETTER U WITH HORN AND TILDE] -"\u1EEE" => "U" - -# Ự  [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW] -"\u1EF0" => "U" - -# Ⓤ  [CIRCLED LATIN CAPITAL LETTER U] -"\u24CA" => "U" - -# U  [FULLWIDTH LATIN CAPITAL LETTER U] -"\uFF35" => "U" - -# ù  [LATIN SMALL LETTER U WITH GRAVE] -"\u00F9" => "u" - -# ú  [LATIN SMALL LETTER U WITH ACUTE] -"\u00FA" => "u" - -# û  [LATIN SMALL LETTER U WITH CIRCUMFLEX] -"\u00FB" => "u" - -# ü  [LATIN SMALL LETTER U WITH DIAERESIS] -"\u00FC" => "u" - -# ũ  [LATIN SMALL LETTER U WITH TILDE] -"\u0169" => "u" - -# ū  [LATIN SMALL LETTER U WITH MACRON] -"\u016B" => "u" - -# ŭ  [LATIN SMALL LETTER U WITH BREVE] -"\u016D" => "u" - -# ů  [LATIN SMALL LETTER U WITH RING ABOVE] -"\u016F" => "u" - -# ű  [LATIN SMALL LETTER U WITH DOUBLE ACUTE] -"\u0171" => "u" - -# ų  [LATIN SMALL LETTER U WITH OGONEK] -"\u0173" => "u" - -# ư  [LATIN SMALL LETTER U WITH HORN] -"\u01B0" => "u" - -# ǔ  [LATIN SMALL LETTER U WITH CARON] -"\u01D4" => "u" - -# ǖ  [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON] -"\u01D6" => "u" - -# ǘ  [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE] -"\u01D8" => "u" - -# ǚ  [LATIN SMALL LETTER U WITH DIAERESIS AND CARON] -"\u01DA" => "u" - -# ǜ  [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE] -"\u01DC" => "u" - -# ȕ  [LATIN SMALL LETTER U WITH DOUBLE GRAVE] -"\u0215" => "u" - -# ȗ  [LATIN SMALL LETTER U WITH INVERTED BREVE] -"\u0217" => "u" - -# ʉ  [LATIN SMALL LETTER U BAR] -"\u0289" => "u" - -# ᵤ  [LATIN SUBSCRIPT SMALL LETTER U] -"\u1D64" => "u" - -# ᶙ  [LATIN SMALL LETTER U WITH RETROFLEX HOOK] -"\u1D99" => "u" - -# ṳ  [LATIN SMALL LETTER U WITH DIAERESIS BELOW] -"\u1E73" => "u" - -# ṵ  [LATIN SMALL LETTER U WITH TILDE BELOW] -"\u1E75" => "u" - -# ṷ  [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW] -"\u1E77" => "u" - -# ṹ  [LATIN SMALL LETTER U WITH TILDE AND ACUTE] -"\u1E79" => "u" - -# ṻ  [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS] -"\u1E7B" => "u" - -# ụ  [LATIN SMALL LETTER U WITH DOT BELOW] -"\u1EE5" => "u" - -# ủ  [LATIN SMALL LETTER U WITH HOOK ABOVE] -"\u1EE7" => "u" - -# ứ  [LATIN SMALL LETTER U WITH HORN AND ACUTE] -"\u1EE9" => "u" - -# ừ  [LATIN SMALL LETTER U WITH HORN AND GRAVE] -"\u1EEB" => "u" - -# ử  [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE] -"\u1EED" => "u" - -# ữ  [LATIN SMALL LETTER U WITH HORN AND TILDE] -"\u1EEF" => "u" - -# ự  [LATIN SMALL LETTER U WITH HORN AND DOT BELOW] -"\u1EF1" => "u" - -# ⓤ  [CIRCLED LATIN SMALL LETTER U] -"\u24E4" => "u" - -# u  [FULLWIDTH LATIN SMALL LETTER U] -"\uFF55" => "u" - -# ⒰  [PARENTHESIZED LATIN SMALL LETTER U] -"\u24B0" => "(u)" - -# ᵫ  [LATIN SMALL LETTER UE] -"\u1D6B" => "ue" - -# Ʋ  [LATIN CAPITAL LETTER V WITH HOOK] -"\u01B2" => "V" - -# Ʌ  [LATIN CAPITAL LETTER TURNED V] -"\u0245" => "V" - -# ᴠ  [LATIN LETTER SMALL CAPITAL V] -"\u1D20" => "V" - -# Ṽ  [LATIN CAPITAL LETTER V WITH TILDE] -"\u1E7C" => "V" - -# Ṿ  [LATIN CAPITAL LETTER V WITH DOT BELOW] -"\u1E7E" => "V" - -# Ỽ  [LATIN CAPITAL LETTER MIDDLE-WELSH V] -"\u1EFC" => "V" - -# Ⓥ  [CIRCLED LATIN CAPITAL LETTER V] -"\u24CB" => "V" - -# Ꝟ  [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE] -"\uA75E" => "V" - -# Ꝩ  [LATIN CAPITAL LETTER VEND] -"\uA768" => "V" - -# V  [FULLWIDTH LATIN CAPITAL LETTER V] -"\uFF36" => "V" - -# ʋ  [LATIN SMALL LETTER V WITH HOOK] -"\u028B" => "v" - -# ʌ  [LATIN SMALL LETTER TURNED V] -"\u028C" => "v" - -# ᵥ  [LATIN SUBSCRIPT SMALL LETTER V] -"\u1D65" => "v" - -# ᶌ  [LATIN SMALL LETTER V WITH PALATAL HOOK] -"\u1D8C" => "v" - -# ṽ  [LATIN SMALL LETTER V WITH TILDE] -"\u1E7D" => "v" - -# ṿ  [LATIN SMALL LETTER V WITH DOT BELOW] -"\u1E7F" => "v" - -# ⓥ  [CIRCLED LATIN SMALL LETTER V] -"\u24E5" => "v" - -# ⱱ  [LATIN SMALL LETTER V WITH RIGHT HOOK] -"\u2C71" => "v" - -# ⱴ  [LATIN SMALL LETTER V WITH CURL] -"\u2C74" => "v" - -# ꝟ  [LATIN SMALL LETTER V WITH DIAGONAL STROKE] -"\uA75F" => "v" - -# v  [FULLWIDTH LATIN SMALL LETTER V] -"\uFF56" => "v" - -# Ꝡ  [LATIN CAPITAL LETTER VY] -"\uA760" => "VY" - -# ⒱  [PARENTHESIZED LATIN SMALL LETTER V] -"\u24B1" => "(v)" - -# ꝡ  [LATIN SMALL LETTER VY] -"\uA761" => "vy" - -# Ŵ  [LATIN CAPITAL LETTER W WITH CIRCUMFLEX] -"\u0174" => "W" - -# Ƿ  http://en.wikipedia.org/wiki/Wynn  [LATIN CAPITAL LETTER WYNN] -"\u01F7" => "W" - -# ᴡ  [LATIN LETTER SMALL CAPITAL W] -"\u1D21" => "W" - -# Ẁ  [LATIN CAPITAL LETTER W WITH GRAVE] -"\u1E80" => "W" - -# Ẃ  [LATIN CAPITAL LETTER W WITH ACUTE] -"\u1E82" => "W" - -# Ẅ  [LATIN CAPITAL LETTER W WITH DIAERESIS] -"\u1E84" => "W" - -# Ẇ  [LATIN CAPITAL LETTER W WITH DOT ABOVE] -"\u1E86" => "W" - -# Ẉ  [LATIN CAPITAL LETTER W WITH DOT BELOW] -"\u1E88" => "W" - -# Ⓦ  [CIRCLED LATIN CAPITAL LETTER W] -"\u24CC" => "W" - -# Ⱳ  [LATIN CAPITAL LETTER W WITH HOOK] -"\u2C72" => "W" - -# W  [FULLWIDTH LATIN CAPITAL LETTER W] -"\uFF37" => "W" - -# ŵ  [LATIN SMALL LETTER W WITH CIRCUMFLEX] -"\u0175" => "w" - -# ƿ  http://en.wikipedia.org/wiki/Wynn  [LATIN LETTER WYNN] -"\u01BF" => "w" - -# ʍ  [LATIN SMALL LETTER TURNED W] -"\u028D" => "w" - -# ẁ  [LATIN SMALL LETTER W WITH GRAVE] -"\u1E81" => "w" - -# ẃ  [LATIN SMALL LETTER W WITH ACUTE] -"\u1E83" => "w" - -# ẅ  [LATIN SMALL LETTER W WITH DIAERESIS] -"\u1E85" => "w" - -# ẇ  [LATIN SMALL LETTER W WITH DOT ABOVE] -"\u1E87" => "w" - -# ẉ  [LATIN SMALL LETTER W WITH DOT BELOW] -"\u1E89" => "w" - -# ẘ  [LATIN SMALL LETTER W WITH RING ABOVE] -"\u1E98" => "w" - -# ⓦ  [CIRCLED LATIN SMALL LETTER W] -"\u24E6" => "w" - -# ⱳ  [LATIN SMALL LETTER W WITH HOOK] -"\u2C73" => "w" - -# w  [FULLWIDTH LATIN SMALL LETTER W] -"\uFF57" => "w" - -# ⒲  [PARENTHESIZED LATIN SMALL LETTER W] -"\u24B2" => "(w)" - -# Ẋ  [LATIN CAPITAL LETTER X WITH DOT ABOVE] -"\u1E8A" => "X" - -# Ẍ  [LATIN CAPITAL LETTER X WITH DIAERESIS] -"\u1E8C" => "X" - -# Ⓧ  [CIRCLED LATIN CAPITAL LETTER X] -"\u24CD" => "X" - -# X  [FULLWIDTH LATIN CAPITAL LETTER X] -"\uFF38" => "X" - -# ᶍ  [LATIN SMALL LETTER X WITH PALATAL HOOK] -"\u1D8D" => "x" - -# ẋ  [LATIN SMALL LETTER X WITH DOT ABOVE] -"\u1E8B" => "x" - -# ẍ  [LATIN SMALL LETTER X WITH DIAERESIS] -"\u1E8D" => "x" - -# ₓ  [LATIN SUBSCRIPT SMALL LETTER X] -"\u2093" => "x" - -# ⓧ  [CIRCLED LATIN SMALL LETTER X] -"\u24E7" => "x" - -# x  [FULLWIDTH LATIN SMALL LETTER X] -"\uFF58" => "x" - -# ⒳  [PARENTHESIZED LATIN SMALL LETTER X] -"\u24B3" => "(x)" - -# Ý  [LATIN CAPITAL LETTER Y WITH ACUTE] -"\u00DD" => "Y" - -# Ŷ  [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX] -"\u0176" => "Y" - -# Ÿ  [LATIN CAPITAL LETTER Y WITH DIAERESIS] -"\u0178" => "Y" - -# Ƴ  [LATIN CAPITAL LETTER Y WITH HOOK] -"\u01B3" => "Y" - -# Ȳ  [LATIN CAPITAL LETTER Y WITH MACRON] -"\u0232" => "Y" - -# Ɏ  [LATIN CAPITAL LETTER Y WITH STROKE] -"\u024E" => "Y" - -# ʏ  [LATIN LETTER SMALL CAPITAL Y] -"\u028F" => "Y" - -# Ẏ  [LATIN CAPITAL LETTER Y WITH DOT ABOVE] -"\u1E8E" => "Y" - -# Ỳ  [LATIN CAPITAL LETTER Y WITH GRAVE] -"\u1EF2" => "Y" - -# Ỵ  [LATIN CAPITAL LETTER Y WITH DOT BELOW] -"\u1EF4" => "Y" - -# Ỷ  [LATIN CAPITAL LETTER Y WITH HOOK ABOVE] -"\u1EF6" => "Y" - -# Ỹ  [LATIN CAPITAL LETTER Y WITH TILDE] -"\u1EF8" => "Y" - -# Ỿ  [LATIN CAPITAL LETTER Y WITH LOOP] -"\u1EFE" => "Y" - -# Ⓨ  [CIRCLED LATIN CAPITAL LETTER Y] -"\u24CE" => "Y" - -# Y  [FULLWIDTH LATIN CAPITAL LETTER Y] -"\uFF39" => "Y" - -# ý  [LATIN SMALL LETTER Y WITH ACUTE] -"\u00FD" => "y" - -# ÿ  [LATIN SMALL LETTER Y WITH DIAERESIS] -"\u00FF" => "y" - -# ŷ  [LATIN SMALL LETTER Y WITH CIRCUMFLEX] -"\u0177" => "y" - -# ƴ  [LATIN SMALL LETTER Y WITH HOOK] -"\u01B4" => "y" - -# ȳ  [LATIN SMALL LETTER Y WITH MACRON] -"\u0233" => "y" - -# ɏ  [LATIN SMALL LETTER Y WITH STROKE] -"\u024F" => "y" - -# ʎ  [LATIN SMALL LETTER TURNED Y] -"\u028E" => "y" - -# ẏ  [LATIN SMALL LETTER Y WITH DOT ABOVE] -"\u1E8F" => "y" - -# ẙ  [LATIN SMALL LETTER Y WITH RING ABOVE] -"\u1E99" => "y" - -# ỳ  [LATIN SMALL LETTER Y WITH GRAVE] -"\u1EF3" => "y" - -# ỵ  [LATIN SMALL LETTER Y WITH DOT BELOW] -"\u1EF5" => "y" - -# ỷ  [LATIN SMALL LETTER Y WITH HOOK ABOVE] -"\u1EF7" => "y" - -# ỹ  [LATIN SMALL LETTER Y WITH TILDE] -"\u1EF9" => "y" - -# ỿ  [LATIN SMALL LETTER Y WITH LOOP] -"\u1EFF" => "y" - -# ⓨ  [CIRCLED LATIN SMALL LETTER Y] -"\u24E8" => "y" - -# y  [FULLWIDTH LATIN SMALL LETTER Y] -"\uFF59" => "y" - -# ⒴  [PARENTHESIZED LATIN SMALL LETTER Y] -"\u24B4" => "(y)" - -# Ź  [LATIN CAPITAL LETTER Z WITH ACUTE] -"\u0179" => "Z" - -# Ż  [LATIN CAPITAL LETTER Z WITH DOT ABOVE] -"\u017B" => "Z" - -# Ž  [LATIN CAPITAL LETTER Z WITH CARON] -"\u017D" => "Z" - -# Ƶ  [LATIN CAPITAL LETTER Z WITH STROKE] -"\u01B5" => "Z" - -# Ȝ  http://en.wikipedia.org/wiki/Yogh  [LATIN CAPITAL LETTER YOGH] -"\u021C" => "Z" - -# Ȥ  [LATIN CAPITAL LETTER Z WITH HOOK] -"\u0224" => "Z" - -# ᴢ  [LATIN LETTER SMALL CAPITAL Z] -"\u1D22" => "Z" - -# Ẑ  [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX] -"\u1E90" => "Z" - -# Ẓ  [LATIN CAPITAL LETTER Z WITH DOT BELOW] -"\u1E92" => "Z" - -# Ẕ  [LATIN CAPITAL LETTER Z WITH LINE BELOW] -"\u1E94" => "Z" - -# Ⓩ  [CIRCLED LATIN CAPITAL LETTER Z] -"\u24CF" => "Z" - -# Ⱬ  [LATIN CAPITAL LETTER Z WITH DESCENDER] -"\u2C6B" => "Z" - -# Ꝣ  [LATIN CAPITAL LETTER VISIGOTHIC Z] -"\uA762" => "Z" - -# Z  [FULLWIDTH LATIN CAPITAL LETTER Z] -"\uFF3A" => "Z" - -# ź  [LATIN SMALL LETTER Z WITH ACUTE] -"\u017A" => "z" - -# ż  [LATIN SMALL LETTER Z WITH DOT ABOVE] -"\u017C" => "z" - -# ž  [LATIN SMALL LETTER Z WITH CARON] -"\u017E" => "z" - -# ƶ  [LATIN SMALL LETTER Z WITH STROKE] -"\u01B6" => "z" - -# ȝ  http://en.wikipedia.org/wiki/Yogh  [LATIN SMALL LETTER YOGH] -"\u021D" => "z" - -# ȥ  [LATIN SMALL LETTER Z WITH HOOK] -"\u0225" => "z" - -# ɀ  [LATIN SMALL LETTER Z WITH SWASH TAIL] -"\u0240" => "z" - -# ʐ  [LATIN SMALL LETTER Z WITH RETROFLEX HOOK] -"\u0290" => "z" - -# ʑ  [LATIN SMALL LETTER Z WITH CURL] -"\u0291" => "z" - -# ᵶ  [LATIN SMALL LETTER Z WITH MIDDLE TILDE] -"\u1D76" => "z" - -# ᶎ  [LATIN SMALL LETTER Z WITH PALATAL HOOK] -"\u1D8E" => "z" - -# ẑ  [LATIN SMALL LETTER Z WITH CIRCUMFLEX] -"\u1E91" => "z" - -# ẓ  [LATIN SMALL LETTER Z WITH DOT BELOW] -"\u1E93" => "z" - -# ẕ  [LATIN SMALL LETTER Z WITH LINE BELOW] -"\u1E95" => "z" - -# ⓩ  [CIRCLED LATIN SMALL LETTER Z] -"\u24E9" => "z" - -# ⱬ  [LATIN SMALL LETTER Z WITH DESCENDER] -"\u2C6C" => "z" - -# ꝣ  [LATIN SMALL LETTER VISIGOTHIC Z] -"\uA763" => "z" - -# z  [FULLWIDTH LATIN SMALL LETTER Z] -"\uFF5A" => "z" - -# ⒵  [PARENTHESIZED LATIN SMALL LETTER Z] -"\u24B5" => "(z)" - -# ⁰  [SUPERSCRIPT ZERO] -"\u2070" => "0" - -# ₀  [SUBSCRIPT ZERO] -"\u2080" => "0" - -# ⓪  [CIRCLED DIGIT ZERO] -"\u24EA" => "0" - -# ⓿  [NEGATIVE CIRCLED DIGIT ZERO] -"\u24FF" => "0" - -# 0  [FULLWIDTH DIGIT ZERO] -"\uFF10" => "0" - -# ¹  [SUPERSCRIPT ONE] -"\u00B9" => "1" - -# ₁  [SUBSCRIPT ONE] -"\u2081" => "1" - -# ①  [CIRCLED DIGIT ONE] -"\u2460" => "1" - -# ⓵  [DOUBLE CIRCLED DIGIT ONE] -"\u24F5" => "1" - -# ❶  [DINGBAT NEGATIVE CIRCLED DIGIT ONE] -"\u2776" => "1" - -# ➀  [DINGBAT CIRCLED SANS-SERIF DIGIT ONE] -"\u2780" => "1" - -# ➊  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE] -"\u278A" => "1" - -# 1  [FULLWIDTH DIGIT ONE] -"\uFF11" => "1" - -# ⒈  [DIGIT ONE FULL STOP] -"\u2488" => "1." - -# ⑴  [PARENTHESIZED DIGIT ONE] -"\u2474" => "(1)" - -# ²  [SUPERSCRIPT TWO] -"\u00B2" => "2" - -# ₂  [SUBSCRIPT TWO] -"\u2082" => "2" - -# ②  [CIRCLED DIGIT TWO] -"\u2461" => "2" - -# ⓶  [DOUBLE CIRCLED DIGIT TWO] -"\u24F6" => "2" - -# ❷  [DINGBAT NEGATIVE CIRCLED DIGIT TWO] -"\u2777" => "2" - -# ➁  [DINGBAT CIRCLED SANS-SERIF DIGIT TWO] -"\u2781" => "2" - -# ➋  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO] -"\u278B" => "2" - -# 2  [FULLWIDTH DIGIT TWO] -"\uFF12" => "2" - -# ⒉  [DIGIT TWO FULL STOP] -"\u2489" => "2." - -# ⑵  [PARENTHESIZED DIGIT TWO] -"\u2475" => "(2)" - -# ³  [SUPERSCRIPT THREE] -"\u00B3" => "3" - -# ₃  [SUBSCRIPT THREE] -"\u2083" => "3" - -# ③  [CIRCLED DIGIT THREE] -"\u2462" => "3" - -# ⓷  [DOUBLE CIRCLED DIGIT THREE] -"\u24F7" => "3" - -# ❸  [DINGBAT NEGATIVE CIRCLED DIGIT THREE] -"\u2778" => "3" - -# ➂  [DINGBAT CIRCLED SANS-SERIF DIGIT THREE] -"\u2782" => "3" - -# ➌  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE] -"\u278C" => "3" - -# 3  [FULLWIDTH DIGIT THREE] -"\uFF13" => "3" - -# ⒊  [DIGIT THREE FULL STOP] -"\u248A" => "3." - -# ⑶  [PARENTHESIZED DIGIT THREE] -"\u2476" => "(3)" - -# ⁴  [SUPERSCRIPT FOUR] -"\u2074" => "4" - -# ₄  [SUBSCRIPT FOUR] -"\u2084" => "4" - -# ④  [CIRCLED DIGIT FOUR] -"\u2463" => "4" - -# ⓸  [DOUBLE CIRCLED DIGIT FOUR] -"\u24F8" => "4" - -# ❹  [DINGBAT NEGATIVE CIRCLED DIGIT FOUR] -"\u2779" => "4" - -# ➃  [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR] -"\u2783" => "4" - -# ➍  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR] -"\u278D" => "4" - -# 4  [FULLWIDTH DIGIT FOUR] -"\uFF14" => "4" - -# ⒋  [DIGIT FOUR FULL STOP] -"\u248B" => "4." - -# ⑷  [PARENTHESIZED DIGIT FOUR] -"\u2477" => "(4)" - -# ⁵  [SUPERSCRIPT FIVE] -"\u2075" => "5" - -# ₅  [SUBSCRIPT FIVE] -"\u2085" => "5" - -# ⑤  [CIRCLED DIGIT FIVE] -"\u2464" => "5" - -# ⓹  [DOUBLE CIRCLED DIGIT FIVE] -"\u24F9" => "5" - -# ❺  [DINGBAT NEGATIVE CIRCLED DIGIT FIVE] -"\u277A" => "5" - -# ➄  [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE] -"\u2784" => "5" - -# ➎  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE] -"\u278E" => "5" - -# 5  [FULLWIDTH DIGIT FIVE] -"\uFF15" => "5" - -# ⒌  [DIGIT FIVE FULL STOP] -"\u248C" => "5." - -# ⑸  [PARENTHESIZED DIGIT FIVE] -"\u2478" => "(5)" - -# ⁶  [SUPERSCRIPT SIX] -"\u2076" => "6" - -# ₆  [SUBSCRIPT SIX] -"\u2086" => "6" - -# ⑥  [CIRCLED DIGIT SIX] -"\u2465" => "6" - -# ⓺  [DOUBLE CIRCLED DIGIT SIX] -"\u24FA" => "6" - -# ❻  [DINGBAT NEGATIVE CIRCLED DIGIT SIX] -"\u277B" => "6" - -# ➅  [DINGBAT CIRCLED SANS-SERIF DIGIT SIX] -"\u2785" => "6" - -# ➏  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX] -"\u278F" => "6" - -# 6  [FULLWIDTH DIGIT SIX] -"\uFF16" => "6" - -# ⒍  [DIGIT SIX FULL STOP] -"\u248D" => "6." - -# ⑹  [PARENTHESIZED DIGIT SIX] -"\u2479" => "(6)" - -# ⁷  [SUPERSCRIPT SEVEN] -"\u2077" => "7" - -# ₇  [SUBSCRIPT SEVEN] -"\u2087" => "7" - -# ⑦  [CIRCLED DIGIT SEVEN] -"\u2466" => "7" - -# ⓻  [DOUBLE CIRCLED DIGIT SEVEN] -"\u24FB" => "7" - -# ❼  [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN] -"\u277C" => "7" - -# ➆  [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN] -"\u2786" => "7" - -# ➐  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN] -"\u2790" => "7" - -# 7  [FULLWIDTH DIGIT SEVEN] -"\uFF17" => "7" - -# ⒎  [DIGIT SEVEN FULL STOP] -"\u248E" => "7." - -# ⑺  [PARENTHESIZED DIGIT SEVEN] -"\u247A" => "(7)" - -# ⁸  [SUPERSCRIPT EIGHT] -"\u2078" => "8" - -# ₈  [SUBSCRIPT EIGHT] -"\u2088" => "8" - -# ⑧  [CIRCLED DIGIT EIGHT] -"\u2467" => "8" - -# ⓼  [DOUBLE CIRCLED DIGIT EIGHT] -"\u24FC" => "8" - -# ❽  [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT] -"\u277D" => "8" - -# ➇  [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT] -"\u2787" => "8" - -# ➑  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT] -"\u2791" => "8" - -# 8  [FULLWIDTH DIGIT EIGHT] -"\uFF18" => "8" - -# ⒏  [DIGIT EIGHT FULL STOP] -"\u248F" => "8." - -# ⑻  [PARENTHESIZED DIGIT EIGHT] -"\u247B" => "(8)" - -# ⁹  [SUPERSCRIPT NINE] -"\u2079" => "9" - -# ₉  [SUBSCRIPT NINE] -"\u2089" => "9" - -# ⑨  [CIRCLED DIGIT NINE] -"\u2468" => "9" - -# ⓽  [DOUBLE CIRCLED DIGIT NINE] -"\u24FD" => "9" - -# ❾  [DINGBAT NEGATIVE CIRCLED DIGIT NINE] -"\u277E" => "9" - -# ➈  [DINGBAT CIRCLED SANS-SERIF DIGIT NINE] -"\u2788" => "9" - -# ➒  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE] -"\u2792" => "9" - -# 9  [FULLWIDTH DIGIT NINE] -"\uFF19" => "9" - -# ⒐  [DIGIT NINE FULL STOP] -"\u2490" => "9." - -# ⑼  [PARENTHESIZED DIGIT NINE] -"\u247C" => "(9)" - -# ⑩  [CIRCLED NUMBER TEN] -"\u2469" => "10" - -# ⓾  [DOUBLE CIRCLED NUMBER TEN] -"\u24FE" => "10" - -# ❿  [DINGBAT NEGATIVE CIRCLED NUMBER TEN] -"\u277F" => "10" - -# ➉  [DINGBAT CIRCLED SANS-SERIF NUMBER TEN] -"\u2789" => "10" - -# ➓  [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN] -"\u2793" => "10" - -# ⒑  [NUMBER TEN FULL STOP] -"\u2491" => "10." - -# ⑽  [PARENTHESIZED NUMBER TEN] -"\u247D" => "(10)" - -# ⑪  [CIRCLED NUMBER ELEVEN] -"\u246A" => "11" - -# ⓫  [NEGATIVE CIRCLED NUMBER ELEVEN] -"\u24EB" => "11" - -# ⒒  [NUMBER ELEVEN FULL STOP] -"\u2492" => "11." - -# ⑾  [PARENTHESIZED NUMBER ELEVEN] -"\u247E" => "(11)" - -# ⑫  [CIRCLED NUMBER TWELVE] -"\u246B" => "12" - -# ⓬  [NEGATIVE CIRCLED NUMBER TWELVE] -"\u24EC" => "12" - -# ⒓  [NUMBER TWELVE FULL STOP] -"\u2493" => "12." - -# ⑿  [PARENTHESIZED NUMBER TWELVE] -"\u247F" => "(12)" - -# ⑬  [CIRCLED NUMBER THIRTEEN] -"\u246C" => "13" - -# ⓭  [NEGATIVE CIRCLED NUMBER THIRTEEN] -"\u24ED" => "13" - -# ⒔  [NUMBER THIRTEEN FULL STOP] -"\u2494" => "13." - -# ⒀  [PARENTHESIZED NUMBER THIRTEEN] -"\u2480" => "(13)" - -# ⑭  [CIRCLED NUMBER FOURTEEN] -"\u246D" => "14" - -# ⓮  [NEGATIVE CIRCLED NUMBER FOURTEEN] -"\u24EE" => "14" - -# ⒕  [NUMBER FOURTEEN FULL STOP] -"\u2495" => "14." - -# ⒁  [PARENTHESIZED NUMBER FOURTEEN] -"\u2481" => "(14)" - -# ⑮  [CIRCLED NUMBER FIFTEEN] -"\u246E" => "15" - -# ⓯  [NEGATIVE CIRCLED NUMBER FIFTEEN] -"\u24EF" => "15" - -# ⒖  [NUMBER FIFTEEN FULL STOP] -"\u2496" => "15." - -# ⒂  [PARENTHESIZED NUMBER FIFTEEN] -"\u2482" => "(15)" - -# ⑯  [CIRCLED NUMBER SIXTEEN] -"\u246F" => "16" - -# ⓰  [NEGATIVE CIRCLED NUMBER SIXTEEN] -"\u24F0" => "16" - -# ⒗  [NUMBER SIXTEEN FULL STOP] -"\u2497" => "16." - -# ⒃  [PARENTHESIZED NUMBER SIXTEEN] -"\u2483" => "(16)" - -# ⑰  [CIRCLED NUMBER SEVENTEEN] -"\u2470" => "17" - -# ⓱  [NEGATIVE CIRCLED NUMBER SEVENTEEN] -"\u24F1" => "17" - -# ⒘  [NUMBER SEVENTEEN FULL STOP] -"\u2498" => "17." - -# ⒄  [PARENTHESIZED NUMBER SEVENTEEN] -"\u2484" => "(17)" - -# ⑱  [CIRCLED NUMBER EIGHTEEN] -"\u2471" => "18" - -# ⓲  [NEGATIVE CIRCLED NUMBER EIGHTEEN] -"\u24F2" => "18" - -# ⒙  [NUMBER EIGHTEEN FULL STOP] -"\u2499" => "18." - -# ⒅  [PARENTHESIZED NUMBER EIGHTEEN] -"\u2485" => "(18)" - -# ⑲  [CIRCLED NUMBER NINETEEN] -"\u2472" => "19" - -# ⓳  [NEGATIVE CIRCLED NUMBER NINETEEN] -"\u24F3" => "19" - -# ⒚  [NUMBER NINETEEN FULL STOP] -"\u249A" => "19." - -# ⒆  [PARENTHESIZED NUMBER NINETEEN] -"\u2486" => "(19)" - -# ⑳  [CIRCLED NUMBER TWENTY] -"\u2473" => "20" - -# ⓴  [NEGATIVE CIRCLED NUMBER TWENTY] -"\u24F4" => "20" - -# ⒛  [NUMBER TWENTY FULL STOP] -"\u249B" => "20." - -# ⒇  [PARENTHESIZED NUMBER TWENTY] -"\u2487" => "(20)" - -# «  [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK] -"\u00AB" => "\"" - -# »  [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK] -"\u00BB" => "\"" - -# “  [LEFT DOUBLE QUOTATION MARK] -"\u201C" => "\"" - -# ”  [RIGHT DOUBLE QUOTATION MARK] -"\u201D" => "\"" - -# „  [DOUBLE LOW-9 QUOTATION MARK] -"\u201E" => "\"" - -# ″  [DOUBLE PRIME] -"\u2033" => "\"" - -# ‶  [REVERSED DOUBLE PRIME] -"\u2036" => "\"" - -# ❝  [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT] -"\u275D" => "\"" - -# ❞  [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT] -"\u275E" => "\"" - -# ❮  [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT] -"\u276E" => "\"" - -# ❯  [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT] -"\u276F" => "\"" - -# "  [FULLWIDTH QUOTATION MARK] -"\uFF02" => "\"" - -# ‘  [LEFT SINGLE QUOTATION MARK] -"\u2018" => "\'" - -# ’  [RIGHT SINGLE QUOTATION MARK] -"\u2019" => "\'" - -# ‚  [SINGLE LOW-9 QUOTATION MARK] -"\u201A" => "\'" - -# ‛  [SINGLE HIGH-REVERSED-9 QUOTATION MARK] -"\u201B" => "\'" - -# ′  [PRIME] -"\u2032" => "\'" - -# ‵  [REVERSED PRIME] -"\u2035" => "\'" - -# ‹  [SINGLE LEFT-POINTING ANGLE QUOTATION MARK] -"\u2039" => "\'" - -# ›  [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK] -"\u203A" => "\'" - -# ❛  [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT] -"\u275B" => "\'" - -# ❜  [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT] -"\u275C" => "\'" - -# '  [FULLWIDTH APOSTROPHE] -"\uFF07" => "\'" - -# ‐  [HYPHEN] -"\u2010" => "-" - -# ‑  [NON-BREAKING HYPHEN] -"\u2011" => "-" - -# ‒  [FIGURE DASH] -"\u2012" => "-" - -# –  [EN DASH] -"\u2013" => "-" - -# —  [EM DASH] -"\u2014" => "-" - -# ⁻  [SUPERSCRIPT MINUS] -"\u207B" => "-" - -# ₋  [SUBSCRIPT MINUS] -"\u208B" => "-" - -# -  [FULLWIDTH HYPHEN-MINUS] -"\uFF0D" => "-" - -# ⁅  [LEFT SQUARE BRACKET WITH QUILL] -"\u2045" => "[" - -# ❲  [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT] -"\u2772" => "[" - -# [  [FULLWIDTH LEFT SQUARE BRACKET] -"\uFF3B" => "[" - -# ⁆  [RIGHT SQUARE BRACKET WITH QUILL] -"\u2046" => "]" - -# ❳  [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT] -"\u2773" => "]" - -# ]  [FULLWIDTH RIGHT SQUARE BRACKET] -"\uFF3D" => "]" - -# ⁽  [SUPERSCRIPT LEFT PARENTHESIS] -"\u207D" => "(" - -# ₍  [SUBSCRIPT LEFT PARENTHESIS] -"\u208D" => "(" - -# ❨  [MEDIUM LEFT PARENTHESIS ORNAMENT] -"\u2768" => "(" - -# ❪  [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT] -"\u276A" => "(" - -# (  [FULLWIDTH LEFT PARENTHESIS] -"\uFF08" => "(" - -# ⸨  [LEFT DOUBLE PARENTHESIS] -"\u2E28" => "((" - -# ⁾  [SUPERSCRIPT RIGHT PARENTHESIS] -"\u207E" => ")" - -# ₎  [SUBSCRIPT RIGHT PARENTHESIS] -"\u208E" => ")" - -# ❩  [MEDIUM RIGHT PARENTHESIS ORNAMENT] -"\u2769" => ")" - -# ❫  [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT] -"\u276B" => ")" - -# )  [FULLWIDTH RIGHT PARENTHESIS] -"\uFF09" => ")" - -# ⸩  [RIGHT DOUBLE PARENTHESIS] -"\u2E29" => "))" - -# ❬  [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT] -"\u276C" => "<" - -# ❰  [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT] -"\u2770" => "<" - -# <  [FULLWIDTH LESS-THAN SIGN] -"\uFF1C" => "<" - -# ❭  [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT] -"\u276D" => ">" - -# ❱  [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT] -"\u2771" => ">" - -# >  [FULLWIDTH GREATER-THAN SIGN] -"\uFF1E" => ">" - -# ❴  [MEDIUM LEFT CURLY BRACKET ORNAMENT] -"\u2774" => "{" - -# {  [FULLWIDTH LEFT CURLY BRACKET] -"\uFF5B" => "{" - -# ❵  [MEDIUM RIGHT CURLY BRACKET ORNAMENT] -"\u2775" => "}" - -# }  [FULLWIDTH RIGHT CURLY BRACKET] -"\uFF5D" => "}" - -# ⁺  [SUPERSCRIPT PLUS SIGN] -"\u207A" => "+" - -# ₊  [SUBSCRIPT PLUS SIGN] -"\u208A" => "+" - -# +  [FULLWIDTH PLUS SIGN] -"\uFF0B" => "+" - -# ⁼  [SUPERSCRIPT EQUALS SIGN] -"\u207C" => "=" - -# ₌  [SUBSCRIPT EQUALS SIGN] -"\u208C" => "=" - -# =  [FULLWIDTH EQUALS SIGN] -"\uFF1D" => "=" - -# !  [FULLWIDTH EXCLAMATION MARK] -"\uFF01" => "!" - -# ‼  [DOUBLE EXCLAMATION MARK] -"\u203C" => "!!" - -# ⁉  [EXCLAMATION QUESTION MARK] -"\u2049" => "!?" - -# #  [FULLWIDTH NUMBER SIGN] -"\uFF03" => "#" - -# $  [FULLWIDTH DOLLAR SIGN] -"\uFF04" => "$" - -# ⁒  [COMMERCIAL MINUS SIGN] -"\u2052" => "%" - -# %  [FULLWIDTH PERCENT SIGN] -"\uFF05" => "%" - -# &  [FULLWIDTH AMPERSAND] -"\uFF06" => "&" - -# ⁎  [LOW ASTERISK] -"\u204E" => "*" - -# *  [FULLWIDTH ASTERISK] -"\uFF0A" => "*" - -# ,  [FULLWIDTH COMMA] -"\uFF0C" => "," - -# .  [FULLWIDTH FULL STOP] -"\uFF0E" => "." - -# ⁄  [FRACTION SLASH] -"\u2044" => "/" - -# /  [FULLWIDTH SOLIDUS] -"\uFF0F" => "/" - -# :  [FULLWIDTH COLON] -"\uFF1A" => ":" - -# ⁏  [REVERSED SEMICOLON] -"\u204F" => ";" - -# ;  [FULLWIDTH SEMICOLON] -"\uFF1B" => ";" - -# ?  [FULLWIDTH QUESTION MARK] -"\uFF1F" => "?" - -# ⁇  [DOUBLE QUESTION MARK] -"\u2047" => "??" - -# ⁈  [QUESTION EXCLAMATION MARK] -"\u2048" => "?!" - -# @  [FULLWIDTH COMMERCIAL AT] -"\uFF20" => "@" - -# \  [FULLWIDTH REVERSE SOLIDUS] -"\uFF3C" => "\\" - -# ‸  [CARET] -"\u2038" => "^" - -# ^  [FULLWIDTH CIRCUMFLEX ACCENT] -"\uFF3E" => "^" - -# _  [FULLWIDTH LOW LINE] -"\uFF3F" => "_" - -# ⁓  [SWUNG DASH] -"\u2053" => "~" - -# ~  [FULLWIDTH TILDE] -"\uFF5E" => "~" - -################################################################ -# Below is the Perl script used to generate the above mappings # -# from ASCIIFoldingFilter.java:                                # -################################################################ -# -# #!/usr/bin/perl -# -# use warnings; -# use strict; -#  -# my @source_chars = (); -# my @source_char_descriptions = (); -# my $target = ''; -#  -# while (<>) { -#   if (/case\s+'(\\u[A-F0-9]+)':\s*\/\/\s*(.*)/i) { -#     push @source_chars, $1; -#	  push @source_char_descriptions, $2; -#	  next; -#   } -#   if (/output\[[^\]]+\]\s*=\s*'(\\'|\\\\|.)'/) { -#     $target .= $1; -#     next; -#   } -#   if (/break;/) { -#     $target = "\\\"" if ($target eq '"'); -#     for my $source_char_num (0..$#source_chars) { -#	    print "# $source_char_descriptions[$source_char_num]\n"; -#	    print "\"$source_chars[$source_char_num]\" => \"$target\"\n\n"; -#	  } -#	  @source_chars = (); -#	  @source_char_descriptions = (); -#	  $target = ''; -#   } -# } diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/mapping-ISOLatin1Accent.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/mapping-ISOLatin1Accent.txt deleted file mode 100644 index ede774258..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/mapping-ISOLatin1Accent.txt +++ /dev/null @@ -1,246 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License.  You may obtain a copy of the License at -# -#     http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Syntax: -#   "source" => "target" -#     "source".length() > 0 (source cannot be empty.) -#     "target".length() >= 0 (target can be empty.) - -# example: -#   "À" => "A" -#   "\u00C0" => "A" -#   "\u00C0" => "\u0041" -#   "ß" => "ss" -#   "\t" => " " -#   "\n" => "" - -# À => A -"\u00C0" => "A" - -# Á => A -"\u00C1" => "A" - -#  => A -"\u00C2" => "A" - -# à => A -"\u00C3" => "A" - -# Ä => A -"\u00C4" => "A" - -# Å => A -"\u00C5" => "A" - -# Æ => AE -"\u00C6" => "AE" - -# Ç => C -"\u00C7" => "C" - -# È => E -"\u00C8" => "E" - -# É => E -"\u00C9" => "E" - -# Ê => E -"\u00CA" => "E" - -# Ë => E -"\u00CB" => "E" - -# Ì => I -"\u00CC" => "I" - -# Í => I -"\u00CD" => "I" - -# Î => I -"\u00CE" => "I" - -# Ï => I -"\u00CF" => "I" - -# IJ => IJ -"\u0132" => "IJ" - -# Ð => D -"\u00D0" => "D" - -# Ñ => N -"\u00D1" => "N" - -# Ò => O -"\u00D2" => "O" - -# Ó => O -"\u00D3" => "O" - -# Ô => O -"\u00D4" => "O" - -# Õ => O -"\u00D5" => "O" - -# Ö => O -"\u00D6" => "O" - -# Ø => O -"\u00D8" => "O" - -# Œ => OE -"\u0152" => "OE" - -# Þ -"\u00DE" => "TH" - -# Ù => U -"\u00D9" => "U" - -# Ú => U -"\u00DA" => "U" - -# Û => U -"\u00DB" => "U" - -# Ü => U -"\u00DC" => "U" - -# Ý => Y -"\u00DD" => "Y" - -# Ÿ => Y -"\u0178" => "Y" - -# à => a -"\u00E0" => "a" - -# á => a -"\u00E1" => "a" - -# â => a -"\u00E2" => "a" - -# ã => a -"\u00E3" => "a" - -# ä => a -"\u00E4" => "a" - -# å => a -"\u00E5" => "a" - -# æ => ae -"\u00E6" => "ae" - -# ç => c -"\u00E7" => "c" - -# è => e -"\u00E8" => "e" - -# é => e -"\u00E9" => "e" - -# ê => e -"\u00EA" => "e" - -# ë => e -"\u00EB" => "e" - -# ì => i -"\u00EC" => "i" - -# í => i -"\u00ED" => "i" - -# î => i -"\u00EE" => "i" - -# ï => i -"\u00EF" => "i" - -# ij => ij -"\u0133" => "ij" - -# ð => d -"\u00F0" => "d" - -# ñ => n -"\u00F1" => "n" - -# ò => o -"\u00F2" => "o" - -# ó => o -"\u00F3" => "o" - -# ô => o -"\u00F4" => "o" - -# õ => o -"\u00F5" => "o" - -# ö => o -"\u00F6" => "o" - -# ø => o -"\u00F8" => "o" - -# œ => oe -"\u0153" => "oe" - -# ß => ss -"\u00DF" => "ss" - -# þ => th -"\u00FE" => "th" - -# ù => u -"\u00F9" => "u" - -# ú => u -"\u00FA" => "u" - -# û => u -"\u00FB" => "u" - -# ü => u -"\u00FC" => "u" - -# ý => y -"\u00FD" => "y" - -# ÿ => y -"\u00FF" => "y" - -# ff => ff -"\uFB00" => "ff" - -# fi => fi -"\uFB01" => "fi" - -# fl => fl -"\uFB02" => "fl" - -# ffi => ffi -"\uFB03" => "ffi" - -# ffl => ffl -"\uFB04" => "ffl" - -# ſt => ft -"\uFB05" => "ft" - -# st => st -"\uFB06" => "st" diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/protwords.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/protwords.txt deleted file mode 100644 index 1dfc0abec..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/protwords.txt +++ /dev/null @@ -1,21 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License.  You may obtain a copy of the License at -# -#     http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -# Use a protected word file to protect against the stemmer reducing two -# unrelated words to the same base word. - -# Some non-words that normally won't be encountered, -# just to test that they won't be stemmed. -dontstems -zwhacky - diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/solrconfig.xml b/solr-8.1.1/example/example-DIH/solr/mail/conf/solrconfig.xml deleted file mode 100644 index 531cdd7f9..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/solrconfig.xml +++ /dev/null @@ -1,1356 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements.  See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License.  You may obtain a copy of the License at - -     http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<!-- -     For more details about configurations options that may appear in -     this file, see http://wiki.apache.org/solr/SolrConfigXml. ---> -<config> -  <!-- In all configuration below, a prefix of "solr." for class names -       is an alias that causes solr to search appropriate packages, -       including org.apache.solr.(search|update|request|core|analysis) - -       You may also specify a fully qualified Java classname if you -       have your own custom plugins. -    --> - -  <!-- Controls what version of Lucene various components of Solr -       adhere to.  Generally, you want to use the latest version to -       get all bug fixes and improvements. It is highly recommended -       that you fully re-index after changing this setting as it can -       affect both how text is indexed and queried. -  --> -  <luceneMatchVersion>8.1.1</luceneMatchVersion> - -  <!-- <lib/> directives can be used to instruct Solr to load any Jars -       identified and use them to resolve any "plugins" specified in -       your solrconfig.xml or schema.xml (ie: Analyzers, Request -       Handlers, etc...). - -       All directories and paths are resolved relative to the -       instanceDir. - -       Please note that <lib/> directives are processed in the order -       that they appear in your solrconfig.xml file, and are "stacked" -       on top of each other when building a ClassLoader - so if you have -       plugin jars with dependencies on other jars, the "lower level" -       dependency jars should be loaded first. - -       If a "./lib" directory exists in your instanceDir, all files -       found in it are included as if you had used the following -       syntax... - -              <lib dir="./lib" /> -    --> - -  <!-- A 'dir' option by itself adds any files found in the directory -       to the classpath, this is useful for including all jars in a -       directory. - -       When a 'regex' is specified in addition to a 'dir', only the -       files in that directory which completely match the regex -       (anchored on both ends) will be included. - -       If a 'dir' option (with or without a regex) is used and nothing -       is found that matches, a warning will be logged. - -       The examples below can be used to load some solr-contribs along -       with their external dependencies. -    --> -  <lib dir="${solr.install.dir:../../../..}/contrib/dataimporthandler/lib/" regex=".*\.jar" /> -  <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-dataimporthandler-.*\.jar" /> - -  <lib dir="${solr.install.dir:../../../..}/contrib/dataimporthandler-extras/lib/" regex=".*\.jar" /> - -  <lib dir="${solr.install.dir:../../../..}/contrib/extraction/lib" regex=".*\.jar" /> -  <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-cell-\d.*\.jar" /> - -  <lib dir="${solr.install.dir:../../../..}/contrib/langid/lib/" regex=".*\.jar" /> -  <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-langid-\d.*\.jar" /> - -  <lib dir="${solr.install.dir:../../../..}/contrib/velocity/lib" regex=".*\.jar" /> -  <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-velocity-\d.*\.jar" /> - -  <!-- an exact 'path' can be used instead of a 'dir' to specify a -       specific jar file.  This will cause a serious error to be logged -       if it can't be loaded. -    --> -  <!-- -     <lib path="../a-jar-that-does-not-exist.jar" /> -  --> - -  <!-- Data Directory - -       Used to specify an alternate directory to hold all index data -       other than the default ./data under the Solr home.  If -       replication is in use, this should match the replication -       configuration. -    --> -  <dataDir>${solr.data.dir:}</dataDir> - - -  <!-- The DirectoryFactory to use for indexes. - -       solr.StandardDirectoryFactory is filesystem -       based and tries to pick the best implementation for the current -       JVM and platform.  solr.NRTCachingDirectoryFactory, the default, -       wraps solr.StandardDirectoryFactory and caches small files in memory -       for better NRT performance. - -       One can force a particular implementation via solr.MMapDirectoryFactory, -       solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory. - -       solr.RAMDirectoryFactory is memory based and not persistent. -    --> -  <directoryFactory name="DirectoryFactory" -                    class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> - -  <!-- The CodecFactory for defining the format of the inverted index. -       The default implementation is SchemaCodecFactory, which is the official Lucene -       index format, but hooks into the schema to provide per-field customization of -       the postings lists and per-document values in the fieldType element -       (postingsFormat/docValuesFormat). Note that most of the alternative implementations -       are experimental, so if you choose to customize the index format, it's a good -       idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader) -       before upgrading to a newer version to avoid unnecessary reindexing. -  --> -  <codecFactory class="solr.SchemaCodecFactory"/> - -  <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -       Index Config - These settings control low-level behavior of indexing -       Most example settings here show the default value, but are commented -       out, to more easily see where customizations have been made. - -       Note: This replaces <indexDefaults> and <mainIndex> from older versions -       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --> -  <indexConfig> -    <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a -         LimitTokenCountFilterFactory in your fieldType definition. E.g. -     <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/> -    --> -    <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 --> -    <!-- <writeLockTimeout>1000</writeLockTimeout>  --> - -    <!-- Expert: Enabling compound file will use less files for the index, -         using fewer file descriptors on the expense of performance decrease. -         Default in Lucene is "true". Default in Solr is "false" (since 3.6) --> -    <!-- <useCompoundFile>false</useCompoundFile> --> - -    <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene -         indexing for buffering added documents and deletions before they are -         flushed to the Directory. -         maxBufferedDocs sets a limit on the number of documents buffered -         before flushing. -         If both ramBufferSizeMB and maxBufferedDocs is set, then -         Lucene will flush based on whichever limit is hit first. -         The default is 100 MB.  --> -    <!-- <ramBufferSizeMB>100</ramBufferSizeMB> --> -    <!-- <maxBufferedDocs>1000</maxBufferedDocs> --> - -    <!-- Expert: Merge Policy -         The Merge Policy in Lucene controls how merging of segments is done. -         The default since Solr/Lucene 3.3 is TieredMergePolicy. -         The default since Lucene 2.3 was the LogByteSizeMergePolicy, -         Even older versions of Lucene used LogDocMergePolicy. -     --> -    <!-- -        <mergePolicyFactory class="solr.TieredMergePolicyFactory"> -          <int name="maxMergeAtOnce">10</int> -          <int name="segmentsPerTier">10</int> -        </mergePolicyFactory> -     --> - -    <!-- Expert: Merge Scheduler -         The Merge Scheduler in Lucene controls how merges are -         performed.  The ConcurrentMergeScheduler (Lucene 2.3 default) -         can perform merges in the background using separate threads. -         The SerialMergeScheduler (Lucene 2.2 default) does not. -     --> -    <!-- -       <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/> -       --> - -    <!-- LockFactory - -         This option specifies which Lucene LockFactory implementation -         to use. - -         single = SingleInstanceLockFactory - suggested for a -                  read-only index or when there is no possibility of -                  another process trying to modify the index. -         native = NativeFSLockFactory - uses OS native file locking. -                  Do not use when multiple solr webapps in the same -                  JVM are attempting to share a single index. -         simple = SimpleFSLockFactory  - uses a plain file for locking - -         Defaults: 'native' is default for Solr3.6 and later, otherwise -                   'simple' is the default - -         More details on the nuances of each LockFactory... -         http://wiki.apache.org/lucene-java/AvailableLockFactories -    --> -    <lockType>${solr.lock.type:native}</lockType> - -    <!-- Commit Deletion Policy -         Custom deletion policies can be specified here. The class must -         implement org.apache.lucene.index.IndexDeletionPolicy. - -         The default Solr IndexDeletionPolicy implementation supports -         deleting index commit points on number of commits, age of -         commit point and optimized status. - -         The latest commit point should always be preserved regardless -         of the criteria. -    --> -    <!-- -    <deletionPolicy class="solr.SolrDeletionPolicy"> -    --> -      <!-- The number of commit points to be kept --> -      <!-- <str name="maxCommitsToKeep">1</str> --> -      <!-- The number of optimized commit points to be kept --> -      <!-- <str name="maxOptimizedCommitsToKeep">0</str> --> -      <!-- -          Delete all commit points once they have reached the given age. -          Supports DateMathParser syntax e.g. -        --> -      <!-- -         <str name="maxCommitAge">30MINUTES</str> -         <str name="maxCommitAge">1DAY</str> -      --> -    <!-- -    </deletionPolicy> -    --> - -    <!-- Lucene Infostream - -         To aid in advanced debugging, Lucene provides an "InfoStream" -         of detailed information when indexing. - -         Setting the value to true will instruct the underlying Lucene -         IndexWriter to write its info stream to solr's log. By default, -         this is enabled here, and controlled through log4j2.xml -      --> -     <infoStream>true</infoStream> -  </indexConfig> - - -  <!-- JMX - -       This example enables JMX if and only if an existing MBeanServer -       is found, use this if you want to configure JMX through JVM -       parameters. Remove this to disable exposing Solr configuration -       and statistics to JMX. - -       For more details see http://wiki.apache.org/solr/SolrJmx -    --> -  <jmx /> -  <!-- If you want to connect to a particular server, specify the -       agentId -    --> -  <!-- <jmx agentId="myAgent" /> --> -  <!-- If you want to start a new MBeanServer, specify the serviceUrl --> -  <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/> -    --> - -  <!-- The default high-performance update handler --> -  <updateHandler class="solr.DirectUpdateHandler2"> - -    <!-- Enables a transaction log, used for real-time get, durability, and -         and solr cloud replica recovery.  The log can grow as big as -         uncommitted changes to the index, so use of a hard autoCommit -         is recommended (see below). -         "dir" - the target directory for transaction logs, defaults to the -                solr data directory.  --> -    <updateLog> -      <str name="dir">${solr.ulog.dir:}</str> -    </updateLog> - -    <!-- AutoCommit - -         Perform a hard commit automatically under certain conditions. -         Instead of enabling autoCommit, consider using "commitWithin" -         when adding documents. - -         http://wiki.apache.org/solr/UpdateXmlMessages - -         maxDocs - Maximum number of documents to add since the last -                   commit before automatically triggering a new commit. - -         maxTime - Maximum amount of time in ms that is allowed to pass -                   since a document was added before automatically -                   triggering a new commit. -         openSearcher - if false, the commit causes recent index changes -           to be flushed to stable storage, but does not cause a new -           searcher to be opened to make those changes visible. - -         If the updateLog is enabled, then it's highly recommended to -         have some sort of hard autoCommit to limit the log size. -      --> -     <autoCommit> -       <maxTime>${solr.autoCommit.maxTime:15000}</maxTime> -       <openSearcher>false</openSearcher> -     </autoCommit> - -    <!-- softAutoCommit is like autoCommit except it causes a -         'soft' commit which only ensures that changes are visible -         but does not ensure that data is synced to disk.  This is -         faster and more near-realtime friendly than a hard commit. -      --> - -     <autoSoftCommit> -       <maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime> -     </autoSoftCommit> - -    <!-- Update Related Event Listeners - -         Various IndexWriter related events can trigger Listeners to -         take actions. - -         postCommit - fired after every commit or optimize command -         postOptimize - fired after every optimize command -      --> - -  </updateHandler> - -  <!-- IndexReaderFactory - -       Use the following format to specify a custom IndexReaderFactory, -       which allows for alternate IndexReader implementations. - -       ** Experimental Feature ** - -       Please note - Using a custom IndexReaderFactory may prevent -       certain other features from working. The API to -       IndexReaderFactory may change without warning or may even be -       removed from future releases if the problems cannot be -       resolved. - - -       ** Features that may not work with custom IndexReaderFactory ** - -       The ReplicationHandler assumes a disk-resident index. Using a -       custom IndexReader implementation may cause incompatibility -       with ReplicationHandler and may cause replication to not work -       correctly. See SOLR-1366 for details. - -    --> -  <!-- -  <indexReaderFactory name="IndexReaderFactory" class="package.class"> -    <str name="someArg">Some Value</str> -  </indexReaderFactory > -  --> - -  <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -       Query section - these settings control query time things like caches -       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --> -  <query> -    <!-- Max Boolean Clauses - -         Maximum number of clauses in each BooleanQuery,  an exception -         is thrown if exceeded. - -         ** WARNING ** - -         This option actually modifies a global Lucene property that -         will affect all SolrCores.  If multiple solrconfig.xml files -         disagree on this property, the value at any given moment will -         be based on the last SolrCore to be initialized. - -      --> -    <maxBooleanClauses>${solr.max.booleanClauses:1024}</maxBooleanClauses> - - -    <!-- Solr Internal Query Caches - -         There are two implementations of cache available for Solr, -         LRUCache, based on a synchronized LinkedHashMap, and -         FastLRUCache, based on a ConcurrentHashMap. - -         FastLRUCache has faster gets and slower puts in single -         threaded operation and thus is generally faster than LRUCache -         when the hit ratio of the cache is high (> 75%), and may be -         faster under other scenarios on multi-cpu systems. -    --> - -    <!-- Filter Cache - -         Cache used by SolrIndexSearcher for filters (DocSets), -         unordered sets of *all* documents that match a query.  When a -         new searcher is opened, its caches may be prepopulated or -         "autowarmed" using data from caches in the old searcher. -         autowarmCount is the number of items to prepopulate.  For -         LRUCache, the autowarmed items will be the most recently -         accessed items. - -         Parameters: -           class - the SolrCache implementation LRUCache or -               (LRUCache or FastLRUCache) -           size - the maximum number of entries in the cache -           initialSize - the initial capacity (number of entries) of -               the cache.  (see java.util.HashMap) -           autowarmCount - the number of entries to prepopulate from -               and old cache. -      --> -    <filterCache class="solr.FastLRUCache" -                 size="512" -                 initialSize="512" -                 autowarmCount="0"/> - -    <!-- Query Result Cache - -         Caches results of searches - ordered lists of document ids -         (DocList) based on a query, a sort, and the range of documents requested. -      --> -    <queryResultCache class="solr.LRUCache" -                     size="512" -                     initialSize="512" -                     autowarmCount="0"/> - -    <!-- Document Cache - -         Caches Lucene Document objects (the stored fields for each -         document).  Since Lucene internal document ids are transient, -         this cache will not be autowarmed. -      --> -    <documentCache class="solr.LRUCache" -                   size="512" -                   initialSize="512" -                   autowarmCount="0"/> - -    <!-- custom cache currently used by block join --> -    <cache name="perSegFilter" -      class="solr.search.LRUCache" -      size="10" -      initialSize="0" -      autowarmCount="10" -      regenerator="solr.NoOpRegenerator" /> - -    <!-- Field Value Cache - -         Cache used to hold field values that are quickly accessible -         by document id.  The fieldValueCache is created by default -         even if not configured here. -      --> -    <!-- -       <fieldValueCache class="solr.FastLRUCache" -                        size="512" -                        autowarmCount="128" -                        showItems="32" /> -      --> - -    <!-- Custom Cache - -         Example of a generic cache.  These caches may be accessed by -         name through SolrIndexSearcher.getCache(),cacheLookup(), and -         cacheInsert().  The purpose is to enable easy caching of -         user/application level data.  The regenerator argument should -         be specified as an implementation of solr.CacheRegenerator -         if autowarming is desired. -      --> -    <!-- -       <cache name="myUserCache" -              class="solr.LRUCache" -              size="4096" -              initialSize="1024" -              autowarmCount="1024" -              regenerator="com.mycompany.MyRegenerator" -              /> -      --> - - -    <!-- Lazy Field Loading - -         If true, stored fields that are not requested will be loaded -         lazily.  This can result in a significant speed improvement -         if the usual case is to not load all stored fields, -         especially if the skipped fields are large compressed text -         fields. -    --> -    <enableLazyFieldLoading>true</enableLazyFieldLoading> - -   <!-- Use Filter For Sorted Query - -        A possible optimization that attempts to use a filter to -        satisfy a search.  If the requested sort does not include -        score, then the filterCache will be checked for a filter -        matching the query. If found, the filter will be used as the -        source of document ids, and then the sort will be applied to -        that. - -        For most situations, this will not be useful unless you -        frequently get the same search repeatedly with different sort -        options, and none of them ever use "score" -     --> -   <!-- -      <useFilterForSortedQuery>true</useFilterForSortedQuery> -     --> - -   <!-- Result Window Size - -        An optimization for use with the queryResultCache.  When a search -        is requested, a superset of the requested number of document ids -        are collected.  For example, if a search for a particular query -        requests matching documents 10 through 19, and queryWindowSize is 50, -        then documents 0 through 49 will be collected and cached.  Any further -        requests in that range can be satisfied via the cache. -     --> -   <queryResultWindowSize>20</queryResultWindowSize> - -   <!-- Maximum number of documents to cache for any entry in the -        queryResultCache. -     --> -   <queryResultMaxDocsCached>200</queryResultMaxDocsCached> - -   <!-- Query Related Event Listeners - -        Various IndexSearcher related events can trigger Listeners to -        take actions. - -        newSearcher - fired whenever a new searcher is being prepared -        and there is a current searcher handling requests (aka -        registered).  It can be used to prime certain caches to -        prevent long request times for certain requests. - -        firstSearcher - fired whenever a new searcher is being -        prepared but there is no current registered searcher to handle -        requests or to gain autowarming data from. - - -     --> -    <!-- QuerySenderListener takes an array of NamedList and executes a -         local query request for each NamedList in sequence. -      --> -    <listener event="newSearcher" class="solr.QuerySenderListener"> -      <arr name="queries"> -        <!-- -           <lst><str name="q">solr</str><str name="sort">price asc</str></lst> -           <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst> -          --> -      </arr> -    </listener> -    <listener event="firstSearcher" class="solr.QuerySenderListener"> -      <arr name="queries"> -        <lst> -          <str name="q">static firstSearcher warming in solrconfig.xml</str> -        </lst> -      </arr> -    </listener> - -    <!-- Use Cold Searcher - -         If a search request comes in and there is no current -         registered searcher, then immediately register the still -         warming searcher and use it.  If "false" then all requests -         will block until the first searcher is done warming. -      --> -    <useColdSearcher>false</useColdSearcher> - -  </query> - - -  <!-- Request Dispatcher - -       This section contains instructions for how the SolrDispatchFilter -       should behave when processing requests for this SolrCore. -    --> -  <requestDispatcher> -    <!-- Request Parsing - -         These settings indicate how Solr Requests may be parsed, and -         what restrictions may be placed on the ContentStreams from -         those requests - -         enableRemoteStreaming - enables use of the stream.file -         and stream.url parameters for specifying remote streams. - -         multipartUploadLimitInKB - specifies the max size (in KiB) of -         Multipart File Uploads that Solr will allow in a Request. - -         formdataUploadLimitInKB - specifies the max size (in KiB) of -         form data (application/x-www-form-urlencoded) sent via -         POST. You can use POST to pass request parameters not -         fitting into the URL. - -         addHttpRequestToContext - if set to true, it will instruct -         the requestParsers to include the original HttpServletRequest -         object in the context map of the SolrQueryRequest under the -         key "httpRequest". It will not be used by any of the existing -         Solr components, but may be useful when developing custom -         plugins. - -         *** WARNING *** -         Before enabling remote streaming, you should make sure your -         system has authentication enabled. - -    <requestParsers enableRemoteStreaming="false" -                    multipartUploadLimitInKB="-1" -                    formdataUploadLimitInKB="-1" -                    addHttpRequestToContext="false"/> -      --> - -    <!-- HTTP Caching - -         Set HTTP caching related parameters (for proxy caches and clients). - -         The options below instruct Solr not to output any HTTP Caching -         related headers -      --> -    <httpCaching never304="true" /> -    <!-- If you include a <cacheControl> directive, it will be used to -         generate a Cache-Control header (as well as an Expires header -         if the value contains "max-age=") - -         By default, no Cache-Control header is generated. - -         You can use the <cacheControl> option even if you have set -         never304="true" -      --> -    <!-- -       <httpCaching never304="true" > -         <cacheControl>max-age=30, public</cacheControl> -       </httpCaching> -      --> -    <!-- To enable Solr to respond with automatically generated HTTP -         Caching headers, and to response to Cache Validation requests -         correctly, set the value of never304="false" - -         This will cause Solr to generate Last-Modified and ETag -         headers based on the properties of the Index. - -         The following options can also be specified to affect the -         values of these headers... - -         lastModFrom - the default value is "openTime" which means the -         Last-Modified value (and validation against If-Modified-Since -         requests) will all be relative to when the current Searcher -         was opened.  You can change it to lastModFrom="dirLastMod" if -         you want the value to exactly correspond to when the physical -         index was last modified. - -         etagSeed="..." is an option you can change to force the ETag -         header (and validation against If-None-Match requests) to be -         different even if the index has not changed (ie: when making -         significant changes to your config file) - -         (lastModifiedFrom and etagSeed are both ignored if you use -         the never304="true" option) -      --> -    <!-- -       <httpCaching lastModifiedFrom="openTime" -                    etagSeed="Solr"> -         <cacheControl>max-age=30, public</cacheControl> -       </httpCaching> -      --> -  </requestDispatcher> - -  <!-- Request Handlers - -       http://wiki.apache.org/solr/SolrRequestHandler - -       Incoming queries will be dispatched to a specific handler by name -       based on the path specified in the request. - -       If a Request Handler is declared with startup="lazy", then it will -       not be initialized until the first request that uses it. - -    --> - -  <requestHandler name="/dataimport" class="solr.DataImportHandler"> -    <lst name="defaults"> -      <str name="config">mail-data-config.xml</str> -    </lst> -  </requestHandler> - -  <!-- SearchHandler - -       http://wiki.apache.org/solr/SearchHandler - -       For processing Search Queries, the primary Request Handler -       provided with Solr is "SearchHandler" It delegates to a sequent -       of SearchComponents (see below) and supports distributed -       queries across multiple shards -    --> -  <requestHandler name="/select" class="solr.SearchHandler"> -    <!-- default values for query parameters can be specified, these -         will be overridden by parameters in the request -      --> -     <lst name="defaults"> -       <str name="echoParams">explicit</str> -       <int name="rows">10</int> -       <str name="df">text</str> -       <!-- Change from JSON to XML format (the default prior to Solr 7.0) -          <str name="wt">xml</str>  -         --> -     </lst> -    <!-- In addition to defaults, "appends" params can be specified -         to identify values which should be appended to the list of -         multi-val params from the query (or the existing "defaults"). -      --> -    <!-- In this example, the param "fq=instock:true" would be appended to -         any query time fq params the user may specify, as a mechanism for -         partitioning the index, independent of any user selected filtering -         that may also be desired (perhaps as a result of faceted searching). - -         NOTE: there is *absolutely* nothing a client can do to prevent these -         "appends" values from being used, so don't use this mechanism -         unless you are sure you always want it. -      --> -    <!-- -       <lst name="appends"> -         <str name="fq">inStock:true</str> -       </lst> -      --> -    <!-- "invariants" are a way of letting the Solr maintainer lock down -         the options available to Solr clients.  Any params values -         specified here are used regardless of what values may be specified -         in either the query, the "defaults", or the "appends" params. - -         In this example, the facet.field and facet.query params would -         be fixed, limiting the facets clients can use.  Faceting is -         not turned on by default - but if the client does specify -         facet=true in the request, these are the only facets they -         will be able to see counts for; regardless of what other -         facet.field or facet.query params they may specify. - -         NOTE: there is *absolutely* nothing a client can do to prevent these -         "invariants" values from being used, so don't use this mechanism -         unless you are sure you always want it. -      --> -    <!-- -       <lst name="invariants"> -         <str name="facet.field">cat</str> -         <str name="facet.field">manu_exact</str> -         <str name="facet.query">price:[* TO 500]</str> -         <str name="facet.query">price:[500 TO *]</str> -       </lst> -      --> -    <!-- If the default list of SearchComponents is not desired, that -         list can either be overridden completely, or components can be -         prepended or appended to the default list.  (see below) -      --> -    <!-- -       <arr name="components"> -         <str>nameOfCustomComponent1</str> -         <str>nameOfCustomComponent2</str> -       </arr> -      --> -    </requestHandler> - -  <!-- A request handler that returns indented JSON by default --> -  <requestHandler name="/query" class="solr.SearchHandler"> -     <lst name="defaults"> -       <str name="echoParams">explicit</str> -       <str name="wt">json</str> -       <str name="indent">true</str> -       <str name="df">text</str> -     </lst> -  </requestHandler> - - -  <!-- A Robust Example - -       This example SearchHandler declaration shows off usage of the -       SearchHandler with many defaults declared - -       Note that multiple instances of the same Request Handler -       (SearchHandler) can be registered multiple times with different -       names (and different init parameters) -    --> -  <requestHandler name="/browse" class="solr.SearchHandler"> -    <lst name="defaults"> -      <str name="echoParams">explicit</str> - -      <!-- VelocityResponseWriter settings --> -      <str name="wt">velocity</str> -      <str name="v.template">browse</str> -      <str name="v.layout">layout</str> - -      <!-- Query settings --> -      <str name="defType">edismax</str> -      <str name="q.alt">*:*</str> -      <str name="rows">10</str> -      <str name="fl">*,score</str> - -      <!-- Faceting defaults --> -      <str name="facet">on</str> -      <str name="facet.mincount">1</str> -    </lst> -  </requestHandler> - -  <initParams path="/update/**,/query,/select,/tvrh,/elevate,/spell,/browse"> -    <lst name="defaults"> -      <str name="df">content</str> -    </lst> -  </initParams> - -  <!-- Solr Cell Update Request Handler - -       http://wiki.apache.org/solr/ExtractingRequestHandler - -    --> -  <requestHandler name="/update/extract" -                  startup="lazy" -                  class="solr.extraction.ExtractingRequestHandler" > -    <lst name="defaults"> -      <str name="lowernames">true</str> -      <str name="uprefix">ignored_</str> - -      <!-- capture link hrefs but ignore div attributes --> -      <str name="captureAttr">true</str> -      <str name="fmap.a">links</str> -      <str name="fmap.div">ignored_</str> -    </lst> -  </requestHandler> - -  <!-- Search Components - -       Search components are registered to SolrCore and used by -       instances of SearchHandler (which can access them by name) - -       By default, the following components are available: - -       <searchComponent name="query"     class="solr.QueryComponent" /> -       <searchComponent name="facet"     class="solr.FacetComponent" /> -       <searchComponent name="mlt"       class="solr.MoreLikeThisComponent" /> -       <searchComponent name="highlight" class="solr.HighlightComponent" /> -       <searchComponent name="stats"     class="solr.StatsComponent" /> -       <searchComponent name="debug"     class="solr.DebugComponent" /> - -       Default configuration in a requestHandler would look like: - -       <arr name="components"> -         <str>query</str> -         <str>facet</str> -         <str>mlt</str> -         <str>highlight</str> -         <str>stats</str> -         <str>debug</str> -       </arr> - -       If you register a searchComponent to one of the standard names, -       that will be used instead of the default. - -       To insert components before or after the 'standard' components, use: - -       <arr name="first-components"> -         <str>myFirstComponentName</str> -       </arr> - -       <arr name="last-components"> -         <str>myLastComponentName</str> -       </arr> - -       NOTE: The component registered with the name "debug" will -       always be executed after the "last-components" - -     --> - -   <!-- Spell Check - -        The spell check component can return a list of alternative spelling -        suggestions. - -        http://wiki.apache.org/solr/SpellCheckComponent -     --> -  <searchComponent name="spellcheck" class="solr.SpellCheckComponent"> - -    <str name="queryAnalyzerFieldType">text_general</str> - -    <!-- Multiple "Spell Checkers" can be declared and used by this -         component -      --> - -    <!-- a spellchecker built from a field of the main index --> -    <lst name="spellchecker"> -      <str name="name">default</str> -      <str name="field">text</str> -      <str name="classname">solr.DirectSolrSpellChecker</str> -      <!-- the spellcheck distance measure used, the default is the internal levenshtein --> -      <str name="distanceMeasure">internal</str> -      <!-- minimum accuracy needed to be considered a valid spellcheck suggestion --> -      <float name="accuracy">0.5</float> -      <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 --> -      <int name="maxEdits">2</int> -      <!-- the minimum shared prefix when enumerating terms --> -      <int name="minPrefix">1</int> -      <!-- maximum number of inspections per result. --> -      <int name="maxInspections">5</int> -      <!-- minimum length of a query term to be considered for correction --> -      <int name="minQueryLength">4</int> -      <!-- maximum threshold of documents a query term can appear to be considered for correction --> -      <float name="maxQueryFrequency">0.01</float> -      <!-- uncomment this to require suggestions to occur in 1% of the documents -        <float name="thresholdTokenFrequency">.01</float> -      --> -    </lst> - -    <!-- a spellchecker that can break or combine words.  See "/spell" handler below for usage --> -    <lst name="spellchecker"> -      <str name="name">wordbreak</str> -      <str name="classname">solr.WordBreakSolrSpellChecker</str> -      <str name="field">name</str> -      <str name="combineWords">true</str> -      <str name="breakWords">true</str> -      <int name="maxChanges">10</int> -    </lst> - -    <!-- a spellchecker that uses a different distance measure --> -    <!-- -       <lst name="spellchecker"> -         <str name="name">jarowinkler</str> -         <str name="field">spell</str> -         <str name="classname">solr.DirectSolrSpellChecker</str> -         <str name="distanceMeasure"> -           org.apache.lucene.search.spell.JaroWinklerDistance -         </str> -       </lst> -     --> - -    <!-- a spellchecker that use an alternate comparator - -         comparatorClass be one of: -          1. score (default) -          2. freq (Frequency first, then score) -          3. A fully qualified class name -      --> -    <!-- -       <lst name="spellchecker"> -         <str name="name">freq</str> -         <str name="field">lowerfilt</str> -         <str name="classname">solr.DirectSolrSpellChecker</str> -         <str name="comparatorClass">freq</str> -      --> - -    <!-- A spellchecker that reads the list of words from a file --> -    <!-- -       <lst name="spellchecker"> -         <str name="classname">solr.FileBasedSpellChecker</str> -         <str name="name">file</str> -         <str name="sourceLocation">spellings.txt</str> -         <str name="characterEncoding">UTF-8</str> -         <str name="spellcheckIndexDir">spellcheckerFile</str> -       </lst> -      --> -  </searchComponent> - -  <!-- A request handler for demonstrating the spellcheck component. - -       NOTE: This is purely as an example.  The whole purpose of the -       SpellCheckComponent is to hook it into the request handler that -       handles your normal user queries so that a separate request is -       not needed to get suggestions. - -       IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS -       NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM! - -       See http://wiki.apache.org/solr/SpellCheckComponent for details -       on the request parameters. -    --> -  <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy"> -    <lst name="defaults"> -      <str name="df">text</str> -      <!-- Solr will use suggestions from both the 'default' spellchecker -           and from the 'wordbreak' spellchecker and combine them. -           collations (re-written queries) can include a combination of -           corrections from both spellcheckers --> -      <str name="spellcheck.dictionary">default</str> -      <str name="spellcheck.dictionary">wordbreak</str> -      <str name="spellcheck">on</str> -      <str name="spellcheck.extendedResults">true</str> -      <str name="spellcheck.count">10</str> -      <str name="spellcheck.alternativeTermCount">5</str> -      <str name="spellcheck.maxResultsForSuggest">5</str> -      <str name="spellcheck.collate">true</str> -      <str name="spellcheck.collateExtendedResults">true</str> -      <str name="spellcheck.maxCollationTries">10</str> -      <str name="spellcheck.maxCollations">5</str> -    </lst> -    <arr name="last-components"> -      <str>spellcheck</str> -    </arr> -  </requestHandler> - -  <searchComponent name="suggest" class="solr.SuggestComponent"> -    <lst name="suggester"> -      <str name="name">mySuggester</str> -      <str name="lookupImpl">FuzzyLookupFactory</str>      <!-- org.apache.solr.spelling.suggest.fst --> -      <str name="dictionaryImpl">DocumentDictionaryFactory</str>     <!-- org.apache.solr.spelling.suggest.HighFrequencyDictionaryFactory --> -      <str name="field">cat</str> -      <str name="weightField">price</str> -      <str name="suggestAnalyzerFieldType">string</str> -    </lst> -  </searchComponent> - -  <requestHandler name="/suggest" class="solr.SearchHandler" startup="lazy"> -    <lst name="defaults"> -      <str name="suggest">true</str> -      <str name="suggest.count">10</str> -    </lst> -    <arr name="components"> -      <str>suggest</str> -    </arr> -  </requestHandler> -  <!-- Term Vector Component - -       http://wiki.apache.org/solr/TermVectorComponent -    --> -  <searchComponent name="tvComponent" class="solr.TermVectorComponent"/> - -  <!-- A request handler for demonstrating the term vector component - -       This is purely as an example. - -       In reality you will likely want to add the component to your -       already specified request handlers. -    --> -  <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy"> -    <lst name="defaults"> -      <str name="df">text</str> -      <bool name="tv">true</bool> -    </lst> -    <arr name="last-components"> -      <str>tvComponent</str> -    </arr> -  </requestHandler> - -  <!-- Terms Component - -       http://wiki.apache.org/solr/TermsComponent - -       A component to return terms and document frequency of those -       terms -    --> -  <searchComponent name="terms" class="solr.TermsComponent"/> - -  <!-- A request handler for demonstrating the terms component --> -  <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy"> -     <lst name="defaults"> -      <bool name="terms">true</bool> -      <bool name="distrib">false</bool> -    </lst> -    <arr name="components"> -      <str>terms</str> -    </arr> -  </requestHandler> - - -  <!-- Query Elevation Component - -       http://wiki.apache.org/solr/QueryElevationComponent - -       a search component that enables you to configure the top -       results for a given query regardless of the normal lucene -       scoring. -    --> -  <searchComponent name="elevator" class="solr.QueryElevationComponent" > -    <!-- pick a fieldType to analyze queries --> -    <str name="queryFieldType">string</str> -    <str name="config-file">elevate.xml</str> -  </searchComponent> - -  <!-- A request handler for demonstrating the elevator component --> -  <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy"> -    <lst name="defaults"> -      <str name="echoParams">explicit</str> -      <str name="df">text</str> -    </lst> -    <arr name="last-components"> -      <str>elevator</str> -    </arr> -  </requestHandler> - -  <!-- Highlighting Component - -       http://wiki.apache.org/solr/HighlightingParameters -    --> -  <searchComponent class="solr.HighlightComponent" name="highlight"> -    <highlighting> -      <!-- Configure the standard fragmenter --> -      <!-- This could most likely be commented out in the "default" case --> -      <fragmenter name="gap" -                  default="true" -                  class="solr.highlight.GapFragmenter"> -        <lst name="defaults"> -          <int name="hl.fragsize">100</int> -        </lst> -      </fragmenter> - -      <!-- A regular-expression-based fragmenter -           (for sentence extraction) -        --> -      <fragmenter name="regex" -                  class="solr.highlight.RegexFragmenter"> -        <lst name="defaults"> -          <!-- slightly smaller fragsizes work better because of slop --> -          <int name="hl.fragsize">70</int> -          <!-- allow 50% slop on fragment sizes --> -          <float name="hl.regex.slop">0.5</float> -          <!-- a basic sentence pattern --> -          <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str> -        </lst> -      </fragmenter> - -      <!-- Configure the standard formatter --> -      <formatter name="html" -                 default="true" -                 class="solr.highlight.HtmlFormatter"> -        <lst name="defaults"> -          <str name="hl.simple.pre"><![CDATA[<em>]]></str> -          <str name="hl.simple.post"><![CDATA[</em>]]></str> -        </lst> -      </formatter> - -      <!-- Configure the standard encoder --> -      <encoder name="html" -               class="solr.highlight.HtmlEncoder" /> - -      <!-- Configure the standard fragListBuilder --> -      <fragListBuilder name="simple" -                       class="solr.highlight.SimpleFragListBuilder"/> - -      <!-- Configure the single fragListBuilder --> -      <fragListBuilder name="single" -                       class="solr.highlight.SingleFragListBuilder"/> - -      <!-- Configure the weighted fragListBuilder --> -      <fragListBuilder name="weighted" -                       default="true" -                       class="solr.highlight.WeightedFragListBuilder"/> - -      <!-- default tag FragmentsBuilder --> -      <fragmentsBuilder name="default" -                        default="true" -                        class="solr.highlight.ScoreOrderFragmentsBuilder"> -        <!-- -        <lst name="defaults"> -          <str name="hl.multiValuedSeparatorChar">/</str> -        </lst> -        --> -      </fragmentsBuilder> - -      <!-- multi-colored tag FragmentsBuilder --> -      <fragmentsBuilder name="colored" -                        class="solr.highlight.ScoreOrderFragmentsBuilder"> -        <lst name="defaults"> -          <str name="hl.tag.pre"><![CDATA[ -               <b style="background:yellow">,<b style="background:lawgreen">, -               <b style="background:aquamarine">,<b style="background:magenta">, -               <b style="background:palegreen">,<b style="background:coral">, -               <b style="background:wheat">,<b style="background:khaki">, -               <b style="background:lime">,<b style="background:deepskyblue">]]></str> -          <str name="hl.tag.post"><![CDATA[</b>]]></str> -        </lst> -      </fragmentsBuilder> - -      <boundaryScanner name="default" -                       default="true" -                       class="solr.highlight.SimpleBoundaryScanner"> -        <lst name="defaults"> -          <str name="hl.bs.maxScan">10</str> -          <str name="hl.bs.chars">.,!? 	

</str> -        </lst> -      </boundaryScanner> - -      <boundaryScanner name="breakIterator" -                       class="solr.highlight.BreakIteratorBoundaryScanner"> -        <lst name="defaults"> -          <!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE --> -          <str name="hl.bs.type">WORD</str> -          <!-- language and country are used when constructing Locale object.  --> -          <!-- And the Locale object will be used when getting instance of BreakIterator --> -          <str name="hl.bs.language">en</str> -          <str name="hl.bs.country">US</str> -        </lst> -      </boundaryScanner> -    </highlighting> -  </searchComponent> - -  <!-- Update Processors - -       Chains of Update Processor Factories for dealing with Update -       Requests can be declared, and then used by name in Update -       Request Processors - -       http://wiki.apache.org/solr/UpdateRequestProcessor - -    --> -  <!-- Deduplication - -       An example dedup update processor that creates the "id" field -       on the fly based on the hash code of some other fields.  This -       example has overwriteDupes set to false since we are using the -       id field as the signatureField and Solr will maintain -       uniqueness based on that anyway. - -    --> -  <!-- -     <updateRequestProcessorChain name="dedupe"> -       <processor class="solr.processor.SignatureUpdateProcessorFactory"> -         <bool name="enabled">true</bool> -         <str name="signatureField">id</str> -         <bool name="overwriteDupes">false</bool> -         <str name="fields">name,features,cat</str> -         <str name="signatureClass">solr.processor.Lookup3Signature</str> -       </processor> -       <processor class="solr.LogUpdateProcessorFactory" /> -       <processor class="solr.RunUpdateProcessorFactory" /> -     </updateRequestProcessorChain> -    --> - -  <!-- Language identification - -       This example update chain identifies the language of the incoming -       documents using the langid contrib. The detected language is -       written to field language_s. No field name mapping is done. -       The fields used for detection are text, title, subject and description, -       making this example suitable for detecting languages form full-text -       rich documents injected via ExtractingRequestHandler. -       See more about langId at http://wiki.apache.org/solr/LanguageDetection -    --> -    <!-- -     <updateRequestProcessorChain name="langid"> -       <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory"> -         <str name="langid.fl">text,title,subject,description</str> -         <str name="langid.langField">language_s</str> -         <str name="langid.fallback">en</str> -       </processor> -       <processor class="solr.LogUpdateProcessorFactory" /> -       <processor class="solr.RunUpdateProcessorFactory" /> -     </updateRequestProcessorChain> -    --> - -  <!-- Script update processor - -    This example hooks in an update processor implemented using JavaScript. - -    See more about the script update processor at http://wiki.apache.org/solr/ScriptUpdateProcessor -  --> -  <!-- -    <updateRequestProcessorChain name="script"> -      <processor class="solr.StatelessScriptUpdateProcessorFactory"> -        <str name="script">update-script.js</str> -        <lst name="params"> -          <str name="config_param">example config parameter</str> -        </lst> -      </processor> -      <processor class="solr.RunUpdateProcessorFactory" /> -    </updateRequestProcessorChain> -  --> - -  <!-- Response Writers - -       http://wiki.apache.org/solr/QueryResponseWriter - -       Request responses will be written using the writer specified by -       the 'wt' request parameter matching the name of a registered -       writer. - -       The "default" writer is the default and will be used if 'wt' is -       not specified in the request. -    --> -  <!-- The following response writers are implicitly configured unless -       overridden... -    --> -  <!-- -     <queryResponseWriter name="xml" -                          default="true" -                          class="solr.XMLResponseWriter" /> -     <queryResponseWriter name="json" class="solr.JSONResponseWriter"/> -     <queryResponseWriter name="python" class="solr.PythonResponseWriter"/> -     <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/> -     <queryResponseWriter name="php" class="solr.PHPResponseWriter"/> -     <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/> -     <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/> -     <queryResponseWriter name="schema.xml" class="solr.SchemaXmlResponseWriter"/> -    --> - -  <queryResponseWriter name="json" class="solr.JSONResponseWriter"> -     <!-- For the purposes of the tutorial, JSON responses are written as -      plain text so that they are easy to read in *any* browser. -      If you expect a MIME type of "application/json" just remove this override. -     --> -    <str name="content-type">text/plain; charset=UTF-8</str> -  </queryResponseWriter> - -  <!-- -     Custom response writers can be declared as needed... -    --> -  <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"> -    <str name="template.base.dir">${velocity.template.base.dir:}</str> -  </queryResponseWriter> - -  <!-- XSLT response writer transforms the XML output by any xslt file found -       in Solr's conf/xslt directory.  Changes to xslt files are checked for -       every xsltCacheLifetimeSeconds. -    --> -  <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter"> -    <int name="xsltCacheLifetimeSeconds">5</int> -  </queryResponseWriter> - -  <!-- Query Parsers - -       https://lucene.apache.org/solr/guide/query-syntax-and-parsing.html - -       Multiple QParserPlugins can be registered by name, and then -       used in either the "defType" param for the QueryComponent (used -       by SearchHandler) or in LocalParams -    --> -  <!-- example of registering a query parser --> -  <!-- -     <queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/> -    --> - -  <!-- Function Parsers - -       http://wiki.apache.org/solr/FunctionQuery - -       Multiple ValueSourceParsers can be registered by name, and then -       used as function names when using the "func" QParser. -    --> -  <!-- example of registering a custom function parser  --> -  <!-- -     <valueSourceParser name="myfunc" -                        class="com.mycompany.MyValueSourceParser" /> -    --> - - -  <!-- Document Transformers -       http://wiki.apache.org/solr/DocTransformers -    --> -  <!-- -     Could be something like: -     <transformer name="db" class="com.mycompany.LoadFromDatabaseTransformer" > -       <int name="connection">jdbc://....</int> -     </transformer> - -     To add a constant value to all docs, use: -     <transformer name="mytrans2" class="org.apache.solr.response.transform.ValueAugmenterFactory" > -       <int name="value">5</int> -     </transformer> - -     If you want the user to still be able to change it with _value:something_ use this: -     <transformer name="mytrans3" class="org.apache.solr.response.transform.ValueAugmenterFactory" > -       <double name="defaultValue">5</double> -     </transformer> - -      If you are using the QueryElevationComponent, you may wish to mark documents that get boosted.  The -      EditorialMarkerFactory will do exactly that: -     <transformer name="qecBooster" class="org.apache.solr.response.transform.EditorialMarkerFactory" /> -    --> - -</config> diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/spellings.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/spellings.txt deleted file mode 100644 index 162a044d5..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/spellings.txt +++ /dev/null @@ -1,2 +0,0 @@ -pizza -history diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/stopwords.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/stopwords.txt deleted file mode 100644 index ae1e83eeb..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/stopwords.txt +++ /dev/null @@ -1,14 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements.  See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License.  You may obtain a copy of the License at -# -#     http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/synonyms.txt b/solr-8.1.1/example/example-DIH/solr/mail/conf/synonyms.txt deleted file mode 100644 index eab4ee875..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/synonyms.txt +++ /dev/null @@ -1,29 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License.  You may obtain a copy of the License at -# -#     http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -#some test synonym mappings unlikely to appear in real input text -aaafoo => aaabar -bbbfoo => bbbfoo bbbbar -cccfoo => cccbar cccbaz -fooaaa,baraaa,bazaaa - -# Some synonym groups specific to this example -GB,gib,gigabyte,gigabytes -MB,mib,megabyte,megabytes -Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming -#after us won't split it into two words. - -# Synonym mappings can be used for spelling correction too -pixima => pixma - diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/update-script.js b/solr-8.1.1/example/example-DIH/solr/mail/conf/update-script.js deleted file mode 100644 index 49b07f9b7..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/update-script.js +++ /dev/null @@ -1,53 +0,0 @@ -/* -  This is a basic skeleton JavaScript update processor. - -  In order for this to be executed, it must be properly wired into solrconfig.xml; by default it is commented out in -  the example solrconfig.xml and must be uncommented to be enabled. - -  See http://wiki.apache.org/solr/ScriptUpdateProcessor for more details. -*/ - -function processAdd(cmd) { - -  doc = cmd.solrDoc;  // org.apache.solr.common.SolrInputDocument -  id = doc.getFieldValue("id"); -  logger.info("update-script#processAdd: id=" + id); - -// Set a field value: -//  doc.setField("foo_s", "whatever"); - -// Get a configuration parameter: -//  config_param = params.get('config_param');  // "params" only exists if processor configured with <lst name="params"> - -// Get a request parameter: -// some_param = req.getParams().get("some_param") - -// Add a field of field names that match a pattern: -//   - Potentially useful to determine the fields/attributes represented in a result set, via faceting on field_name_ss -//  field_names = doc.getFieldNames().toArray(); -//  for(i=0; i < field_names.length; i++) { -//    field_name = field_names[i]; -//    if (/attr_.*/.test(field_name)) { doc.addField("attribute_ss", field_names[i]); } -//  } - -} - -function processDelete(cmd) { -  // no-op -} - -function processMergeIndexes(cmd) { -  // no-op -} - -function processCommit(cmd) { -  // no-op -} - -function processRollback(cmd) { -  // no-op -} - -function finish() { -  // no-op -} diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/xslt/example.xsl b/solr-8.1.1/example/example-DIH/solr/mail/conf/xslt/example.xsl deleted file mode 100644 index b89927008..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/xslt/example.xsl +++ /dev/null @@ -1,132 +0,0 @@ -<?xml version='1.0' encoding='UTF-8'?> - -<!--  - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements.  See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License.  You may obtain a copy of the License at - * - *     http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - --> - -<!--  -  Simple transform of Solr query results to HTML - --> -<xsl:stylesheet version='1.0' -    xmlns:xsl='http://www.w3.org/1999/XSL/Transform' -> - -  <xsl:output media-type="text/html" encoding="UTF-8"/>  -   -  <xsl:variable name="title" select="concat('Solr search results (',response/result/@numFound,' documents)')"/> -   -  <xsl:template match='/'> -    <html> -      <head> -        <title><xsl:value-of select="$title"/></title> -        <xsl:call-template name="css"/> -      </head> -      <body> -        <h1><xsl:value-of select="$title"/></h1> -        <div class="note"> -          This has been formatted by the sample "example.xsl" transform - -          use your own XSLT to get a nicer page -        </div> -        <xsl:apply-templates select="response/result/doc"/> -      </body> -    </html> -  </xsl:template> -   -  <xsl:template match="doc"> -    <xsl:variable name="pos" select="position()"/> -    <div class="doc"> -      <table width="100%"> -        <xsl:apply-templates> -          <xsl:with-param name="pos"><xsl:value-of select="$pos"/></xsl:with-param> -        </xsl:apply-templates> -      </table> -    </div> -  </xsl:template> - -  <xsl:template match="doc/*[@name='score']" priority="100"> -    <xsl:param name="pos"></xsl:param> -    <tr> -      <td class="name"> -        <xsl:value-of select="@name"/> -      </td> -      <td class="value"> -        <xsl:value-of select="."/> - -        <xsl:if test="boolean(//lst[@name='explain'])"> -          <xsl:element name="a"> -            <!-- can't allow whitespace here --> -            <xsl:attribute name="href">javascript:toggle("<xsl:value-of select="concat('exp-',$pos)" />");</xsl:attribute>?</xsl:element> -          <br/> -          <xsl:element name="div"> -            <xsl:attribute name="class">exp</xsl:attribute> -            <xsl:attribute name="id"> -              <xsl:value-of select="concat('exp-',$pos)" /> -            </xsl:attribute> -            <xsl:value-of select="//lst[@name='explain']/str[position()=$pos]"/> -          </xsl:element> -        </xsl:if> -      </td> -    </tr> -  </xsl:template> - -  <xsl:template match="doc/arr" priority="100"> -    <tr> -      <td class="name"> -        <xsl:value-of select="@name"/> -      </td> -      <td class="value"> -        <ul> -        <xsl:for-each select="*"> -          <li><xsl:value-of select="."/></li> -        </xsl:for-each> -        </ul> -      </td> -    </tr> -  </xsl:template> - - -  <xsl:template match="doc/*"> -    <tr> -      <td class="name"> -        <xsl:value-of select="@name"/> -      </td> -      <td class="value"> -        <xsl:value-of select="."/> -      </td> -    </tr> -  </xsl:template> - -  <xsl:template match="*"/> -   -  <xsl:template name="css"> -    <script> -      function toggle(id) { -        var obj = document.getElementById(id); -        obj.style.display = (obj.style.display != 'block') ? 'block' : 'none'; -      } -    </script> -    <style type="text/css"> -      body { font-family: "Lucida Grande", sans-serif } -      td.name { font-style: italic; font-size:80%; } -      td { vertical-align: top; } -      ul { margin: 0px; margin-left: 1em; padding: 0px; } -      .note { font-size:80%; } -      .doc { margin-top: 1em; border-top: solid grey 1px; } -      .exp { display: none; font-family: monospace; white-space: pre; } -    </style> -  </xsl:template> - -</xsl:stylesheet> diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/xslt/example_atom.xsl b/solr-8.1.1/example/example-DIH/solr/mail/conf/xslt/example_atom.xsl deleted file mode 100644 index b6c23151d..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/xslt/example_atom.xsl +++ /dev/null @@ -1,67 +0,0 @@ -<?xml version='1.0' encoding='UTF-8'?> - -<!--  - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements.  See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License.  You may obtain a copy of the License at - * - *     http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - --> - -<!--  -  Simple transform of Solr query results to Atom - --> - -<xsl:stylesheet version='1.0' -    xmlns:xsl='http://www.w3.org/1999/XSL/Transform'> - -  <xsl:output -       method="xml" -       encoding="utf-8" -       media-type="application/xml" -  /> - -  <xsl:template match='/'> -    <xsl:variable name="query" select="response/lst[@name='responseHeader']/lst[@name='params']/str[@name='q']"/> -    <feed xmlns="http://www.w3.org/2005/Atom"> -      <title>Example Solr Atom 1.0 Feed</title> -      <subtitle> -       This has been formatted by the sample "example_atom.xsl" transform - -       use your own XSLT to get a nicer Atom feed. -      </subtitle> -      <author> -        <name>Apache Solr</name> -        <email>solr-user@lucene.apache.org</email> -      </author> -      <link rel="self" type="application/atom+xml"  -            href="http://localhost:8983/solr/q={$query}&wt=xslt&tr=atom.xsl"/> -      <updated> -        <xsl:value-of select="response/result/doc[position()=1]/date[@name='timestamp']"/> -      </updated> -      <id>tag:localhost,2007:example</id> -      <xsl:apply-templates select="response/result/doc"/> -    </feed> -  </xsl:template> -     -  <!-- search results xslt --> -  <xsl:template match="doc"> -    <xsl:variable name="id" select="str[@name='id']"/> -    <entry> -      <title><xsl:value-of select="str[@name='name']"/></title> -      <link href="http://localhost:8983/solr/select?q={$id}"/> -      <id>tag:localhost,2007:<xsl:value-of select="$id"/></id> -      <summary><xsl:value-of select="arr[@name='features']"/></summary> -      <updated><xsl:value-of select="date[@name='timestamp']"/></updated> -    </entry> -  </xsl:template> - -</xsl:stylesheet> diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/xslt/example_rss.xsl b/solr-8.1.1/example/example-DIH/solr/mail/conf/xslt/example_rss.xsl deleted file mode 100644 index c8ab5bfb1..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/xslt/example_rss.xsl +++ /dev/null @@ -1,66 +0,0 @@ -<?xml version='1.0' encoding='UTF-8'?> - -<!--  - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements.  See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License.  You may obtain a copy of the License at - * - *     http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - --> - -<!--  -  Simple transform of Solr query results to RSS - --> - -<xsl:stylesheet version='1.0' -    xmlns:xsl='http://www.w3.org/1999/XSL/Transform'> - -  <xsl:output -       method="xml" -       encoding="utf-8" -       media-type="application/xml" -  /> -  <xsl:template match='/'> -    <rss version="2.0"> -       <channel> -         <title>Example Solr RSS 2.0 Feed</title> -         <link>http://localhost:8983/solr</link> -         <description> -          This has been formatted by the sample "example_rss.xsl" transform - -          use your own XSLT to get a nicer RSS feed. -         </description> -         <language>en-us</language> -         <docs>http://localhost:8983/solr</docs> -         <xsl:apply-templates select="response/result/doc"/> -       </channel> -    </rss> -  </xsl:template> -   -  <!-- search results xslt --> -  <xsl:template match="doc"> -    <xsl:variable name="id" select="str[@name='id']"/> -    <xsl:variable name="timestamp" select="date[@name='timestamp']"/> -    <item> -      <title><xsl:value-of select="str[@name='name']"/></title> -      <link> -        http://localhost:8983/solr/select?q=id:<xsl:value-of select="$id"/> -      </link> -      <description> -        <xsl:value-of select="arr[@name='features']"/> -      </description> -      <pubDate><xsl:value-of select="$timestamp"/></pubDate> -      <guid> -        http://localhost:8983/solr/select?q=id:<xsl:value-of select="$id"/> -      </guid> -    </item> -  </xsl:template> -</xsl:stylesheet> diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/xslt/luke.xsl b/solr-8.1.1/example/example-DIH/solr/mail/conf/xslt/luke.xsl deleted file mode 100644 index 05fb5bfee..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/xslt/luke.xsl +++ /dev/null @@ -1,337 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- -    Licensed to the Apache Software Foundation (ASF) under one or more -    contributor license agreements.  See the NOTICE file distributed with -    this work for additional information regarding copyright ownership. -    The ASF licenses this file to You under the Apache License, Version 2.0 -    (the "License"); you may not use this file except in compliance with -    the License.  You may obtain a copy of the License at -     -    http://www.apache.org/licenses/LICENSE-2.0 -     -    Unless required by applicable law or agreed to in writing, software -    distributed under the License is distributed on an "AS IS" BASIS, -    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -    See the License for the specific language governing permissions and -    limitations under the License. ---> - - -<!--  -  Display the luke request handler with graphs - --> -<xsl:stylesheet -    xmlns:xsl="http://www.w3.org/1999/XSL/Transform" -    xmlns="http://www.w3.org/1999/xhtml" -    version="1.0" -    > -    <xsl:output -        method="html" -        encoding="UTF-8" -        media-type="text/html" -        doctype-public="-//W3C//DTD XHTML 1.0 Strict//EN" -        doctype-system="http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" -    /> - -    <xsl:variable name="title">Solr Luke Request Handler Response</xsl:variable> - -    <xsl:template match="/"> -        <html xmlns="http://www.w3.org/1999/xhtml"> -            <head> -                <link rel="stylesheet" type="text/css" href="solr-admin.css"/> -                <link rel="icon" href="favicon.ico" type="image/x-icon"/> -                <link rel="shortcut icon" href="favicon.ico" type="image/x-icon"/> -                <title> -                    <xsl:value-of select="$title"/> -                </title> -                <xsl:call-template name="css"/> - -            </head> -            <body> -                <h1> -                    <xsl:value-of select="$title"/> -                </h1> -                <div class="doc"> -                    <ul> -                        <xsl:if test="response/lst[@name='index']"> -                            <li> -                                <a href="#index">Index Statistics</a> -                            </li> -                        </xsl:if> -                        <xsl:if test="response/lst[@name='fields']"> -                            <li> -                                <a href="#fields">Field Statistics</a> -                                <ul> -                                    <xsl:for-each select="response/lst[@name='fields']/lst"> -                                        <li> -                                            <a href="#{@name}"> -                                                <xsl:value-of select="@name"/> -                                            </a> -                                        </li> -                                    </xsl:for-each> -                                </ul> -                            </li> -                        </xsl:if> -                        <xsl:if test="response/lst[@name='doc']"> -                            <li> -                                <a href="#doc">Document statistics</a> -                            </li> -                        </xsl:if> -                    </ul> -                </div> -                <xsl:if test="response/lst[@name='index']"> -                    <h2><a name="index"/>Index Statistics</h2> -                    <xsl:apply-templates select="response/lst[@name='index']"/> -                </xsl:if> -                <xsl:if test="response/lst[@name='fields']"> -                    <h2><a name="fields"/>Field Statistics</h2> -                    <xsl:apply-templates select="response/lst[@name='fields']"/> -                </xsl:if> -                <xsl:if test="response/lst[@name='doc']"> -                    <h2><a name="doc"/>Document statistics</h2> -                    <xsl:apply-templates select="response/lst[@name='doc']"/> -                </xsl:if> -            </body> -        </html> -    </xsl:template> - -    <xsl:template match="lst"> -        <xsl:if test="parent::lst"> -            <tr> -                <td colspan="2"> -                    <div class="doc"> -                        <xsl:call-template name="list"/> -                    </div> -                </td> -            </tr> -        </xsl:if> -        <xsl:if test="not(parent::lst)"> -            <div class="doc"> -                <xsl:call-template name="list"/> -            </div> -        </xsl:if> -    </xsl:template> - -    <xsl:template name="list"> -        <xsl:if test="count(child::*)>0"> -            <table> -                <thead> -                    <tr> -                        <th colspan="2"> -                            <p> -                                <a name="{@name}"/> -                            </p> -                            <xsl:value-of select="@name"/> -                        </th> -                    </tr> -                </thead> -                <tbody> -                    <xsl:choose> -                        <xsl:when -                            test="@name='histogram'"> -                            <tr> -                                <td colspan="2"> -                                    <xsl:call-template name="histogram"/> -                                </td> -                            </tr> -                        </xsl:when> -                        <xsl:otherwise> -                            <xsl:apply-templates/> -                        </xsl:otherwise> -                    </xsl:choose> -                </tbody> -            </table> -        </xsl:if> -    </xsl:template> - -    <xsl:template name="histogram"> -        <div class="doc"> -            <xsl:call-template name="barchart"> -                <xsl:with-param name="max_bar_width">50</xsl:with-param> -                <xsl:with-param name="iwidth">800</xsl:with-param> -                <xsl:with-param name="iheight">160</xsl:with-param> -                <xsl:with-param name="fill">blue</xsl:with-param> -            </xsl:call-template> -        </div> -    </xsl:template> - -    <xsl:template name="barchart"> -        <xsl:param name="max_bar_width"/> -        <xsl:param name="iwidth"/> -        <xsl:param name="iheight"/> -        <xsl:param name="fill"/> -        <xsl:variable name="max"> -            <xsl:for-each select="int"> -                <xsl:sort data-type="number" order="descending"/> -                <xsl:if test="position()=1"> -                    <xsl:value-of select="."/> -                </xsl:if> -            </xsl:for-each> -        </xsl:variable> -        <xsl:variable name="bars"> -           <xsl:value-of select="count(int)"/> -        </xsl:variable> -        <xsl:variable name="bar_width"> -           <xsl:choose> -             <xsl:when test="$max_bar_width < ($iwidth div $bars)"> -               <xsl:value-of select="$max_bar_width"/> -             </xsl:when> -             <xsl:otherwise> -               <xsl:value-of select="$iwidth div $bars"/> -             </xsl:otherwise> -           </xsl:choose> -        </xsl:variable> -        <table class="histogram"> -           <tbody> -              <tr> -                <xsl:for-each select="int"> -                   <td> -                 <xsl:value-of select="."/> -                 <div class="histogram"> -                  <xsl:attribute name="style">background-color: <xsl:value-of select="$fill"/>; width: <xsl:value-of select="$bar_width"/>px; height: <xsl:value-of select="($iheight*number(.)) div $max"/>px;</xsl:attribute> -                 </div> -                   </td>  -                </xsl:for-each> -              </tr> -              <tr> -                <xsl:for-each select="int"> -                   <td> -                       <xsl:value-of select="@name"/> -                   </td> -                </xsl:for-each> -              </tr> -           </tbody> -        </table> -    </xsl:template> - -    <xsl:template name="keyvalue"> -        <xsl:choose> -            <xsl:when test="@name"> -                <tr> -                    <td class="name"> -                        <xsl:value-of select="@name"/> -                    </td> -                    <td class="value"> -                        <xsl:value-of select="."/> -                    </td> -                </tr> -            </xsl:when> -            <xsl:otherwise> -                <xsl:value-of select="."/> -            </xsl:otherwise> -        </xsl:choose> -    </xsl:template> - -    <xsl:template match="int|bool|long|float|double|uuid|date"> -        <xsl:call-template name="keyvalue"/> -    </xsl:template> - -    <xsl:template match="arr"> -        <tr> -            <td class="name"> -                <xsl:value-of select="@name"/> -            </td> -            <td class="value"> -                <ul> -                    <xsl:for-each select="child::*"> -                        <li> -                            <xsl:apply-templates/> -                        </li> -                    </xsl:for-each> -                </ul> -            </td> -        </tr> -    </xsl:template> - -    <xsl:template match="str"> -        <xsl:choose> -            <xsl:when test="@name='schema' or @name='index' or @name='flags'"> -                <xsl:call-template name="schema"/> -            </xsl:when> -            <xsl:otherwise> -                <xsl:call-template name="keyvalue"/> -            </xsl:otherwise> -        </xsl:choose> -    </xsl:template> - -    <xsl:template name="schema"> -        <tr> -            <td class="name"> -                <xsl:value-of select="@name"/> -            </td> -            <td class="value"> -                <xsl:if test="contains(.,'unstored')"> -                    <xsl:value-of select="."/> -                </xsl:if> -                <xsl:if test="not(contains(.,'unstored'))"> -                    <xsl:call-template name="infochar2string"> -                        <xsl:with-param name="charList"> -                            <xsl:value-of select="."/> -                        </xsl:with-param> -                    </xsl:call-template> -                </xsl:if> -            </td> -        </tr> -    </xsl:template> - -    <xsl:template name="infochar2string"> -        <xsl:param name="i">1</xsl:param> -        <xsl:param name="charList"/> - -        <xsl:variable name="char"> -            <xsl:value-of select="substring($charList,$i,1)"/> -        </xsl:variable> -        <xsl:choose> -            <xsl:when test="$char='I'"> -                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='I']"/> - </xsl:when> -            <xsl:when test="$char='T'"> -                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='T']"/> - </xsl:when> -            <xsl:when test="$char='S'"> -                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='S']"/> - </xsl:when> -            <xsl:when test="$char='M'"> -                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='M']"/> - </xsl:when> -            <xsl:when test="$char='V'"> -                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='V']"/> - </xsl:when> -            <xsl:when test="$char='o'"> -                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='o']"/> - </xsl:when> -            <xsl:when test="$char='p'"> -                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='p']"/> - </xsl:when> -            <xsl:when test="$char='O'"> -                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='O']"/> - </xsl:when> -            <xsl:when test="$char='L'"> -                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='L']"/> - </xsl:when> -            <xsl:when test="$char='B'"> -                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='B']"/> - </xsl:when> -            <xsl:when test="$char='C'"> -                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='C']"/> - </xsl:when> -            <xsl:when test="$char='f'"> -                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='f']"/> - </xsl:when> -            <xsl:when test="$char='l'"> -                <xsl:value-of select="/response/lst[@name='info']/lst/str[@name='l']"/> - -            </xsl:when> -        </xsl:choose> - -        <xsl:if test="not($i>=string-length($charList))"> -            <xsl:call-template name="infochar2string"> -                <xsl:with-param name="i"> -                    <xsl:value-of select="$i+1"/> -                </xsl:with-param> -                <xsl:with-param name="charList"> -                    <xsl:value-of select="$charList"/> -                </xsl:with-param> -            </xsl:call-template> -        </xsl:if> -    </xsl:template> -    <xsl:template name="css"> -        <style type="text/css"> -            <![CDATA[ -            td.name {font-style: italic; font-size:80%; } -            .doc { margin: 0.5em; border: solid grey 1px; } -            .exp { display: none; font-family: monospace; white-space: pre; } -            div.histogram { background: none repeat scroll 0%; -moz-background-clip: -moz-initial; -moz-background-origin: -moz-initial; -moz-background-inline-policy: -moz-initial;} -            table.histogram { width: auto; vertical-align: bottom; } -            table.histogram td, table.histogram th { text-align: center; vertical-align: bottom; border-bottom: 1px solid #ff9933; width: auto; } -            ]]> -        </style> -    </xsl:template> -</xsl:stylesheet> diff --git a/solr-8.1.1/example/example-DIH/solr/mail/conf/xslt/updateXml.xsl b/solr-8.1.1/example/example-DIH/solr/mail/conf/xslt/updateXml.xsl deleted file mode 100644 index a96e1d024..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/conf/xslt/updateXml.xsl +++ /dev/null @@ -1,70 +0,0 @@ -<!--  - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements.  See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License.  You may obtain a copy of the License at - * - *     http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - --> - -<!-- -  Simple transform of Solr query response into Solr Update XML compliant XML. -  When used in the xslt response writer you will get UpdaateXML as output. -  But you can also store a query response XML to disk and feed this XML to -  the XSLTUpdateRequestHandler to index the content. Provided as example only. -  See http://wiki.apache.org/solr/XsltUpdateRequestHandler for more info - --> -<xsl:stylesheet version='1.0' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'> -  <xsl:output media-type="text/xml" method="xml" indent="yes"/> - -  <xsl:template match='/'> -    <add> -        <xsl:apply-templates select="response/result/doc"/> -    </add> -  </xsl:template> -   -  <!-- Ignore score (makes no sense to index) --> -  <xsl:template match="doc/*[@name='score']" priority="100"> -  </xsl:template> - -  <xsl:template match="doc"> -    <xsl:variable name="pos" select="position()"/> -    <doc> -        <xsl:apply-templates> -          <xsl:with-param name="pos"><xsl:value-of select="$pos"/></xsl:with-param> -        </xsl:apply-templates> -    </doc> -  </xsl:template> - -  <!-- Flatten arrays to duplicate field lines --> -  <xsl:template match="doc/arr" priority="100"> -      <xsl:variable name="fn" select="@name"/> -       -      <xsl:for-each select="*"> -        <xsl:element name="field"> -          <xsl:attribute name="name"><xsl:value-of select="$fn"/></xsl:attribute> -          <xsl:value-of select="."/> -        </xsl:element> -      </xsl:for-each> -  </xsl:template> - - -  <xsl:template match="doc/*"> -      <xsl:variable name="fn" select="@name"/> - -      <xsl:element name="field"> -        <xsl:attribute name="name"><xsl:value-of select="$fn"/></xsl:attribute> -        <xsl:value-of select="."/> -      </xsl:element> -  </xsl:template> - -  <xsl:template match="*"/> -</xsl:stylesheet> diff --git a/solr-8.1.1/example/example-DIH/solr/mail/core.properties b/solr-8.1.1/example/example-DIH/solr/mail/core.properties deleted file mode 100644 index 8b1378917..000000000 --- a/solr-8.1.1/example/example-DIH/solr/mail/core.properties +++ /dev/null @@ -1 +0,0 @@ -  | 
