123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305 |
- <?xml version="1.0" encoding="UTF-8"?><taeDescription xmlns="http://uima.apache.org/resourceSpecifier">
- <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
- <primitive>true</primitive>
- <annotatorImplementationName>com.ibm.langware.annotator.jFrostLexAnnotator</annotatorImplementationName>
- <analysisEngineMetaData>
- <name>LanguageWare Lexical Annotator</name>
- <description>This annotator provides access to LanguageWare Lexical Analysis.</description>
- <version>8.0.4.0</version>
- <vendor>IBM Corporation</vendor>
- <configurationParameters defaultGroup="default" searchStrategy="language_fallback">
- <configurationParameter>
- <name>SofaNames</name>
- <description>The Sofa names the annotator should work on. If no
- names are specified, the annotator works on the
- default sofa.</description>
- <type>String</type>
- <multiValued>true</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>LWDataSubdir</name>
- <description>The name of the directory under the UIMA data directory in
- which the LanguageWare resources are located</description>
- <type>String</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>UseExplicitDicts</name>
- <description>Dictionaries to be used are specified explicitly in this config file.</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>PreloadLanguages</name>
- <description>A list of all languages which should be pre-loaded at
- init-time in the form xx-YY (xx=lang, YY=sublang/country)</description>
- <type>String</type>
- <multiValued>true</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>DefaultLanguage</name>
- <description>The language to use in processing when the document language is not set
- before the annotator processing.</description>
- <type>String</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>DictionaryCacheSize</name>
- <description>!Deprecated! Maximum number of dictionaries held in cache</description>
- <type>Integer</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>ProcessLanguagesWithNoDictionaries</name>
- <description>Control the annotator behaviour if no dictionaries are configured for the processed document language.
- If "tokenize", only basic tokenization will be possible.
- If "skip", processing will be terminated with no errors.
- If "error", an exception will be thrown.
- The default value is "skip".</description>
- <type>String</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>UseFirstMatchPolicy</name>
- <description>If true lookup stops after the first match in any dictionary (DLTCM_POLICY_FIRST)
- otherwise all matches from all dictionaries are found (DLTCM_POLICY_ALL)</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>UseStrictCaseMode</name>
- <description>If true, the strict-case mode is turned 'ON'. That means Case information will be respected when
- doing lookup in lowercase dictionaries. otherwise, it will set strict-case mode to 'OFF' and a match
- will be returned even if the case doesn't match.</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>UseRelativeTokenAndSentenceNumbers</name>
- <description>If true token and sentence numbers are reset to 1 for each new sentence/paragraph</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>AnnotateMWConstituentTokens</name>
- <description>If true, MWU annotations will be created for Multi-Word entries and Token Annotations will be created
- for their constituent words; otherwise, Only MWU annotations will be created.</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>MWBoundary</name>
- <description>This defines MWUs lookup boundaries. possible values for this parameter are:
- "Sentence", "Paragraph", or "Document".</description>
- <type>String</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>IgnorePunctuationTokens</name>
- <description>If true, punctuation tokens are ignored</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>AggressiveSentenceBreaks</name>
- <description>!Deprecated! If true, an end-of-line will be considered end-of-sentence</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>CrossDictionaryDecomposition</name>
- <description>If true a decomposition is performed across dictionaries
- i.e. words from several dictionaries may be combined into one compound</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>BOFAOnlyDecomposition</name>
- <description>If true a decomposition is performed basing on BOFA values only.</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>FilterDecomposedGlosses</name>
- <description>If true, the paradigms reported by decomposition for each component are filtered
- according to the decomposition rules, removing paradigms that are not valid in
- combination. Setting this to false may lead to better performance and recall
- at the expense of precision.</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>StandaloneDecomposition</name>
- <description>If true, the lexical analyzer tries to decompose dictionary-matched entries which have a compound flag.</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>JapaneseDecomposition</name>
- <description>If true decomposition is done for Japanese documents without
- regard to the result specification</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>JapaneseDeepWordBreak</name>
- <description>If true returns Japanese word suffixes separated from their stems</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>CreateCompoundPartsInsteadOfToken</name>
- <description>If true then compound parts are created not as type uima.tt.CompPartAnnotation but
- as uima.tt.TokenAnnotation. The annotations for a compound parts of a complex word
- are created instead of (not in addition to) the token for the whole complex word.</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>true</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>ReturnOnlyFirstLevelOfCompoundBreakdown</name>
- <description>If true then for compounds which have several decompositions are only the first
- (longest match) decomposition is returned. E.g. for the German "Segelschullehrer" only
- "Segelschul"+"lehrer" is returned and not also "Segel" + "schul"+ "lehrer"</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>CreateDecompStructure</name>
- <description>If true, then full decomp analysis structure is created.
- This option is intended to be used mutually exclusively with the previous two.</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>BreakOnHyphens</name>
- <description>!Deprecated! If true then we will try to break unknown words if it contains a hyphen</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>DoLookupVariant</name>
- <description>If true lookup unknown word in variant dictionary.</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>DoRuleBasedNormalization4All</name>
- <description>If true lookup a variant with rulebased normalization for all unknown word.</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>DoRuleBasedNormalization4Katakana</name>
- <description>If true lookup a variant with rulebased normalization only for katakana word.</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>CreateGenericAnnotations</name>
- <description>Create Generic annotations if annotate glosses availables.</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>CheckGenericTypes</name>
- <description>Check the types when writing the feature values for generic annotations.</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>GlossComparatorClassname</name>
- <description>The full name of the class implementation for the Comparator interface
- to be used for sorting gloss collections.</description>
- <type>String</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>LemmaPoolingThreshold</name>
- <description>A threshold that is used to control lemma Pooling. Pooling enhance memory usage of
- the annotator. It is good when processing large documents. Setting the value to 0
- means always enabled; while setting its value to -1 disables pooling.</description>
- <type>Integer</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <commonParameters>
- <configurationParameter>
- <name>LexicalDicts</name>
- <description>File name of dictionaries for the lexical analysis</description>
- <type>String</type>
- <multiValued>true</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>MultiWordDicts</name>
- <description>File name of dictionaries for the specific multi-word unit</description>
- <type>String</type>
- <multiValued>true</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>OOVDicts</name>
- <description>File name of dictionaries for the morphological guesser (out-of-vocabulary)</description>
- <type>String</type>
- <multiValued>true</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>SynonymDicts</name>
- <description>File name of dictionaries for synonyms</description>
- <type>String</type>
- <multiValued>true</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>VariantDicts</name>
- <description>File name of dictionaries for word variants</description>
- <type>String</type>
- <multiValued>true</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>SpellCorrectionDicts</name>
- <description>File name of dictionaries for the spelling correction</description>
- <type>String</type>
- <multiValued>true</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>PartOfSpeechDict</name>
- <description>File name of dictionary for the Part-of-Speech Tagging</description>
- <type>String</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>PostTagHandling</name>
- <description>Post tag handling policy</description>
- <type>String</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>PostLemmaEntryHandling</name>
- <description>Post LemmaEntries handling policy</description>
- <type>String</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>MaxCharNumPerSentence</name>
- <description>The maximum number of characters in a sentence.</description>
- <type>Integer</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>BreakRulesSpec</name>
- <description>Break rules to be used.</description>
- <type>String</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>DecompositionRulesSpec</name>
- <description>Decomposition rules to be used.</description>
- <type>String</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- </commonParameters>
- <configurationGroup names="default en af ar ca cs da de el es fr he it ja ko nb nl nn pl pt ru sv tr zh"/>
- </configurationParameters>
- <configurationParameterSettings>
- <nameValuePair>
- <name>LWDataSubdir</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>PreloadLanguages</name>
- <value>
- <array>
- <string>en</string>
- </array>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>UseExplicitDicts</name>
- <value>
- <boolean>true</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>ProcessLanguagesWithNoDictionaries</name>
- <value>
- <string>skip</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>UseFirstMatchPolicy</name>
- <value>
- <boolean>false</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>UseStrictCaseMode</name>
- <value>
- <boolean>false</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>UseRelativeTokenAndSentenceNumbers</name>
- <value>
- <boolean>false</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>AnnotateMWConstituentTokens</name>
- <value>
- <boolean>true</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>MWBoundary</name>
- <value>
- <string>Sentence</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>IgnorePunctuationTokens</name>
- <value>
- <boolean>false</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>CrossDictionaryDecomposition</name>
- <value>
- <boolean>true</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>BOFAOnlyDecomposition</name>
- <value>
- <boolean>false</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>FilterDecomposedGlosses</name>
- <value>
- <boolean>true</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>JapaneseDecomposition</name>
- <value>
- <boolean>true</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>JapaneseDeepWordBreak</name>
- <value>
- <boolean>false</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>CreateCompoundPartsInsteadOfToken</name>
- <value>
- <boolean>true</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>ReturnOnlyFirstLevelOfCompoundBreakdown</name>
- <value>
- <boolean>false</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>CreateDecompStructure</name>
- <value>
- <boolean>false</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>DoLookupVariant</name>
- <value>
- <boolean>false</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>DoRuleBasedNormalization4All</name>
- <value>
- <boolean>false</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>DoRuleBasedNormalization4Katakana</name>
- <value>
- <boolean>false</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>CreateGenericAnnotations</name>
- <value>
- <boolean>true</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>CheckGenericTypes</name>
- <value>
- <boolean>false</boolean>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>GlossComparatorClassname</name>
- <value>
- <string>com.ibm.langware.annotator.GlossComparator</string>
- </value>
- </nameValuePair>
- <settingsForGroup name="de">
- <nameValuePair>
- <name>PartOfSpeechDict</name>
- <value>
- <string>de-XX-TSimplified-7220.dic</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>LexicalDicts</name>
- <value>
- <array>
- <string>../resources/dictionary/8/de-XX-LLex-7017.dic</string>
- <string>../resources/dictionary/9/de-XX-OOV-7002.dic</string>
- </array>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>OOVDicts</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>BreakRulesSpec</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- </settingsForGroup>
- <settingsForGroup name="ru">
- <nameValuePair>
- <name>PartOfSpeechDict</name>
- <value>
- <string>ru-RU-TSimplified-7200.dic</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>LexicalDicts</name>
- <value>
- <array>
- <string>../resources/dictionary/24/ru-RU-LLex-7003.dic</string>
- <string>../resources/dictionary/25/ru-RU-OOV-7003.dic</string>
- </array>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>OOVDicts</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>BreakRulesSpec</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- </settingsForGroup>
- <settingsForGroup name="pt">
- <nameValuePair>
- <name>PartOfSpeechDict</name>
- <value>
- <string>pt-XX-TSimplified-7001.dic</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>LexicalDicts</name>
- <value>
- <array>
- <string>../resources/dictionary/22/pt-XX-LLex-7008.dic</string>
- <string>../resources/dictionary/23/pt-XX-OOV-7003.dic</string>
- </array>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>OOVDicts</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>BreakRulesSpec</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- </settingsForGroup>
- <settingsForGroup name="ko">
- <nameValuePair>
- <name>PartOfSpeechDict</name>
- <value>
- <string>ko-KR-TKpos-8041.dic</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>LexicalDicts</name>
- <value>
- <array/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>OOVDicts</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>BreakRulesSpec</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- </settingsForGroup>
- <settingsForGroup name="en">
- <nameValuePair>
- <name>PartOfSpeechDict</name>
- <value>
- <string>en-XX-TPenn-7212.dic</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>LexicalDicts</name>
- <value>
- <array>
- <string>../resources/dictionary/0/en-XX-LLex-7030.dic</string>
- <string>../resources/dictionary/1/en-XX-OOV-7004.dic</string>
- </array>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>OOVDicts</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>BreakRulesSpec</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- </settingsForGroup>
- <settingsForGroup name="it">
- <nameValuePair>
- <name>PartOfSpeechDict</name>
- <value>
- <string>it-IT-TSimplified-7001.dic</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>LexicalDicts</name>
- <value>
- <array>
- <string>../resources/dictionary/15/it-IT-LLex-7007.dic</string>
- <string>../resources/dictionary/16/it-IT-OOV-7002.dic</string>
- </array>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>OOVDicts</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>BreakRulesSpec</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- </settingsForGroup>
- <settingsForGroup name="fr">
- <nameValuePair>
- <name>PartOfSpeechDict</name>
- <value>
- <string>fr-XX-TSimplified-7001.dic</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>LexicalDicts</name>
- <value>
- <array>
- <string>../resources/dictionary/12/fr-XX-LLex-7009.dic</string>
- <string>../resources/dictionary/13/fr-XX-OOV-7002.dic</string>
- </array>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>OOVDicts</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>BreakRulesSpec</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- </settingsForGroup>
- <settingsForGroup name="zh">
- <nameValuePair>
- <name>PartOfSpeechDict</name>
- <value>
- <string>zh-XX-TCpos-7000.dic</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>LexicalDicts</name>
- <value>
- <array>
- <string>../resources/dictionary/26/zh-XX-Lex-8003.dic</string>
- </array>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>OOVDicts</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>BreakRulesSpec</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- </settingsForGroup>
- <settingsForGroup name="es">
- <nameValuePair>
- <name>PartOfSpeechDict</name>
- <value>
- <string>es-ES-TSimplified-7002.dic</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>LexicalDicts</name>
- <value>
- <array>
- <string>../resources/dictionary/10/es-ES-LLex-7006.dic</string>
- <string>../resources/dictionary/11/es-ES-OOV-7003.dic</string>
- </array>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>OOVDicts</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>BreakRulesSpec</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- </settingsForGroup>
- <settingsForGroup name="cs">
- <nameValuePair>
- <name>PartOfSpeechDict</name>
- <value>
- <string>cs-CZ-TSimplified-7200.dic</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>LexicalDicts</name>
- <value>
- <array>
- <string>../resources/dictionary/4/cs-CZ-LLex-7003.dic</string>
- <string>../resources/dictionary/5/cs-CZ-OOV-7004.dic</string>
- </array>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>OOVDicts</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>BreakRulesSpec</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- </settingsForGroup>
- <settingsForGroup name="ar">
- <nameValuePair>
- <name>PartOfSpeechDict</name>
- <value>
- <string>ar-XX-TSimplified-7003.dic</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>LexicalDicts</name>
- <value>
- <array>
- <string>../resources/dictionary/2/ar-XX-Lex-7007.dic</string>
- <string>../resources/dictionary/3/ar-XX-OOV-7003.dic</string>
- </array>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>OOVDicts</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>BreakRulesSpec</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- </settingsForGroup>
- <settingsForGroup name="ja">
- <nameValuePair>
- <name>PartOfSpeechDict</name>
- <value>
- <string>ja-JP-TJpos-7000.dic</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>LexicalDicts</name>
- <value>
- <array>
- <string>../resources/dictionary/17/ja-JP-Lex-7006.dic</string>
- </array>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>OOVDicts</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>BreakRulesSpec</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- </settingsForGroup>
- <settingsForGroup name="pl">
- <nameValuePair>
- <name>PartOfSpeechDict</name>
- <value>
- <string>pl-PL-TSimplified-7200.dic</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>LexicalDicts</name>
- <value>
- <array>
- <string>../resources/dictionary/20/pl-PL-LLex-7003.dic</string>
- <string>../resources/dictionary/21/pl-PL-OOV-7004.dic</string>
- </array>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>OOVDicts</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>BreakRulesSpec</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- </settingsForGroup>
- <settingsForGroup name="da">
- <nameValuePair>
- <name>PartOfSpeechDict</name>
- <value>
- <string>da-DK-TSimplified-7000.dic</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>LexicalDicts</name>
- <value>
- <array>
- <string>../resources/dictionary/6/da-DK-LLex-7005.dic</string>
- <string>../resources/dictionary/7/da-DK-OOV-7002.dic</string>
- </array>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>OOVDicts</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>BreakRulesSpec</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- </settingsForGroup>
- <settingsForGroup name="he">
- <nameValuePair>
- <name>PartOfSpeechDict</name>
- <value>
- <string>he-IL-TSimplified-7201.dic</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>LexicalDicts</name>
- <value>
- <array>
- <string>../resources/dictionary/14/he-IL-Lex-7205.dic</string>
- </array>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>OOVDicts</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>BreakRulesSpec</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- </settingsForGroup>
- <settingsForGroup name="tr">
- <nameValuePair>
- <name>PartOfSpeechDict</name>
- <value>
- <string>tr-TR-TTpos-8502.dic</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>LexicalDicts</name>
- <value>
- <array/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>OOVDicts</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>BreakRulesSpec</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- </settingsForGroup>
- <settingsForGroup name="nl">
- <nameValuePair>
- <name>PartOfSpeechDict</name>
- <value>
- <string>nl-NL-TSimplified-7000.dic</string>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>LexicalDicts</name>
- <value>
- <array>
- <string>../resources/dictionary/18/nl-NL-Reform-LLex-7004.dic</string>
- <string>../resources/dictionary/19/nl-NL-OOV-7002.dic</string>
- </array>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>OOVDicts</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>BreakRulesSpec</name>
- <value>
- <string/>
- </value>
- </nameValuePair>
- </settingsForGroup>
- </configurationParameterSettings>
- <typeSystemDescription>
- <types>
- <typeDescription>
- <name>uima.tcas.DocumentAnnotation</name>
- <description>Annotation covering the entire document, containing document meta information, for example the document language</description>
- <supertypeName>uima.tcas.Annotation</supertypeName>
- <features>
- <featureDescription>
- <name>language</name>
- <description>The document language</description>
- <rangeTypeName>uima.cas.String</rangeTypeName>
- </featureDescription>
- <featureDescription>
- <name>languageCandidates</name>
- <description>A list of language candidates for the document produced during language identification. These are sorted by confidence value</description>
- <rangeTypeName>uima.cas.FSList</rangeTypeName>
- </featureDescription>
- </features>
- </typeDescription>
- <typeDescription>
- <name>uima.tt.TTAnnotation</name>
- <description>Base type for lexical and document structure annotation types</description>
- <supertypeName>uima.tcas.Annotation</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>uima.tt.DocStructureAnnotation</name>
- <description>Base type for document structure annotation types</description>
- <supertypeName>uima.tt.TTAnnotation</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>uima.tt.ParagraphAnnotation</name>
- <description>A paragraph</description>
- <supertypeName>uima.tt.DocStructureAnnotation</supertypeName>
- <features>
- <featureDescription>
- <name>paragraphNumber</name>
- <description>The sequence number of the paragraph in the document</description>
- <rangeTypeName>uima.cas.Integer</rangeTypeName>
- </featureDescription>
- </features>
- </typeDescription>
- <typeDescription>
- <name>uima.tt.SentenceAnnotation</name>
- <description>A sentence</description>
- <supertypeName>uima.tt.DocStructureAnnotation</supertypeName>
- <features>
- <featureDescription>
- <name>sentenceNumber</name>
- <description>The sequence number of the sentence in the paragraph (or the document)</description>
- <rangeTypeName>uima.cas.Integer</rangeTypeName>
- </featureDescription>
- </features>
- </typeDescription>
- <typeDescription>
- <name>uima.tt.LexicalAnnotation</name>
- <description>Base type for lexical annotation types</description>
- <supertypeName>uima.tt.TTAnnotation</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>uima.tt.DictionaryEntryAnnotation</name>
- <description>Base type for dictionary-based user-defined annotation types</description>
- <supertypeName>uima.tt.LexicalAnnotation</supertypeName>
- <features>
- <featureDescription>
- <name>lemma</name>
- <description>Morphological information for the dictionary entry</description>
- <rangeTypeName>uima.tt.Lemma</rangeTypeName>
- </featureDescription>
- </features>
- </typeDescription>
- <typeDescription>
- <name>uima.tt.TokenLikeAnnotation</name>
- <description>Base type for token annotation types</description>
- <supertypeName>uima.tt.LexicalAnnotation</supertypeName>
- <features>
- <featureDescription>
- <name>lemma</name>
- <description>The best probable entry containing all morphological information for the token</description>
- <rangeTypeName>uima.tt.Lemma</rangeTypeName>
- </featureDescription>
- <featureDescription>
- <name>lemmaEntries</name>
- <description>List of lemma entries containing all morphological information for the token</description>
- <rangeTypeName>uima.cas.FSArray</rangeTypeName>
- </featureDescription>
- <featureDescription>
- <name>dictionaryMatch</name>
- <description>A flag indicating whether or not the token matches a dictionary entry</description>
- <rangeTypeName>uima.cas.Boolean</rangeTypeName>
- </featureDescription>
- </features>
- </typeDescription>
- <typeDescription>
- <name>uima.tt.TokenAnnotation</name>
- <description>General token annotation type. It is also the base type for the special token types</description>
- <supertypeName>uima.tt.TokenLikeAnnotation</supertypeName>
- <features>
- <featureDescription>
- <name>posTag</name>
- <description>Part-of-Speech tag</description>
- <rangeTypeName>uima.cas.String</rangeTypeName>
- </featureDescription>
- </features>
- </typeDescription>
- <typeDescription>
- <name>uima.tt.CompPartAnnotation</name>
- <description>A part of a compound word</description>
- <supertypeName>uima.tt.TokenLikeAnnotation</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>uima.tt.KeyStringEntry</name>
- <description>Base type for types defining key/value feature (e.g. uima.tt.Lemma type)</description>
- <supertypeName>uima.cas.TOP</supertypeName>
- <features>
- <featureDescription>
- <name>key</name>
- <description>A key/value feature (e.g. lemma string in uima.tt.Lemma type)</description>
- <rangeTypeName>uima.cas.String</rangeTypeName>
- </featureDescription>
- </features>
- </typeDescription>
- <typeDescription>
- <name>uima.tt.Lemma</name>
- <description>Morphological information retrieved from a lexical dictionary entry</description>
- <supertypeName>uima.tt.KeyStringEntry</supertypeName>
- <features>
- <featureDescription>
- <name>partOfSpeech</name>
- <description>An integral encoding representing the part-of-speech for the lemma</description>
- <rangeTypeName>uima.cas.Integer</rangeTypeName>
- </featureDescription>
- <featureDescription>
- <name>frost_ExtendedPOS</name>
- <description>An integer representing additional information related to the part-of-speech</description>
- <rangeTypeName>uima.cas.Integer</rangeTypeName>
- </featureDescription>
- <featureDescription>
- <name>isStopword</name>
- <description/>
- <rangeTypeName>uima.cas.Boolean</rangeTypeName>
- </featureDescription>
- </features>
- </typeDescription>
- <typeDescription>
- <name>uima.tt.LanguageConfidencePair</name>
- <description>Language-Confidence pair of a language candidate for the document text</description>
- <supertypeName>uima.cas.TOP</supertypeName>
- <features>
- <featureDescription>
- <name>languageConfidence</name>
- <description>An indication (a float value between 0 and 1) of how well the candidate language actually fits the language of the document</description>
- <rangeTypeName>uima.cas.Float</rangeTypeName>
- </featureDescription>
- <featureDescription>
- <name>language</name>
- <description>Language name (ISO Locale code)</description>
- <rangeTypeName>uima.cas.String</rangeTypeName>
- </featureDescription>
- </features>
- </typeDescription>
- <typeDescription>
- <name>com.ibm.langware.uimatypes.WordLikeToken</name>
- <description>Base type for possible words (not punctuations nor symbols). Also represents alphanumeric tokens</description>
- <supertypeName>uima.tt.TokenAnnotation</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>com.ibm.langware.uimatypes.Alphabetic</name>
- <description>Alphabetic word</description>
- <supertypeName>com.ibm.langware.uimatypes.WordLikeToken</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>com.ibm.langware.uimatypes.UppercaseAlphabetic</name>
- <description>Uppercase alphabetic word</description>
- <supertypeName>com.ibm.langware.uimatypes.Alphabetic</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>com.ibm.langware.uimatypes.TitlecaseAlphabetic</name>
- <description>Titlecase alphabetic word</description>
- <supertypeName>com.ibm.langware.uimatypes.Alphabetic</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>com.ibm.langware.uimatypes.LowercaseAlphabetic</name>
- <description>Lowercase alphabetic word</description>
- <supertypeName>com.ibm.langware.uimatypes.Alphabetic</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>com.ibm.langware.uimatypes.Arabic</name>
- <description>Arabic word</description>
- <supertypeName>com.ibm.langware.uimatypes.Alphabetic</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>com.ibm.langware.uimatypes.Hebrew</name>
- <description>Hebrew word</description>
- <supertypeName>com.ibm.langware.uimatypes.Alphabetic</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>com.ibm.langware.uimatypes.Syllabic</name>
- <description>Syllabic word</description>
- <supertypeName>com.ibm.langware.uimatypes.WordLikeToken</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>com.ibm.langware.uimatypes.Hiragana</name>
- <description>Hiragana (Syllabic) word</description>
- <supertypeName>com.ibm.langware.uimatypes.Syllabic</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>com.ibm.langware.uimatypes.Katakana</name>
- <description>Katakana (Syllabic) word</description>
- <supertypeName>com.ibm.langware.uimatypes.Syllabic</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>com.ibm.langware.uimatypes.Hangul</name>
- <description>Hangul (Syllabic) word</description>
- <supertypeName>com.ibm.langware.uimatypes.Syllabic</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>com.ibm.langware.uimatypes.Ideographic</name>
- <description>Ideographic word</description>
- <supertypeName>com.ibm.langware.uimatypes.WordLikeToken</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>com.ibm.langware.uimatypes.Han</name>
- <description>Han (Ideographic) word</description>
- <supertypeName>com.ibm.langware.uimatypes.Ideographic</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>com.ibm.langware.uimatypes.Numeric</name>
- <description>A numeric sequence</description>
- <supertypeName>com.ibm.langware.uimatypes.WordLikeToken</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>com.ibm.langware.uimatypes.ChineseNumeral</name>
- <description>A Chinese numeral</description>
- <supertypeName>com.ibm.langware.uimatypes.Numeric</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>com.ibm.langware.uimatypes.Punctuation</name>
- <description>A punctuation or symbol</description>
- <supertypeName>uima.tt.TokenAnnotation</supertypeName>
- </typeDescription>
- <typeDescription>
- <name>com.ibm.langware.uimatypes.ClauseEndingPunctuation</name>
- <description>A clause terminating punctuation</description>
- <supertypeName>com.ibm.langware.uimatypes.Punctuation</supertypeName>
- </typeDescription>
- </types>
- </typeSystemDescription>
- <capabilities>
- <capability>
- <inputs/>
- <outputs>
- <type>uima.tt.ParagraphAnnotation</type>
- <type>uima.tt.SentenceAnnotation</type>
- <type>uima.tt.TokenAnnotation</type>
- <feature>uima.tt.TokenAnnotation:lemma</feature>
- <feature>uima.tt.TokenAnnotation:lemmaEntries</feature>
- </outputs>
- <languagesSupported>
- <language>x-unspecified</language>
- </languagesSupported>
- </capability>
- <capability>
- <inputs/>
- <outputs>
- <type>uima.tt.ParagraphAnnotation</type>
- <type>uima.tt.SentenceAnnotation</type>
- <type>uima.tt.TokenAnnotation</type>
- <type>uima.tt.CompPartAnnotation</type>
- <type>uima.tt.Lemma</type>
- <feature>uima.tt.ParagraphAnnotation:paragraphNumber</feature>
- <feature>uima.tt.SentenceAnnotation:sentenceNumber</feature>
- <feature>uima.tt.TokenAnnotation:posTag</feature>
- <feature>uima.tt.TokenAnnotation:lemmaEntries</feature>
- <feature>uima.tt.TokenAnnotation:dictionaryMatch</feature>
- <feature>uima.tt.Lemma:key</feature>
- <feature>uima.tt.Lemma:partOfSpeech</feature>
- <feature>uima.tt.Lemma:isStopword</feature>
- <feature>uima.tt.Lemma:frost_ExtendedPOS</feature>
- </outputs>
- <languagesSupported>
- <language>en</language>
- <language>af</language>
- <language>ar</language>
- <language>ca</language>
- <language>cs</language>
- <language>da</language>
- <language>de</language>
- <language>el</language>
- <language>es</language>
- <language>fr</language>
- <language>he</language>
- <language>it</language>
- <language>ja</language>
- <language>ko</language>
- <language>nb</language>
- <language>nl</language>
- <language>nn</language>
- <language>pl</language>
- <language>pt</language>
- <language>ru</language>
- <language>sv</language>
- <language>tr</language>
- <language>zh</language>
- </languagesSupported>
- </capability>
- </capabilities>
- <operationalProperties>
- <modifiesCas>true</modifiesCas>
- <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
- <outputsNewCASes>false</outputsNewCASes>
- </operationalProperties>
- </analysisEngineMetaData>
- <resourceManagerConfiguration>
- <externalResources>
- <externalResource>
- <name>ResourcesFile</name>
- <description>Location of Resources</description>
- <fileLanguageResourceSpecifier>
- <fileUrlPrefix>../resources/Tagger/</fileUrlPrefix>
- <fileUrlSuffix>/</fileUrlSuffix>
- </fileLanguageResourceSpecifier>
- </externalResource>
- </externalResources>
- <externalResourceBindings>
- <externalResourceBinding>
- <key>Resources</key>
- <resourceName>ResourcesFile</resourceName>
- </externalResourceBinding>
- </externalResourceBindings>
- </resourceManagerConfiguration>
- </taeDescription>
|