LWAnnotator1.xml 59 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305
  1. <?xml version="1.0" encoding="UTF-8"?><taeDescription xmlns="http://uima.apache.org/resourceSpecifier">
  2. <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
  3. <primitive>true</primitive>
  4. <annotatorImplementationName>com.ibm.langware.annotator.jFrostLexAnnotator</annotatorImplementationName>
  5. <analysisEngineMetaData>
  6. <name>LanguageWare Lexical Annotator</name>
  7. <description>This annotator provides access to LanguageWare Lexical Analysis.</description>
  8. <version>8.0.4.0</version>
  9. <vendor>IBM Corporation</vendor>
  10. <configurationParameters defaultGroup="default" searchStrategy="language_fallback">
  11. <configurationParameter>
  12. <name>SofaNames</name>
  13. <description>The Sofa names the annotator should work on. If no
  14. names are specified, the annotator works on the
  15. default sofa.</description>
  16. <type>String</type>
  17. <multiValued>true</multiValued>
  18. <mandatory>false</mandatory>
  19. </configurationParameter>
  20. <configurationParameter>
  21. <name>LWDataSubdir</name>
  22. <description>The name of the directory under the UIMA data directory in
  23. which the LanguageWare resources are located</description>
  24. <type>String</type>
  25. <multiValued>false</multiValued>
  26. <mandatory>false</mandatory>
  27. </configurationParameter>
  28. <configurationParameter>
  29. <name>UseExplicitDicts</name>
  30. <description>Dictionaries to be used are specified explicitly in this config file.</description>
  31. <type>Boolean</type>
  32. <multiValued>false</multiValued>
  33. <mandatory>false</mandatory>
  34. </configurationParameter>
  35. <configurationParameter>
  36. <name>PreloadLanguages</name>
  37. <description>A list of all languages which should be pre-loaded at
  38. init-time in the form xx-YY (xx=lang, YY=sublang/country)</description>
  39. <type>String</type>
  40. <multiValued>true</multiValued>
  41. <mandatory>false</mandatory>
  42. </configurationParameter>
  43. <configurationParameter>
  44. <name>DefaultLanguage</name>
  45. <description>The language to use in processing when the document language is not set
  46. before the annotator processing.</description>
  47. <type>String</type>
  48. <multiValued>false</multiValued>
  49. <mandatory>false</mandatory>
  50. </configurationParameter>
  51. <configurationParameter>
  52. <name>DictionaryCacheSize</name>
  53. <description>!Deprecated! Maximum number of dictionaries held in cache</description>
  54. <type>Integer</type>
  55. <multiValued>false</multiValued>
  56. <mandatory>false</mandatory>
  57. </configurationParameter>
  58. <configurationParameter>
  59. <name>ProcessLanguagesWithNoDictionaries</name>
  60. <description>Control the annotator behaviour if no dictionaries are configured for the processed document language.
  61. If "tokenize", only basic tokenization will be possible.
  62. If "skip", processing will be terminated with no errors.
  63. If "error", an exception will be thrown.
  64. The default value is "skip".</description>
  65. <type>String</type>
  66. <multiValued>false</multiValued>
  67. <mandatory>false</mandatory>
  68. </configurationParameter>
  69. <configurationParameter>
  70. <name>UseFirstMatchPolicy</name>
  71. <description>If true lookup stops after the first match in any dictionary (DLTCM_POLICY_FIRST)
  72. otherwise all matches from all dictionaries are found (DLTCM_POLICY_ALL)</description>
  73. <type>Boolean</type>
  74. <multiValued>false</multiValued>
  75. <mandatory>false</mandatory>
  76. </configurationParameter>
  77. <configurationParameter>
  78. <name>UseStrictCaseMode</name>
  79. <description>If true, the strict-case mode is turned 'ON'. That means Case information will be respected when
  80. doing lookup in lowercase dictionaries. otherwise, it will set strict-case mode to 'OFF' and a match
  81. will be returned even if the case doesn't match.</description>
  82. <type>Boolean</type>
  83. <multiValued>false</multiValued>
  84. <mandatory>false</mandatory>
  85. </configurationParameter>
  86. <configurationParameter>
  87. <name>UseRelativeTokenAndSentenceNumbers</name>
  88. <description>If true token and sentence numbers are reset to 1 for each new sentence/paragraph</description>
  89. <type>Boolean</type>
  90. <multiValued>false</multiValued>
  91. <mandatory>false</mandatory>
  92. </configurationParameter>
  93. <configurationParameter>
  94. <name>AnnotateMWConstituentTokens</name>
  95. <description>If true, MWU annotations will be created for Multi-Word entries and Token Annotations will be created
  96. for their constituent words; otherwise, Only MWU annotations will be created.</description>
  97. <type>Boolean</type>
  98. <multiValued>false</multiValued>
  99. <mandatory>false</mandatory>
  100. </configurationParameter>
  101. <configurationParameter>
  102. <name>MWBoundary</name>
  103. <description>This defines MWUs lookup boundaries. possible values for this parameter are:
  104. "Sentence", "Paragraph", or "Document".</description>
  105. <type>String</type>
  106. <multiValued>false</multiValued>
  107. <mandatory>false</mandatory>
  108. </configurationParameter>
  109. <configurationParameter>
  110. <name>IgnorePunctuationTokens</name>
  111. <description>If true, punctuation tokens are ignored</description>
  112. <type>Boolean</type>
  113. <multiValued>false</multiValued>
  114. <mandatory>false</mandatory>
  115. </configurationParameter>
  116. <configurationParameter>
  117. <name>AggressiveSentenceBreaks</name>
  118. <description>!Deprecated! If true, an end-of-line will be considered end-of-sentence</description>
  119. <type>Boolean</type>
  120. <multiValued>false</multiValued>
  121. <mandatory>false</mandatory>
  122. </configurationParameter>
  123. <configurationParameter>
  124. <name>CrossDictionaryDecomposition</name>
  125. <description>If true a decomposition is performed across dictionaries
  126. i.e. words from several dictionaries may be combined into one compound</description>
  127. <type>Boolean</type>
  128. <multiValued>false</multiValued>
  129. <mandatory>false</mandatory>
  130. </configurationParameter>
  131. <configurationParameter>
  132. <name>BOFAOnlyDecomposition</name>
  133. <description>If true a decomposition is performed basing on BOFA values only.</description>
  134. <type>Boolean</type>
  135. <multiValued>false</multiValued>
  136. <mandatory>false</mandatory>
  137. </configurationParameter>
  138. <configurationParameter>
  139. <name>FilterDecomposedGlosses</name>
  140. <description>If true, the paradigms reported by decomposition for each component are filtered
  141. according to the decomposition rules, removing paradigms that are not valid in
  142. combination. Setting this to false may lead to better performance and recall
  143. at the expense of precision.</description>
  144. <type>Boolean</type>
  145. <multiValued>false</multiValued>
  146. <mandatory>false</mandatory>
  147. </configurationParameter>
  148. <configurationParameter>
  149. <name>StandaloneDecomposition</name>
  150. <description>If true, the lexical analyzer tries to decompose dictionary-matched entries which have a compound flag.</description>
  151. <type>Boolean</type>
  152. <multiValued>false</multiValued>
  153. <mandatory>false</mandatory>
  154. </configurationParameter>
  155. <configurationParameter>
  156. <name>JapaneseDecomposition</name>
  157. <description>If true decomposition is done for Japanese documents without
  158. regard to the result specification</description>
  159. <type>Boolean</type>
  160. <multiValued>false</multiValued>
  161. <mandatory>false</mandatory>
  162. </configurationParameter>
  163. <configurationParameter>
  164. <name>JapaneseDeepWordBreak</name>
  165. <description>If true returns Japanese word suffixes separated from their stems</description>
  166. <type>Boolean</type>
  167. <multiValued>false</multiValued>
  168. <mandatory>false</mandatory>
  169. </configurationParameter>
  170. <configurationParameter>
  171. <name>CreateCompoundPartsInsteadOfToken</name>
  172. <description>If true then compound parts are created not as type uima.tt.CompPartAnnotation but
  173. as uima.tt.TokenAnnotation. The annotations for a compound parts of a complex word
  174. are created instead of (not in addition to) the token for the whole complex word.</description>
  175. <type>Boolean</type>
  176. <multiValued>false</multiValued>
  177. <mandatory>true</mandatory>
  178. </configurationParameter>
  179. <configurationParameter>
  180. <name>ReturnOnlyFirstLevelOfCompoundBreakdown</name>
  181. <description>If true then for compounds which have several decompositions are only the first
  182. (longest match) decomposition is returned. E.g. for the German "Segelschullehrer" only
  183. "Segelschul"+"lehrer" is returned and not also "Segel" + "schul"+ "lehrer"</description>
  184. <type>Boolean</type>
  185. <multiValued>false</multiValued>
  186. <mandatory>false</mandatory>
  187. </configurationParameter>
  188. <configurationParameter>
  189. <name>CreateDecompStructure</name>
  190. <description>If true, then full decomp analysis structure is created.
  191. This option is intended to be used mutually exclusively with the previous two.</description>
  192. <type>Boolean</type>
  193. <multiValued>false</multiValued>
  194. <mandatory>false</mandatory>
  195. </configurationParameter>
  196. <configurationParameter>
  197. <name>BreakOnHyphens</name>
  198. <description>!Deprecated! If true then we will try to break unknown words if it contains a hyphen</description>
  199. <type>Boolean</type>
  200. <multiValued>false</multiValued>
  201. <mandatory>false</mandatory>
  202. </configurationParameter>
  203. <configurationParameter>
  204. <name>DoLookupVariant</name>
  205. <description>If true lookup unknown word in variant dictionary.</description>
  206. <type>Boolean</type>
  207. <multiValued>false</multiValued>
  208. <mandatory>false</mandatory>
  209. </configurationParameter>
  210. <configurationParameter>
  211. <name>DoRuleBasedNormalization4All</name>
  212. <description>If true lookup a variant with rulebased normalization for all unknown word.</description>
  213. <type>Boolean</type>
  214. <multiValued>false</multiValued>
  215. <mandatory>false</mandatory>
  216. </configurationParameter>
  217. <configurationParameter>
  218. <name>DoRuleBasedNormalization4Katakana</name>
  219. <description>If true lookup a variant with rulebased normalization only for katakana word.</description>
  220. <type>Boolean</type>
  221. <multiValued>false</multiValued>
  222. <mandatory>false</mandatory>
  223. </configurationParameter>
  224. <configurationParameter>
  225. <name>CreateGenericAnnotations</name>
  226. <description>Create Generic annotations if annotate glosses availables.</description>
  227. <type>Boolean</type>
  228. <multiValued>false</multiValued>
  229. <mandatory>false</mandatory>
  230. </configurationParameter>
  231. <configurationParameter>
  232. <name>CheckGenericTypes</name>
  233. <description>Check the types when writing the feature values for generic annotations.</description>
  234. <type>Boolean</type>
  235. <multiValued>false</multiValued>
  236. <mandatory>false</mandatory>
  237. </configurationParameter>
  238. <configurationParameter>
  239. <name>GlossComparatorClassname</name>
  240. <description>The full name of the class implementation for the Comparator interface
  241. to be used for sorting gloss collections.</description>
  242. <type>String</type>
  243. <multiValued>false</multiValued>
  244. <mandatory>false</mandatory>
  245. </configurationParameter>
  246. <configurationParameter>
  247. <name>LemmaPoolingThreshold</name>
  248. <description>A threshold that is used to control lemma Pooling. Pooling enhance memory usage of
  249. the annotator. It is good when processing large documents. Setting the value to 0
  250. means always enabled; while setting its value to -1 disables pooling.</description>
  251. <type>Integer</type>
  252. <multiValued>false</multiValued>
  253. <mandatory>false</mandatory>
  254. </configurationParameter>
  255. <commonParameters>
  256. <configurationParameter>
  257. <name>LexicalDicts</name>
  258. <description>File name of dictionaries for the lexical analysis</description>
  259. <type>String</type>
  260. <multiValued>true</multiValued>
  261. <mandatory>false</mandatory>
  262. </configurationParameter>
  263. <configurationParameter>
  264. <name>MultiWordDicts</name>
  265. <description>File name of dictionaries for the specific multi-word unit</description>
  266. <type>String</type>
  267. <multiValued>true</multiValued>
  268. <mandatory>false</mandatory>
  269. </configurationParameter>
  270. <configurationParameter>
  271. <name>OOVDicts</name>
  272. <description>File name of dictionaries for the morphological guesser (out-of-vocabulary)</description>
  273. <type>String</type>
  274. <multiValued>true</multiValued>
  275. <mandatory>false</mandatory>
  276. </configurationParameter>
  277. <configurationParameter>
  278. <name>SynonymDicts</name>
  279. <description>File name of dictionaries for synonyms</description>
  280. <type>String</type>
  281. <multiValued>true</multiValued>
  282. <mandatory>false</mandatory>
  283. </configurationParameter>
  284. <configurationParameter>
  285. <name>VariantDicts</name>
  286. <description>File name of dictionaries for word variants</description>
  287. <type>String</type>
  288. <multiValued>true</multiValued>
  289. <mandatory>false</mandatory>
  290. </configurationParameter>
  291. <configurationParameter>
  292. <name>SpellCorrectionDicts</name>
  293. <description>File name of dictionaries for the spelling correction</description>
  294. <type>String</type>
  295. <multiValued>true</multiValued>
  296. <mandatory>false</mandatory>
  297. </configurationParameter>
  298. <configurationParameter>
  299. <name>PartOfSpeechDict</name>
  300. <description>File name of dictionary for the Part-of-Speech Tagging</description>
  301. <type>String</type>
  302. <multiValued>false</multiValued>
  303. <mandatory>false</mandatory>
  304. </configurationParameter>
  305. <configurationParameter>
  306. <name>PostTagHandling</name>
  307. <description>Post tag handling policy</description>
  308. <type>String</type>
  309. <multiValued>false</multiValued>
  310. <mandatory>false</mandatory>
  311. </configurationParameter>
  312. <configurationParameter>
  313. <name>PostLemmaEntryHandling</name>
  314. <description>Post LemmaEntries handling policy</description>
  315. <type>String</type>
  316. <multiValued>false</multiValued>
  317. <mandatory>false</mandatory>
  318. </configurationParameter>
  319. <configurationParameter>
  320. <name>MaxCharNumPerSentence</name>
  321. <description>The maximum number of characters in a sentence.</description>
  322. <type>Integer</type>
  323. <multiValued>false</multiValued>
  324. <mandatory>false</mandatory>
  325. </configurationParameter>
  326. <configurationParameter>
  327. <name>BreakRulesSpec</name>
  328. <description>Break rules to be used.</description>
  329. <type>String</type>
  330. <multiValued>false</multiValued>
  331. <mandatory>false</mandatory>
  332. </configurationParameter>
  333. <configurationParameter>
  334. <name>DecompositionRulesSpec</name>
  335. <description>Decomposition rules to be used.</description>
  336. <type>String</type>
  337. <multiValued>false</multiValued>
  338. <mandatory>false</mandatory>
  339. </configurationParameter>
  340. </commonParameters>
  341. <configurationGroup names="default en af ar ca cs da de el es fr he it ja ko nb nl nn pl pt ru sv tr zh"/>
  342. </configurationParameters>
  343. <configurationParameterSettings>
  344. <nameValuePair>
  345. <name>LWDataSubdir</name>
  346. <value>
  347. <string/>
  348. </value>
  349. </nameValuePair>
  350. <nameValuePair>
  351. <name>PreloadLanguages</name>
  352. <value>
  353. <array>
  354. <string>en</string>
  355. </array>
  356. </value>
  357. </nameValuePair>
  358. <nameValuePair>
  359. <name>UseExplicitDicts</name>
  360. <value>
  361. <boolean>true</boolean>
  362. </value>
  363. </nameValuePair>
  364. <nameValuePair>
  365. <name>ProcessLanguagesWithNoDictionaries</name>
  366. <value>
  367. <string>skip</string>
  368. </value>
  369. </nameValuePair>
  370. <nameValuePair>
  371. <name>UseFirstMatchPolicy</name>
  372. <value>
  373. <boolean>false</boolean>
  374. </value>
  375. </nameValuePair>
  376. <nameValuePair>
  377. <name>UseStrictCaseMode</name>
  378. <value>
  379. <boolean>false</boolean>
  380. </value>
  381. </nameValuePair>
  382. <nameValuePair>
  383. <name>UseRelativeTokenAndSentenceNumbers</name>
  384. <value>
  385. <boolean>false</boolean>
  386. </value>
  387. </nameValuePair>
  388. <nameValuePair>
  389. <name>AnnotateMWConstituentTokens</name>
  390. <value>
  391. <boolean>true</boolean>
  392. </value>
  393. </nameValuePair>
  394. <nameValuePair>
  395. <name>MWBoundary</name>
  396. <value>
  397. <string>Sentence</string>
  398. </value>
  399. </nameValuePair>
  400. <nameValuePair>
  401. <name>IgnorePunctuationTokens</name>
  402. <value>
  403. <boolean>false</boolean>
  404. </value>
  405. </nameValuePair>
  406. <nameValuePair>
  407. <name>CrossDictionaryDecomposition</name>
  408. <value>
  409. <boolean>true</boolean>
  410. </value>
  411. </nameValuePair>
  412. <nameValuePair>
  413. <name>BOFAOnlyDecomposition</name>
  414. <value>
  415. <boolean>false</boolean>
  416. </value>
  417. </nameValuePair>
  418. <nameValuePair>
  419. <name>FilterDecomposedGlosses</name>
  420. <value>
  421. <boolean>true</boolean>
  422. </value>
  423. </nameValuePair>
  424. <nameValuePair>
  425. <name>JapaneseDecomposition</name>
  426. <value>
  427. <boolean>true</boolean>
  428. </value>
  429. </nameValuePair>
  430. <nameValuePair>
  431. <name>JapaneseDeepWordBreak</name>
  432. <value>
  433. <boolean>false</boolean>
  434. </value>
  435. </nameValuePair>
  436. <nameValuePair>
  437. <name>CreateCompoundPartsInsteadOfToken</name>
  438. <value>
  439. <boolean>true</boolean>
  440. </value>
  441. </nameValuePair>
  442. <nameValuePair>
  443. <name>ReturnOnlyFirstLevelOfCompoundBreakdown</name>
  444. <value>
  445. <boolean>false</boolean>
  446. </value>
  447. </nameValuePair>
  448. <nameValuePair>
  449. <name>CreateDecompStructure</name>
  450. <value>
  451. <boolean>false</boolean>
  452. </value>
  453. </nameValuePair>
  454. <nameValuePair>
  455. <name>DoLookupVariant</name>
  456. <value>
  457. <boolean>false</boolean>
  458. </value>
  459. </nameValuePair>
  460. <nameValuePair>
  461. <name>DoRuleBasedNormalization4All</name>
  462. <value>
  463. <boolean>false</boolean>
  464. </value>
  465. </nameValuePair>
  466. <nameValuePair>
  467. <name>DoRuleBasedNormalization4Katakana</name>
  468. <value>
  469. <boolean>false</boolean>
  470. </value>
  471. </nameValuePair>
  472. <nameValuePair>
  473. <name>CreateGenericAnnotations</name>
  474. <value>
  475. <boolean>true</boolean>
  476. </value>
  477. </nameValuePair>
  478. <nameValuePair>
  479. <name>CheckGenericTypes</name>
  480. <value>
  481. <boolean>false</boolean>
  482. </value>
  483. </nameValuePair>
  484. <nameValuePair>
  485. <name>GlossComparatorClassname</name>
  486. <value>
  487. <string>com.ibm.langware.annotator.GlossComparator</string>
  488. </value>
  489. </nameValuePair>
  490. <settingsForGroup name="de">
  491. <nameValuePair>
  492. <name>PartOfSpeechDict</name>
  493. <value>
  494. <string>de-XX-TSimplified-7220.dic</string>
  495. </value>
  496. </nameValuePair>
  497. <nameValuePair>
  498. <name>LexicalDicts</name>
  499. <value>
  500. <array>
  501. <string>../resources/dictionary/8/de-XX-LLex-7017.dic</string>
  502. <string>../resources/dictionary/9/de-XX-OOV-7002.dic</string>
  503. </array>
  504. </value>
  505. </nameValuePair>
  506. <nameValuePair>
  507. <name>OOVDicts</name>
  508. <value>
  509. <string/>
  510. </value>
  511. </nameValuePair>
  512. <nameValuePair>
  513. <name>BreakRulesSpec</name>
  514. <value>
  515. <string/>
  516. </value>
  517. </nameValuePair>
  518. </settingsForGroup>
  519. <settingsForGroup name="ru">
  520. <nameValuePair>
  521. <name>PartOfSpeechDict</name>
  522. <value>
  523. <string>ru-RU-TSimplified-7200.dic</string>
  524. </value>
  525. </nameValuePair>
  526. <nameValuePair>
  527. <name>LexicalDicts</name>
  528. <value>
  529. <array>
  530. <string>../resources/dictionary/24/ru-RU-LLex-7003.dic</string>
  531. <string>../resources/dictionary/25/ru-RU-OOV-7003.dic</string>
  532. </array>
  533. </value>
  534. </nameValuePair>
  535. <nameValuePair>
  536. <name>OOVDicts</name>
  537. <value>
  538. <string/>
  539. </value>
  540. </nameValuePair>
  541. <nameValuePair>
  542. <name>BreakRulesSpec</name>
  543. <value>
  544. <string/>
  545. </value>
  546. </nameValuePair>
  547. </settingsForGroup>
  548. <settingsForGroup name="pt">
  549. <nameValuePair>
  550. <name>PartOfSpeechDict</name>
  551. <value>
  552. <string>pt-XX-TSimplified-7001.dic</string>
  553. </value>
  554. </nameValuePair>
  555. <nameValuePair>
  556. <name>LexicalDicts</name>
  557. <value>
  558. <array>
  559. <string>../resources/dictionary/22/pt-XX-LLex-7008.dic</string>
  560. <string>../resources/dictionary/23/pt-XX-OOV-7003.dic</string>
  561. </array>
  562. </value>
  563. </nameValuePair>
  564. <nameValuePair>
  565. <name>OOVDicts</name>
  566. <value>
  567. <string/>
  568. </value>
  569. </nameValuePair>
  570. <nameValuePair>
  571. <name>BreakRulesSpec</name>
  572. <value>
  573. <string/>
  574. </value>
  575. </nameValuePair>
  576. </settingsForGroup>
  577. <settingsForGroup name="ko">
  578. <nameValuePair>
  579. <name>PartOfSpeechDict</name>
  580. <value>
  581. <string>ko-KR-TKpos-8041.dic</string>
  582. </value>
  583. </nameValuePair>
  584. <nameValuePair>
  585. <name>LexicalDicts</name>
  586. <value>
  587. <array/>
  588. </value>
  589. </nameValuePair>
  590. <nameValuePair>
  591. <name>OOVDicts</name>
  592. <value>
  593. <string/>
  594. </value>
  595. </nameValuePair>
  596. <nameValuePair>
  597. <name>BreakRulesSpec</name>
  598. <value>
  599. <string/>
  600. </value>
  601. </nameValuePair>
  602. </settingsForGroup>
  603. <settingsForGroup name="en">
  604. <nameValuePair>
  605. <name>PartOfSpeechDict</name>
  606. <value>
  607. <string>en-XX-TPenn-7212.dic</string>
  608. </value>
  609. </nameValuePair>
  610. <nameValuePair>
  611. <name>LexicalDicts</name>
  612. <value>
  613. <array>
  614. <string>../resources/dictionary/0/en-XX-LLex-7030.dic</string>
  615. <string>../resources/dictionary/1/en-XX-OOV-7004.dic</string>
  616. </array>
  617. </value>
  618. </nameValuePair>
  619. <nameValuePair>
  620. <name>OOVDicts</name>
  621. <value>
  622. <string/>
  623. </value>
  624. </nameValuePair>
  625. <nameValuePair>
  626. <name>BreakRulesSpec</name>
  627. <value>
  628. <string/>
  629. </value>
  630. </nameValuePair>
  631. </settingsForGroup>
  632. <settingsForGroup name="it">
  633. <nameValuePair>
  634. <name>PartOfSpeechDict</name>
  635. <value>
  636. <string>it-IT-TSimplified-7001.dic</string>
  637. </value>
  638. </nameValuePair>
  639. <nameValuePair>
  640. <name>LexicalDicts</name>
  641. <value>
  642. <array>
  643. <string>../resources/dictionary/15/it-IT-LLex-7007.dic</string>
  644. <string>../resources/dictionary/16/it-IT-OOV-7002.dic</string>
  645. </array>
  646. </value>
  647. </nameValuePair>
  648. <nameValuePair>
  649. <name>OOVDicts</name>
  650. <value>
  651. <string/>
  652. </value>
  653. </nameValuePair>
  654. <nameValuePair>
  655. <name>BreakRulesSpec</name>
  656. <value>
  657. <string/>
  658. </value>
  659. </nameValuePair>
  660. </settingsForGroup>
  661. <settingsForGroup name="fr">
  662. <nameValuePair>
  663. <name>PartOfSpeechDict</name>
  664. <value>
  665. <string>fr-XX-TSimplified-7001.dic</string>
  666. </value>
  667. </nameValuePair>
  668. <nameValuePair>
  669. <name>LexicalDicts</name>
  670. <value>
  671. <array>
  672. <string>../resources/dictionary/12/fr-XX-LLex-7009.dic</string>
  673. <string>../resources/dictionary/13/fr-XX-OOV-7002.dic</string>
  674. </array>
  675. </value>
  676. </nameValuePair>
  677. <nameValuePair>
  678. <name>OOVDicts</name>
  679. <value>
  680. <string/>
  681. </value>
  682. </nameValuePair>
  683. <nameValuePair>
  684. <name>BreakRulesSpec</name>
  685. <value>
  686. <string/>
  687. </value>
  688. </nameValuePair>
  689. </settingsForGroup>
  690. <settingsForGroup name="zh">
  691. <nameValuePair>
  692. <name>PartOfSpeechDict</name>
  693. <value>
  694. <string>zh-XX-TCpos-7000.dic</string>
  695. </value>
  696. </nameValuePair>
  697. <nameValuePair>
  698. <name>LexicalDicts</name>
  699. <value>
  700. <array>
  701. <string>../resources/dictionary/26/zh-XX-Lex-8003.dic</string>
  702. </array>
  703. </value>
  704. </nameValuePair>
  705. <nameValuePair>
  706. <name>OOVDicts</name>
  707. <value>
  708. <string/>
  709. </value>
  710. </nameValuePair>
  711. <nameValuePair>
  712. <name>BreakRulesSpec</name>
  713. <value>
  714. <string/>
  715. </value>
  716. </nameValuePair>
  717. </settingsForGroup>
  718. <settingsForGroup name="es">
  719. <nameValuePair>
  720. <name>PartOfSpeechDict</name>
  721. <value>
  722. <string>es-ES-TSimplified-7002.dic</string>
  723. </value>
  724. </nameValuePair>
  725. <nameValuePair>
  726. <name>LexicalDicts</name>
  727. <value>
  728. <array>
  729. <string>../resources/dictionary/10/es-ES-LLex-7006.dic</string>
  730. <string>../resources/dictionary/11/es-ES-OOV-7003.dic</string>
  731. </array>
  732. </value>
  733. </nameValuePair>
  734. <nameValuePair>
  735. <name>OOVDicts</name>
  736. <value>
  737. <string/>
  738. </value>
  739. </nameValuePair>
  740. <nameValuePair>
  741. <name>BreakRulesSpec</name>
  742. <value>
  743. <string/>
  744. </value>
  745. </nameValuePair>
  746. </settingsForGroup>
  747. <settingsForGroup name="cs">
  748. <nameValuePair>
  749. <name>PartOfSpeechDict</name>
  750. <value>
  751. <string>cs-CZ-TSimplified-7200.dic</string>
  752. </value>
  753. </nameValuePair>
  754. <nameValuePair>
  755. <name>LexicalDicts</name>
  756. <value>
  757. <array>
  758. <string>../resources/dictionary/4/cs-CZ-LLex-7003.dic</string>
  759. <string>../resources/dictionary/5/cs-CZ-OOV-7004.dic</string>
  760. </array>
  761. </value>
  762. </nameValuePair>
  763. <nameValuePair>
  764. <name>OOVDicts</name>
  765. <value>
  766. <string/>
  767. </value>
  768. </nameValuePair>
  769. <nameValuePair>
  770. <name>BreakRulesSpec</name>
  771. <value>
  772. <string/>
  773. </value>
  774. </nameValuePair>
  775. </settingsForGroup>
  776. <settingsForGroup name="ar">
  777. <nameValuePair>
  778. <name>PartOfSpeechDict</name>
  779. <value>
  780. <string>ar-XX-TSimplified-7003.dic</string>
  781. </value>
  782. </nameValuePair>
  783. <nameValuePair>
  784. <name>LexicalDicts</name>
  785. <value>
  786. <array>
  787. <string>../resources/dictionary/2/ar-XX-Lex-7007.dic</string>
  788. <string>../resources/dictionary/3/ar-XX-OOV-7003.dic</string>
  789. </array>
  790. </value>
  791. </nameValuePair>
  792. <nameValuePair>
  793. <name>OOVDicts</name>
  794. <value>
  795. <string/>
  796. </value>
  797. </nameValuePair>
  798. <nameValuePair>
  799. <name>BreakRulesSpec</name>
  800. <value>
  801. <string/>
  802. </value>
  803. </nameValuePair>
  804. </settingsForGroup>
  805. <settingsForGroup name="ja">
  806. <nameValuePair>
  807. <name>PartOfSpeechDict</name>
  808. <value>
  809. <string>ja-JP-TJpos-7000.dic</string>
  810. </value>
  811. </nameValuePair>
  812. <nameValuePair>
  813. <name>LexicalDicts</name>
  814. <value>
  815. <array>
  816. <string>../resources/dictionary/17/ja-JP-Lex-7006.dic</string>
  817. </array>
  818. </value>
  819. </nameValuePair>
  820. <nameValuePair>
  821. <name>OOVDicts</name>
  822. <value>
  823. <string/>
  824. </value>
  825. </nameValuePair>
  826. <nameValuePair>
  827. <name>BreakRulesSpec</name>
  828. <value>
  829. <string/>
  830. </value>
  831. </nameValuePair>
  832. </settingsForGroup>
  833. <settingsForGroup name="pl">
  834. <nameValuePair>
  835. <name>PartOfSpeechDict</name>
  836. <value>
  837. <string>pl-PL-TSimplified-7200.dic</string>
  838. </value>
  839. </nameValuePair>
  840. <nameValuePair>
  841. <name>LexicalDicts</name>
  842. <value>
  843. <array>
  844. <string>../resources/dictionary/20/pl-PL-LLex-7003.dic</string>
  845. <string>../resources/dictionary/21/pl-PL-OOV-7004.dic</string>
  846. </array>
  847. </value>
  848. </nameValuePair>
  849. <nameValuePair>
  850. <name>OOVDicts</name>
  851. <value>
  852. <string/>
  853. </value>
  854. </nameValuePair>
  855. <nameValuePair>
  856. <name>BreakRulesSpec</name>
  857. <value>
  858. <string/>
  859. </value>
  860. </nameValuePair>
  861. </settingsForGroup>
  862. <settingsForGroup name="da">
  863. <nameValuePair>
  864. <name>PartOfSpeechDict</name>
  865. <value>
  866. <string>da-DK-TSimplified-7000.dic</string>
  867. </value>
  868. </nameValuePair>
  869. <nameValuePair>
  870. <name>LexicalDicts</name>
  871. <value>
  872. <array>
  873. <string>../resources/dictionary/6/da-DK-LLex-7005.dic</string>
  874. <string>../resources/dictionary/7/da-DK-OOV-7002.dic</string>
  875. </array>
  876. </value>
  877. </nameValuePair>
  878. <nameValuePair>
  879. <name>OOVDicts</name>
  880. <value>
  881. <string/>
  882. </value>
  883. </nameValuePair>
  884. <nameValuePair>
  885. <name>BreakRulesSpec</name>
  886. <value>
  887. <string/>
  888. </value>
  889. </nameValuePair>
  890. </settingsForGroup>
  891. <settingsForGroup name="he">
  892. <nameValuePair>
  893. <name>PartOfSpeechDict</name>
  894. <value>
  895. <string>he-IL-TSimplified-7201.dic</string>
  896. </value>
  897. </nameValuePair>
  898. <nameValuePair>
  899. <name>LexicalDicts</name>
  900. <value>
  901. <array>
  902. <string>../resources/dictionary/14/he-IL-Lex-7205.dic</string>
  903. </array>
  904. </value>
  905. </nameValuePair>
  906. <nameValuePair>
  907. <name>OOVDicts</name>
  908. <value>
  909. <string/>
  910. </value>
  911. </nameValuePair>
  912. <nameValuePair>
  913. <name>BreakRulesSpec</name>
  914. <value>
  915. <string/>
  916. </value>
  917. </nameValuePair>
  918. </settingsForGroup>
  919. <settingsForGroup name="tr">
  920. <nameValuePair>
  921. <name>PartOfSpeechDict</name>
  922. <value>
  923. <string>tr-TR-TTpos-8502.dic</string>
  924. </value>
  925. </nameValuePair>
  926. <nameValuePair>
  927. <name>LexicalDicts</name>
  928. <value>
  929. <array/>
  930. </value>
  931. </nameValuePair>
  932. <nameValuePair>
  933. <name>OOVDicts</name>
  934. <value>
  935. <string/>
  936. </value>
  937. </nameValuePair>
  938. <nameValuePair>
  939. <name>BreakRulesSpec</name>
  940. <value>
  941. <string/>
  942. </value>
  943. </nameValuePair>
  944. </settingsForGroup>
  945. <settingsForGroup name="nl">
  946. <nameValuePair>
  947. <name>PartOfSpeechDict</name>
  948. <value>
  949. <string>nl-NL-TSimplified-7000.dic</string>
  950. </value>
  951. </nameValuePair>
  952. <nameValuePair>
  953. <name>LexicalDicts</name>
  954. <value>
  955. <array>
  956. <string>../resources/dictionary/18/nl-NL-Reform-LLex-7004.dic</string>
  957. <string>../resources/dictionary/19/nl-NL-OOV-7002.dic</string>
  958. </array>
  959. </value>
  960. </nameValuePair>
  961. <nameValuePair>
  962. <name>OOVDicts</name>
  963. <value>
  964. <string/>
  965. </value>
  966. </nameValuePair>
  967. <nameValuePair>
  968. <name>BreakRulesSpec</name>
  969. <value>
  970. <string/>
  971. </value>
  972. </nameValuePair>
  973. </settingsForGroup>
  974. </configurationParameterSettings>
  975. <typeSystemDescription>
  976. <types>
  977. <typeDescription>
  978. <name>uima.tcas.DocumentAnnotation</name>
  979. <description>Annotation covering the entire document, containing document meta information, for example the document language</description>
  980. <supertypeName>uima.tcas.Annotation</supertypeName>
  981. <features>
  982. <featureDescription>
  983. <name>language</name>
  984. <description>The document language</description>
  985. <rangeTypeName>uima.cas.String</rangeTypeName>
  986. </featureDescription>
  987. <featureDescription>
  988. <name>languageCandidates</name>
  989. <description>A list of language candidates for the document produced during language identification. These are sorted by confidence value</description>
  990. <rangeTypeName>uima.cas.FSList</rangeTypeName>
  991. </featureDescription>
  992. </features>
  993. </typeDescription>
  994. <typeDescription>
  995. <name>uima.tt.TTAnnotation</name>
  996. <description>Base type for lexical and document structure annotation types</description>
  997. <supertypeName>uima.tcas.Annotation</supertypeName>
  998. </typeDescription>
  999. <typeDescription>
  1000. <name>uima.tt.DocStructureAnnotation</name>
  1001. <description>Base type for document structure annotation types</description>
  1002. <supertypeName>uima.tt.TTAnnotation</supertypeName>
  1003. </typeDescription>
  1004. <typeDescription>
  1005. <name>uima.tt.ParagraphAnnotation</name>
  1006. <description>A paragraph</description>
  1007. <supertypeName>uima.tt.DocStructureAnnotation</supertypeName>
  1008. <features>
  1009. <featureDescription>
  1010. <name>paragraphNumber</name>
  1011. <description>The sequence number of the paragraph in the document</description>
  1012. <rangeTypeName>uima.cas.Integer</rangeTypeName>
  1013. </featureDescription>
  1014. </features>
  1015. </typeDescription>
  1016. <typeDescription>
  1017. <name>uima.tt.SentenceAnnotation</name>
  1018. <description>A sentence</description>
  1019. <supertypeName>uima.tt.DocStructureAnnotation</supertypeName>
  1020. <features>
  1021. <featureDescription>
  1022. <name>sentenceNumber</name>
  1023. <description>The sequence number of the sentence in the paragraph (or the document)</description>
  1024. <rangeTypeName>uima.cas.Integer</rangeTypeName>
  1025. </featureDescription>
  1026. </features>
  1027. </typeDescription>
  1028. <typeDescription>
  1029. <name>uima.tt.LexicalAnnotation</name>
  1030. <description>Base type for lexical annotation types</description>
  1031. <supertypeName>uima.tt.TTAnnotation</supertypeName>
  1032. </typeDescription>
  1033. <typeDescription>
  1034. <name>uima.tt.DictionaryEntryAnnotation</name>
  1035. <description>Base type for dictionary-based user-defined annotation types</description>
  1036. <supertypeName>uima.tt.LexicalAnnotation</supertypeName>
  1037. <features>
  1038. <featureDescription>
  1039. <name>lemma</name>
  1040. <description>Morphological information for the dictionary entry</description>
  1041. <rangeTypeName>uima.tt.Lemma</rangeTypeName>
  1042. </featureDescription>
  1043. </features>
  1044. </typeDescription>
  1045. <typeDescription>
  1046. <name>uima.tt.TokenLikeAnnotation</name>
  1047. <description>Base type for token annotation types</description>
  1048. <supertypeName>uima.tt.LexicalAnnotation</supertypeName>
  1049. <features>
  1050. <featureDescription>
  1051. <name>lemma</name>
  1052. <description>The best probable entry containing all morphological information for the token</description>
  1053. <rangeTypeName>uima.tt.Lemma</rangeTypeName>
  1054. </featureDescription>
  1055. <featureDescription>
  1056. <name>lemmaEntries</name>
  1057. <description>List of lemma entries containing all morphological information for the token</description>
  1058. <rangeTypeName>uima.cas.FSArray</rangeTypeName>
  1059. </featureDescription>
  1060. <featureDescription>
  1061. <name>dictionaryMatch</name>
  1062. <description>A flag indicating whether or not the token matches a dictionary entry</description>
  1063. <rangeTypeName>uima.cas.Boolean</rangeTypeName>
  1064. </featureDescription>
  1065. </features>
  1066. </typeDescription>
  1067. <typeDescription>
  1068. <name>uima.tt.TokenAnnotation</name>
  1069. <description>General token annotation type. It is also the base type for the special token types</description>
  1070. <supertypeName>uima.tt.TokenLikeAnnotation</supertypeName>
  1071. <features>
  1072. <featureDescription>
  1073. <name>posTag</name>
  1074. <description>Part-of-Speech tag</description>
  1075. <rangeTypeName>uima.cas.String</rangeTypeName>
  1076. </featureDescription>
  1077. </features>
  1078. </typeDescription>
  1079. <typeDescription>
  1080. <name>uima.tt.CompPartAnnotation</name>
  1081. <description>A part of a compound word</description>
  1082. <supertypeName>uima.tt.TokenLikeAnnotation</supertypeName>
  1083. </typeDescription>
  1084. <typeDescription>
  1085. <name>uima.tt.KeyStringEntry</name>
  1086. <description>Base type for types defining key/value feature (e.g. uima.tt.Lemma type)</description>
  1087. <supertypeName>uima.cas.TOP</supertypeName>
  1088. <features>
  1089. <featureDescription>
  1090. <name>key</name>
  1091. <description>A key/value feature (e.g. lemma string in uima.tt.Lemma type)</description>
  1092. <rangeTypeName>uima.cas.String</rangeTypeName>
  1093. </featureDescription>
  1094. </features>
  1095. </typeDescription>
  1096. <typeDescription>
  1097. <name>uima.tt.Lemma</name>
  1098. <description>Morphological information retrieved from a lexical dictionary entry</description>
  1099. <supertypeName>uima.tt.KeyStringEntry</supertypeName>
  1100. <features>
  1101. <featureDescription>
  1102. <name>partOfSpeech</name>
  1103. <description>An integral encoding representing the part-of-speech for the lemma</description>
  1104. <rangeTypeName>uima.cas.Integer</rangeTypeName>
  1105. </featureDescription>
  1106. <featureDescription>
  1107. <name>frost_ExtendedPOS</name>
  1108. <description>An integer representing additional information related to the part-of-speech</description>
  1109. <rangeTypeName>uima.cas.Integer</rangeTypeName>
  1110. </featureDescription>
  1111. <featureDescription>
  1112. <name>isStopword</name>
  1113. <description/>
  1114. <rangeTypeName>uima.cas.Boolean</rangeTypeName>
  1115. </featureDescription>
  1116. </features>
  1117. </typeDescription>
  1118. <typeDescription>
  1119. <name>uima.tt.LanguageConfidencePair</name>
  1120. <description>Language-Confidence pair of a language candidate for the document text</description>
  1121. <supertypeName>uima.cas.TOP</supertypeName>
  1122. <features>
  1123. <featureDescription>
  1124. <name>languageConfidence</name>
  1125. <description>An indication (a float value between 0 and 1) of how well the candidate language actually fits the language of the document</description>
  1126. <rangeTypeName>uima.cas.Float</rangeTypeName>
  1127. </featureDescription>
  1128. <featureDescription>
  1129. <name>language</name>
  1130. <description>Language name (ISO Locale code)</description>
  1131. <rangeTypeName>uima.cas.String</rangeTypeName>
  1132. </featureDescription>
  1133. </features>
  1134. </typeDescription>
  1135. <typeDescription>
  1136. <name>com.ibm.langware.uimatypes.WordLikeToken</name>
  1137. <description>Base type for possible words (not punctuations nor symbols). Also represents alphanumeric tokens</description>
  1138. <supertypeName>uima.tt.TokenAnnotation</supertypeName>
  1139. </typeDescription>
  1140. <typeDescription>
  1141. <name>com.ibm.langware.uimatypes.Alphabetic</name>
  1142. <description>Alphabetic word</description>
  1143. <supertypeName>com.ibm.langware.uimatypes.WordLikeToken</supertypeName>
  1144. </typeDescription>
  1145. <typeDescription>
  1146. <name>com.ibm.langware.uimatypes.UppercaseAlphabetic</name>
  1147. <description>Uppercase alphabetic word</description>
  1148. <supertypeName>com.ibm.langware.uimatypes.Alphabetic</supertypeName>
  1149. </typeDescription>
  1150. <typeDescription>
  1151. <name>com.ibm.langware.uimatypes.TitlecaseAlphabetic</name>
  1152. <description>Titlecase alphabetic word</description>
  1153. <supertypeName>com.ibm.langware.uimatypes.Alphabetic</supertypeName>
  1154. </typeDescription>
  1155. <typeDescription>
  1156. <name>com.ibm.langware.uimatypes.LowercaseAlphabetic</name>
  1157. <description>Lowercase alphabetic word</description>
  1158. <supertypeName>com.ibm.langware.uimatypes.Alphabetic</supertypeName>
  1159. </typeDescription>
  1160. <typeDescription>
  1161. <name>com.ibm.langware.uimatypes.Arabic</name>
  1162. <description>Arabic word</description>
  1163. <supertypeName>com.ibm.langware.uimatypes.Alphabetic</supertypeName>
  1164. </typeDescription>
  1165. <typeDescription>
  1166. <name>com.ibm.langware.uimatypes.Hebrew</name>
  1167. <description>Hebrew word</description>
  1168. <supertypeName>com.ibm.langware.uimatypes.Alphabetic</supertypeName>
  1169. </typeDescription>
  1170. <typeDescription>
  1171. <name>com.ibm.langware.uimatypes.Syllabic</name>
  1172. <description>Syllabic word</description>
  1173. <supertypeName>com.ibm.langware.uimatypes.WordLikeToken</supertypeName>
  1174. </typeDescription>
  1175. <typeDescription>
  1176. <name>com.ibm.langware.uimatypes.Hiragana</name>
  1177. <description>Hiragana (Syllabic) word</description>
  1178. <supertypeName>com.ibm.langware.uimatypes.Syllabic</supertypeName>
  1179. </typeDescription>
  1180. <typeDescription>
  1181. <name>com.ibm.langware.uimatypes.Katakana</name>
  1182. <description>Katakana (Syllabic) word</description>
  1183. <supertypeName>com.ibm.langware.uimatypes.Syllabic</supertypeName>
  1184. </typeDescription>
  1185. <typeDescription>
  1186. <name>com.ibm.langware.uimatypes.Hangul</name>
  1187. <description>Hangul (Syllabic) word</description>
  1188. <supertypeName>com.ibm.langware.uimatypes.Syllabic</supertypeName>
  1189. </typeDescription>
  1190. <typeDescription>
  1191. <name>com.ibm.langware.uimatypes.Ideographic</name>
  1192. <description>Ideographic word</description>
  1193. <supertypeName>com.ibm.langware.uimatypes.WordLikeToken</supertypeName>
  1194. </typeDescription>
  1195. <typeDescription>
  1196. <name>com.ibm.langware.uimatypes.Han</name>
  1197. <description>Han (Ideographic) word</description>
  1198. <supertypeName>com.ibm.langware.uimatypes.Ideographic</supertypeName>
  1199. </typeDescription>
  1200. <typeDescription>
  1201. <name>com.ibm.langware.uimatypes.Numeric</name>
  1202. <description>A numeric sequence</description>
  1203. <supertypeName>com.ibm.langware.uimatypes.WordLikeToken</supertypeName>
  1204. </typeDescription>
  1205. <typeDescription>
  1206. <name>com.ibm.langware.uimatypes.ChineseNumeral</name>
  1207. <description>A Chinese numeral</description>
  1208. <supertypeName>com.ibm.langware.uimatypes.Numeric</supertypeName>
  1209. </typeDescription>
  1210. <typeDescription>
  1211. <name>com.ibm.langware.uimatypes.Punctuation</name>
  1212. <description>A punctuation or symbol</description>
  1213. <supertypeName>uima.tt.TokenAnnotation</supertypeName>
  1214. </typeDescription>
  1215. <typeDescription>
  1216. <name>com.ibm.langware.uimatypes.ClauseEndingPunctuation</name>
  1217. <description>A clause terminating punctuation</description>
  1218. <supertypeName>com.ibm.langware.uimatypes.Punctuation</supertypeName>
  1219. </typeDescription>
  1220. </types>
  1221. </typeSystemDescription>
  1222. <capabilities>
  1223. <capability>
  1224. <inputs/>
  1225. <outputs>
  1226. <type>uima.tt.ParagraphAnnotation</type>
  1227. <type>uima.tt.SentenceAnnotation</type>
  1228. <type>uima.tt.TokenAnnotation</type>
  1229. <feature>uima.tt.TokenAnnotation:lemma</feature>
  1230. <feature>uima.tt.TokenAnnotation:lemmaEntries</feature>
  1231. </outputs>
  1232. <languagesSupported>
  1233. <language>x-unspecified</language>
  1234. </languagesSupported>
  1235. </capability>
  1236. <capability>
  1237. <inputs/>
  1238. <outputs>
  1239. <type>uima.tt.ParagraphAnnotation</type>
  1240. <type>uima.tt.SentenceAnnotation</type>
  1241. <type>uima.tt.TokenAnnotation</type>
  1242. <type>uima.tt.CompPartAnnotation</type>
  1243. <type>uima.tt.Lemma</type>
  1244. <feature>uima.tt.ParagraphAnnotation:paragraphNumber</feature>
  1245. <feature>uima.tt.SentenceAnnotation:sentenceNumber</feature>
  1246. <feature>uima.tt.TokenAnnotation:posTag</feature>
  1247. <feature>uima.tt.TokenAnnotation:lemmaEntries</feature>
  1248. <feature>uima.tt.TokenAnnotation:dictionaryMatch</feature>
  1249. <feature>uima.tt.Lemma:key</feature>
  1250. <feature>uima.tt.Lemma:partOfSpeech</feature>
  1251. <feature>uima.tt.Lemma:isStopword</feature>
  1252. <feature>uima.tt.Lemma:frost_ExtendedPOS</feature>
  1253. </outputs>
  1254. <languagesSupported>
  1255. <language>en</language>
  1256. <language>af</language>
  1257. <language>ar</language>
  1258. <language>ca</language>
  1259. <language>cs</language>
  1260. <language>da</language>
  1261. <language>de</language>
  1262. <language>el</language>
  1263. <language>es</language>
  1264. <language>fr</language>
  1265. <language>he</language>
  1266. <language>it</language>
  1267. <language>ja</language>
  1268. <language>ko</language>
  1269. <language>nb</language>
  1270. <language>nl</language>
  1271. <language>nn</language>
  1272. <language>pl</language>
  1273. <language>pt</language>
  1274. <language>ru</language>
  1275. <language>sv</language>
  1276. <language>tr</language>
  1277. <language>zh</language>
  1278. </languagesSupported>
  1279. </capability>
  1280. </capabilities>
  1281. <operationalProperties>
  1282. <modifiesCas>true</modifiesCas>
  1283. <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
  1284. <outputsNewCASes>false</outputsNewCASes>
  1285. </operationalProperties>
  1286. </analysisEngineMetaData>
  1287. <resourceManagerConfiguration>
  1288. <externalResources>
  1289. <externalResource>
  1290. <name>ResourcesFile</name>
  1291. <description>Location of Resources</description>
  1292. <fileLanguageResourceSpecifier>
  1293. <fileUrlPrefix>../resources/Tagger/</fileUrlPrefix>
  1294. <fileUrlSuffix>/</fileUrlSuffix>
  1295. </fileLanguageResourceSpecifier>
  1296. </externalResource>
  1297. </externalResources>
  1298. <externalResourceBindings>
  1299. <externalResourceBinding>
  1300. <key>Resources</key>
  1301. <resourceName>ResourcesFile</resourceName>
  1302. </externalResourceBinding>
  1303. </externalResourceBindings>
  1304. </resourceManagerConfiguration>
  1305. </taeDescription>