shithub: flite

ref: baaa888c574e075222ad70b002fe6a140a19098b
dir: /lang/usenglish/us_gpos.c/

View raw version
/*************************************************************************/
/*                                                                       */
/*                  Language Technologies Institute                      */
/*                     Carnegie Mellon University                        */
/*                         Copyright (c) 2001                            */
/*                        All Rights Reserved.                           */
/*                                                                       */
/*  Permission is hereby granted, free of charge, to use and distribute  */
/*  this software and its documentation without restriction, including   */
/*  without limitation the rights to use, copy, modify, merge, publish,  */
/*  distribute, sublicense, and/or sell copies of this work, and to      */
/*  permit persons to whom this work is furnished to do so, subject to   */
/*  the following conditions:                                            */
/*   1. The code must retain the above copyright notice, this list of    */
/*      conditions and the following disclaimer.                         */
/*   2. Any modifications must be clearly marked as such.                */
/*   3. Original authors' names are not deleted.                         */
/*   4. The authors' names are not used to endorse or promote products   */
/*      derived from this software without specific prior written        */
/*      permission.                                                      */
/*                                                                       */
/*  CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK         */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
/*  SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE      */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
/*  THIS SOFTWARE.                                                       */
/*                                                                       */
/*************************************************************************/
/*             Author:  Alan W Black ([email protected])                    */
/*               Date:  January 2001                                     */
/*************************************************************************/
/*  Poor mans part of speech tagger                                      */
/*************************************************************************/

#include "cst_val.h"

DEF_STATIC_CONST_VAL_STRING(gpos_in,"in");
DEF_STATIC_CONST_VAL_STRING(gpos_of,"of");
DEF_STATIC_CONST_VAL_STRING(gpos_for,"for");
DEF_STATIC_CONST_VAL_STRING(gpos_on,"on");
DEF_STATIC_CONST_VAL_STRING(gpos_that,"that");
DEF_STATIC_CONST_VAL_STRING(gpos_with,"with");
DEF_STATIC_CONST_VAL_STRING(gpos_by,"by");
DEF_STATIC_CONST_VAL_STRING(gpos_at,"at");
DEF_STATIC_CONST_VAL_STRING(gpos_from,"from");
DEF_STATIC_CONST_VAL_STRING(gpos_as,"as");
DEF_STATIC_CONST_VAL_STRING(gpos_if,"if");
DEF_STATIC_CONST_VAL_STRING(gpos_against,"against");
DEF_STATIC_CONST_VAL_STRING(gpos_about,"about");
DEF_STATIC_CONST_VAL_STRING(gpos_before,"before");
DEF_STATIC_CONST_VAL_STRING(gpos_because,"because");
DEF_STATIC_CONST_VAL_STRING(gpos_under,"under");
DEF_STATIC_CONST_VAL_STRING(gpos_after,"after");
DEF_STATIC_CONST_VAL_STRING(gpos_over,"over");
DEF_STATIC_CONST_VAL_STRING(gpos_into,"into");
DEF_STATIC_CONST_VAL_STRING(gpos_while,"while");
DEF_STATIC_CONST_VAL_STRING(gpos_without,"without");
DEF_STATIC_CONST_VAL_STRING(gpos_through,"through");
DEF_STATIC_CONST_VAL_STRING(gpos_new,"new"); /* ??? */
DEF_STATIC_CONST_VAL_STRING(gpos_between,"between");
DEF_STATIC_CONST_VAL_STRING(gpos_among,"among");
DEF_STATIC_CONST_VAL_STRING(gpos_until,"until");
DEF_STATIC_CONST_VAL_STRING(gpos_per,"per");
DEF_STATIC_CONST_VAL_STRING(gpos_up,"up");
DEF_STATIC_CONST_VAL_STRING(gpos_down,"down");

static const cst_val * const gpos_in_list[] = {
    (cst_val *)&gpos_in,
    (cst_val *)&gpos_of,
    (cst_val *)&gpos_for,
    (cst_val *)&gpos_in,
    (cst_val *)&gpos_on,
    (cst_val *)&gpos_that,
    (cst_val *)&gpos_with,
    (cst_val *)&gpos_by,
    (cst_val *)&gpos_at,
    (cst_val *)&gpos_from,
    (cst_val *)&gpos_as,
    (cst_val *)&gpos_if,
    (cst_val *)&gpos_that,
    (cst_val *)&gpos_against,
    (cst_val *)&gpos_about,
    (cst_val *)&gpos_before,
    (cst_val *)&gpos_because,
    (cst_val *)&gpos_under,
    (cst_val *)&gpos_after,
    (cst_val *)&gpos_over,
    (cst_val *)&gpos_into,
    (cst_val *)&gpos_while,
    (cst_val *)&gpos_without,
    (cst_val *)&gpos_through,
    (cst_val *)&gpos_new,
    (cst_val *)&gpos_between,
    (cst_val *)&gpos_among,
    (cst_val *)&gpos_until,
    (cst_val *)&gpos_per,
    (cst_val *)&gpos_up,
    (cst_val *)&gpos_down,
    0 };

DEF_STATIC_CONST_VAL_STRING(gpos_to,"to");

static const cst_val * const gpos_to_list[] = {
    (cst_val *)&gpos_to,
    (cst_val *)&gpos_to,
    0 };

DEF_STATIC_CONST_VAL_STRING(gpos_det,"det");
DEF_STATIC_CONST_VAL_STRING(gpos_the,"the");
DEF_STATIC_CONST_VAL_STRING(gpos_a,"a");
DEF_STATIC_CONST_VAL_STRING(gpos_an,"an");
DEF_STATIC_CONST_VAL_STRING(gpos_some,"some");
DEF_STATIC_CONST_VAL_STRING(gpos_this,"this");
DEF_STATIC_CONST_VAL_STRING(gpos_each,"each");
DEF_STATIC_CONST_VAL_STRING(gpos_another,"another");
DEF_STATIC_CONST_VAL_STRING(gpos_those,"those");
DEF_STATIC_CONST_VAL_STRING(gpos_every,"every");
DEF_STATIC_CONST_VAL_STRING(gpos_all,"all");
DEF_STATIC_CONST_VAL_STRING(gpos_any,"any");
DEF_STATIC_CONST_VAL_STRING(gpos_these,"these");
DEF_STATIC_CONST_VAL_STRING(gpos_both,"both");
DEF_STATIC_CONST_VAL_STRING(gpos_neither,"neither");
DEF_STATIC_CONST_VAL_STRING(gpos_no,"no");
DEF_STATIC_CONST_VAL_STRING(gpos_many,"many");

static const cst_val * const gpos_det_list[] = {
    (cst_val *)&gpos_det,
    (cst_val *)&gpos_the,
    (cst_val *)&gpos_a,
    (cst_val *)&gpos_an,
    (cst_val *)&gpos_no,
    (cst_val *)&gpos_some,
    (cst_val *)&gpos_this,
    (cst_val *)&gpos_each,
    (cst_val *)&gpos_another,
    (cst_val *)&gpos_those,
    (cst_val *)&gpos_every,
    (cst_val *)&gpos_all,
    (cst_val *)&gpos_any,
    (cst_val *)&gpos_these,
    (cst_val *)&gpos_both,
    (cst_val *)&gpos_neither,
    (cst_val *)&gpos_no,
    (cst_val *)&gpos_many,
    0 };

DEF_STATIC_CONST_VAL_STRING(gpos_md,"md");
DEF_STATIC_CONST_VAL_STRING(gpos_will,"will");
DEF_STATIC_CONST_VAL_STRING(gpos_may,"may");
DEF_STATIC_CONST_VAL_STRING(gpos_would,"would");
DEF_STATIC_CONST_VAL_STRING(gpos_can,"can");
DEF_STATIC_CONST_VAL_STRING(gpos_could,"could");
DEF_STATIC_CONST_VAL_STRING(gpos_should,"should");
DEF_STATIC_CONST_VAL_STRING(gpos_must,"must");
DEF_STATIC_CONST_VAL_STRING(gpos_ought,"ought");
DEF_STATIC_CONST_VAL_STRING(gpos_might,"might");

static const cst_val * const gpos_md_list[] = {
    (cst_val *)&gpos_md,
    (cst_val *)&gpos_will,
    (cst_val *)&gpos_may,
    (cst_val *)&gpos_would,
    (cst_val *)&gpos_can,
    (cst_val *)&gpos_could,
    (cst_val *)&gpos_should,
    (cst_val *)&gpos_must,
    (cst_val *)&gpos_ought,
    (cst_val *)&gpos_might,
    0 };

DEF_STATIC_CONST_VAL_STRING(gpos_cc,"cc");
DEF_STATIC_CONST_VAL_STRING(gpos_and,"and");
DEF_STATIC_CONST_VAL_STRING(gpos_but,"but");
DEF_STATIC_CONST_VAL_STRING(gpos_or,"or");
DEF_STATIC_CONST_VAL_STRING(gpos_plus,"plus");
DEF_STATIC_CONST_VAL_STRING(gpos_yet,"yet");
DEF_STATIC_CONST_VAL_STRING(gpos_nor,"nor");

static const cst_val * const gpos_cc_list[] = {
    (cst_val *)&gpos_cc,
    (cst_val *)&gpos_and,
    (cst_val *)&gpos_but,
    (cst_val *)&gpos_or,
    (cst_val *)&gpos_plus,
    (cst_val *)&gpos_yet,
    (cst_val *)&gpos_nor,
    0 };

DEF_STATIC_CONST_VAL_STRING(gpos_wp,"wp");
DEF_STATIC_CONST_VAL_STRING(gpos_who,"who");
DEF_STATIC_CONST_VAL_STRING(gpos_what,"what");
DEF_STATIC_CONST_VAL_STRING(gpos_where,"where");
DEF_STATIC_CONST_VAL_STRING(gpos_how,"how");
DEF_STATIC_CONST_VAL_STRING(gpos_when,"when");

static const cst_val * const gpos_wp_list[] = {
    (cst_val *)&gpos_wp,
    (cst_val *)&gpos_who,
    (cst_val *)&gpos_what,
    (cst_val *)&gpos_where,
    (cst_val *)&gpos_how,
    (cst_val *)&gpos_when,
    0 };

DEF_STATIC_CONST_VAL_STRING(gpos_pps,"pps");
DEF_STATIC_CONST_VAL_STRING(gpos_her,"her");
DEF_STATIC_CONST_VAL_STRING(gpos_his,"his");
DEF_STATIC_CONST_VAL_STRING(gpos_their,"their");
DEF_STATIC_CONST_VAL_STRING(gpos_its,"its");
DEF_STATIC_CONST_VAL_STRING(gpos_our,"our");
DEF_STATIC_CONST_VAL_STRING(gpos_mine,"mine");

static const cst_val * const gpos_pps_list[] = {
    (cst_val *)&gpos_pps,
    (cst_val *)&gpos_her,
    (cst_val *)&gpos_his,
    (cst_val *)&gpos_their,
    (cst_val *)&gpos_its,
    (cst_val *)&gpos_our,
    (cst_val *)&gpos_mine,
    0 };

DEF_STATIC_CONST_VAL_STRING(gpos_aux,"aux");
DEF_STATIC_CONST_VAL_STRING(gpos_is,"is");
DEF_STATIC_CONST_VAL_STRING(gpos_am,"am");
DEF_STATIC_CONST_VAL_STRING(gpos_are,"are");
DEF_STATIC_CONST_VAL_STRING(gpos_was,"was");
DEF_STATIC_CONST_VAL_STRING(gpos_were,"were");
DEF_STATIC_CONST_VAL_STRING(gpos_has,"has");
DEF_STATIC_CONST_VAL_STRING(gpos_have,"have");
DEF_STATIC_CONST_VAL_STRING(gpos_had,"had");
DEF_STATIC_CONST_VAL_STRING(gpos_be,"be");

static const cst_val * const gpos_aux_list[] = {
    (cst_val *)&gpos_aux,
    (cst_val *)&gpos_is,
    (cst_val *)&gpos_am,
    (cst_val *)&gpos_are,
    (cst_val *)&gpos_was,
    (cst_val *)&gpos_were,
    (cst_val *)&gpos_has,
    (cst_val *)&gpos_have,
    (cst_val *)&gpos_had,
    (cst_val *)&gpos_be,
    0 };

DEF_STATIC_CONST_VAL_STRING(gpos_punc,"punc");
DEF_STATIC_CONST_VAL_STRING(gpos_dot,".");
DEF_STATIC_CONST_VAL_STRING(gpos_comma,",");
DEF_STATIC_CONST_VAL_STRING(gpos_colon,":");
DEF_STATIC_CONST_VAL_STRING(gpos_semicolon,";");
DEF_STATIC_CONST_VAL_STRING(gpos_dquote,"\"");
DEF_STATIC_CONST_VAL_STRING(gpos_squote,"'");
DEF_STATIC_CONST_VAL_STRING(gpos_leftparen,"(");
DEF_STATIC_CONST_VAL_STRING(gpos_qmark,"?");
DEF_STATIC_CONST_VAL_STRING(gpos_rightparen,")");
DEF_STATIC_CONST_VAL_STRING(gpos_emark,"!");

static const cst_val * const gpos_punc_list[] = {
    (cst_val *)&gpos_punc,
    (cst_val *)&gpos_dot,
    (cst_val *)&gpos_comma,
    (cst_val *)&gpos_colon,
    (cst_val *)&gpos_semicolon,
    (cst_val *)&gpos_dquote,
    (cst_val *)&gpos_squote,
    (cst_val *)&gpos_leftparen,
    (cst_val *)&gpos_qmark,
    (cst_val *)&gpos_rightparen,
    (cst_val *)&gpos_emark,
    0 };

const cst_val * const * const us_gpos[] = {
    gpos_in_list,
    gpos_to_list,
    gpos_det_list,
    gpos_md_list,
    gpos_cc_list,
    gpos_wp_list,
    gpos_pps_list,
    gpos_aux_list,
    gpos_punc_list,
    0 };