%%% -*- Mode: PROLOG; Package: PROLOG-USER -*- %%% SAMPLE ARABIC LEXICON %%% %%% (c) 2001 Sandiway Fong, NEC Research Institute, Inc. %%% no caseFoldInput. default_features(n,[grid([],[]),case(_),theta(_)]). default_features(v,[index(_)]). block_default_features(nounFeatures,2). block_default_features(verbToNounFeatures,2). no superClass(_,_). no blockContraction(_). %%% EXPORT %%% default_features(C,Fs) used in lexicon compilation %%% term(C) terminals %%% lexicon(Word,C,Fs) Word has category label C %%% and feature list Fs %%% probeLexicon(Word) Word is in lexicon %%% vMorphToFs(Base,Form,Features) TNS/AGR features %%% inf(Verb,Type) %%% relevant(C) constraints imposed by markers %%% apply to C %%% REFERENCES %%% (list processing) utilities term(n). term(a). term(v). term(p). term(c). term(mod). term(neg). term(mrkr). term('$'). %%% Most lexical entries are stored directly as %%% %%% lex(Word,Category,Features) %%% %%% Non-base forms require inference: %%% 1. plural nouns all features except agr(_) inherited %%% from the sg. form %%% 2. nominalized verbs inherits verb features except morph(_,_) %%% 3. non-base verb forms all features except morph(_,_) inherited %%% from the base form lexicon(Word,C,Fs) :- lex(Word,C,Fs). % deterministic probeLexicon(Word) :- lex(Word,_,_), !. %%% NEGATION lex(laa,neg,[polarity(-),eng(not)]). lex(maa,neg,[polarity(-),eng(not)]). %%% PREPOSITIONS lex('Qilaa',p,[grid([],[destination]),predicate(destination), adjR([goal(vpAllowExt(destination,X),X)]),eng(to)]). %% Adverbs % lex(how,adv,[adjoin(right),wh(+),predicate(manner)]). % lex(when,adv,[adjoin(right),wh(+),predicate(time)]). % lex(why,adv,[adjoin(right),wh(+),predicate(reason)]). % lex(often,adv,[adjoin(left),predicate(manner)]). % lex(completely,adv,[adjoin(_),predicate(manner)]). %% Determiners as markers attaching to nouns on the right lex(al,mrkr,[right(n,[],[prefix(al),def(+)])]). lex(d,mrkr,[right(n,[],[prefix(d),def(+)])]). lex(l,mrkr,[right(n,[],[prefix(l),def(+)])]). lex(n,mrkr,[right(n,[],[prefix(n),def(+)])]). lex('S',mrkr,[right(n,[],[prefix('S'),def(+)])]). lex(r,mrkr,[right(n,[],[prefix(r),def(+)])]). lex(t,mrkr,[right(n,[],[prefix(r),def(+)])]). % lex('Qayy',det,[wh(+),eng(which)]). % lex(kull,det,[op(+),eng(every)]). %% Nouns %% Obligatory features %% 1. Anaphoric: a(+/-), Pronominal: p(+/-) for Binding Theory %% 2. Agreement: agr([Per,Num,Gen]) %% 3. O-grid: grid(Ext,Int) %% Optional features %% 1. Morphologically realized Case: morphC(C) %% 2. Quantifiers: op(+/-) %% 3. Wh-nouns: wh(+) % Numbers and Ordinals % lex(one,n,[morphC(gen),count(_),agr([3,sg,[]]),nonarg(+)]). % lex(first,n,[morphC(gen),count(+),agr([3,sg,[]]),nonarg(+)]). % Pleonastic elements lex(there,n,[theta(X),agr([3,[],[]]),nonarg(+),linkTo(np)]) :- optional(zero,X), notAssigned(X). % default is non-argument unless the theta slot is instantiated, in which % case, the argument status is choosen using the predicate chooseIt/3 lex(it,n,[agr([3,sg,n]),goal(theta(R),lambda(X,chooseIt(R,V,X))), oneof(_,[[noCoindex,nonarg(V),linkTo(c2)],[a(-),p(+)]])]). % Proper nouns lex('aHmad',n,[a(-),p(-),agr([3,sg,m])]). lex('Ciisaa',n,[a(-),p(-),agr([3,sg,m])]). lex(muusaa,n,[a(-),p(-),agr([3,sg,m])]). lex(zayd,n,[a(-),p(-),agr([3,sg,m])]). % Anaphors and pronominals lex(nfas,n,[a(+),p(-),agr([]),eng(self)]). % Pronominal clitics lex(tu,pf(cl),[a(-),p(+),agr([1,sg,[m,f]]),eng(i)|Fs]) :- subjClFeatures(Fs). lex(naa,pf(cl),[a(-),p(+),agr([1,pl,[m,f]]),eng(us)|Fs]) :- objCliticFeatures(Fs). lex(ta,pf(cl),[a(-),p(+),agr([2,sg,[m,f]]),eng(you)|Fs]) :- subjClFeatures(Fs). lex(hu,pf(cl),[a(-),p(+),agr([3,sg,m]),eng(him)|Fs]) :- objCliticFeatures(Fs). lex(hum,pf(cl),[a(+),p(-),agr([3,pl,m]),eng(they)|Fs]) :- objCliticFeatures(Fs). lex(hunna,pf(cl),[a(+),p(-),agr([3,pl,f]),eng(they)|Fs]) :- objCliticFeatures(Fs). % Quantifier nouns % lex(everyone,n,[a(-),p(-),agr([3,sg,[m,f]]),op(+)]). % Wh-nouns lex(man,n,[a(-),p(-),agr([3,sg,[m,f]]),wh(+),eng(who)]). % Common nouns lex('Qabuu',n,[a(-),p(-),agr([3,sg,m]),morphC(nom),eng(father)]). lex('Qawlaad',n,[a(-),p(-),agr([3,pl,[m,f]]),eng(children)]). lex('Qustaad_',n,[a(-),p(-),agr([3,sg,[m,f]]),eng(teacher)]). lex(baqarat,n,[a(-),p(-),agr([3,sg,f]),eng(cow)]). lex(bint,n,[a(-),p(-),agr([3,sg,f]),eng(girl)]). lex(daar,n,[a(-),p(-),agr([3,sg,n]),eng(house)]). lex(jaasuus,n,[a(-),p(-),agr([3,sg,[m,f]]),eng(spy)]). lex(makaatib,n,[a(-),p(-),agr([3,pl,n]),eng(office)]). lex(mudiir,n,[a(-),p(-),agr([3,sg,[m,f]]),eng(director)]). lex('nisaaQ',n,[a(-),p(-),agr([3,pl,f]),eng(women)]). lex(rajul,n,[a(-),p(-),agr([3,sg,m]),eng(man)]). lex(rijaal,n,[a(-),p(-),agr([3,sg,m]),eng(men)]). lex(risaalat,n,[a(-),p(-),agr([3,sg,n]),eng(letter)]). lex(talju,n,[a(-),p(-),agr([3,sg,n]),eng(snow)]). % Nouns with O-grids lex(father,n,[grid([possessor],[]),a(-),p(-),count(+),agr([3,sg,m])]). lex(mother,n,[grid([possessor],[]),a(-),p(-),count(+),agr([3,sg,f])]). % Nominalized verbs % lex(belief,n,[case(_),theta(_),count(+),agr([3,sg,n]),a(-),p(-)|F]) :- % verbToNounFeatures(believe,F). %%% VERBS - BASE FORMS % % Obligatory Elements % 1. grid(Ext,Int) denotes the O-grid % 2. morph(Base,[]/ing/s/ed(1)/ed(2)) % Optional Features % 1. ecm(opt/oblig) for ECM verbs % 2. noCasemark(+) for technically intransitive verbs % 3. subcat(C$Fs,Fs1) subcategorizes for an element of category C % with feature restriction list Fs. % Feature list Fs1 will be appended to the feature list of C % 4. selR(Rs), selR(Type,R) restriction(s) Rs or R on direct object. lex('Sagata',v,[morph(fall,[]),grid([],[theme]),noCasemark(+)]). lex('QaCtay',v,[morph(give,past(+)),grid([agent],[goal,theme]),idoCase(acc)]). lex('Qaqbal',v,[morph(appear,past(+)),grid([theme],[])]). % unaccusative lex('Qarad',v,[morph(want,past(+)),grid([agent],[prop])]). lex(ahtarim,v,[morph(respect,past(-)),grid([experiencer],[theme])]). lex(araa,v,[morph(see,past(-)),grid([experiencer],[theme])]). lex('Darab',v,[morph(beat,past(+)),grid([agent],[patient])]). lex('dabaH',v,[morph(cut_throat,past(+)),grid([agent],[patient])]). lex(daxal,v,[morph(enter,past(+)),grid([agent],[theme])]). lex(daxala,v,[morph(enter,past(+)),grid([agent],[theme])]). lex('jaaQ',v,[morph(come,past(+)),grid([theme],[])]). lex('aQtii',v,[morph(come,past(-)),grid([theme],[])]). lex(kataba,v,[morph(write,past(+)),grid([agent],[theme])]). lex(ntaqada,v,[morph(criticize,past(+)),grid([agent],[theme])]). lex(intaqada,v,[morph(criticize,past(+)),grid([agent],[theme])]). lex('Saaka',v,[morph(complained,past(+)),grid([agent],[theme])]). lex('Saahad',v,[morph(saw,past(+)),grid([experiencer],[theme])]). lex(takallam,v,[morph(speak,past(+)),grid([theme],[])]). lex('uHibb',v,[morph(like,past(-)),grid([agent],[theme])]). lex(uriid,v,[morph(want,past(-)),grid([agent],[prop])]). lex(uqaabil,v,[morph(meet,past(-)),grid([agent],[theme])]). lex(zaCam,v,[morph(pretend,past(+)),grid([agent],[prop])]). %% Auxiliaries % lex(have,v,[morph(have,[]),aux,perf,subcat(vp$[morph(_,ed(2))],[perf])]). % lex(do,v,[morph(do,[]),aux,subcat(vp$[morph(_,[])],[])]). % lex(can,v,[morph(can,def(past(-))),aux,modal, % subcat(vp$[morph(_,[])],[])]). %% Adjectives %% % Complementizers lex('Qan',c,[wh(-),eng(that)]). % Modals lex('Qa',mod,[eng(that)]). % Markers: morphological realizations of particular features % Case lex(u,mrkr,[left(n,[],[suffix(nom),morphC(nom)])]). lex(un,mrkr,[left(n,[],[suffix(nom),morphC(nom)])]). lex(a,mrkr,[left(n,[],[suffix(acc),morphC(acc)])]). lex(an,mrkr,[left(n,[],[suffix(acc),morphC(acc)])]). % Agreement lex(at,mrkr,[left(v,[],[suffix(f),agr([[],[],f])])]). % f lex(na,mrkr,[left(v,[],[suffix('f.pl'),agr([[],pl,f])])]). % f.pl lex(a,mrkr,[left(v,[],[suffix(a),subj])]). % subjunctive % Person prefix lex(y,mrkr,[right(v,[],[prefix(y),agr([3,[],[]])])]). no lexFeature(_,_). % relevant for marker constraints relevant(n). relevant(v). no inf(_,_). % CONTRACTIONS % 1st parameter is class no contraction(_,_,_). no contraction(_,_,_,_). % MISCELLANEOUS tnsAGRFeatures(V,Fs) :- % override defn in xbar.pl V has_feature morph(_,TNS), vAGRFeature(V,AGR), mkFs([TNS,AGR],Fs). vAGRFeature(V,AGR) :- V has_feature agr(A) -> AGR = agr(A) ; AGR = agr([]). %%% PREDICATES FOR INDIVIDUAL LEXICAL ITEMS noMax(X) :- cat(X,C), \+ max(C). overt(X) :- \+ ec(X). % [CP Op[i] [C' ..t[i].. ]] % Assume t[i] O-marked theme inheritRole(CP,Role) :- cat(CP,c2), Op specifier_of CP, emptyOperator(Op) -> Role = [theme] ; Role = []. subjClFeatures([adjoin(v,right),morphC(nom)]). objCliticFeatures([adjoin(v,right),morphC(acc)]).