From 0ed42304c83174441220b096dde477cc95a023ec Mon Sep 17 00:00:00 2001 From: endorphant Date: Tue, 3 May 2016 13:14:53 -0400 Subject: [PATCH] added permalinks --- bin/core.py | 46 +- bin/inflect.py | 3130 ++++++++++++++++++++++++++++++++++++++++++++++++ bin/ttbp.py | 27 +- changelog.txt | 5 +- 4 files changed, 3195 insertions(+), 13 deletions(-) create mode 100644 bin/inflect.py diff --git a/bin/core.py b/bin/core.py index d6dbd44..d6685b0 100644 --- a/bin/core.py +++ b/bin/core.py @@ -46,10 +46,10 @@ def load_files(): global FILES FILES = [] - for file in os.listdir(DATA): - filename = os.path.join(DATA, file) + for filename in os.listdir(DATA): + filename = os.path.join(DATA, filename) if os.path.isfile(filename) and os.path.splitext(filename)[1] == ".txt": - FILES.append(file) + FILES.append(filename) FILES.sort() FILES.reverse() @@ -67,8 +67,9 @@ def write(outurl="default.html"): outfile.write("\n") - for file in FILES: - for line in write_entry(file): + for filename in FILES: + write_page(filename) + for line in write_entry(filename): outfile.write(line) outfile.write("\n") @@ -80,10 +81,35 @@ def write(outurl="default.html"): return os.path.join(LIVE+USER,os.path.basename(os.path.realpath(WWW)),outurl) -def write_entry(file): +def write_page(filename): + # makes a single permalink page + + outurl = os.path.join(WWW, "".join(parse_date(filename))+".html") + outfile = open(outurl, "w") + + outfile.write("\n\n") + + for line in HEADER: + outfile.write(line) + + outfile.write("\n") + + for line in write_entry(filename): + outfile.write(line) + + outfile.write("\n") + + for line in FOOTER: + outfile.write(line) + + outfile.close() + + return outurl + +def write_entry(filename): # dump given file into entry format, return as list of strings - date = parse_date(file) + date = parse_date(filename) entry = [ "\t\t



\n", @@ -93,7 +119,7 @@ def write_entry(file): ] raw = [] - rawfile = open(os.path.join(DATA, file), "r") + rawfile = open(os.path.join(DATA, filename), "r") for line in rawfile: raw.append(line) @@ -104,7 +130,9 @@ def write_entry(file): if line == "\n": entry.append("

\n\t\t\t

") - entry.append("

\n\t\t\n") + entry.append("

\n") + entry.append("\t\t\t

permalink

\n") + entry.append("\n\t\t\n") return entry diff --git a/bin/inflect.py b/bin/inflect.py new file mode 100644 index 0000000..64382a2 --- /dev/null +++ b/bin/inflect.py @@ -0,0 +1,3130 @@ +''' + inflect.py: correctly generate plurals, ordinals, indefinite articles; + convert numbers to words + Copyright (C) 2010 Paul Dyson + + Based upon the Perl module Lingua::EN::Inflect by Damian Conway. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + The original Perl module Lingua::EN::Inflect by Damian Conway is + available from http://search.cpan.org/~dconway/ + + This module can be downloaded at http://pypi.python.org/pypi/inflect + +methods: + classical inflect + plural plural_noun plural_verb plural_adj singular_noun no num a an + compare compare_nouns compare_verbs compare_adjs + present_participle + ordinal + number_to_words + join + defnoun defverb defadj defa defan + + INFLECTIONS: classical inflect + plural plural_noun plural_verb plural_adj singular_noun compare + no num a an present_participle + + PLURALS: classical inflect + plural plural_noun plural_verb plural_adj singular_noun no num + compare compare_nouns compare_verbs compare_adjs + + COMPARISONS: classical + compare compare_nouns compare_verbs compare_adjs + + ARTICLES: classical inflect num a an + + NUMERICAL: ordinal number_to_words + + USER_DEFINED: defnoun defverb defadj defa defan + +Exceptions: + UnknownClassicalModeError + BadNumValueError + BadChunkingOptionError + NumOutOfRangeError + BadUserDefinedPatternError + BadRcFileError + BadGenderError + +''' + +from re import match, search, subn, IGNORECASE, VERBOSE +from re import split as splitre +from re import error as reerror +from re import sub as resub + + +class UnknownClassicalModeError(Exception): + pass + + +class BadNumValueError(Exception): + pass + + +class BadChunkingOptionError(Exception): + pass + + +class NumOutOfRangeError(Exception): + pass + + +class BadUserDefinedPatternError(Exception): + pass + + +class BadRcFileError(Exception): + pass + + +class BadGenderError(Exception): + pass + +__ver_major__ = 0 +__ver_minor__ = 2 +__ver_patch__ = 4 +__ver_sub__ = "" +__version__ = "%d.%d.%d%s" % (__ver_major__, __ver_minor__, + __ver_patch__, __ver_sub__) + + +STDOUT_ON = False + + +def print3(txt): + if STDOUT_ON: + print(txt) + + +def enclose(s): + return "(?:%s)" % s + + +def joinstem(cutpoint=0, words=''): + ''' + join stem of each word in words into a string for regex + each word is truncated at cutpoint + cutpoint is usually negative indicating the number of letters to remove + from the end of each word + + e.g. + joinstem(-2, ["ephemeris", "iris", ".*itis"]) returns + (?:ephemer|ir|.*it) + + ''' + return enclose('|'.join(w[:cutpoint] for w in words)) + + +def bysize(words): + ''' + take a list of words and return a dict of sets sorted by word length + e.g. + ret[3]=set(['ant', 'cat', 'dog', 'pig']) + ret[4]=set(['frog', 'goat']) + ret[5]=set(['horse']) + ret[8]=set(['elephant']) + ''' + ret = {} + for w in words: + if len(w) not in ret: + ret[len(w)] = set() + ret[len(w)].add(w) + return ret + + +def make_pl_si_lists(lst, plending, siendingsize, dojoinstem=True): + ''' + given a list of singular words: lst + an ending to append to make the plural: plending + the number of characters to remove from the singular before appending plending: siendingsize + a flag whether to create a joinstem: dojoinstem + + return: + a list of pluralised words: si_list (called si because this is what you need to + look for to make the singular) + the pluralised words as a dict of sets sorted by word length: si_bysize + the singular words as a dict of sets sorted by word length: pl_bysize + if dojoinstem is True: a regular expression that matches any of the stems: stem + ''' + if siendingsize is not None: + siendingsize = -siendingsize + si_list = [w[:siendingsize] + plending for w in lst] + pl_bysize = bysize(lst) + si_bysize = bysize(si_list) + if dojoinstem: + stem = joinstem(siendingsize, lst) + return si_list, si_bysize, pl_bysize, stem + else: + return si_list, si_bysize, pl_bysize + + +# 1. PLURALS + +pl_sb_irregular_s = { + "corpus": "corpuses|corpora", + "opus": "opuses|opera", + "genus": "genera", + "mythos": "mythoi", + "penis": "penises|penes", + "testis": "testes", + "atlas": "atlases|atlantes", + "yes": "yeses", +} + +pl_sb_irregular = { + "child": "children", + "brother": "brothers|brethren", + "loaf": "loaves", + "hoof": "hoofs|hooves", + "beef": "beefs|beeves", + "thief": "thiefs|thieves", + "money": "monies", + "mongoose": "mongooses", + "ox": "oxen", + "cow": "cows|kine", + "graffito": "graffiti", + "octopus": "octopuses|octopodes", + "genie": "genies|genii", + "ganglion": "ganglions|ganglia", + "trilby": "trilbys", + "turf": "turfs|turves", + "numen": "numina", + "atman": "atmas", + "occiput": "occiputs|occipita", + "sabretooth": "sabretooths", + "sabertooth": "sabertooths", + "lowlife": "lowlifes", + "flatfoot": "flatfoots", + "tenderfoot": "tenderfoots", + "romany": "romanies", + "jerry": "jerries", + "mary": "maries", + "talouse": "talouses", + "blouse": "blouses", + "rom": "roma", + "carmen": "carmina", +} + +pl_sb_irregular.update(pl_sb_irregular_s) +# pl_sb_irregular_keys = enclose('|'.join(pl_sb_irregular.keys())) + +pl_sb_irregular_caps = { + 'Romany': 'Romanies', + 'Jerry': 'Jerrys', + 'Mary': 'Marys', + 'Rom': 'Roma', +} + +pl_sb_irregular_compound = { + "prima donna": "prima donnas|prime donne", +} + +si_sb_irregular = dict([(v, k) for (k, v) in pl_sb_irregular.items()]) +keys = list(si_sb_irregular.keys()) +for k in keys: + if '|' in k: + k1, k2 = k.split('|') + si_sb_irregular[k1] = si_sb_irregular[k2] = si_sb_irregular[k] + del si_sb_irregular[k] +si_sb_irregular_caps = dict([(v, k) for (k, v) in pl_sb_irregular_caps.items()]) +si_sb_irregular_compound = dict([(v, k) for (k, v) in pl_sb_irregular_compound.items()]) +keys = list(si_sb_irregular_compound.keys()) +for k in keys: + if '|' in k: + k1, k2 = k.split('|') + si_sb_irregular_compound[k1] = si_sb_irregular_compound[k2] = si_sb_irregular_compound[k] + del si_sb_irregular_compound[k] + +# si_sb_irregular_keys = enclose('|'.join(si_sb_irregular.keys())) + +# Z's that don't double + +pl_sb_z_zes_list = ( + "quartz", "topaz", +) +pl_sb_z_zes_bysize = bysize(pl_sb_z_zes_list) + +pl_sb_ze_zes_list = ('snooze',) +pl_sb_ze_zes_bysize = bysize(pl_sb_ze_zes_list) + + +# CLASSICAL "..is" -> "..ides" + +pl_sb_C_is_ides_complete = [ + # GENERAL WORDS... + "ephemeris", "iris", "clitoris", + "chrysalis", "epididymis", +] + +pl_sb_C_is_ides_endings = [ + # INFLAMATIONS... + "itis", +] + +pl_sb_C_is_ides = joinstem(-2, pl_sb_C_is_ides_complete + ['.*%s' % w for w in pl_sb_C_is_ides_endings]) + +pl_sb_C_is_ides_list = pl_sb_C_is_ides_complete + pl_sb_C_is_ides_endings + +(si_sb_C_is_ides_list, si_sb_C_is_ides_bysize, + pl_sb_C_is_ides_bysize) = make_pl_si_lists(pl_sb_C_is_ides_list, 'ides', 2, dojoinstem=False) + + +# CLASSICAL "..a" -> "..ata" + +pl_sb_C_a_ata_list = ( + "anathema", "bema", "carcinoma", "charisma", "diploma", + "dogma", "drama", "edema", "enema", "enigma", "lemma", + "lymphoma", "magma", "melisma", "miasma", "oedema", + "sarcoma", "schema", "soma", "stigma", "stoma", "trauma", + "gumma", "pragma", +) + +(si_sb_C_a_ata_list, si_sb_C_a_ata_bysize, + pl_sb_C_a_ata_bysize, pl_sb_C_a_ata) = make_pl_si_lists(pl_sb_C_a_ata_list, 'ata', 1) + +# UNCONDITIONAL "..a" -> "..ae" + +pl_sb_U_a_ae_list = ( + "alumna", "alga", "vertebra", "persona" +) +(si_sb_U_a_ae_list, si_sb_U_a_ae_bysize, + pl_sb_U_a_ae_bysize, pl_sb_U_a_ae) = make_pl_si_lists(pl_sb_U_a_ae_list, 'e', None) + +# CLASSICAL "..a" -> "..ae" + +pl_sb_C_a_ae_list = ( + "amoeba", "antenna", "formula", "hyperbola", + "medusa", "nebula", "parabola", "abscissa", + "hydra", "nova", "lacuna", "aurora", "umbra", + "flora", "fauna", +) +(si_sb_C_a_ae_list, si_sb_C_a_ae_bysize, + pl_sb_C_a_ae_bysize, pl_sb_C_a_ae) = make_pl_si_lists(pl_sb_C_a_ae_list, 'e', None) + + +# CLASSICAL "..en" -> "..ina" + +pl_sb_C_en_ina_list = ( + "stamen", "foramen", "lumen", +) + +(si_sb_C_en_ina_list, si_sb_C_en_ina_bysize, + pl_sb_C_en_ina_bysize, pl_sb_C_en_ina) = make_pl_si_lists(pl_sb_C_en_ina_list, 'ina', 2) + + +# UNCONDITIONAL "..um" -> "..a" + +pl_sb_U_um_a_list = ( + "bacterium", "agendum", "desideratum", "erratum", + "stratum", "datum", "ovum", "extremum", + "candelabrum", +) +(si_sb_U_um_a_list, si_sb_U_um_a_bysize, + pl_sb_U_um_a_bysize, pl_sb_U_um_a) = make_pl_si_lists(pl_sb_U_um_a_list, 'a', 2) + +# CLASSICAL "..um" -> "..a" + +pl_sb_C_um_a_list = ( + "maximum", "minimum", "momentum", "optimum", + "quantum", "cranium", "curriculum", "dictum", + "phylum", "aquarium", "compendium", "emporium", + "enconium", "gymnasium", "honorarium", "interregnum", + "lustrum", "memorandum", "millennium", "rostrum", + "spectrum", "speculum", "stadium", "trapezium", + "ultimatum", "medium", "vacuum", "velum", + "consortium", "arboretum", +) + +(si_sb_C_um_a_list, si_sb_C_um_a_bysize, + pl_sb_C_um_a_bysize, pl_sb_C_um_a) = make_pl_si_lists(pl_sb_C_um_a_list, 'a', 2) + + +# UNCONDITIONAL "..us" -> "i" + +pl_sb_U_us_i_list = ( + "alumnus", "alveolus", "bacillus", "bronchus", + "locus", "nucleus", "stimulus", "meniscus", + "sarcophagus", +) +(si_sb_U_us_i_list, si_sb_U_us_i_bysize, + pl_sb_U_us_i_bysize, pl_sb_U_us_i) = make_pl_si_lists(pl_sb_U_us_i_list, 'i', 2) + +# CLASSICAL "..us" -> "..i" + +pl_sb_C_us_i_list = ( + "focus", "radius", "genius", + "incubus", "succubus", "nimbus", + "fungus", "nucleolus", "stylus", + "torus", "umbilicus", "uterus", + "hippopotamus", "cactus", +) + +(si_sb_C_us_i_list, si_sb_C_us_i_bysize, + pl_sb_C_us_i_bysize, pl_sb_C_us_i) = make_pl_si_lists(pl_sb_C_us_i_list, 'i', 2) + + +# CLASSICAL "..us" -> "..us" (ASSIMILATED 4TH DECLENSION LATIN NOUNS) + +pl_sb_C_us_us = ( + "status", "apparatus", "prospectus", "sinus", + "hiatus", "impetus", "plexus", +) +pl_sb_C_us_us_bysize = bysize(pl_sb_C_us_us) + +# UNCONDITIONAL "..on" -> "a" + +pl_sb_U_on_a_list = ( + "criterion", "perihelion", "aphelion", + "phenomenon", "prolegomenon", "noumenon", + "organon", "asyndeton", "hyperbaton", +) +(si_sb_U_on_a_list, si_sb_U_on_a_bysize, + pl_sb_U_on_a_bysize, pl_sb_U_on_a) = make_pl_si_lists(pl_sb_U_on_a_list, 'a', 2) + +# CLASSICAL "..on" -> "..a" + +pl_sb_C_on_a_list = ( + "oxymoron", +) + +(si_sb_C_on_a_list, si_sb_C_on_a_bysize, + pl_sb_C_on_a_bysize, pl_sb_C_on_a) = make_pl_si_lists(pl_sb_C_on_a_list, 'a', 2) + + +# CLASSICAL "..o" -> "..i" (BUT NORMALLY -> "..os") + +pl_sb_C_o_i = [ + "solo", "soprano", "basso", "alto", + "contralto", "tempo", "piano", "virtuoso", +] # list not tuple so can concat for pl_sb_U_o_os + +pl_sb_C_o_i_bysize = bysize(pl_sb_C_o_i) +si_sb_C_o_i_bysize = bysize(['%si' % w[:-1] for w in pl_sb_C_o_i]) + +pl_sb_C_o_i_stems = joinstem(-1, pl_sb_C_o_i) + +# ALWAYS "..o" -> "..os" + +pl_sb_U_o_os_complete = set(( + "ado", "ISO", "NATO", "NCO", "NGO", "oto", +)) +si_sb_U_o_os_complete = set('%ss' % w for w in pl_sb_U_o_os_complete) + + +pl_sb_U_o_os_endings = [ + "aficionado", "aggro", + "albino", "allegro", "ammo", + "Antananarivo", "archipelago", "armadillo", + "auto", "avocado", "Bamako", + "Barquisimeto", "bimbo", "bingo", + "Biro", "bolero", "Bolzano", + "bongo", "Boto", "burro", + "Cairo", "canto", "cappuccino", + "casino", "cello", "Chicago", + "Chimango", "cilantro", "cochito", + "coco", "Colombo", "Colorado", + "commando", "concertino", "contango", + "credo", "crescendo", "cyano", + "demo", "ditto", "Draco", + "dynamo", "embryo", "Esperanto", + "espresso", "euro", "falsetto", + "Faro", "fiasco", "Filipino", + "flamenco", "furioso", "generalissimo", + "Gestapo", "ghetto", "gigolo", + "gizmo", "Greensboro", "gringo", + "Guaiabero", "guano", "gumbo", + "gyro", "hairdo", "hippo", + "Idaho", "impetigo", "inferno", + "info", "intermezzo", "intertrigo", + "Iquico", "jumbo", + "junto", "Kakapo", "kilo", + "Kinkimavo", "Kokako", "Kosovo", + "Lesotho", "libero", "libido", + "libretto", "lido", "Lilo", + "limbo", "limo", "lineno", + "lingo", "lino", "livedo", + "loco", "logo", "lumbago", + "macho", "macro", "mafioso", + "magneto", "magnifico", "Majuro", + "Malabo", "manifesto", "Maputo", + "Maracaibo", "medico", "memo", + "metro", "Mexico", "micro", + "Milano", "Monaco", "mono", + "Montenegro", "Morocco", "Muqdisho", + "myo", + "neutrino", "Ningbo", + "octavo", "oregano", "Orinoco", + "Orlando", "Oslo", + "panto", "Paramaribo", "Pardusco", + "pedalo", "photo", "pimento", + "pinto", "pleco", "Pluto", + "pogo", "polo", "poncho", + "Porto-Novo", "Porto", "pro", + "psycho", "pueblo", "quarto", + "Quito", "rhino", "risotto", + "rococo", "rondo", "Sacramento", + "saddo", "sago", "salvo", + "Santiago", "Sapporo", "Sarajevo", + "scherzando", "scherzo", "silo", + "sirocco", "sombrero", "staccato", + "sterno", "stucco", "stylo", + "sumo", "Taiko", "techno", + "terrazzo", "testudo", "timpano", + "tiro", "tobacco", "Togo", + "Tokyo", "torero", "Torino", + "Toronto", "torso", "tremolo", + "typo", "tyro", "ufo", + "UNESCO", "vaquero", "vermicello", + "verso", "vibrato", "violoncello", + "Virgo", "weirdo", "WHO", + "WTO", "Yamoussoukro", "yo-yo", + "zero", "Zibo", +] + pl_sb_C_o_i + +pl_sb_U_o_os_bysize = bysize(pl_sb_U_o_os_endings) +si_sb_U_o_os_bysize = bysize(['%ss' % w for w in pl_sb_U_o_os_endings]) + + +# UNCONDITIONAL "..ch" -> "..chs" + +pl_sb_U_ch_chs_list = ( + "czech", "eunuch", "stomach" +) + +(si_sb_U_ch_chs_list, si_sb_U_ch_chs_bysize, + pl_sb_U_ch_chs_bysize, pl_sb_U_ch_chs) = make_pl_si_lists(pl_sb_U_ch_chs_list, 's', None) + + +# UNCONDITIONAL "..[ei]x" -> "..ices" + +pl_sb_U_ex_ices_list = ( + "codex", "murex", "silex", +) +(si_sb_U_ex_ices_list, si_sb_U_ex_ices_bysize, + pl_sb_U_ex_ices_bysize, pl_sb_U_ex_ices) = make_pl_si_lists(pl_sb_U_ex_ices_list, 'ices', 2) + +pl_sb_U_ix_ices_list = ( + "radix", "helix", +) +(si_sb_U_ix_ices_list, si_sb_U_ix_ices_bysize, + pl_sb_U_ix_ices_bysize, pl_sb_U_ix_ices) = make_pl_si_lists(pl_sb_U_ix_ices_list, 'ices', 2) + +# CLASSICAL "..[ei]x" -> "..ices" + +pl_sb_C_ex_ices_list = ( + "vortex", "vertex", "cortex", "latex", + "pontifex", "apex", "index", "simplex", +) + +(si_sb_C_ex_ices_list, si_sb_C_ex_ices_bysize, + pl_sb_C_ex_ices_bysize, pl_sb_C_ex_ices) = make_pl_si_lists(pl_sb_C_ex_ices_list, 'ices', 2) + + +pl_sb_C_ix_ices_list = ( + "appendix", +) + +(si_sb_C_ix_ices_list, si_sb_C_ix_ices_bysize, + pl_sb_C_ix_ices_bysize, pl_sb_C_ix_ices) = make_pl_si_lists(pl_sb_C_ix_ices_list, 'ices', 2) + + +# ARABIC: ".." -> "..i" + +pl_sb_C_i_list = ( + "afrit", "afreet", "efreet", +) + +(si_sb_C_i_list, si_sb_C_i_bysize, + pl_sb_C_i_bysize, pl_sb_C_i) = make_pl_si_lists(pl_sb_C_i_list, 'i', None) + + +# HEBREW: ".." -> "..im" + +pl_sb_C_im_list = ( + "goy", "seraph", "cherub", +) + +(si_sb_C_im_list, si_sb_C_im_bysize, + pl_sb_C_im_bysize, pl_sb_C_im) = make_pl_si_lists(pl_sb_C_im_list, 'im', None) + + +# UNCONDITIONAL "..man" -> "..mans" + +pl_sb_U_man_mans_list = """ + ataman caiman cayman ceriman + desman dolman farman harman hetman + human leman ottoman shaman talisman +""".split() +pl_sb_U_man_mans_caps_list = """ + Alabaman Bahaman Burman German + Hiroshiman Liman Nakayaman Norman Oklahoman + Panaman Roman Selman Sonaman Tacoman Yakiman + Yokohaman Yuman +""".split() + +(si_sb_U_man_mans_list, si_sb_U_man_mans_bysize, + pl_sb_U_man_mans_bysize) = make_pl_si_lists(pl_sb_U_man_mans_list, 's', None, dojoinstem=False) +(si_sb_U_man_mans_caps_list, si_sb_U_man_mans_caps_bysize, + pl_sb_U_man_mans_caps_bysize) = make_pl_si_lists(pl_sb_U_man_mans_caps_list, 's', None, dojoinstem=False) + + +pl_sb_uninflected_s_complete = [ + # PAIRS OR GROUPS SUBSUMED TO A SINGULAR... + "breeches", "britches", "pajamas", "pyjamas", "clippers", "gallows", + "hijinks", "headquarters", "pliers", "scissors", "testes", "herpes", + "pincers", "shears", "proceedings", "trousers", + + # UNASSIMILATED LATIN 4th DECLENSION + + "cantus", "coitus", "nexus", + + # RECENT IMPORTS... + "contretemps", "corps", "debris", + "siemens", + + # DISEASES + "mumps", + + # MISCELLANEOUS OTHERS... + "diabetes", "jackanapes", "series", "species", "subspecies", "rabies", + "chassis", "innings", "news", "mews", "haggis", +] + +pl_sb_uninflected_s_endings = [ + # RECENT IMPORTS... + "ois", + + # DISEASES + "measles", +] + +pl_sb_uninflected_s = pl_sb_uninflected_s_complete + ['.*%s' % w for w in pl_sb_uninflected_s_endings] + +pl_sb_uninflected_herd = ( + # DON'T INFLECT IN CLASSICAL MODE, OTHERWISE NORMAL INFLECTION + "wildebeest", "swine", "eland", "bison", "buffalo", + "elk", "rhinoceros", 'zucchini', + 'caribou', 'dace', 'grouse', 'guinea fowl', 'guinea-fowl', + 'haddock', 'hake', 'halibut', 'herring', 'mackerel', + 'pickerel', 'pike', 'roe', 'seed', 'shad', + 'snipe', 'teal', 'turbot', 'water fowl', 'water-fowl', +) + +pl_sb_uninflected_complete = [ + # SOME FISH AND HERD ANIMALS + "tuna", "salmon", "mackerel", "trout", + "bream", "sea-bass", "sea bass", "carp", "cod", "flounder", "whiting", + "moose", + + # OTHER ODDITIES + "graffiti", "djinn", 'samuri', + 'offspring', 'pence', 'quid', 'hertz', +] + pl_sb_uninflected_s_complete +# SOME WORDS ENDING IN ...s (OFTEN PAIRS TAKEN AS A WHOLE) + +pl_sb_uninflected_caps = [ + # ALL NATIONALS ENDING IN -ese + "Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese", + "Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese", + "Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese", + "Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese", + "Shavese", "Vermontese", "Wenchowese", "Yengeese", +] + + +pl_sb_uninflected_endings = [ + # SOME FISH AND HERD ANIMALS + "fish", + + "deer", "sheep", + + # ALL NATIONALS ENDING IN -ese + "nese", "rese", "lese", "mese", + + # DISEASES + "pox", + + + # OTHER ODDITIES + 'craft', +] + pl_sb_uninflected_s_endings +# SOME WORDS ENDING IN ...s (OFTEN PAIRS TAKEN AS A WHOLE) + + +pl_sb_uninflected_bysize = bysize(pl_sb_uninflected_endings) + + +# SINGULAR WORDS ENDING IN ...s (ALL INFLECT WITH ...es) + +pl_sb_singular_s_complete = [ + "acropolis", "aegis", "alias", "asbestos", "bathos", "bias", + "bronchitis", "bursitis", "caddis", "cannabis", + "canvas", "chaos", "cosmos", "dais", "digitalis", + "epidermis", "ethos", "eyas", "gas", "glottis", + "hubris", "ibis", "lens", "mantis", "marquis", "metropolis", + "pathos", "pelvis", "polis", "rhinoceros", + "sassafras", "trellis", +] + pl_sb_C_is_ides_complete + + +pl_sb_singular_s_endings = [ + "ss", "us", +] + pl_sb_C_is_ides_endings + +pl_sb_singular_s_bysize = bysize(pl_sb_singular_s_endings) + +si_sb_singular_s_complete = ['%ses' % w for w in pl_sb_singular_s_complete] +si_sb_singular_s_endings = ['%ses' % w for w in pl_sb_singular_s_endings] +si_sb_singular_s_bysize = bysize(si_sb_singular_s_endings) + +pl_sb_singular_s_es = [ + "[A-Z].*es", +] + +pl_sb_singular_s = enclose('|'.join(pl_sb_singular_s_complete + + ['.*%s' % w for w in pl_sb_singular_s_endings] + + pl_sb_singular_s_es)) + + +# PLURALS ENDING IN uses -> use + + +si_sb_ois_oi_case = ( + 'Bolshois', 'Hanois' +) + +si_sb_uses_use_case = ( + 'Betelgeuses', 'Duses', 'Meuses', 'Syracuses', 'Toulouses', +) + +si_sb_uses_use = ( + 'abuses', 'applauses', 'blouses', + 'carouses', 'causes', 'chartreuses', 'clauses', + 'contuses', 'douses', 'excuses', 'fuses', + 'grouses', 'hypotenuses', 'masseuses', + 'menopauses', 'misuses', 'muses', 'overuses', 'pauses', + 'peruses', 'profuses', 'recluses', 'reuses', + 'ruses', 'souses', 'spouses', 'suffuses', 'transfuses', 'uses', +) + +si_sb_ies_ie_case = ( + 'Addies', 'Aggies', 'Allies', 'Amies', 'Angies', 'Annies', + 'Annmaries', 'Archies', 'Arties', 'Aussies', 'Barbies', + 'Barries', 'Basies', 'Bennies', 'Bernies', 'Berties', 'Bessies', + 'Betties', 'Billies', 'Blondies', 'Bobbies', 'Bonnies', + 'Bowies', 'Brandies', 'Bries', 'Brownies', 'Callies', + 'Carnegies', 'Carries', 'Cassies', 'Charlies', 'Cheries', + 'Christies', 'Connies', 'Curies', 'Dannies', 'Debbies', 'Dixies', + 'Dollies', 'Donnies', 'Drambuies', 'Eddies', 'Effies', 'Ellies', + 'Elsies', 'Eries', 'Ernies', 'Essies', 'Eugenies', 'Fannies', + 'Flossies', 'Frankies', 'Freddies', 'Gillespies', 'Goldies', + 'Gracies', 'Guthries', 'Hallies', 'Hatties', 'Hetties', + 'Hollies', 'Jackies', 'Jamies', 'Janies', 'Jannies', 'Jeanies', + 'Jeannies', 'Jennies', 'Jessies', 'Jimmies', 'Jodies', 'Johnies', + 'Johnnies', 'Josies', 'Julies', 'Kalgoorlies', 'Kathies', 'Katies', + 'Kellies', 'Kewpies', 'Kristies', 'Laramies', 'Lassies', 'Lauries', + 'Leslies', 'Lessies', 'Lillies', 'Lizzies', 'Lonnies', 'Lories', + 'Lorries', 'Lotties', 'Louies', 'Mackenzies', 'Maggies', 'Maisies', + 'Mamies', 'Marcies', 'Margies', 'Maries', 'Marjories', 'Matties', + 'McKenzies', 'Melanies', 'Mickies', 'Millies', 'Minnies', 'Mollies', + 'Mounties', 'Nannies', 'Natalies', 'Nellies', 'Netties', 'Ollies', + 'Ozzies', 'Pearlies', 'Pottawatomies', 'Reggies', 'Richies', 'Rickies', + 'Robbies', 'Ronnies', 'Rosalies', 'Rosemaries', 'Rosies', 'Roxies', + 'Rushdies', 'Ruthies', 'Sadies', 'Sallies', 'Sammies', 'Scotties', + 'Selassies', 'Sherries', 'Sophies', 'Stacies', 'Stefanies', 'Stephanies', + 'Stevies', 'Susies', 'Sylvies', 'Tammies', 'Terries', 'Tessies', + 'Tommies', 'Tracies', 'Trekkies', 'Valaries', 'Valeries', 'Valkyries', + 'Vickies', 'Virgies', 'Willies', 'Winnies', 'Wylies', 'Yorkies', +) + +si_sb_ies_ie = ( + 'aeries', 'baggies', 'belies', 'biggies', 'birdies', 'bogies', + 'bonnies', 'boogies', 'bookies', 'bourgeoisies', 'brownies', + 'budgies', 'caddies', 'calories', 'camaraderies', 'cockamamies', + 'collies', 'cookies', 'coolies', 'cooties', 'coteries', 'crappies', + 'curies', 'cutesies', 'dogies', 'eyrie', 'floozies', 'footsies', + 'freebies', 'genies', 'goalies', 'groupies', + 'hies', 'jalousies', 'junkies', + 'kiddies', 'laddies', 'lassies', 'lies', + 'lingeries', 'magpies', 'menageries', 'mommies', 'movies', 'neckties', + 'newbies', 'nighties', 'oldies', 'organdies', 'overlies', + 'pies', 'pinkies', 'pixies', 'potpies', 'prairies', + 'quickies', 'reveries', 'rookies', 'rotisseries', 'softies', 'sorties', + 'species', 'stymies', 'sweeties', 'ties', 'underlies', 'unties', + 'veggies', 'vies', 'yuppies', 'zombies', +) + + +si_sb_oes_oe_case = ( + 'Chloes', 'Crusoes', 'Defoes', 'Faeroes', 'Ivanhoes', 'Joes', + 'McEnroes', 'Moes', 'Monroes', 'Noes', 'Poes', 'Roscoes', + 'Tahoes', 'Tippecanoes', 'Zoes', +) + +si_sb_oes_oe = ( + 'aloes', 'backhoes', 'canoes', + 'does', 'floes', 'foes', 'hoes', 'mistletoes', + 'oboes', 'pekoes', 'roes', 'sloes', + 'throes', 'tiptoes', 'toes', 'woes', +) + +si_sb_z_zes = ( + "quartzes", "topazes", +) + +si_sb_zzes_zz = ( + 'buzzes', 'fizzes', 'frizzes', 'razzes' +) + +si_sb_ches_che_case = ( + 'Andromaches', 'Apaches', 'Blanches', 'Comanches', + 'Nietzsches', 'Porsches', 'Roches', +) + +si_sb_ches_che = ( + 'aches', 'avalanches', 'backaches', 'bellyaches', 'caches', + 'cloches', 'creches', 'douches', 'earaches', 'fiches', + 'headaches', 'heartaches', 'microfiches', + 'niches', 'pastiches', 'psyches', 'quiches', + 'stomachaches', 'toothaches', +) + +si_sb_xes_xe = ( + 'annexes', 'axes', 'deluxes', 'pickaxes', +) + +si_sb_sses_sse_case = ( + 'Hesses', 'Jesses', 'Larousses', 'Matisses', +) +si_sb_sses_sse = ( + 'bouillabaisses', 'crevasses', 'demitasses', 'impasses', + 'mousses', 'posses', +) + +si_sb_ves_ve_case = ( + # *[nwl]ives -> [nwl]live + 'Clives', 'Palmolives', +) +si_sb_ves_ve = ( + # *[^d]eaves -> eave + 'interweaves', 'weaves', + + # *[nwl]ives -> [nwl]live + 'olives', + + # *[eoa]lves -> [eoa]lve + 'bivalves', 'dissolves', 'resolves', 'salves', 'twelves', 'valves', +) + + +plverb_special_s = enclose('|'.join( + [pl_sb_singular_s] + + pl_sb_uninflected_s + + list(pl_sb_irregular_s.keys()) + [ + '(.*[csx])is', + '(.*)ceps', + '[A-Z].*s', + ] +)) + +pl_sb_postfix_adj = { + 'general': ['(?!major|lieutenant|brigadier|adjutant|.*star)\S+'], + 'martial': ['court'], +} + +for k in list(pl_sb_postfix_adj.keys()): + pl_sb_postfix_adj[k] = enclose( + enclose('|'.join(pl_sb_postfix_adj[k])) + + "(?=(?:-|\\s+)%s)" % k) + +pl_sb_postfix_adj_stems = '(' + '|'.join(list(pl_sb_postfix_adj.values())) + ')(.*)' + + +# PLURAL WORDS ENDING IS es GO TO SINGULAR is + +si_sb_es_is = ( + 'amanuenses', 'amniocenteses', 'analyses', 'antitheses', + 'apotheoses', 'arterioscleroses', 'atheroscleroses', 'axes', + # 'bases', # bases -> basis + 'catalyses', 'catharses', 'chasses', 'cirrhoses', + 'cocces', 'crises', 'diagnoses', 'dialyses', 'diereses', + 'electrolyses', 'emphases', 'exegeses', 'geneses', + 'halitoses', 'hydrolyses', 'hypnoses', 'hypotheses', 'hystereses', + 'metamorphoses', 'metastases', 'misdiagnoses', 'mitoses', + 'mononucleoses', 'narcoses', 'necroses', 'nemeses', 'neuroses', + 'oases', 'osmoses', 'osteoporoses', 'paralyses', 'parentheses', + 'parthenogeneses', 'periphrases', 'photosyntheses', 'probosces', + 'prognoses', 'prophylaxes', 'prostheses', 'preces', 'psoriases', + 'psychoanalyses', 'psychokineses', 'psychoses', 'scleroses', + 'scolioses', 'sepses', 'silicoses', 'symbioses', 'synopses', + 'syntheses', 'taxes', 'telekineses', 'theses', 'thromboses', + 'tuberculoses', 'urinalyses', +) + +pl_prep_list = """ + about above across after among around at athwart before behind + below beneath beside besides between betwixt beyond but by + during except for from in into near of off on onto out over + since till to under until unto upon with""".split() + +pl_prep_list_da = pl_prep_list + ['de', 'du', 'da'] + +pl_prep_bysize = bysize(pl_prep_list_da) + +pl_prep = enclose('|'.join(pl_prep_list_da)) + +pl_sb_prep_dual_compound = r'(.*?)((?:-|\s+)(?:' + pl_prep + r')(?:-|\s+))a(?:-|\s+)(.*)' + + +singular_pronoun_genders = set(['neuter', + 'feminine', + 'masculine', + 'gender-neutral', + 'feminine or masculine', + 'masculine or feminine']) + +pl_pron_nom = { + # NOMINATIVE REFLEXIVE + "i": "we", "myself": "ourselves", + "you": "you", "yourself": "yourselves", + "she": "they", "herself": "themselves", + "he": "they", "himself": "themselves", + "it": "they", "itself": "themselves", + "they": "they", "themself": "themselves", + + # POSSESSIVE + "mine": "ours", + "yours": "yours", + "hers": "theirs", + "his": "theirs", + "its": "theirs", + "theirs": "theirs", +} + +si_pron = {} +si_pron['nom'] = dict([(v, k) for (k, v) in pl_pron_nom.items()]) +si_pron['nom']['we'] = 'I' + + +pl_pron_acc = { + # ACCUSATIVE REFLEXIVE + "me": "us", "myself": "ourselves", + "you": "you", "yourself": "yourselves", + "her": "them", "herself": "themselves", + "him": "them", "himself": "themselves", + "it": "them", "itself": "themselves", + "them": "them", "themself": "themselves", +} + +pl_pron_acc_keys = enclose('|'.join(list(pl_pron_acc.keys()))) +pl_pron_acc_keys_bysize = bysize(list(pl_pron_acc.keys())) + +si_pron['acc'] = dict([(v, k) for (k, v) in pl_pron_acc.items()]) + +for thecase, plur, gend, sing in ( + ('nom', 'they', 'neuter', 'it'), + ('nom', 'they', 'feminine', 'she'), + ('nom', 'they', 'masculine', 'he'), + ('nom', 'they', 'gender-neutral', 'they'), + ('nom', 'they', 'feminine or masculine', 'she or he'), + ('nom', 'they', 'masculine or feminine', 'he or she'), + ('nom', 'themselves', 'neuter', 'itself'), + ('nom', 'themselves', 'feminine', 'herself'), + ('nom', 'themselves', 'masculine', 'himself'), + ('nom', 'themselves', 'gender-neutral', 'themself'), + ('nom', 'themselves', 'feminine or masculine', 'herself or himself'), + ('nom', 'themselves', 'masculine or feminine', 'himself or herself'), + ('nom', 'theirs', 'neuter', 'its'), + ('nom', 'theirs', 'feminine', 'hers'), + ('nom', 'theirs', 'masculine', 'his'), + ('nom', 'theirs', 'gender-neutral', 'theirs'), + ('nom', 'theirs', 'feminine or masculine', 'hers or his'), + ('nom', 'theirs', 'masculine or feminine', 'his or hers'), + ('acc', 'them', 'neuter', 'it'), + ('acc', 'them', 'feminine', 'her'), + ('acc', 'them', 'masculine', 'him'), + ('acc', 'them', 'gender-neutral', 'them'), + ('acc', 'them', 'feminine or masculine', 'her or him'), + ('acc', 'them', 'masculine or feminine', 'him or her'), + ('acc', 'themselves', 'neuter', 'itself'), + ('acc', 'themselves', 'feminine', 'herself'), + ('acc', 'themselves', 'masculine', 'himself'), + ('acc', 'themselves', 'gender-neutral', 'themself'), + ('acc', 'themselves', 'feminine or masculine', 'herself or himself'), + ('acc', 'themselves', 'masculine or feminine', 'himself or herself'), +): + try: + si_pron[thecase][plur][gend] = sing + except TypeError: + si_pron[thecase][plur] = {} + si_pron[thecase][plur][gend] = sing + + +si_pron_acc_keys = enclose('|'.join(list(si_pron['acc'].keys()))) +si_pron_acc_keys_bysize = bysize(list(si_pron['acc'].keys())) + + +def get_si_pron(thecase, word, gender): + try: + sing = si_pron[thecase][word] + except KeyError: + raise # not a pronoun + try: + return sing[gender] # has several types due to gender + except TypeError: + return sing # answer independent of gender + +plverb_irregular_pres = { + # 1st PERS. SING. 2ND PERS. SING. 3RD PERS. SINGULAR + # 3RD PERS. (INDET.) + "am": "are", "are": "are", "is": "are", + "was": "were", "were": "were", "was": "were", + "have": "have", "have": "have", "has": "have", + "do": "do", "do": "do", "does": "do", +} + +plverb_ambiguous_pres = { + # 1st PERS. SING. 2ND PERS. SING. 3RD PERS. SINGULAR + # 3RD PERS. (INDET.) + "act": "act", "act": "act", "acts": "act", + "blame": "blame", "blame": "blame", "blames": "blame", + "can": "can", "can": "can", "can": "can", + "must": "must", "must": "must", "must": "must", + "fly": "fly", "fly": "fly", "flies": "fly", + "copy": "copy", "copy": "copy", "copies": "copy", + "drink": "drink", "drink": "drink", "drinks": "drink", + "fight": "fight", "fight": "fight", "fights": "fight", + "fire": "fire", "fire": "fire", "fires": "fire", + "like": "like", "like": "like", "likes": "like", + "look": "look", "look": "look", "looks": "look", + "make": "make", "make": "make", "makes": "make", + "reach": "reach", "reach": "reach", "reaches": "reach", + "run": "run", "run": "run", "runs": "run", + "sink": "sink", "sink": "sink", "sinks": "sink", + "sleep": "sleep", "sleep": "sleep", "sleeps": "sleep", + "view": "view", "view": "view", "views": "view", +} + +plverb_ambiguous_pres_keys = enclose('|'.join(list(plverb_ambiguous_pres.keys()))) + + +plverb_irregular_non_pres = ( + "did", "had", "ate", "made", "put", + "spent", "fought", "sank", "gave", "sought", + "shall", "could", "ought", "should", +) + +plverb_ambiguous_non_pres = enclose('|'.join(( + "thought", "saw", "bent", "will", "might", "cut", +))) + +# "..oes" -> "..oe" (the rest are "..oes" -> "o") + +pl_v_oes_oe = ('canoes', 'floes', 'oboes', 'roes', 'throes', 'woes') +pl_v_oes_oe_endings_size4 = ('hoes', 'toes') +pl_v_oes_oe_endings_size5 = ('shoes') + + +pl_count_zero = ( + "0", "no", "zero", "nil" +) + + +pl_count_one = ( + "1", "a", "an", "one", "each", "every", "this", "that", +) + +pl_adj_special = { + "a": "some", "an": "some", + "this": "these", "that": "those", +} + +pl_adj_special_keys = enclose('|'.join(list(pl_adj_special.keys()))) + +pl_adj_poss = { + "my": "our", + "your": "your", + "its": "their", + "her": "their", + "his": "their", + "their": "their", +} + +pl_adj_poss_keys = enclose('|'.join(list(pl_adj_poss.keys()))) + + +# 2. INDEFINITE ARTICLES + +# THIS PATTERN MATCHES STRINGS OF CAPITALS STARTING WITH A "VOWEL-SOUND" +# CONSONANT FOLLOWED BY ANOTHER CONSONANT, AND WHICH ARE NOT LIKELY +# TO BE REAL WORDS (OH, ALL RIGHT THEN, IT'S JUST MAGIC!) + +A_abbrev = r""" +(?! FJO | [HLMNS]Y. | RY[EO] | SQU + | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU]) +[FHLMNRSX][A-Z] +""" + +# THIS PATTERN CODES THE BEGINNINGS OF ALL ENGLISH WORDS BEGINING WITH A +# 'y' FOLLOWED BY A CONSONANT. ANY OTHER Y-CONSONANT PREFIX THEREFORE +# IMPLIES AN ABBREVIATION. + +A_y_cons = 'y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)' + +# EXCEPTIONS TO EXCEPTIONS + +A_explicit_a = enclose('|'.join(( + "unabomber", "unanimous", "US", +))) + +A_explicit_an = enclose('|'.join(( + "euler", + "hour(?!i)", "heir", "honest", "hono[ur]", + "mpeg", +))) + +A_ordinal_an = enclose('|'.join(( + "[aefhilmnorsx]-?th", +))) + +A_ordinal_a = enclose('|'.join(( + "[bcdgjkpqtuvwyz]-?th", +))) + + +# NUMERICAL INFLECTIONS + +nth = { + 0: 'th', + 1: 'st', + 2: 'nd', + 3: 'rd', + 4: 'th', + 5: 'th', + 6: 'th', + 7: 'th', + 8: 'th', + 9: 'th', + 11: 'th', + 12: 'th', + 13: 'th', +} + +ordinal = dict(ty='tieth', + one='first', + two='second', + three='third', + five='fifth', + eight='eighth', + nine='ninth', + twelve='twelfth') + +ordinal_suff = '|'.join(list(ordinal.keys())) + + +# NUMBERS + +unit = ['', 'one', 'two', 'three', 'four', 'five', + 'six', 'seven', 'eight', 'nine'] +teen = ['ten', 'eleven', 'twelve', 'thirteen', 'fourteen', + 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen'] +ten = ['', '', 'twenty', 'thirty', 'forty', + 'fifty', 'sixty', 'seventy', 'eighty', 'ninety'] +mill = [' ', ' thousand', ' million', ' billion', ' trillion', ' quadrillion', + ' quintillion', ' sextillion', ' septillion', ' octillion', + ' nonillion', ' decillion'] + + +# SUPPORT CLASSICAL PLURALIZATIONS + +def_classical = dict( + all=False, + zero=False, + herd=False, + names=True, + persons=False, + ancient=False, +) + +all_classical = dict((k, True) for k in list(def_classical.keys())) +no_classical = dict((k, False) for k in list(def_classical.keys())) + + +# TODO: .inflectrc file does not work +# can't just execute methods from another file like this + +# for rcfile in (pathjoin(dirname(__file__), '.inflectrc'), +# expanduser(pathjoin(('~'), '.inflectrc'))): +# if isfile(rcfile): +# try: +# execfile(rcfile) +# except: +# print3("\nBad .inflectrc file (%s):\n" % rcfile) +# raise BadRcFileError + + +class engine: + + def __init__(self): + + self.classical_dict = def_classical.copy() + self.persistent_count = None + self.mill_count = 0 + self.pl_sb_user_defined = [] + self.pl_v_user_defined = [] + self.pl_adj_user_defined = [] + self.si_sb_user_defined = [] + self.A_a_user_defined = [] + self.thegender = 'neuter' + + deprecated_methods = dict(pl='plural', + plnoun='plural_noun', + plverb='plural_verb', + pladj='plural_adj', + sinoun='single_noun', + prespart='present_participle', + numwords='number_to_words', + plequal='compare', + plnounequal='compare_nouns', + plverbequal='compare_verbs', + pladjequal='compare_adjs', + wordlist='join', + ) + + def __getattr__(self, meth): + if meth in self.deprecated_methods: + print3('%s() deprecated, use %s()' % (meth, self.deprecated_methods[meth])) + raise DeprecationWarning + raise AttributeError + + def defnoun(self, singular, plural): + ''' + Set the noun plural of singular to plural. + + ''' + self.checkpat(singular) + self.checkpatplural(plural) + self.pl_sb_user_defined.extend((singular, plural)) + self.si_sb_user_defined.extend((plural, singular)) + return 1 + + def defverb(self, s1, p1, s2, p2, s3, p3): + ''' + Set the verb plurals for s1, s2 and s3 to p1, p2 and p3 respectively. + + Where 1, 2 and 3 represent the 1st, 2nd and 3rd person forms of the verb. + + ''' + self.checkpat(s1) + self.checkpat(s2) + self.checkpat(s3) + self.checkpatplural(p1) + self.checkpatplural(p2) + self.checkpatplural(p3) + self.pl_v_user_defined.extend((s1, p1, s2, p2, s3, p3)) + return 1 + + def defadj(self, singular, plural): + ''' + Set the adjective plural of singular to plural. + + ''' + self.checkpat(singular) + self.checkpatplural(plural) + self.pl_adj_user_defined.extend((singular, plural)) + return 1 + + def defa(self, pattern): + ''' + Define the indefinate article as 'a' for words matching pattern. + + ''' + self.checkpat(pattern) + self.A_a_user_defined.extend((pattern, 'a')) + return 1 + + def defan(self, pattern): + ''' + Define the indefinate article as 'an' for words matching pattern. + + ''' + self.checkpat(pattern) + self.A_a_user_defined.extend((pattern, 'an')) + return 1 + + def checkpat(self, pattern): + ''' + check for errors in a regex pattern + ''' + if pattern is None: + return + try: + match(pattern, '') + except reerror: + print3("\nBad user-defined singular pattern:\n\t%s\n" % pattern) + raise BadUserDefinedPatternError + + def checkpatplural(self, pattern): + ''' + check for errors in a regex replace pattern + ''' + return + # can't find a pattern that doesn't pass the following test: + # if pattern is None: + # return + # try: + # resub('', pattern, '') + # except reerror: + # print3("\nBad user-defined plural pattern:\n\t%s\n" % pattern) + # raise BadUserDefinedPatternError + + def ud_match(self, word, wordlist): + for i in range(len(wordlist) - 2, -2, -2): # backwards through even elements + mo = search(r'^%s$' % wordlist[i], word, IGNORECASE) + if mo: + if wordlist[i + 1] is None: + return None + pl = resub(r'\$(\d+)', r'\\1', wordlist[i + 1]) # change $n to \n for expand + return mo.expand(pl) + return None + + def classical(self, **kwargs): + """ + turn classical mode on and off for various categories + + turn on all classical modes: + classical() + classical(all=True) + + turn on or off specific claassical modes: + e.g. + classical(herd=True) + classical(names=False) + + By default all classical modes are off except names. + + unknown value in args or key in kwargs rasies exception: UnknownClasicalModeError + + """ + classical_mode = list(def_classical.keys()) + if not kwargs: + self.classical_dict = all_classical.copy() + return + if 'all' in kwargs: + if kwargs['all']: + self.classical_dict = all_classical.copy() + else: + self.classical_dict = no_classical.copy() + + for k, v in list(kwargs.items()): + if k in classical_mode: + self.classical_dict[k] = v + else: + raise UnknownClassicalModeError + + def num(self, count=None, show=None): # (;$count,$show) + ''' + Set the number to be used in other method calls. + + Returns count. + + Set show to False to return '' instead. + + ''' + if count is not None: + try: + self.persistent_count = int(count) + except ValueError: + raise BadNumValueError + if (show is None) or show: + return str(count) + else: + self.persistent_count = None + return '' + + def gender(self, gender): + ''' + set the gender for the singular of plural pronouns + + can be one of: + 'neuter' ('they' -> 'it') + 'feminine' ('they' -> 'she') + 'masculine' ('they' -> 'he') + 'gender-neutral' ('they' -> 'they') + 'feminine or masculine' ('they' -> 'she or he') + 'masculine or feminine' ('they' -> 'he or she') + ''' + if gender in singular_pronoun_genders: + self.thegender = gender + else: + raise BadGenderError + + def nummo(self, matchobject): + ''' + num but take a matchobject + use groups 1 and 2 in matchobject + ''' + return self.num(matchobject.group(1), matchobject.group(2)) + + def plmo(self, matchobject): + ''' + plural but take a matchobject + use groups 1 and 3 in matchobject + ''' + return self.plural(matchobject.group(1), matchobject.group(3)) + + def plnounmo(self, matchobject): + ''' + plural_noun but take a matchobject + use groups 1 and 3 in matchobject + ''' + return self.plural_noun(matchobject.group(1), matchobject.group(3)) + + def plverbmo(self, matchobject): + ''' + plural_verb but take a matchobject + use groups 1 and 3 in matchobject + ''' + return self.plural_verb(matchobject.group(1), matchobject.group(3)) + + def pladjmo(self, matchobject): + ''' + plural_adj but take a matchobject + use groups 1 and 3 in matchobject + ''' + return self.plural_adj(matchobject.group(1), matchobject.group(3)) + + def sinounmo(self, matchobject): + ''' + singular_noun but take a matchobject + use groups 1 and 3 in matchobject + ''' + return self.singular_noun(matchobject.group(1), matchobject.group(3)) + + def amo(self, matchobject): + ''' + A but take a matchobject + use groups 1 and 3 in matchobject + ''' + if matchobject.group(3) is None: + return self.a(matchobject.group(1)) + return self.a(matchobject.group(1), matchobject.group(3)) + + def nomo(self, matchobject): + ''' + NO but take a matchobject + use groups 1 and 3 in matchobject + ''' + return self.no(matchobject.group(1), matchobject.group(3)) + + def ordinalmo(self, matchobject): + ''' + ordinal but take a matchobject + use group 1 + ''' + return self.ordinal(matchobject.group(1)) + + def numwordsmo(self, matchobject): + ''' + number_to_words but take a matchobject + use group 1 + ''' + return self.number_to_words(matchobject.group(1)) + + def prespartmo(self, matchobject): + ''' + prespart but take a matchobject + use group 1 + ''' + return self.present_participle(matchobject.group(1)) + +# 0. PERFORM GENERAL INFLECTIONS IN A STRING + + def inflect(self, text): + ''' + Perform inflections in a string. + + e.g. inflect('The plural of cat is plural(cat)') returns + 'The plural of cat is cats' + + can use plural, plural_noun, plural_verb, plural_adj, singular_noun, a, an, no, ordinal, + number_to_words and prespart + + ''' + save_persistent_count = self.persistent_count + sections = splitre(r"(num\([^)]*\))", text) + inflection = [] + + for section in sections: + (section, count) = subn(r"num\(\s*?(?:([^),]*)(?:,([^)]*))?)?\)", self.nummo, section) + if not count: + total = -1 + while total: + (section, total) = subn( + r"(?x)\bplural \( ([^),]*) (, ([^)]*) )? \) ", + self.plmo, section) + (section, count) = subn( + r"(?x)\bplural_noun \( ([^),]*) (, ([^)]*) )? \) ", + self.plnounmo, section) + total += count + (section, count) = subn( + r"(?x)\bplural_verb \( ([^),]*) (, ([^)]*) )? \) ", + self.plverbmo, section) + total += count + (section, count) = subn( + r"(?x)\bplural_adj \( ([^),]*) (, ([^)]*) )? \) ", + self.pladjmo, section) + total += count + (section, count) = subn( + r"(?x)\bsingular_noun \( ([^),]*) (, ([^)]*) )? \) ", + self.sinounmo, section) + total += count + (section, count) = subn( + r"(?x)\ban? \( ([^),]*) (, ([^)]*) )? \) ", + self.amo, section) + total += count + (section, count) = subn( + r"(?x)\bno \( ([^),]*) (, ([^)]*) )? \) ", + self.nomo, section) + total += count + (section, count) = subn( + r"(?x)\bordinal \( ([^)]*) \) ", + self.ordinalmo, section) + total += count + (section, count) = subn( + r"(?x)\bnumber_to_words \( ([^)]*) \) ", + self.numwordsmo, section) + total += count + (section, count) = subn( + r"(?x)\bpresent_participle \( ([^)]*) \) ", + self.prespartmo, section) + total += count + + inflection.append(section) + + self.persistent_count = save_persistent_count + return "".join(inflection) + +# ## PLURAL SUBROUTINES + + def postprocess(self, orig, inflected): + """ + FIX PEDANTRY AND CAPITALIZATION :-) + """ + if '|' in inflected: + inflected = inflected.split('|')[self.classical_dict['all']] + if orig == "I": + return inflected + if orig == orig.upper(): + return inflected.upper() + if orig[0] == orig[0].upper(): + return '%s%s' % (inflected[0].upper(), + inflected[1:]) + return inflected + + def partition_word(self, text): + mo = search(r'\A(\s*)(.+?)(\s*)\Z', text) + try: + return mo.group(1), mo.group(2), mo.group(3) + except AttributeError: # empty string + return '', '', '' + +# def pl(self, *args, **kwds): +# print 'pl() deprecated, use plural()' +# raise DeprecationWarning +# return self.plural(*args, **kwds) +# +# def plnoun(self, *args, **kwds): +# print 'plnoun() deprecated, use plural_noun()' +# raise DeprecationWarning +# return self.plural_noun(*args, **kwds) +# +# def plverb(self, *args, **kwds): +# print 'plverb() deprecated, use plural_verb()' +# raise DeprecationWarning +# return self.plural_verb(*args, **kwds) +# +# def pladj(self, *args, **kwds): +# print 'pladj() deprecated, use plural_adj()' +# raise DeprecationWarning +# return self.plural_adj(*args, **kwds) +# +# def sinoun(self, *args, **kwds): +# print 'sinoun() deprecated, use singular_noun()' +# raise DeprecationWarning +# return self.singular_noun(*args, **kwds) +# +# def prespart(self, *args, **kwds): +# print 'prespart() deprecated, use present_participle()' +# raise DeprecationWarning +# return self.present_participle(*args, **kwds) +# +# def numwords(self, *args, **kwds): +# print 'numwords() deprecated, use number_to_words()' +# raise DeprecationWarning +# return self.number_to_words(*args, **kwds) + + def plural(self, text, count=None): + ''' + Return the plural of text. + + If count supplied, then return text if count is one of: + 1, a, an, one, each, every, this, that + otherwise return the plural. + + Whitespace at the start and end is preserved. + + ''' + pre, word, post = self.partition_word(text) + if not word: + return text + plural = self.postprocess( + word, + self._pl_special_adjective(word, count) or + self._pl_special_verb(word, count) or + self._plnoun(word, count)) + return "%s%s%s" % (pre, plural, post) + + def plural_noun(self, text, count=None): + ''' + Return the plural of text, where text is a noun. + + If count supplied, then return text if count is one of: + 1, a, an, one, each, every, this, that + otherwise return the plural. + + Whitespace at the start and end is preserved. + + ''' + pre, word, post = self.partition_word(text) + if not word: + return text + plural = self.postprocess(word, self._plnoun(word, count)) + return "%s%s%s" % (pre, plural, post) + + def plural_verb(self, text, count=None): + ''' + Return the plural of text, where text is a verb. + + If count supplied, then return text if count is one of: + 1, a, an, one, each, every, this, that + otherwise return the plural. + + Whitespace at the start and end is preserved. + + ''' + pre, word, post = self.partition_word(text) + if not word: + return text + plural = self.postprocess(word, self._pl_special_verb(word, count) or + self._pl_general_verb(word, count)) + return "%s%s%s" % (pre, plural, post) + + def plural_adj(self, text, count=None): + ''' + Return the plural of text, where text is an adjective. + + If count supplied, then return text if count is one of: + 1, a, an, one, each, every, this, that + otherwise return the plural. + + Whitespace at the start and end is preserved. + + ''' + pre, word, post = self.partition_word(text) + if not word: + return text + plural = self.postprocess(word, self._pl_special_adjective(word, count) or word) + return "%s%s%s" % (pre, plural, post) + + def compare(self, word1, word2): + ''' + compare word1 and word2 for equality regardless of plurality + + return values: + eq - the strings are equal + p:s - word1 is the plural of word2 + s:p - word2 is the plural of word1 + p:p - word1 and word2 are two different plural forms of the one word + False - otherwise + + ''' + return ( + self._plequal(word1, word2, self.plural_noun) or + self._plequal(word1, word2, self.plural_verb) or + self._plequal(word1, word2, self.plural_adj)) + + def compare_nouns(self, word1, word2): + ''' + compare word1 and word2 for equality regardless of plurality + word1 and word2 are to be treated as nouns + + return values: + eq - the strings are equal + p:s - word1 is the plural of word2 + s:p - word2 is the plural of word1 + p:p - word1 and word2 are two different plural forms of the one word + False - otherwise + + ''' + return self._plequal(word1, word2, self.plural_noun) + + def compare_verbs(self, word1, word2): + ''' + compare word1 and word2 for equality regardless of plurality + word1 and word2 are to be treated as verbs + + return values: + eq - the strings are equal + p:s - word1 is the plural of word2 + s:p - word2 is the plural of word1 + p:p - word1 and word2 are two different plural forms of the one word + False - otherwise + + ''' + return self._plequal(word1, word2, self.plural_verb) + + def compare_adjs(self, word1, word2): + ''' + compare word1 and word2 for equality regardless of plurality + word1 and word2 are to be treated as adjectives + + return values: + eq - the strings are equal + p:s - word1 is the plural of word2 + s:p - word2 is the plural of word1 + p:p - word1 and word2 are two different plural forms of the one word + False - otherwise + + ''' + return self._plequal(word1, word2, self.plural_adj) + + def singular_noun(self, text, count=None, gender=None): + ''' + Return the singular of text, where text is a plural noun. + + If count supplied, then return the singular if count is one of: + 1, a, an, one, each, every, this, that or if count is None + otherwise return text unchanged. + + Whitespace at the start and end is preserved. + + ''' + pre, word, post = self.partition_word(text) + if not word: + return text + sing = self._sinoun(word, count=count, gender=gender) + if sing is not False: + plural = self.postprocess(word, self._sinoun(word, count=count, gender=gender)) + return "%s%s%s" % (pre, plural, post) + return False + + def _plequal(self, word1, word2, pl): + classval = self.classical_dict.copy() + self.classical_dict = all_classical.copy() + if word1 == word2: + return "eq" + if word1 == pl(word2): + return "p:s" + if pl(word1) == word2: + return "s:p" + self.classical_dict = no_classical.copy() + if word1 == pl(word2): + return "p:s" + if pl(word1) == word2: + return "s:p" + self.classical_dict = classval.copy() + + if pl == self.plural or pl == self.plural_noun: + if self._pl_check_plurals_N(word1, word2): + return "p:p" + if self._pl_check_plurals_N(word2, word1): + return "p:p" + if pl == self.plural or pl == self.plural_adj: + if self._pl_check_plurals_adj(word1, word2): + return "p:p" + return False + + def _pl_reg_plurals(self, pair, stems, end1, end2): + if search(r"(%s)(%s\|\1%s|%s\|\1%s)" % (stems, end1, end2, end2, end1), pair): + return True + return False + + def _pl_check_plurals_N(self, word1, word2): + pair = "%s|%s" % (word1, word2) + if pair in list(pl_sb_irregular_s.values()): + return True + if pair in list(pl_sb_irregular.values()): + return True + if pair in list(pl_sb_irregular_caps.values()): + return True + + for (stems, end1, end2) in ( + (pl_sb_C_a_ata, "as", "ata"), + (pl_sb_C_is_ides, "is", "ides"), + (pl_sb_C_a_ae, "s", "e"), + (pl_sb_C_en_ina, "ens", "ina"), + (pl_sb_C_um_a, "ums", "a"), + (pl_sb_C_us_i, "uses", "i"), + (pl_sb_C_on_a, "ons", "a"), + (pl_sb_C_o_i_stems, "os", "i"), + (pl_sb_C_ex_ices, "exes", "ices"), + (pl_sb_C_ix_ices, "ixes", "ices"), + (pl_sb_C_i, "s", "i"), + (pl_sb_C_im, "s", "im"), + ('.*eau', "s", "x"), + ('.*ieu', "s", "x"), + ('.*tri', "xes", "ces"), + ('.{2,}[yia]n', "xes", "ges") + ): + if self._pl_reg_plurals(pair, stems, end1, end2): + return True + return False + + def _pl_check_plurals_adj(self, word1, word2): +# VERSION: tuple in endswith requires python 2.5 + word1a = word1[:word1.rfind("'")] if word1.endswith(("'s", "'")) else '' + word2a = word2[:word2.rfind("'")] if word2.endswith(("'s", "'")) else '' + # TODO: BUG? report upstream. I don't think you should chop off the s' + # word1b = word1[:-2] if word1.endswith("s'") else '' + # word2b = word2[:-2] if word2.endswith("s'") else '' + + # TODO: dresses', dresses's -> dresses, dresses when chop off letters + # then they return False because they are the same. Need to fix this. + + if word1a: + if word2a and (self._pl_check_plurals_N(word1a, word2a) + or self._pl_check_plurals_N(word2a, word1a)): + return True + # if word2b and ( self._pl_check_plurals_N(word1a, word2b) + # or self._pl_check_plurals_N(word2b, word1a) ): + # return True + + # if word1b: + # if word2a and ( self._pl_check_plurals_N(word1b, word2a) + # or self._pl_check_plurals_N(word2a, word1b) ): + # return True + # if word2b and ( self._pl_check_plurals_N(word1b, word2b) + # or self._pl_check_plurals_N(word2b, word1b) ): + # return True + + return False + + def get_count(self, count=None): + if count is None and self.persistent_count is not None: + count = self.persistent_count + + if count is not None: + count = 1 if ((str(count) in pl_count_one) or + (self.classical_dict['zero'] and str(count).lower() in pl_count_zero)) else 2 + else: + count = '' + return count + + # @profile + def _plnoun(self, word, count=None): + count = self.get_count(count) + +# DEFAULT TO PLURAL + + if count == 1: + return word + +# HANDLE USER-DEFINED NOUNS + + value = self.ud_match(word, self.pl_sb_user_defined) + if value is not None: + return value + +# HANDLE EMPTY WORD, SINGULAR COUNT AND UNINFLECTED PLURALS + + if word == '': + return word + + lowerword = word.lower() + + if lowerword in pl_sb_uninflected_complete: + return word + + if word in pl_sb_uninflected_caps: + return word + + for k, v in pl_sb_uninflected_bysize.items(): + if lowerword[-k:] in v: + return word + + if (self.classical_dict['herd'] and lowerword in pl_sb_uninflected_herd): + return word + +# HANDLE COMPOUNDS ("Governor General", "mother-in-law", "aide-de-camp", ETC.) + + mo = search(r"^(?:%s)$" % pl_sb_postfix_adj_stems, word, IGNORECASE) + if mo and mo.group(2) != '': + return "%s%s" % (self._plnoun(mo.group(1), 2), mo.group(2)) + + if ' a ' in lowerword or '-a-' in lowerword: + mo = search(r"^(?:%s)$" % pl_sb_prep_dual_compound, word, IGNORECASE) + if mo and mo.group(2) != '' and mo.group(3) != '': + return "%s%s%s" % (self._plnoun(mo.group(1), 2), + mo.group(2), + self._plnoun(mo.group(3))) + + lowersplit = lowerword.split(' ') + if len(lowersplit) >= 3: + for numword in range(1, len(lowersplit) - 1): + if lowersplit[numword] in pl_prep_list_da: + return ' '.join( + lowersplit[:numword - 1] + + [self._plnoun(lowersplit[numword - 1], 2)] + lowersplit[numword:]) + + lowersplit = lowerword.split('-') + if len(lowersplit) >= 3: + for numword in range(1, len(lowersplit) - 1): + if lowersplit[numword] in pl_prep_list_da: + return ' '.join( + lowersplit[:numword - 1] + + [self._plnoun(lowersplit[numword - 1], 2) + + '-' + lowersplit[numword] + '-']) + ' '.join(lowersplit[(numword + 1):]) + +# HANDLE PRONOUNS + + for k, v in pl_pron_acc_keys_bysize.items(): + if lowerword[-k:] in v: # ends with accusivate pronoun + for pk, pv in pl_prep_bysize.items(): + if lowerword[:pk] in pv: # starts with a prep + if lowerword.split() == [lowerword[:pk], lowerword[-k:]]: # only whitespace in between + return lowerword[:-k] + pl_pron_acc[lowerword[-k:]] + + try: + return pl_pron_nom[word.lower()] + except KeyError: + pass + + try: + return pl_pron_acc[word.lower()] + except KeyError: + pass + +# HANDLE ISOLATED IRREGULAR PLURALS + + wordsplit = word.split() + wordlast = wordsplit[-1] + lowerwordlast = wordlast.lower() + + if wordlast in list(pl_sb_irregular_caps.keys()): + llen = len(wordlast) + return '%s%s' % (word[:-llen], + pl_sb_irregular_caps[wordlast]) + + if lowerwordlast in list(pl_sb_irregular.keys()): + llen = len(lowerwordlast) + return '%s%s' % (word[:-llen], + pl_sb_irregular[lowerwordlast]) + + if (' '.join(wordsplit[-2:])).lower() in list(pl_sb_irregular_compound.keys()): + llen = len(' '.join(wordsplit[-2:])) # TODO: what if 2 spaces between these words? + return '%s%s' % (word[:-llen], + pl_sb_irregular_compound[(' '.join(wordsplit[-2:])).lower()]) + + if lowerword[-3:] == 'quy': + return word[:-1] + 'ies' + + if lowerword[-6:] == 'person': + if self.classical_dict['persons']: + return word + 's' + else: + return word[:-4] + 'ople' + +# HANDLE FAMILIES OF IRREGULAR PLURALS + + if lowerword[-3:] == 'man': + for k, v in pl_sb_U_man_mans_bysize.items(): + if lowerword[-k:] in v: + return word + 's' + for k, v in pl_sb_U_man_mans_caps_bysize.items(): + if word[-k:] in v: + return word + 's' + return word[:-3] + 'men' + if lowerword[-5:] == 'mouse': + return word[:-5] + 'mice' + if lowerword[-5:] == 'louse': + return word[:-5] + 'lice' + if lowerword[-5:] == 'goose': + return word[:-5] + 'geese' + if lowerword[-5:] == 'tooth': + return word[:-5] + 'teeth' + if lowerword[-4:] == 'foot': + return word[:-4] + 'feet' + + if lowerword == 'die': + return 'dice' + +# HANDLE UNASSIMILATED IMPORTS + + if lowerword[-4:] == 'ceps': + return word + if lowerword[-4:] == 'zoon': + return word[:-2] + 'a' + if lowerword[-3:] in ('cis', 'sis', 'xis'): + return word[:-2] + 'es' + + for lastlet, d, numend, post in ( + ('h', pl_sb_U_ch_chs_bysize, None, 's'), + ('x', pl_sb_U_ex_ices_bysize, -2, 'ices'), + ('x', pl_sb_U_ix_ices_bysize, -2, 'ices'), + ('m', pl_sb_U_um_a_bysize, -2, 'a'), + ('s', pl_sb_U_us_i_bysize, -2, 'i'), + ('n', pl_sb_U_on_a_bysize, -2, 'a'), + ('a', pl_sb_U_a_ae_bysize, None, 'e'), + ): + if lowerword[-1] == lastlet: # this test to add speed + for k, v in d.items(): + if lowerword[-k:] in v: + return word[:numend] + post + +# HANDLE INCOMPLETELY ASSIMILATED IMPORTS + + if (self.classical_dict['ancient']): + if lowerword[-4:] == 'trix': + return word[:-1] + 'ces' + if lowerword[-3:] in ('eau', 'ieu'): + return word + 'x' + if lowerword[-3:] in ('ynx', 'inx', 'anx') and len(word) > 4: + return word[:-1] + 'ges' + + for lastlet, d, numend, post in ( + ('n', pl_sb_C_en_ina_bysize, -2, 'ina'), + ('x', pl_sb_C_ex_ices_bysize, -2, 'ices'), + ('x', pl_sb_C_ix_ices_bysize, -2, 'ices'), + ('m', pl_sb_C_um_a_bysize, -2, 'a'), + ('s', pl_sb_C_us_i_bysize, -2, 'i'), + ('s', pl_sb_C_us_us_bysize, None, ''), + ('a', pl_sb_C_a_ae_bysize, None, 'e'), + ('a', pl_sb_C_a_ata_bysize, None, 'ta'), + ('s', pl_sb_C_is_ides_bysize, -1, 'des'), + ('o', pl_sb_C_o_i_bysize, -1, 'i'), + ('n', pl_sb_C_on_a_bysize, -2, 'a'), + ): + if lowerword[-1] == lastlet: # this test to add speed + for k, v in d.items(): + if lowerword[-k:] in v: + return word[:numend] + post + + for d, numend, post in ( + (pl_sb_C_i_bysize, None, 'i'), + (pl_sb_C_im_bysize, None, 'im'), + ): + for k, v in d.items(): + if lowerword[-k:] in v: + return word[:numend] + post + +# HANDLE SINGULAR NOUNS ENDING IN ...s OR OTHER SILIBANTS + + if lowerword in pl_sb_singular_s_complete: + return word + 'es' + + for k, v in pl_sb_singular_s_bysize.items(): + if lowerword[-k:] in v: + return word + 'es' + + if lowerword[-2:] == 'es' and word[0] == word[0].upper(): + return word + 'es' + +# Wouldn't special words +# ending with 's' always have been caught, regardless of them starting +# with a capital letter (i.e. being names) +# It makes sense below to do this for words ending in 'y' so that +# Sally -> Sallys. But not sure it makes sense here. Where is the case +# of a word ending in s that is caught here and would otherwise have been +# caught below? +# +# removing it as I can't find a case that executes it +# TODO: check this again +# +# if (self.classical_dict['names']): +# mo = search(r"([A-Z].*s)$", word) +# if mo: +# return "%ses" % mo.group(1) + + if lowerword[-1] == 'z': + for k, v in pl_sb_z_zes_bysize.items(): + if lowerword[-k:] in v: + return word + 'es' + + if lowerword[-2:-1] != 'z': + return word + 'zes' + + if lowerword[-2:] == 'ze': + for k, v in pl_sb_ze_zes_bysize.items(): + if lowerword[-k:] in v: + return word + 's' + + if lowerword[-2:] in ('ch', 'sh', 'zz', 'ss') or lowerword[-1] == 'x': + return word + 'es' + +# ## (r"(.*)(us)$", "%s%ses"), TODO: why is this commented? + +# HANDLE ...f -> ...ves + + if lowerword[-3:] in ('elf', 'alf', 'olf'): + return word[:-1] + 'ves' + if lowerword[-3:] == 'eaf' and lowerword[-4:-3] != 'd': + return word[:-1] + 'ves' + if lowerword[-4:] in ('nife', 'life', 'wife'): + return word[:-2] + 'ves' + if lowerword[-3:] == 'arf': + return word[:-1] + 'ves' + +# HANDLE ...y + + if lowerword[-1] == 'y': + if lowerword[-2:-1] in 'aeiou' or len(word) == 1: + return word + 's' + + if (self.classical_dict['names']): + if lowerword[-1] == 'y' and word[0] == word[0].upper(): + return word + 's' + + return word[:-1] + 'ies' + +# HANDLE ...o + + if lowerword in pl_sb_U_o_os_complete: + return word + 's' + + for k, v in pl_sb_U_o_os_bysize.items(): + if lowerword[-k:] in v: + return word + 's' + + if lowerword[-2:] in ('ao', 'eo', 'io', 'oo', 'uo'): + return word + 's' + + if lowerword[-1] == 'o': + return word + 'es' + +# OTHERWISE JUST ADD ...s + + return "%ss" % word + + def _pl_special_verb(self, word, count=None): + if (self.classical_dict['zero'] and + str(count).lower() in pl_count_zero): + return False + count = self.get_count(count) + + if count == 1: + return word + +# HANDLE USER-DEFINED VERBS + + value = self.ud_match(word, self.pl_v_user_defined) + if value is not None: + return value + +# HANDLE IRREGULAR PRESENT TENSE (SIMPLE AND COMPOUND) + + lowerword = word.lower() + try: + firstword = lowerword.split()[0] + except IndexError: + return False # word is '' + + if firstword in list(plverb_irregular_pres.keys()): + return "%s%s" % (plverb_irregular_pres[firstword], word[len(firstword):]) + +# HANDLE IRREGULAR FUTURE, PRETERITE AND PERFECT TENSES + + if firstword in plverb_irregular_non_pres: + return word + +# HANDLE PRESENT NEGATIONS (SIMPLE AND COMPOUND) + + if firstword.endswith("n't") and firstword[:-3] in list(plverb_irregular_pres.keys()): + return "%sn't%s" % (plverb_irregular_pres[firstword[:-3]], word[len(firstword):]) + + if firstword.endswith("n't"): + return word + +# HANDLE SPECIAL CASES + + mo = search(r"^(%s)$" % plverb_special_s, word) + if mo: + return False + if search(r"\s", word): + return False + if lowerword == 'quizzes': + return 'quiz' + +# HANDLE STANDARD 3RD PERSON (CHOP THE ...(e)s OFF SINGLE WORDS) + + if lowerword[-4:] in ('ches', 'shes', 'zzes', 'sses') or \ + lowerword[-3:] == 'xes': + return word[:-2] + +# # mo = search(r"^(.*)([cs]h|[x]|zz|ss)es$", +# # word, IGNORECASE) +# # if mo: +# # return "%s%s" % (mo.group(1), mo.group(2)) + + if lowerword[-3:] == 'ies' and len(word) > 3: + return lowerword[:-3] + 'y' + + if (lowerword in pl_v_oes_oe or + lowerword[-4:] in pl_v_oes_oe_endings_size4 or + lowerword[-5:] in pl_v_oes_oe_endings_size5): + return word[:-1] + + if lowerword.endswith('oes') and len(word) > 3: + return lowerword[:-2] + + mo = search(r"^(.*[^s])s$", word, IGNORECASE) + if mo: + return mo.group(1) + +# OTHERWISE, A REGULAR VERB (HANDLE ELSEWHERE) + + return False + + def _pl_general_verb(self, word, count=None): + count = self.get_count(count) + + if count == 1: + return word + +# HANDLE AMBIGUOUS PRESENT TENSES (SIMPLE AND COMPOUND) + + mo = search(r"^(%s)((\s.*)?)$" % plverb_ambiguous_pres_keys, word, IGNORECASE) + if mo: + return "%s%s" % (plverb_ambiguous_pres[mo.group(1).lower()], mo.group(2)) + +# HANDLE AMBIGUOUS PRETERITE AND PERFECT TENSES + + mo = search(r"^(%s)((\s.*)?)$" % plverb_ambiguous_non_pres, word, IGNORECASE) + if mo: + return word + +# OTHERWISE, 1st OR 2ND PERSON IS UNINFLECTED + + return word + + def _pl_special_adjective(self, word, count=None): + count = self.get_count(count) + + if count == 1: + return word + +# HANDLE USER-DEFINED ADJECTIVES + + value = self.ud_match(word, self.pl_adj_user_defined) + if value is not None: + return value + +# HANDLE KNOWN CASES + + mo = search(r"^(%s)$" % pl_adj_special_keys, + word, IGNORECASE) + if mo: + return "%s" % (pl_adj_special[mo.group(1).lower()]) + +# HANDLE POSSESSIVES + + mo = search(r"^(%s)$" % pl_adj_poss_keys, + word, IGNORECASE) + if mo: + return "%s" % (pl_adj_poss[mo.group(1).lower()]) + + mo = search(r"^(.*)'s?$", + word) + if mo: + pl = self.plural_noun(mo.group(1)) + trailing_s = "" if pl[-1] == 's' else "s" + return "%s'%s" % (pl, trailing_s) + +# OTHERWISE, NO IDEA + + return False + + # @profile + def _sinoun(self, word, count=None, gender=None): + count = self.get_count(count) + +# DEFAULT TO PLURAL + + if count == 2: + return word + +# SET THE GENDER + + try: + if gender is None: + gender = self.thegender + elif gender not in singular_pronoun_genders: + raise BadGenderError + except (TypeError, IndexError): + raise BadGenderError + +# HANDLE USER-DEFINED NOUNS + + value = self.ud_match(word, self.si_sb_user_defined) + if value is not None: + return value + +# HANDLE EMPTY WORD, SINGULAR COUNT AND UNINFLECTED PLURALS + + if word == '': + return word + + lowerword = word.lower() + + if word in si_sb_ois_oi_case: + return word[:-1] + + if lowerword in pl_sb_uninflected_complete: + return word + + if word in pl_sb_uninflected_caps: + return word + + for k, v in pl_sb_uninflected_bysize.items(): + if lowerword[-k:] in v: + return word + + if (self.classical_dict['herd'] and lowerword in pl_sb_uninflected_herd): + return word + +# HANDLE COMPOUNDS ("Governor General", "mother-in-law", "aide-de-camp", ETC.) + + mo = search(r"^(?:%s)$" % pl_sb_postfix_adj_stems, word, IGNORECASE) + if mo and mo.group(2) != '': + return "%s%s" % (self._sinoun(mo.group(1), 1, gender=gender), mo.group(2)) + + # how to reverse this one? + # mo = search(r"^(?:%s)$" % pl_sb_prep_dual_compound, word, IGNORECASE) + # if mo and mo.group(2) != '' and mo.group(3) != '': + # return "%s%s%s" % (self._sinoun(mo.group(1), 1), + # mo.group(2), + # self._sinoun(mo.group(3), 1)) + + lowersplit = lowerword.split(' ') + if len(lowersplit) >= 3: + for numword in range(1, len(lowersplit) - 1): + if lowersplit[numword] in pl_prep_list_da: + return ' '.join(lowersplit[:numword - 1] + + [self._sinoun(lowersplit[numword - 1], 1, gender=gender)] + + lowersplit[numword:]) + + lowersplit = lowerword.split('-') + if len(lowersplit) >= 3: + for numword in range(1, len(lowersplit) - 1): + if lowersplit[numword] in pl_prep_list_da: + return ' '.join( + lowersplit[:numword - 1] + + [self._sinoun(lowersplit[numword - 1], 1, gender=gender) + + '-' + lowersplit[numword] + '-']) + ' '.join(lowersplit[(numword + 1):]) + +# HANDLE PRONOUNS + + for k, v in si_pron_acc_keys_bysize.items(): + if lowerword[-k:] in v: # ends with accusivate pronoun + for pk, pv in pl_prep_bysize.items(): + if lowerword[:pk] in pv: # starts with a prep + if lowerword.split() == [lowerword[:pk], lowerword[-k:]]: # only whitespace in between + return lowerword[:-k] + get_si_pron('acc', lowerword[-k:], gender) + + try: + return get_si_pron('nom', word.lower(), gender) + except KeyError: + pass + + try: + return get_si_pron('acc', word.lower(), gender) + except KeyError: + pass + +# HANDLE ISOLATED IRREGULAR PLURALS + + wordsplit = word.split() + wordlast = wordsplit[-1] + lowerwordlast = wordlast.lower() + + if wordlast in list(si_sb_irregular_caps.keys()): + llen = len(wordlast) + return '%s%s' % (word[:-llen], + si_sb_irregular_caps[wordlast]) + + if lowerwordlast in list(si_sb_irregular.keys()): + llen = len(lowerwordlast) + return '%s%s' % (word[:-llen], + si_sb_irregular[lowerwordlast]) + + if (' '.join(wordsplit[-2:])).lower() in list(si_sb_irregular_compound.keys()): + llen = len(' '.join(wordsplit[-2:])) # TODO: what if 2 spaces between these words? + return '%s%s' % (word[:-llen], + si_sb_irregular_compound[(' '.join(wordsplit[-2:])).lower()]) + + if lowerword[-5:] == 'quies': + return word[:-3] + 'y' + + if lowerword[-7:] == 'persons': + return word[:-1] + if lowerword[-6:] == 'people': + return word[:-4] + 'rson' + +# HANDLE FAMILIES OF IRREGULAR PLURALS + + if lowerword[-4:] == 'mans': + for k, v in si_sb_U_man_mans_bysize.items(): + if lowerword[-k:] in v: + return word[:-1] + for k, v in si_sb_U_man_mans_caps_bysize.items(): + if word[-k:] in v: + return word[:-1] + if lowerword[-3:] == 'men': + return word[:-3] + 'man' + if lowerword[-4:] == 'mice': + return word[:-4] + 'mouse' + if lowerword[-4:] == 'lice': + return word[:-4] + 'louse' + if lowerword[-5:] == 'geese': + return word[:-5] + 'goose' + if lowerword[-5:] == 'teeth': + return word[:-5] + 'tooth' + if lowerword[-4:] == 'feet': + return word[:-4] + 'foot' + + if lowerword == 'dice': + return 'die' + +# HANDLE UNASSIMILATED IMPORTS + + if lowerword[-4:] == 'ceps': + return word + if lowerword[-3:] == 'zoa': + return word[:-1] + 'on' + + for lastlet, d, numend, post in ( + ('s', si_sb_U_ch_chs_bysize, -1, ''), + ('s', si_sb_U_ex_ices_bysize, -4, 'ex'), + ('s', si_sb_U_ix_ices_bysize, -4, 'ix'), + ('a', si_sb_U_um_a_bysize, -1, 'um'), + ('i', si_sb_U_us_i_bysize, -1, 'us'), + ('a', si_sb_U_on_a_bysize, -1, 'on'), + ('e', si_sb_U_a_ae_bysize, -1, ''), + ): + if lowerword[-1] == lastlet: # this test to add speed + for k, v in d.items(): + if lowerword[-k:] in v: + return word[:numend] + post + +# HANDLE INCOMPLETELY ASSIMILATED IMPORTS + + if (self.classical_dict['ancient']): + + if lowerword[-6:] == 'trices': + return word[:-3] + 'x' + if lowerword[-4:] in ('eaux', 'ieux'): + return word[:-1] + if lowerword[-5:] in ('ynges', 'inges', 'anges') and len(word) > 6: + return word[:-3] + 'x' + + for lastlet, d, numend, post in ( + ('a', si_sb_C_en_ina_bysize, -3, 'en'), + ('s', si_sb_C_ex_ices_bysize, -4, 'ex'), + ('s', si_sb_C_ix_ices_bysize, -4, 'ix'), + ('a', si_sb_C_um_a_bysize, -1, 'um'), + ('i', si_sb_C_us_i_bysize, -1, 'us'), + ('s', pl_sb_C_us_us_bysize, None, ''), + ('e', si_sb_C_a_ae_bysize, -1, ''), + ('a', si_sb_C_a_ata_bysize, -2, ''), + ('s', si_sb_C_is_ides_bysize, -3, 's'), + ('i', si_sb_C_o_i_bysize, -1, 'o'), + ('a', si_sb_C_on_a_bysize, -1, 'on'), + ('m', si_sb_C_im_bysize, -2, ''), + ('i', si_sb_C_i_bysize, -1, ''), + ): + if lowerword[-1] == lastlet: # this test to add speed + for k, v in d.items(): + if lowerword[-k:] in v: + return word[:numend] + post + +# HANDLE PLURLS ENDING IN uses -> use + + if (lowerword[-6:] == 'houses' or + word in si_sb_uses_use_case or + lowerword in si_sb_uses_use): + return word[:-1] + +# HANDLE PLURLS ENDING IN ies -> ie + + if word in si_sb_ies_ie_case or lowerword in si_sb_ies_ie: + return word[:-1] + +# HANDLE PLURLS ENDING IN oes -> oe + + if (lowerword[-5:] == 'shoes' or + word in si_sb_oes_oe_case or + lowerword in si_sb_oes_oe): + return word[:-1] + +# HANDLE SINGULAR NOUNS ENDING IN ...s OR OTHER SILIBANTS + + if (word in si_sb_sses_sse_case or + lowerword in si_sb_sses_sse): + return word[:-1] + + if lowerword in si_sb_singular_s_complete: + return word[:-2] + + for k, v in si_sb_singular_s_bysize.items(): + if lowerword[-k:] in v: + return word[:-2] + + if lowerword[-4:] == 'eses' and word[0] == word[0].upper(): + return word[:-2] + +# Wouldn't special words +# ending with 's' always have been caught, regardless of them starting +# with a capital letter (i.e. being names) +# It makes sense below to do this for words ending in 'y' so that +# Sally -> Sallys. But not sure it makes sense here. Where is the case +# of a word ending in s that is caught here and would otherwise have been +# caught below? +# +# removing it as I can't find a case that executes it +# TODO: check this again +# +# if (self.classical_dict['names']): +# mo = search(r"([A-Z].*ses)$", word) +# if mo: +# return "%s" % mo.group(1) + + if lowerword in si_sb_z_zes: + return word[:-2] + + if lowerword in si_sb_zzes_zz: + return word[:-2] + + if lowerword[-4:] == 'zzes': + return word[:-3] + + if (word in si_sb_ches_che_case or + lowerword in si_sb_ches_che): + return word[:-1] + + if lowerword[-4:] in ('ches', 'shes'): + return word[:-2] + + if lowerword in si_sb_xes_xe: + return word[:-1] + + if lowerword[-3:] == 'xes': + return word[:-2] +# (r"(.*)(us)es$", "%s%s"), TODO: why is this commented? + +# HANDLE ...f -> ...ves + + if (word in si_sb_ves_ve_case or + lowerword in si_sb_ves_ve): + return word[:-1] + + if lowerword[-3:] == 'ves': + if lowerword[-5:-3] in ('el', 'al', 'ol'): + return word[:-3] + 'f' + if lowerword[-5:-3] == 'ea' and word[-6:-5] != 'd': + return word[:-3] + 'f' + if lowerword[-5:-3] in ('ni', 'li', 'wi'): + return word[:-3] + 'fe' + if lowerword[-5:-3] == 'ar': + return word[:-3] + 'f' + +# HANDLE ...y + + if lowerword[-2:] == 'ys': + if len(lowerword) > 2 and lowerword[-3] in 'aeiou': + return word[:-1] + + if (self.classical_dict['names']): + if lowerword[-2:] == 'ys' and word[0] == word[0].upper(): + return word[:-1] + + if lowerword[-3:] == 'ies': + return word[:-3] + 'y' + +# HANDLE ...o + + if lowerword[-2:] == 'os': + + if lowerword in si_sb_U_o_os_complete: + return word[:-1] + + for k, v in si_sb_U_o_os_bysize.items(): + if lowerword[-k:] in v: + return word[:-1] + + if lowerword[-3:] in ('aos', 'eos', 'ios', 'oos', 'uos'): + return word[:-1] + + if lowerword[-3:] == 'oes': + return word[:-2] + +# UNASSIMILATED IMPORTS FINAL RULE + + if word in si_sb_es_is: + return word[:-2] + 'is' + +# OTHERWISE JUST REMOVE ...s + + if lowerword[-1] == 's': + return word[:-1] + +# COULD NOT FIND SINGULAR + + return False + +# ADJECTIVES + + def a(self, text, count=1): + ''' + Return the appropriate indefinite article followed by text. + + The indefinite article is either 'a' or 'an'. + + If count is not one, then return count followed by text + instead of 'a' or 'an'. + + Whitespace at the start and end is preserved. + + ''' + mo = search(r"\A(\s*)(?:an?\s+)?(.+?)(\s*)\Z", + text, IGNORECASE) + if mo: + word = mo.group(2) + if not word: + return text + pre = mo.group(1) + post = mo.group(3) + result = self._indef_article(word, count) + return "%s%s%s" % (pre, result, post) + return '' + + an = a + + def _indef_article(self, word, count): + mycount = self.get_count(count) + + if mycount != 1: + return "%s %s" % (count, word) + +# HANDLE USER-DEFINED VARIANTS + + value = self.ud_match(word, self.A_a_user_defined) + if value is not None: + return "%s %s" % (value, word) + +# HANDLE ORDINAL FORMS + + for a in ( + (r"^(%s)" % A_ordinal_a, "a"), + (r"^(%s)" % A_ordinal_an, "an"), + ): + mo = search(a[0], word, IGNORECASE) + if mo: + return "%s %s" % (a[1], word) + +# HANDLE SPECIAL CASES + + for a in ( + (r"^(%s)" % A_explicit_an, "an"), + (r"^[aefhilmnorsx]$", "an"), + (r"^[bcdgjkpqtuvwyz]$", "a"), + ): + mo = search(a[0], word, IGNORECASE) + if mo: + return "%s %s" % (a[1], word) + +# HANDLE ABBREVIATIONS + + for a in ( + (r"(%s)" % A_abbrev, "an", VERBOSE), + (r"^[aefhilmnorsx][.-]", "an", IGNORECASE), + (r"^[a-z][.-]", "a", IGNORECASE), + ): + mo = search(a[0], word, a[2]) + if mo: + return "%s %s" % (a[1], word) + +# HANDLE CONSONANTS + + mo = search(r"^[^aeiouy]", word, IGNORECASE) + if mo: + return "a %s" % word + +# HANDLE SPECIAL VOWEL-FORMS + + for a in ( + (r"^e[uw]", "a"), + (r"^onc?e\b", "a"), + (r"^onetime\b", "a"), + (r"^uni([^nmd]|mo)", "a"), + (r"^u[bcfghjkqrst][aeiou]", "a"), + (r"^ukr", "a"), + (r"^(%s)" % A_explicit_a, "a"), + ): + mo = search(a[0], word, IGNORECASE) + if mo: + return "%s %s" % (a[1], word) + +# HANDLE SPECIAL CAPITALS + + mo = search(r"^U[NK][AIEO]?", word) + if mo: + return "a %s" % word + +# HANDLE VOWELS + + mo = search(r"^[aeiou]", word, IGNORECASE) + if mo: + return "an %s" % word + +# HANDLE y... (BEFORE CERTAIN CONSONANTS IMPLIES (UNNATURALIZED) "i.." SOUND) + + mo = search(r"^(%s)" % A_y_cons, word, IGNORECASE) + if mo: + return "an %s" % word + +# OTHERWISE, GUESS "a" + return "a %s" % word + +# 2. TRANSLATE ZERO-QUANTIFIED $word TO "no plural($word)" + + def no(self, text, count=None): + ''' + If count is 0, no, zero or nil, return 'no' followed by the plural + of text. + + If count is one of: + 1, a, an, one, each, every, this, that + return count followed by text. + + Otherwise return count follow by the plural of text. + + In the return value count is always followed by a space. + + Whitespace at the start and end is preserved. + + ''' + if count is None and self.persistent_count is not None: + count = self.persistent_count + + if count is None: + count = 0 + mo = search(r"\A(\s*)(.+?)(\s*)\Z", text) + pre = mo.group(1) + word = mo.group(2) + post = mo.group(3) + + if str(count).lower() in pl_count_zero: + return "%sno %s%s" % (pre, self.plural(word, 0), post) + else: + return "%s%s %s%s" % (pre, count, self.plural(word, count), post) + +# PARTICIPLES + + def present_participle(self, word): + ''' + Return the present participle for word. + + word is the 3rd person singular verb. + + ''' + plv = self.plural_verb(word, 2) + + for pat, repl in ( + (r"ie$", r"y"), + (r"ue$", r"u"), # TODO: isn't ue$ -> u encompassed in the following rule? + (r"([auy])e$", r"\g<1>"), + (r"ski$", r"ski"), + (r"[^b]i$", r""), + (r"^(are|were)$", r"be"), + (r"^(had)$", r"hav"), + (r"^(hoe)$", r"\g<1>"), + (r"([^e])e$", r"\g<1>"), + (r"er$", r"er"), + (r"([^aeiou][aeiouy]([bdgmnprst]))$", "\g<1>\g<2>"), + ): + (ans, num) = subn(pat, repl, plv) + if num: + return "%sing" % ans + return "%sing" % ans + +# NUMERICAL INFLECTIONS + + def ordinal(self, num): + ''' + Return the ordinal of num. + + num can be an integer or text + + e.g. ordinal(1) returns '1st' + ordinal('one') returns 'first' + + ''' + if match(r"\d", str(num)): + try: + num % 2 + n = num + except TypeError: + if '.' in str(num): + try: + n = int(num[-1]) # numbers after decimal, so only need last one for ordinal + except ValueError: # ends with '.', so need to use whole string + n = int(num[:-1]) + else: + n = int(num) + try: + post = nth[n % 100] + except KeyError: + post = nth[n % 10] + return "%s%s" % (num, post) + else: + mo = search(r"(%s)\Z" % ordinal_suff, num) + try: + post = ordinal[mo.group(1)] + return resub(r"(%s)\Z" % ordinal_suff, post, num) + except AttributeError: + return "%sth" % num + + def millfn(self, ind=0): + if ind > len(mill) - 1: + print3("number out of range") + raise NumOutOfRangeError + return mill[ind] + + def unitfn(self, units, mindex=0): + return "%s%s" % (unit[units], self.millfn(mindex)) + + def tenfn(self, tens, units, mindex=0): + if tens != 1: + return "%s%s%s%s" % (ten[tens], + '-' if tens and units else '', + unit[units], + self.millfn(mindex)) + return "%s%s" % (teen[units], mill[mindex]) + + def hundfn(self, hundreds, tens, units, mindex): + if hundreds: + return "%s hundred%s%s%s, " % (unit[hundreds], # use unit not unitfn as simpler + " %s " % self.number_args['andword'] if tens or units else '', + self.tenfn(tens, units), + self.millfn(mindex)) + if tens or units: + return "%s%s, " % (self.tenfn(tens, units), self.millfn(mindex)) + return '' + + def group1sub(self, mo): + units = int(mo.group(1)) + if units == 1: + return " %s, " % self.number_args['one'] + elif units: + # TODO: bug one and zero are padded with a space but other numbers aren't. check this in perl + return "%s, " % unit[units] + else: + return " %s, " % self.number_args['zero'] + + def group1bsub(self, mo): + units = int(mo.group(1)) + if units: + # TODO: bug one and zero are padded with a space but other numbers aren't. check this in perl + return "%s, " % unit[units] + else: + return " %s, " % self.number_args['zero'] + + def group2sub(self, mo): + tens = int(mo.group(1)) + units = int(mo.group(2)) + if tens: + return "%s, " % self.tenfn(tens, units) + if units: + return " %s %s, " % (self.number_args['zero'], unit[units]) + return " %s %s, " % (self.number_args['zero'], self.number_args['zero']) + + def group3sub(self, mo): + hundreds = int(mo.group(1)) + tens = int(mo.group(2)) + units = int(mo.group(3)) + if hundreds == 1: + hunword = " %s" % self.number_args['one'] + elif hundreds: + hunword = "%s" % unit[hundreds] + # TODO: bug one and zero are padded with a space but other numbers aren't. check this in perl + else: + hunword = " %s" % self.number_args['zero'] + if tens: + tenword = self.tenfn(tens, units) + elif units: + tenword = " %s %s" % (self.number_args['zero'], unit[units]) + else: + tenword = " %s %s" % (self.number_args['zero'], self.number_args['zero']) + return "%s %s, " % (hunword, tenword) + + def hundsub(self, mo): + ret = self.hundfn(int(mo.group(1)), int(mo.group(2)), int(mo.group(3)), self.mill_count) + self.mill_count += 1 + return ret + + def tensub(self, mo): + return "%s, " % self.tenfn(int(mo.group(1)), int(mo.group(2)), self.mill_count) + + def unitsub(self, mo): + return "%s, " % self.unitfn(int(mo.group(1)), self.mill_count) + + def enword(self, num, group): + # import pdb + # pdb.set_trace() + + if group == 1: + num = resub(r"(\d)", self.group1sub, num) + elif group == 2: + num = resub(r"(\d)(\d)", self.group2sub, num) + num = resub(r"(\d)", self.group1bsub, num, 1) + # group1bsub same as + # group1sub except it doesn't use the default word for one. + # Is this required? i.e. is the default word not to beused when + # grouping in pairs? + # + # No. This is a bug. Fixed. TODO: report upstream. + elif group == 3: + num = resub(r"(\d)(\d)(\d)", self.group3sub, num) + num = resub(r"(\d)(\d)", self.group2sub, num, 1) + num = resub(r"(\d)", self.group1sub, num, 1) + elif int(num) == 0: + num = self.number_args['zero'] + elif int(num) == 1: + num = self.number_args['one'] + else: + num = num.lstrip().lstrip('0') + self.mill_count = 0 + # surely there's a better way to do the next bit + mo = search(r"(\d)(\d)(\d)(?=\D*\Z)", num) + while mo: + num = resub(r"(\d)(\d)(\d)(?=\D*\Z)", self.hundsub, num, 1) + mo = search(r"(\d)(\d)(\d)(?=\D*\Z)", num) + num = resub(r"(\d)(\d)(?=\D*\Z)", self.tensub, num, 1) + num = resub(r"(\d)(?=\D*\Z)", self.unitsub, num, 1) + return num + + def blankfn(self, mo): + ''' do a global blank replace + TODO: surely this can be done with an option to resub + rather than this fn + ''' + return '' + + def commafn(self, mo): + ''' do a global ',' replace + TODO: surely this can be done with an option to resub + rather than this fn + ''' + return ',' + + def spacefn(self, mo): + ''' do a global ' ' replace + TODO: surely this can be done with an option to resub + rather than this fn + ''' + return ' ' + + def number_to_words(self, num, wantlist=False, + group=0, comma=',', andword='and', + zero='zero', one='one', decimal='point', + threshold=None): + ''' + Return a number in words. + + group = 1, 2 or 3 to group numbers before turning into words + comma: define comma + andword: word for 'and'. Can be set to ''. + e.g. "one hundred and one" vs "one hundred one" + zero: word for '0' + one: word for '1' + decimal: word for decimal point + threshold: numbers above threshold not turned into words + + parameters not remembered from last call. Departure from Perl version. + ''' + self.number_args = dict(andword=andword, zero=zero, one=one) + num = '%s' % num + + # Handle "stylistic" conversions (up to a given threshold)... + if (threshold is not None and float(num) > threshold): + spnum = num.split('.', 1) + while (comma): + (spnum[0], n) = subn(r"(\d)(\d{3}(?:,|\Z))", r"\1,\2", spnum[0]) + if n == 0: + break + try: + return "%s.%s" % (spnum[0], spnum[1]) + except IndexError: + return "%s" % spnum[0] + + if group < 0 or group > 3: + raise BadChunkingOptionError + nowhite = num.lstrip() + if nowhite[0] == '+': + sign = "plus" + elif nowhite[0] == '-': + sign = "minus" + else: + sign = "" + + myord = (num[-2:] in ('st', 'nd', 'rd', 'th')) + if myord: + num = num[:-2] + finalpoint = False + if decimal: + if group != 0: + chunks = num.split('.') + else: + chunks = num.split('.', 1) + if chunks[-1] == '': # remove blank string if nothing after decimal + chunks = chunks[:-1] + finalpoint = True # add 'point' to end of output + else: + chunks = [num] + + first = 1 + loopstart = 0 + + if chunks[0] == '': + first = 0 + if len(chunks) > 1: + loopstart = 1 + + for i in range(loopstart, len(chunks)): + chunk = chunks[i] + # remove all non numeric \D + chunk = resub(r"\D", self.blankfn, chunk) + if chunk == "": + chunk = "0" + + if group == 0 and (first == 0 or first == ''): + chunk = self.enword(chunk, 1) + else: + chunk = self.enword(chunk, group) + + if chunk[-2:] == ', ': + chunk = chunk[:-2] + chunk = resub(r"\s+,", self.commafn, chunk) + + if group == 0 and first: + chunk = resub(r", (\S+)\s+\Z", " %s \\1" % andword, chunk) + chunk = resub(r"\s+", self.spacefn, chunk) + # chunk = resub(r"(\A\s|\s\Z)", self.blankfn, chunk) + chunk = chunk.strip() + if first: + first = '' + chunks[i] = chunk + + numchunks = [] + if first != 0: + numchunks = chunks[0].split("%s " % comma) + + if myord and numchunks: + # TODO: can this be just one re as it is in perl? + mo = search(r"(%s)\Z" % ordinal_suff, numchunks[-1]) + if mo: + numchunks[-1] = resub(r"(%s)\Z" % ordinal_suff, ordinal[mo.group(1)], + numchunks[-1]) + else: + numchunks[-1] += 'th' + + for chunk in chunks[1:]: + numchunks.append(decimal) + numchunks.extend(chunk.split("%s " % comma)) + + if finalpoint: + numchunks.append(decimal) + + # wantlist: Perl list context. can explictly specify in Python + if wantlist: + if sign: + numchunks = [sign] + numchunks + return numchunks + elif group: + signout = "%s " % sign if sign else '' + return "%s%s" % (signout, ", ".join(numchunks)) + else: + signout = "%s " % sign if sign else '' + num = "%s%s" % (signout, numchunks.pop(0)) + if decimal is None: + first = True + else: + first = not num.endswith(decimal) + for nc in numchunks: + if nc == decimal: + num += " %s" % nc + first = 0 + elif first: + num += "%s %s" % (comma, nc) + else: + num += " %s" % nc + return num + +# Join words with commas and a trailing 'and' (when appropriate)... + + def join(self, words, sep=None, sep_spaced=True, + final_sep=None, conj='and', conj_spaced=True): + ''' + Join words into a list. + + e.g. join(['ant', 'bee', 'fly']) returns 'ant, bee, and fly' + + options: + conj: replacement for 'and' + sep: separator. default ',', unless ',' is in the list then ';' + final_sep: final separator. default ',', unless ',' is in the list then ';' + conj_spaced: boolean. Should conj have spaces around it + + ''' + if not words: + return "" + if len(words) == 1: + return words[0] + + if conj_spaced: + if conj == '': + conj = ' ' + else: + conj = ' %s ' % conj + + if len(words) == 2: + return "%s%s%s" % (words[0], conj, words[1]) + + if sep is None: + if ',' in ''.join(words): + sep = ';' + else: + sep = ',' + if final_sep is None: + final_sep = sep + + final_sep = "%s%s" % (final_sep, conj) + + if sep_spaced: + sep += ' ' + + return "%s%s%s" % (sep.join(words[0:-1]), final_sep, words[-1]) diff --git a/bin/ttbp.py b/bin/ttbp.py index 6f415dd..b0c8567 100644 --- a/bin/ttbp.py +++ b/bin/ttbp.py @@ -9,12 +9,14 @@ import json import core import chatter +import inflect ## system globals SOURCE = os.path.join("/home", "endorphant", "projects", "ttbp", "bin") LIVE = "http://tilde.town/~" FEEDBACK = os.path.join("/home", "endorphant", "ttbp-mail") USERFILE = os.path.join("/home", "endorphant", "projects", "ttbp", "users.txt") +p = inflect.engine() ## user globals USER = os.path.basename(os.path.expanduser("~")) @@ -126,7 +128,7 @@ def gen_header(): header.append("\n\t") header.append("\n\t") header.append("\n\t\t
") - header.append("\n\t\t\t

~"+USER+"@TTBP

") + header.append("\n\t\t\t

~"+USER+"@TTBP

") header.append("\n\t\t
\n") header.append("\n\t\t\n\n\n\n") header.append("\n\t\t\n") @@ -243,6 +245,9 @@ def main_menu(): feedback_menu() elif choice == '4': redraw(DUST) + elif choice == 'secret': + redraw("here are your recorded feelings, listed by date:\n\n") + view_entries() elif choice == "none": return stop() else: @@ -257,7 +262,7 @@ def feedback_menu(): choice = raw_input("\npick a category for your feedback: ") cat = "" - if choice in ['0', '1', '2']: + if choice in ['0', '1', '2', '3']: cat = SUBJECTS[int(choice)] raw_input("\ncomposing a "+cat+" to ~endorphant.\n\npress to open an external text editor. mail will be sent once you save and quit.\n") redraw(send_feedback(cat)) @@ -296,6 +301,7 @@ def send_feedback(subject="none", mailbox=os.path.join(FEEDBACK, USER+"-"+time.s return "mail sent. thanks for writing! i'll try to respond to you soon." def view_neighbors(): + # TODO: rewrite this so you don't have to traverse a second list?? users = [] @@ -306,12 +312,27 @@ def view_neighbors(): for user in users: userRC = json.load(open(os.path.join("/home", user, ".ttbp", "config", "ttbprc"))) url = LIVE+user+"/"+userRC["publish dir"] - print("\t~"+user+"\t at "+url) + count = 0 + for filename in os.listdir(os.path.join("/home", user, ".ttbp", "entries")): + if os.path.splitext(filename)[1] == ".txt" and len(os.path.splitext(filename)[0]) == 8: + count += 1 + user = "~"+user + if len(user) < 8: + user += "\t" + print("\t"+user+"\t at "+url+"\t("+p.no("entry", count)+")") raw_input("\n\npress to go back home.\n\n") redraw() return + +def view_entries(): + + entries = [] + + raw_input("\n\npress to go back home.\n\n") + redraw() + return ##### start() diff --git a/changelog.txt b/changelog.txt index b9217b2..cee8c3a 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,7 +1,10 @@ TO-DO: (goals for stable release) - -make individual permalink pages -add credits page + -browse own entries + -show most recent global entries + -filename validator (only process entries if they're + . ttbp/entries/YYMMDD.txt") ------