wiki-mad-libs/test.py

import json
import requests
import wikipediaapi

from nltk import pos_tag
from nltk import sent_tokenize, word_tokenize

# Info about the default pos_tag tags
# https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
adlib_tags = {
    "JJ": "Adjective",
    "JJR": "Adjective ending in 'er'",
    "JJS": "Adjective ending in 'est'",
    "NN": "Noun",
    "NNS": "Plural Noun",
    "NNP": "Proper Noun",
    "NNPS": "Plural Proper Noun",
    "RB": "Adverb",
    "RBR": "Adverb ending in 'er'",
    "RBS": "Adverb ending in 'est'",
    "VB": "Verb",
    "VBD": "Past Tense Verb",
    "VBG": "Verb ending in 'ing'",
    "VBN": "Past Tense Verb",
    "VBP": "Present Tense Verb",
    "VBZ": "Present Tense Verb ending in 's'",
}


def get_random_wikipedia_title():
    random_result = json.loads(requests.get('https://en.wikipedia.org/api/rest_v1/page/random/title').text)
    return random_result['items'][0]['title']

wikipedia = wikipediaapi.Wikipedia('en')
wiki_page = wikipedia.page(get_random_wikipedia_title())

print(wiki_page.title)
print(wiki_page.displaytitle)
print(wiki_page.canonicalurl)

summary = wiki_page.summary
sentences = sent_tokenize(summary)
tagged_sentences = []
for sentence in sentences:
    tagged_sentences.append(pos_tag(word_tokenize(sentence)))


i = 0
output_tokens = []
for sentence in tagged_sentences:
    for token, tag in sentence:
        output_tokens.append({"id": i, "token": token, "tag": tag})
        i += 1

print(json.dumps(output_tokens))