From 54d18d7eebc23077d16a14c8766af9ac37e4630d Mon Sep 17 00:00:00 2001 From: gamerdonkey Date: Sun, 26 Jun 2022 18:40:12 +0000 Subject: [PATCH] Got the basic proof-of-concept working. --- test.py | 39 ++++++++++++++++++++++++++++-------- web/index.html | 16 +++++++++++++++ web/script.js | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+), 8 deletions(-) create mode 100644 web/index.html create mode 100644 web/script.js diff --git a/test.py b/test.py index 614205b..030e7ef 100644 --- a/test.py +++ b/test.py @@ -1,9 +1,11 @@ import json +import random import requests -import wikipediaapi +import time -from nltk import pos_tag -from nltk import sent_tokenize, word_tokenize +from nltk import pos_tag, sent_tokenize, word_tokenize +from nltk.corpus import stopwords +from wikipediaapi import Wikipedia, WikipediaPage # Info about the default pos_tag tags # https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html @@ -26,13 +28,24 @@ adlib_tags = { "VBZ": "Present Tense Verb ending in 's'", } +stop_words = set(stopwords.words("english")) -def get_random_wikipedia_title(): +def get_random_wikipedia_title() -> str: random_result = json.loads(requests.get('https://en.wikipedia.org/api/rest_v1/page/random/title').text) return random_result['items'][0]['title'] -wikipedia = wikipediaapi.Wikipedia('en') -wiki_page = wikipedia.page(get_random_wikipedia_title()) +def get_random_wikipedia_page(wikipedia: Wikipedia, min_length: int = None) -> WikipediaPage: + page = None + while(page is None): + page = wikipedia.page(get_random_wikipedia_title()) + if(min_length and len(page.summary) < min_length): + print(f"{page.displaytitle} is too short. Retrying...") + page = None + time.sleep(1) + return page + +wikipedia = Wikipedia('en') +wiki_page = get_random_wikipedia_page(wikipedia, 500) print(wiki_page.title) print(wiki_page.displaytitle) @@ -45,11 +58,21 @@ for sentence in sentences: tagged_sentences.append(pos_tag(word_tokenize(sentence))) -i = 0 +i = adlib_word_counter = 0 +min_words = 4 output_tokens = [] for sentence in tagged_sentences: for token, tag in sentence: output_tokens.append({"id": i, "token": token, "tag": tag}) + adlib_tag = adlib_tags.get(tag) + if adlib_tag is not None: + if random.randint(0, adlib_word_counter) > min_words and token not in stop_words: + output_tokens[-1]["adlib_tag"] = adlib_tag + adlib_word_counter = 0 + else: + adlib_word_counter += 1 + i += 1 -print(json.dumps(output_tokens)) +with open("article.js", "w") as json_file: + json_file.write(f"article = {json.dumps(output_tokens)}") diff --git a/web/index.html b/web/index.html new file mode 100644 index 0000000..a4348db --- /dev/null +++ b/web/index.html @@ -0,0 +1,16 @@ + + + + + WAD-LIBS + + +

WAD-LIBS: Wikipedia Ad-Libbed

+
+

+

+ + + + + diff --git a/web/script.js b/web/script.js new file mode 100644 index 0000000..7c9259a --- /dev/null +++ b/web/script.js @@ -0,0 +1,54 @@ +function parseArticleJSON_old() { + fetch('tokens.json') + .then((article) => article.json()) + .then((article_json) => { + let output = '' + article_json.forEach(function(token) { + output += `${token.token} ` + }) + document.getElementById('article').innerHTML = output + }) + .catch((error) => { + console.log(`Error fetching article: ${error}`) + document.getELementById('article').innerHTML = 'Error' + }) +} + +function createInputs(article) { + inputs = '' + article.forEach(function(token) { + if(token.adlib_tag) { + inputs += ` +

+ + +

+ ` + } + }) + document.getElementById('inputs').innerHTML = inputs +} + +function showArticle(article) { + let output = '' + article.forEach(function(token) { + let adlib_input = document.getElementById(`token_${token.id}`); + if(adlib_input && adlib_input.value) { + output += `${adlib_input.value} ` + } + else { + output += `${token.token} ` + } + }) + document.getElementById('article').innerHTML = output +} + +createInputs(article) + +document.addEventListener('click', function (event) { + if (event.target.matches('#show-article')) { + event.preventDefault() + showArticle(article) + } +}, false); +