Got the basic proof-of-concept working.

2022-06-26 18:40:12 +00:00 · 2022-06-26 18:40:12 +00:00 · 54d18d7eeb
commit 54d18d7eeb
parent 79f25bf5f9
3 changed files with 101 additions and 8 deletions
--- a/test.py
+++ b/test.py
@ -1,9 +1,11 @@
 import json
+import random
 import requests
-import wikipediaapi
+import time

-from nltk import pos_tag
-from nltk import sent_tokenize, word_tokenize
+from nltk import pos_tag, sent_tokenize, word_tokenize
+from nltk.corpus import stopwords
+from wikipediaapi import Wikipedia, WikipediaPage

 # Info about the default pos_tag tags
 # https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
@ -26,13 +28,24 @@ adlib_tags = {
    "VBZ": "Present Tense Verb ending in 's'",
 }

+stop_words = set(stopwords.words("english"))

-def get_random_wikipedia_title():
+def get_random_wikipedia_title() -> str:
    random_result = json.loads(requests.get('https://en.wikipedia.org/api/rest_v1/page/random/title').text)
    return random_result['items'][0]['title']

-wikipedia = wikipediaapi.Wikipedia('en')
-wiki_page = wikipedia.page(get_random_wikipedia_title())
+def get_random_wikipedia_page(wikipedia: Wikipedia, min_length: int = None) -> WikipediaPage:
+    page = None
+    while(page is None):
+        page = wikipedia.page(get_random_wikipedia_title())
+        if(min_length and len(page.summary) < min_length):
+            print(f"{page.displaytitle} is too short. Retrying...")
+            page = None
+            time.sleep(1)
+    return page
+
+wikipedia = Wikipedia('en')
+wiki_page = get_random_wikipedia_page(wikipedia, 500)

 print(wiki_page.title)
 print(wiki_page.displaytitle)
@ -45,11 +58,21 @@ for sentence in sentences:
    tagged_sentences.append(pos_tag(word_tokenize(sentence)))


-i = 0
+i = adlib_word_counter = 0
+min_words = 4
 output_tokens = []
 for sentence in tagged_sentences:
    for token, tag in sentence:
        output_tokens.append({"id": i, "token": token, "tag": tag})
+        adlib_tag = adlib_tags.get(tag)
+        if adlib_tag is not None:
+            if random.randint(0, adlib_word_counter) > min_words and token not in stop_words:
+                output_tokens[-1]["adlib_tag"] = adlib_tag
+                adlib_word_counter = 0
+            else:
+                adlib_word_counter += 1
+
        i += 1

-print(json.dumps(output_tokens))
+with open("article.js", "w") as json_file:
+    json_file.write(f"article = {json.dumps(output_tokens)}")
--- a/web/index.html
+++ b/web/index.html
@ -0,0 +1,16 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <title>WAD-LIBS</title>
+</head>
+<body>
+  <h1>WAD-LIBS: Wikipedia Ad-Libbed</h1>
+  <div id="inputs"></div>
+  <p><button id="show-article">WAD-LIB!</button>
+  <div id="article"></div>
+  <script src="article.js"></script>
+  <script src="script.js"></script>
+</body>
+</html>
+
--- a/web/script.js
+++ b/web/script.js
@ -0,0 +1,54 @@
+function parseArticleJSON_old() {
+  fetch('tokens.json')
+  .then((article) => article.json())
+  .then((article_json) => {
+    let output = ''
+    article_json.forEach(function(token) {
+      output += `${token.token} `
+    })
+    document.getElementById('article').innerHTML = output
+  })
+  .catch((error) => {
+    console.log(`Error fetching article: ${error}`)
+    document.getELementById('article').innerHTML = 'Error'
+  })
+}
+
+function createInputs(article) {
+  inputs = ''
+  article.forEach(function(token) {
+    if(token.adlib_tag) {
+      inputs += `
+      <p>
+      <label for="token_${token.id}">${token.adlib_tag}</label>
+      <input type="text" id="token_${token.id}" name="token_${token.id}">
+      </p>
+      `
+    }
+  })
+  document.getElementById('inputs').innerHTML = inputs
+}
+
+function showArticle(article) {
+  let output = ''
+  article.forEach(function(token) {
+    let adlib_input = document.getElementById(`token_${token.id}`);
+    if(adlib_input && adlib_input.value) {
+      output += `<strong>${adlib_input.value}</strong> `
+    }
+    else {
+      output += `${token.token} `
+    }
+  })
+  document.getElementById('article').innerHTML = output
+}
+
+createInputs(article)
+
+document.addEventListener('click', function (event) {
+  if (event.target.matches('#show-article')) {
+    event.preventDefault()
+    showArticle(article)
+  }
+}, false);
+