Got the basic proof-of-concept working.

2022-06-26 18:40:12 +00:00 · 2022-06-26 18:40:12 +00:00 · 54d18d7eeb
commit 54d18d7eeb
parent 79f25bf5f9
3 changed files with 101 additions and 8 deletions
--- a/test.py
+++ b/test.py
@ -1,9 +1,11 @@
 import json
 import random
 import requests
-import wikipediaapi
+import time
-from nltk import pos_tag
+from nltk import pos_tag, sent_tokenize, word_tokenize
-from nltk import sent_tokenize, word_tokenize
+from nltk.corpus import stopwords
 from wikipediaapi import Wikipedia, WikipediaPage
 # Info about the default pos_tag tags
 # https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
@ -26,13 +28,24 @@ adlib_tags = {
    "VBZ": "Present Tense Verb ending in 's'",
 }
 stop_words = set(stopwords.words("english"))
-def get_random_wikipedia_title():
+def get_random_wikipedia_title() -> str:
    random_result = json.loads(requests.get('https://en.wikipedia.org/api/rest_v1/page/random/title').text)
    return random_result['items'][0]['title']
-wikipedia = wikipediaapi.Wikipedia('en')
+def get_random_wikipedia_page(wikipedia: Wikipedia, min_length: int = None) -> WikipediaPage:
-wiki_page = wikipedia.page(get_random_wikipedia_title())
+    page = None
    while(page is None):
        page = wikipedia.page(get_random_wikipedia_title())
        if(min_length and len(page.summary) < min_length):
            print(f"{page.displaytitle} is too short. Retrying...")
            page = None
            time.sleep(1)
    return page
 wikipedia = Wikipedia('en')
 wiki_page = get_random_wikipedia_page(wikipedia, 500)
 print(wiki_page.title)
 print(wiki_page.displaytitle)
@ -45,11 +58,21 @@ for sentence in sentences:
    tagged_sentences.append(pos_tag(word_tokenize(sentence)))
-i = 0
+i = adlib_word_counter = 0
 min_words = 4
 output_tokens = []
 for sentence in tagged_sentences:
    for token, tag in sentence:
        output_tokens.append({"id": i, "token": token, "tag": tag})
        adlib_tag = adlib_tags.get(tag)
        if adlib_tag is not None:
            if random.randint(0, adlib_word_counter) > min_words and token not in stop_words:
                output_tokens[-1]["adlib_tag"] = adlib_tag
                adlib_word_counter = 0
            else:
                adlib_word_counter += 1
        i += 1
-print(json.dumps(output_tokens))
+with open("article.js", "w") as json_file:
    json_file.write(f"article = {json.dumps(output_tokens)}")
--- a/web/index.html
+++ b/web/index.html
@ -0,0 +1,16 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
  <meta charset="UTF-8" />
  <title>WAD-LIBS</title>
 </head>
 <body>
  <h1>WAD-LIBS: Wikipedia Ad-Libbed</h1>
  <div id="inputs"></div>
  <p><button id="show-article">WAD-LIB!</button>
  <div id="article"></div>
  <script src="article.js"></script>
  <script src="script.js"></script>
 </body>
 </html>
--- a/web/script.js
+++ b/web/script.js
@ -0,0 +1,54 @@
 function parseArticleJSON_old() {
  fetch('tokens.json')
  .then((article) => article.json())
  .then((article_json) => {
    let output = ''
    article_json.forEach(function(token) {
      output += `${token.token} `
    })
    document.getElementById('article').innerHTML = output
  })
  .catch((error) => {
    console.log(`Error fetching article: ${error}`)
    document.getELementById('article').innerHTML = 'Error'
  })
 }
 function createInputs(article) {
  inputs = ''
  article.forEach(function(token) {
    if(token.adlib_tag) {
      inputs += `
      <p>
      <label for="token_${token.id}">${token.adlib_tag}</label>
      <input type="text" id="token_${token.id}" name="token_${token.id}">
      </p>
      `
    }
  })
  document.getElementById('inputs').innerHTML = inputs
 }
 function showArticle(article) {
  let output = ''
  article.forEach(function(token) {
    let adlib_input = document.getElementById(`token_${token.id}`);
    if(adlib_input && adlib_input.value) {
      output += `<strong>${adlib_input.value}</strong> `
    }
    else {
      output += `${token.token} `
    }
  })
  document.getElementById('article').innerHTML = output
 }
 createInputs(article)
 document.addEventListener('click', function (event) {
  if (event.target.matches('#show-article')) {
    event.preventDefault()
    showArticle(article)
  }
 }, false);