Got the basic proof-of-concept working.
This commit is contained in:
		
							parent
							
								
									79f25bf5f9
								
							
						
					
					
						commit
						54d18d7eeb
					
				
							
								
								
									
										39
									
								
								test.py
									
									
									
									
									
								
							
							
						
						
									
										39
									
								
								test.py
									
									
									
									
									
								
							@ -1,9 +1,11 @@
 | 
			
		||||
import json
 | 
			
		||||
import random
 | 
			
		||||
import requests
 | 
			
		||||
import wikipediaapi
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from nltk import pos_tag
 | 
			
		||||
from nltk import sent_tokenize, word_tokenize
 | 
			
		||||
from nltk import pos_tag, sent_tokenize, word_tokenize
 | 
			
		||||
from nltk.corpus import stopwords
 | 
			
		||||
from wikipediaapi import Wikipedia, WikipediaPage
 | 
			
		||||
 | 
			
		||||
# Info about the default pos_tag tags
 | 
			
		||||
# https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
 | 
			
		||||
@ -26,13 +28,24 @@ adlib_tags = {
 | 
			
		||||
    "VBZ": "Present Tense Verb ending in 's'",
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
stop_words = set(stopwords.words("english"))
 | 
			
		||||
 | 
			
		||||
def get_random_wikipedia_title():
 | 
			
		||||
def get_random_wikipedia_title() -> str:
 | 
			
		||||
    random_result = json.loads(requests.get('https://en.wikipedia.org/api/rest_v1/page/random/title').text)
 | 
			
		||||
    return random_result['items'][0]['title']
 | 
			
		||||
 | 
			
		||||
wikipedia = wikipediaapi.Wikipedia('en')
 | 
			
		||||
wiki_page = wikipedia.page(get_random_wikipedia_title())
 | 
			
		||||
def get_random_wikipedia_page(wikipedia: Wikipedia, min_length: int = None) -> WikipediaPage:
 | 
			
		||||
    page = None
 | 
			
		||||
    while(page is None):
 | 
			
		||||
        page = wikipedia.page(get_random_wikipedia_title())
 | 
			
		||||
        if(min_length and len(page.summary) < min_length):
 | 
			
		||||
            print(f"{page.displaytitle} is too short. Retrying...")
 | 
			
		||||
            page = None
 | 
			
		||||
            time.sleep(1)
 | 
			
		||||
    return page
 | 
			
		||||
 | 
			
		||||
wikipedia = Wikipedia('en')
 | 
			
		||||
wiki_page = get_random_wikipedia_page(wikipedia, 500)
 | 
			
		||||
 | 
			
		||||
print(wiki_page.title)
 | 
			
		||||
print(wiki_page.displaytitle)
 | 
			
		||||
@ -45,11 +58,21 @@ for sentence in sentences:
 | 
			
		||||
    tagged_sentences.append(pos_tag(word_tokenize(sentence)))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
i = 0
 | 
			
		||||
i = adlib_word_counter = 0
 | 
			
		||||
min_words = 4
 | 
			
		||||
output_tokens = []
 | 
			
		||||
for sentence in tagged_sentences:
 | 
			
		||||
    for token, tag in sentence:
 | 
			
		||||
        output_tokens.append({"id": i, "token": token, "tag": tag})
 | 
			
		||||
        adlib_tag = adlib_tags.get(tag)
 | 
			
		||||
        if adlib_tag is not None:
 | 
			
		||||
            if random.randint(0, adlib_word_counter) > min_words and token not in stop_words:
 | 
			
		||||
                output_tokens[-1]["adlib_tag"] = adlib_tag
 | 
			
		||||
                adlib_word_counter = 0
 | 
			
		||||
            else:
 | 
			
		||||
                adlib_word_counter += 1
 | 
			
		||||
 | 
			
		||||
        i += 1
 | 
			
		||||
 | 
			
		||||
print(json.dumps(output_tokens))
 | 
			
		||||
with open("article.js", "w") as json_file:
 | 
			
		||||
    json_file.write(f"article = {json.dumps(output_tokens)}")
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										16
									
								
								web/index.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								web/index.html
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,16 @@
 | 
			
		||||
<!DOCTYPE html>
 | 
			
		||||
<html lang="en">
 | 
			
		||||
<head>
 | 
			
		||||
  <meta charset="UTF-8" />
 | 
			
		||||
  <title>WAD-LIBS</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body>
 | 
			
		||||
  <h1>WAD-LIBS: Wikipedia Ad-Libbed</h1>
 | 
			
		||||
  <div id="inputs"></div>
 | 
			
		||||
  <p><button id="show-article">WAD-LIB!</button>
 | 
			
		||||
  <div id="article"></div>
 | 
			
		||||
  <script src="article.js"></script>
 | 
			
		||||
  <script src="script.js"></script>
 | 
			
		||||
</body>
 | 
			
		||||
</html>
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										54
									
								
								web/script.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								web/script.js
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,54 @@
 | 
			
		||||
function parseArticleJSON_old() {
 | 
			
		||||
  fetch('tokens.json')
 | 
			
		||||
  .then((article) => article.json())
 | 
			
		||||
  .then((article_json) => {
 | 
			
		||||
    let output = ''
 | 
			
		||||
    article_json.forEach(function(token) {
 | 
			
		||||
      output += `${token.token} `
 | 
			
		||||
    })
 | 
			
		||||
    document.getElementById('article').innerHTML = output
 | 
			
		||||
  })
 | 
			
		||||
  .catch((error) => {
 | 
			
		||||
    console.log(`Error fetching article: ${error}`)
 | 
			
		||||
    document.getELementById('article').innerHTML = 'Error'
 | 
			
		||||
  })
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function createInputs(article) {
 | 
			
		||||
  inputs = ''
 | 
			
		||||
  article.forEach(function(token) {
 | 
			
		||||
    if(token.adlib_tag) {
 | 
			
		||||
      inputs += `
 | 
			
		||||
      <p>
 | 
			
		||||
      <label for="token_${token.id}">${token.adlib_tag}</label>
 | 
			
		||||
      <input type="text" id="token_${token.id}" name="token_${token.id}">
 | 
			
		||||
      </p>
 | 
			
		||||
      `
 | 
			
		||||
    }
 | 
			
		||||
  })
 | 
			
		||||
  document.getElementById('inputs').innerHTML = inputs
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function showArticle(article) {
 | 
			
		||||
  let output = ''
 | 
			
		||||
  article.forEach(function(token) {
 | 
			
		||||
    let adlib_input = document.getElementById(`token_${token.id}`);
 | 
			
		||||
    if(adlib_input && adlib_input.value) {
 | 
			
		||||
      output += `<strong>${adlib_input.value}</strong> `
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
      output += `${token.token} `
 | 
			
		||||
    }
 | 
			
		||||
  })
 | 
			
		||||
  document.getElementById('article').innerHTML = output
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
createInputs(article)
 | 
			
		||||
 | 
			
		||||
document.addEventListener('click', function (event) {
 | 
			
		||||
  if (event.target.matches('#show-article')) {
 | 
			
		||||
    event.preventDefault()
 | 
			
		||||
    showArticle(article)
 | 
			
		||||
  }
 | 
			
		||||
}, false);
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user