Got the basic proof-of-concept working.
parent
79f25bf5f9
commit
54d18d7eeb
39
test.py
39
test.py
|
@ -1,9 +1,11 @@
|
||||||
import json
|
import json
|
||||||
|
import random
|
||||||
import requests
|
import requests
|
||||||
import wikipediaapi
|
import time
|
||||||
|
|
||||||
from nltk import pos_tag
|
from nltk import pos_tag, sent_tokenize, word_tokenize
|
||||||
from nltk import sent_tokenize, word_tokenize
|
from nltk.corpus import stopwords
|
||||||
|
from wikipediaapi import Wikipedia, WikipediaPage
|
||||||
|
|
||||||
# Info about the default pos_tag tags
|
# Info about the default pos_tag tags
|
||||||
# https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
|
# https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
|
||||||
|
@ -26,13 +28,24 @@ adlib_tags = {
|
||||||
"VBZ": "Present Tense Verb ending in 's'",
|
"VBZ": "Present Tense Verb ending in 's'",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
stop_words = set(stopwords.words("english"))
|
||||||
|
|
||||||
def get_random_wikipedia_title():
|
def get_random_wikipedia_title() -> str:
|
||||||
random_result = json.loads(requests.get('https://en.wikipedia.org/api/rest_v1/page/random/title').text)
|
random_result = json.loads(requests.get('https://en.wikipedia.org/api/rest_v1/page/random/title').text)
|
||||||
return random_result['items'][0]['title']
|
return random_result['items'][0]['title']
|
||||||
|
|
||||||
wikipedia = wikipediaapi.Wikipedia('en')
|
def get_random_wikipedia_page(wikipedia: Wikipedia, min_length: int = None) -> WikipediaPage:
|
||||||
wiki_page = wikipedia.page(get_random_wikipedia_title())
|
page = None
|
||||||
|
while(page is None):
|
||||||
|
page = wikipedia.page(get_random_wikipedia_title())
|
||||||
|
if(min_length and len(page.summary) < min_length):
|
||||||
|
print(f"{page.displaytitle} is too short. Retrying...")
|
||||||
|
page = None
|
||||||
|
time.sleep(1)
|
||||||
|
return page
|
||||||
|
|
||||||
|
wikipedia = Wikipedia('en')
|
||||||
|
wiki_page = get_random_wikipedia_page(wikipedia, 500)
|
||||||
|
|
||||||
print(wiki_page.title)
|
print(wiki_page.title)
|
||||||
print(wiki_page.displaytitle)
|
print(wiki_page.displaytitle)
|
||||||
|
@ -45,11 +58,21 @@ for sentence in sentences:
|
||||||
tagged_sentences.append(pos_tag(word_tokenize(sentence)))
|
tagged_sentences.append(pos_tag(word_tokenize(sentence)))
|
||||||
|
|
||||||
|
|
||||||
i = 0
|
i = adlib_word_counter = 0
|
||||||
|
min_words = 4
|
||||||
output_tokens = []
|
output_tokens = []
|
||||||
for sentence in tagged_sentences:
|
for sentence in tagged_sentences:
|
||||||
for token, tag in sentence:
|
for token, tag in sentence:
|
||||||
output_tokens.append({"id": i, "token": token, "tag": tag})
|
output_tokens.append({"id": i, "token": token, "tag": tag})
|
||||||
|
adlib_tag = adlib_tags.get(tag)
|
||||||
|
if adlib_tag is not None:
|
||||||
|
if random.randint(0, adlib_word_counter) > min_words and token not in stop_words:
|
||||||
|
output_tokens[-1]["adlib_tag"] = adlib_tag
|
||||||
|
adlib_word_counter = 0
|
||||||
|
else:
|
||||||
|
adlib_word_counter += 1
|
||||||
|
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
print(json.dumps(output_tokens))
|
with open("article.js", "w") as json_file:
|
||||||
|
json_file.write(f"article = {json.dumps(output_tokens)}")
|
||||||
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8" />
|
||||||
|
<title>WAD-LIBS</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>WAD-LIBS: Wikipedia Ad-Libbed</h1>
|
||||||
|
<div id="inputs"></div>
|
||||||
|
<p><button id="show-article">WAD-LIB!</button>
|
||||||
|
<div id="article"></div>
|
||||||
|
<script src="article.js"></script>
|
||||||
|
<script src="script.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
|
|
@ -0,0 +1,54 @@
|
||||||
|
function parseArticleJSON_old() {
|
||||||
|
fetch('tokens.json')
|
||||||
|
.then((article) => article.json())
|
||||||
|
.then((article_json) => {
|
||||||
|
let output = ''
|
||||||
|
article_json.forEach(function(token) {
|
||||||
|
output += `${token.token} `
|
||||||
|
})
|
||||||
|
document.getElementById('article').innerHTML = output
|
||||||
|
})
|
||||||
|
.catch((error) => {
|
||||||
|
console.log(`Error fetching article: ${error}`)
|
||||||
|
document.getELementById('article').innerHTML = 'Error'
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
function createInputs(article) {
|
||||||
|
inputs = ''
|
||||||
|
article.forEach(function(token) {
|
||||||
|
if(token.adlib_tag) {
|
||||||
|
inputs += `
|
||||||
|
<p>
|
||||||
|
<label for="token_${token.id}">${token.adlib_tag}</label>
|
||||||
|
<input type="text" id="token_${token.id}" name="token_${token.id}">
|
||||||
|
</p>
|
||||||
|
`
|
||||||
|
}
|
||||||
|
})
|
||||||
|
document.getElementById('inputs').innerHTML = inputs
|
||||||
|
}
|
||||||
|
|
||||||
|
function showArticle(article) {
|
||||||
|
let output = ''
|
||||||
|
article.forEach(function(token) {
|
||||||
|
let adlib_input = document.getElementById(`token_${token.id}`);
|
||||||
|
if(adlib_input && adlib_input.value) {
|
||||||
|
output += `<strong>${adlib_input.value}</strong> `
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
output += `${token.token} `
|
||||||
|
}
|
||||||
|
})
|
||||||
|
document.getElementById('article').innerHTML = output
|
||||||
|
}
|
||||||
|
|
||||||
|
createInputs(article)
|
||||||
|
|
||||||
|
document.addEventListener('click', function (event) {
|
||||||
|
if (event.target.matches('#show-article')) {
|
||||||
|
event.preventDefault()
|
||||||
|
showArticle(article)
|
||||||
|
}
|
||||||
|
}, false);
|
||||||
|
|
Loading…
Reference in New Issue