30 lines
826 B
Python
30 lines
826 B
Python
|
import json
|
||
|
import requests
|
||
|
import wikipediaapi
|
||
|
|
||
|
from nltk import pos_tag, map_tag
|
||
|
from nltk import word_tokenize
|
||
|
|
||
|
|
||
|
def get_random_wikipedia_title():
|
||
|
random_result = json.loads(requests.get('https://en.wikipedia.org/api/rest_v1/page/random/title').text)
|
||
|
return random_result['items'][0]['title']
|
||
|
|
||
|
data = "The quick brown fox jumps over the lazy dog."
|
||
|
|
||
|
data_pos_tagged = pos_tag(word_tokenize(data))
|
||
|
|
||
|
for tagged_word in data_pos_tagged:
|
||
|
print(tagged_word)
|
||
|
|
||
|
wikipedia = wikipediaapi.Wikipedia('en')
|
||
|
random_page = wikipedia.page(get_random_wikipedia_title())
|
||
|
|
||
|
print(random_page.title)
|
||
|
|
||
|
random_page_summary_tagged = pos_tag(word_tokenize(random_page.summary))
|
||
|
simple_tags = [(word, map_tag('en-ptb', 'universal', tag)) for word, tag in random_page_summary_tagged]
|
||
|
|
||
|
print(random_page_summary_tagged)
|
||
|
print(simple_tags)
|