fix news parsing
parent
c75dcafe23
commit
bf4356eb29
|
@ -134,34 +134,27 @@ def parse_news(news_path):
|
||||||
metadata_keys = ['title', 'pubdate']
|
metadata_keys = ['title', 'pubdate']
|
||||||
in_meta = True
|
in_meta = True
|
||||||
in_content = False
|
in_content = False
|
||||||
current_entry = {}
|
current_entry = {'content':''}
|
||||||
entries = []
|
entries = []
|
||||||
with open(news_path, 'r') as f:
|
with open(news_path, 'r') as f:
|
||||||
line = f.readline().rstrip().lstrip()
|
line = 'not null'
|
||||||
while line:
|
while line:
|
||||||
|
line = f.readline()
|
||||||
if blank_line_re.match(line) or line.startswith('#'):
|
if blank_line_re.match(line) or line.startswith('#'):
|
||||||
line = f.readline().rstrip().lstrip()
|
|
||||||
continue
|
continue
|
||||||
|
elif line == '--\n':
|
||||||
if in_meta:
|
entries.append(current_entry)
|
||||||
|
current_entry = {'content':''}
|
||||||
|
in_meta = True
|
||||||
|
in_content = False
|
||||||
|
elif in_meta:
|
||||||
key, value = line.split(':', 1)
|
key, value = line.split(':', 1)
|
||||||
current_entry[key] = value
|
current_entry[key] = value.rstrip().lstrip()
|
||||||
if set(current_entry.keys()) == set(metadata_keys):
|
if set(current_entry.keys()) == set(metadata_keys):
|
||||||
in_content = True
|
in_content = True
|
||||||
in_meta = False
|
in_meta = False
|
||||||
|
elif in_content:
|
||||||
if in_content:
|
current_entry['content'] += "\n{}".format(line.lstrip().rstrip())
|
||||||
if 'content' not in current_entry:
|
|
||||||
current_entry['content'] = ''
|
|
||||||
current_entry['content'] += "\n{}".format(line)
|
|
||||||
|
|
||||||
if line == '--\n':
|
|
||||||
entries.append(current_entry)
|
|
||||||
current_entry = {}
|
|
||||||
in_meta = True
|
|
||||||
in_content = False
|
|
||||||
|
|
||||||
line = f.readline().rstrip().lstrip()
|
|
||||||
|
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
|
Reference in New Issue