improve articles parser
This commit is contained in:
parent
63eb9cb991
commit
5a2b32487c
|
@ -44,6 +44,15 @@ class Article(object):
|
|||
self.content = content
|
||||
self.publication_date = publication_date
|
||||
|
||||
try:
|
||||
self.title = content.splitlines()[0]
|
||||
except IndexError:
|
||||
self.title = "{Title unknown}"
|
||||
|
||||
def __str__(self):
|
||||
"""Returns the title and the publication date of the article."""
|
||||
return '{} ({})'.format(self.title, self.publication_date)
|
||||
|
||||
|
||||
def get_date_from_article_filename(article_filename):
|
||||
"""Get the date from a file name.
|
||||
|
@ -62,7 +71,7 @@ def get_date_from_article_filename(article_filename):
|
|||
try:
|
||||
date = datetime.date(int(values[0]), int(values[1]), int(values[2]))
|
||||
except (ValueError, TypeError):
|
||||
logger.warn('Ignoring file %s', article_filename)
|
||||
logger.warn('Ignoring file - no publication date found %s', article_filename)
|
||||
|
||||
return date
|
||||
|
||||
|
@ -90,16 +99,23 @@ class Site(object):
|
|||
Args:
|
||||
root_path (str): the path where the file is stored.
|
||||
filename (str): the filename of the file.
|
||||
|
||||
Returns:
|
||||
None if no publication date can be built.
|
||||
An `grnx.models.Article` instance instead.
|
||||
"""
|
||||
publication_date = get_date_from_article_filename(filename)
|
||||
if not publication_date:
|
||||
return
|
||||
|
||||
with open(filepath, 'r') as f:
|
||||
article = None
|
||||
with open(os.path.join(filepath, filename), encoding="utf8") as f:
|
||||
content = f.read()
|
||||
|
||||
article = Article(filepath, content, publication_date)
|
||||
|
||||
logger.warn('article found in %s: %s', article.category, article)
|
||||
|
||||
return article
|
||||
|
||||
def to_json(self):
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
from grnx.models import Site
|
||||
|
||||
if __name__ == "__main__":
|
||||
site = Site()
|
|
@ -3,6 +3,7 @@
|
|||
from datetime import date
|
||||
|
||||
from grnx.models import get_date_from_article_filename
|
||||
from grnx.models import Article
|
||||
|
||||
|
||||
def test_article_match_date():
|
||||
|
@ -11,3 +12,8 @@ def test_article_match_date():
|
|||
assert get_date_from_article_filename('2018-09-01-test.md') == date(2018, 9, 1)
|
||||
assert get_date_from_article_filename('2017-02-30-divinity-origin-sin.md') == None
|
||||
assert get_date_from_article_filename('lynis.md') == None
|
||||
|
||||
def test_article_category():
|
||||
"""Asserts that the category of an article is found, based on the filepath."""
|
||||
|
||||
assert "home" == Article('articles/home', "", date(2018, 1, 1))
|
||||
|
|
Loading…
Reference in New Issue