improve articles parser

This commit is contained in:
Frederick Pauchet 2018-08-24 15:59:55 +02:00
parent 63eb9cb991
commit 5a2b32487c
3 changed files with 28 additions and 2 deletions

View File

@ -44,6 +44,15 @@ class Article(object):
self.content = content
self.publication_date = publication_date
try:
self.title = content.splitlines()[0]
except IndexError:
self.title = "{Title unknown}"
def __str__(self):
"""Returns the title and the publication date of the article."""
return '{} ({})'.format(self.title, self.publication_date)
def get_date_from_article_filename(article_filename):
"""Get the date from a file name.
@ -62,7 +71,7 @@ def get_date_from_article_filename(article_filename):
try:
date = datetime.date(int(values[0]), int(values[1]), int(values[2]))
except (ValueError, TypeError):
logger.warn('Ignoring file %s', article_filename)
logger.warn('Ignoring file - no publication date found %s', article_filename)
return date
@ -90,16 +99,23 @@ class Site(object):
Args:
root_path (str): the path where the file is stored.
filename (str): the filename of the file.
Returns:
None if no publication date can be built.
An `grnx.models.Article` instance instead.
"""
publication_date = get_date_from_article_filename(filename)
if not publication_date:
return
with open(filepath, 'r') as f:
article = None
with open(os.path.join(filepath, filename), encoding="utf8") as f:
content = f.read()
article = Article(filepath, content, publication_date)
logger.warn('article found in %s: %s', article.category, article)
return article
def to_json(self):

4
tests/integration.py Normal file
View File

@ -0,0 +1,4 @@
from grnx.models import Site
if __name__ == "__main__":
site = Site()

View File

@ -3,6 +3,7 @@
from datetime import date
from grnx.models import get_date_from_article_filename
from grnx.models import Article
def test_article_match_date():
@ -11,3 +12,8 @@ def test_article_match_date():
assert get_date_from_article_filename('2018-09-01-test.md') == date(2018, 9, 1)
assert get_date_from_article_filename('2017-02-30-divinity-origin-sin.md') == None
assert get_date_from_article_filename('lynis.md') == None
def test_article_category():
"""Asserts that the category of an article is found, based on the filepath."""
assert "home" == Article('articles/home', "", date(2018, 1, 1))