grimboite/grnx/models.py

156 lines
4.3 KiB
Python

# coding: utf-8
import datetime
import json
import re
import os
import logging
from slugify import slugify
logger = logging.getLogger(__name__)
RE_HEADER = re.compile('---((.)?(\n)?)*---')
date_handler = lambda obj: (
obj.isoformat()
if isinstance(obj, datetime.datetime)
or isinstance(obj, datetime.date)
else None
)
def json_handler(obj):
"""Handles the JSON object serializer.
Returns:
The iso format if the object is a date.
The __dict__ attribute for any other JSON serializable object.
Excepts:
TypeError when object is not JSON serialisable.
"""
if hasattr(obj, 'isoformat'):
return obj.isoformat()
elif obj.__dict__:
return obj.__dict__
else:
raise TypeError('Object of type %s with value of %s is not JSON serializable' % (type(obj), repr(obj)))
class Article(object):
def __init__(self, path, content, publication_date=None):
self.path = path
self.content = content
split_path = os.path.normpath(path).split(os.sep)
self.filename = split_path[-1:] # the last element
self.publication_date = get_date_from_article_filename(self.filename)
self.slug = slugify(self.filename)
try:
self.category = split_path[1] #
self.keywords = split_path[2:-1]
except IndexError:
self.category = ''
self.keywords = []
try:
self.title = content.splitlines()[0]
except IndexError:
self.title = self.filename
def __str__(self):
"""Returns the title and the publication date of the article."""
return '{} ({})'.format(self.title, self.publication_date)
def get_date_from_article_filename(article_filename):
"""Get the date from a file name.
Args:
article_filename (str): the file name of the article.
Example:
2018-02-01-firewatch.md -> 1st of February 2018
2017-02-03-divinity-origin-sin.md -> None
lynis.md -> None
"""
values = article_filename.split('-')
date = None
try:
date = datetime.date(int(values[0]), int(values[1]), int(values[2]))
except (ValueError, TypeError):
logger.warn('Ignoring file - no publication date found %s', article_filename)
return date
class Site(object):
"""Represents a Site object.
Args:
root_path (path): The path where articles are stored.
articles (array): contain all articles.
"""
def __init__(self, root_path='articles'):
self.articles = []
self.categories = {}
self.keywords = {}
for root, *_, files in os.walk(root_path):
for file in [file for file in files if file.endswith(".md")]:
self.build_article(root, file)
def build_article(self, filepath, filename):
"""Build a new article from an existing file.
The newly built article is added to the property `self.articles`.
Args:
root_path (str): the path where the file is stored.
filename (str): the filename of the file.
Returns:
None if no publication date can be built.
An `grnx.models.Article` instance instead.
"""
publication_date = get_date_from_article_filename(filename)
if not publication_date:
return
article = None
with open(os.path.join(filepath, filename), encoding="utf8") as f:
content = f.read()
article = Article(filepath, content, publication_date)
logger.warn('article found in %s: %s', article.category, article)
self.articles.append(article)
if article.category not in self.categories:
self.categories[article.category] = []
self.categories[article.category].append(article)
return article
def to_json(self):
"""Serialize the content of the current structure to JSON format."""
json_dumps = json.dumps(self, default=json_handler, sort_keys=True, indent=4)
print('json result: ' + json_dumps)
return json_dumps
def serialize(self):
"""Serialize the current files structure to index.json"""
with open('index.json', 'w') as json_serialized_file:
json_serialized_file.write(self.to_json())