grnx/grnx/models.py

179 lines
4.7 KiB
Python

# coding: utf-8
import datetime
import json
import re
import os
import logging
from slugify import slugify
logger = logging.getLogger(__name__)
RE_HEADER = re.compile('---((.)?(\n)?)*---')
date_handler = lambda obj: (
obj.isoformat()
if isinstance(obj, datetime.datetime)
or isinstance(obj, datetime.date)
else None
)
def json_handler(obj):
"""Handles the JSON object serializer.
Returns:
The iso format if the object is a date.
The __dict__ attribute for any other JSON serializable object.
Excepts:
TypeError when object is not JSON serialisable.
"""
if hasattr(obj, 'isoformat'):
return obj.isoformat()
elif obj.__dict__:
return obj.__dict__
else:
raise TypeError('Object of type %s with value of %s is not JSON serializable' % (type(obj), repr(obj)))
def split(file_path):
return os.path.normpath(file_path).split(os.sep)
class Article(object):
def __init__(self, path, content, publication_date=None):
self.path = path
self.content = content
split_path = split(path)
self.filename = split_path[-1] # the last element
self.filename_without_extension = os.path.splitext(self.filename)[0]
self.publication_date = get_date_from_article_filename(self.filename)
self.slug = self.filename_without_extension
if self.publication_date:
self.slug = self.slug.replace(
self.publication_date.strftime('%Y-%m-%d'), ''
)
if self.slug and self.slug.startswith('-'):
self.slug = self.slug[1:]
self.slug = slugify(self.slug)
try:
self.category = split_path[1] #
self.keywords = split_path[2:-1]
except IndexError:
self.category = ''
self.keywords = []
try:
self.title = content.splitlines()[0]
except IndexError:
self.title = self.filename
def __str__(self):
"""Returns the title and the publication date of the article."""
return '{} ({})'.format(self.title, self.publication_date)
def get_date_from_article_filename(article_filename):
"""Get the date from a file name.
Args:
article_filename (str): the file name of the article.
Example:
2018-02-01-firewatch.md -> 1st of February 2018
2017-02-03-divinity-origin-sin.md -> None
lynis.md -> None
"""
values = article_filename[0:10]
date = None
try:
date = datetime.datetime.strptime(values, '%Y-%m-%d').date()
except (ValueError, TypeError):
logger.warn('No publication date found %s', article_filename)
return date
class Site(object):
"""Represents a Site object.
Args:
root_path (path): The path where articles are stored.
articles (array): contain all articles.
"""
def __init__(self, root_path='articles'):
self.articles = []
self.categories = {}
self.keywords = {}
for root, *_, files in os.walk(root_path):
for file in [file for file in files if file.endswith(".md")]:
try:
self.build_article(root, file)
except UnicodeDecodeError:
continue
def build_article(self, filepath, filename):
"""Build a new article from an existing file.
The newly built article is added to the property `self.articles`.
Args:
root_path (str): the path where the file is stored.
filename (str): the filename of the file.
Returns:
None if no publication date can be built.
An `grnx.models.Article` instance instead.
"""
article_file_path = os.path.join(filepath, filename)
article = None
with open(article_file_path, encoding="utf8") as f:
content = f.read()
article = Article(article_file_path, content)
if article:
logger.warn('article found in %s: %s', article.category, article)
self.articles.append(article)
if article.category not in self.categories:
self.categories[article.category] = []
self.categories[article.category].append(article)
return article
def to_json(self):
"""Serialize the content of the current structure to JSON format."""
json_dumps = json.dumps(
self,
default=json_handler,
sort_keys=True,
indent=4
)
return json_dumps
def serialize(self):
"""Serialize the current files structure to index.json"""
with open('index.json', 'w') as json_serialized_file:
json_serialized_file.write(self.to_json())