grnx/grnx/models.py

# coding: utf-8

import datetime
import json
import re
import os
import logging

from slugify import slugify

logger = logging.getLogger(__name__)


RE_HEADER = re.compile('---((.)?(\n)?)*---')

date_handler = lambda obj: (
    obj.isoformat()
    if isinstance(obj, datetime.datetime)
    or isinstance(obj, datetime.date)
    else None
)

def json_handler(obj):
    """Handles the JSON object serializer.

    Returns:
        The iso format if the object is a date.
        The __dict__ attribute for any other JSON serializable object.

    Excepts:
        TypeError when object is not JSON serialisable.
    """

    if hasattr(obj, 'isoformat'):
        return obj.isoformat()
    elif obj.__dict__:
        return obj.__dict__
    else:
        raise TypeError('Object of type %s with value of %s is not JSON serializable' % (type(obj), repr(obj)))


def split(file_path):
    return os.path.normpath(file_path).split(os.sep)


class Article(object):

    def __init__(self, path, content, publication_date=None):
        self.path = path
        self.content = content

        split_path = split(path)

        self.filename = split_path[-1]     # the last element
        self.filename_without_extension = os.path.splitext(self.filename)[0]
        self.publication_date = get_date_from_article_filename(self.filename)

        self.slug = self.filename_without_extension

        if self.publication_date:
            self.slug = self.slug.replace(
                self.publication_date.strftime('%Y-%m-%d'), ''
            )

        if self.slug and self.slug.startswith('-'):
            self.slug = self.slug[1:]

        self.slug = slugify(self.slug)

        try:
            self.category = split_path[1]       #
            self.keywords = split_path[2:-1]
        except IndexError:
            self.category = ''
            self.keywords = []

        try:
            self.title = content.splitlines()[0]
        except IndexError:
            self.title = self.filename

    def __str__(self):
        """Returns the title and the publication date of the article."""
        return '{} ({})'.format(self.title, self.publication_date)


def get_date_from_article_filename(article_filename):
    """Get the date from a file name.

    Args:
        article_filename (str): the file name of the article.

    Example:
        2018-02-01-firewatch.md -> 1st of February 2018
        2017-02-03-divinity-origin-sin.md -> None
        lynis.md -> None
    """
    values = article_filename[0:10]

    date = None
    try:
        date = datetime.datetime.strptime(values, '%Y-%m-%d').date()
    except (ValueError, TypeError):
        logger.warn('No publication date found %s', article_filename)

    return date


class Site(object):
    """Represents a Site object.

    Args:
        root_path (path): The path where articles are stored.
        articles (array): contain all articles.
    """

    def __init__(self, root_path='articles'):
        self.articles = []
        self.categories = {}
        self.keywords = {}

        for root, *_, files in os.walk(root_path):
            for file in [file for file in files if file.endswith(".md")]:
                try:
                    self.build_article(root, file)
                except UnicodeDecodeError:
                    continue

    def build_article(self, filepath, filename):
        """Build a new article from an existing file.

        The newly built article is added to the property `self.articles`.

        Args:
            root_path (str): the path where the file is stored.
            filename (str): the filename of the file.

        Returns:
            None if no publication date can be built.
            An `grnx.models.Article` instance instead.
        """

        article_file_path = os.path.join(filepath, filename)

        article = None
        with open(article_file_path, encoding="utf8") as f:
            content = f.read()

            article = Article(article_file_path, content)

        if article:
            logger.warn('article found in %s: %s', article.category, article)

            self.articles.append(article)

            if article.category not in self.categories:
                self.categories[article.category] = []

            self.categories[article.category].append(article)

        return article

    def to_json(self):
        """Serialize the content of the current structure to JSON format."""

        json_dumps = json.dumps(
            self,
            default=json_handler,
            sort_keys=True,
            indent=4
        )
        return json_dumps

    def serialize(self):
        """Serialize the current files structure to index.json"""

        with open('index.json', 'w') as json_serialized_file:
            json_serialized_file.write(self.to_json())