179 lines
4.7 KiB
Python
179 lines
4.7 KiB
Python
|
|
|
|
import datetime
|
|
import json
|
|
import re
|
|
import os
|
|
import logging
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
RE_HEADER = re.compile('---((.)?(\n)?)*---')
|
|
|
|
date_handler = lambda obj: (
|
|
obj.isoformat()
|
|
if isinstance(obj, datetime.datetime)
|
|
or isinstance(obj, datetime.date)
|
|
else None
|
|
)
|
|
|
|
def json_handler(obj):
|
|
"""Handles the JSON object serializer.
|
|
|
|
Returns:
|
|
The iso format if the object is a date.
|
|
The __dict__ attribute for any other JSON serializable object.
|
|
|
|
Excepts:
|
|
TypeError when object is not JSON serialisable.
|
|
"""
|
|
|
|
if hasattr(obj, 'isoformat'):
|
|
return obj.isoformat()
|
|
elif obj.__dict__:
|
|
return obj.__dict__
|
|
else:
|
|
raise TypeError('Object of type %s with value of %s is not JSON serializable' % (type(obj), repr(obj)))
|
|
|
|
|
|
def to_json(obj, indent):
|
|
"""Serialize the content of the current structure to JSON format."""
|
|
|
|
json_dumps = json.dumps(
|
|
obj,
|
|
default=json_handler,
|
|
sort_keys=True,
|
|
indent=indent
|
|
)
|
|
|
|
return json_dumps
|
|
|
|
|
|
def split(file_path):
|
|
return os.path.normpath(file_path).split(os.sep)
|
|
|
|
|
|
class Article(object):
|
|
|
|
def __init__(self, path, content, publication_date=None):
|
|
self.path = path
|
|
self.content = content
|
|
|
|
split_path = split(path)
|
|
|
|
self.filename = split_path[-1] # the last element
|
|
self.filename_without_extension = os.path.splitext(self.filename)[0]
|
|
self.publication_date = get_date_from_article_filename(self.filename)
|
|
|
|
self.slug = self.filename_without_extension
|
|
|
|
if self.publication_date:
|
|
self.slug = self.slug.replace(
|
|
self.publication_date.strftime('%Y-%m-%d'), ''
|
|
)
|
|
|
|
if self.slug and self.slug.startswith('-'):
|
|
self.slug = self.slug[1:]
|
|
|
|
try:
|
|
self.category = split_path[1] #
|
|
self.keywords = split_path[2:-1]
|
|
except IndexError:
|
|
self.category = ''
|
|
self.keywords = []
|
|
|
|
try:
|
|
self.title = content.splitlines()[0]
|
|
except IndexError:
|
|
self.title = self.filename
|
|
|
|
def __str__(self):
|
|
"""Returns the title and the publication date of the article."""
|
|
return '{} ({})'.format(self.title, self.publication_date)
|
|
|
|
|
|
def get_date_from_article_filename(article_filename):
|
|
"""Get the date from a file name.
|
|
|
|
Args:
|
|
article_filename (str): the file name of the article.
|
|
|
|
Example:
|
|
2018-02-01-firewatch.md -> 1st of February 2018
|
|
2017-02-03-divinity-origin-sin.md -> None
|
|
lynis.md -> None
|
|
"""
|
|
values = article_filename[0:10]
|
|
|
|
date = None
|
|
try:
|
|
date = datetime.datetime.strptime(values, '%Y-%m-%d').date()
|
|
except (ValueError, TypeError):
|
|
logger.warn('No publication date found %s', article_filename)
|
|
|
|
return date
|
|
|
|
|
|
class Site(object):
|
|
"""Represents a Site object.
|
|
|
|
Args:
|
|
root_path (path): The path where articles are stored.
|
|
articles (array): contain all articles.
|
|
"""
|
|
|
|
def __init__(self, root_path='articles'):
|
|
self.articles = []
|
|
self.categories = {}
|
|
self.keywords = {}
|
|
|
|
for root, *_, files in os.walk(root_path):
|
|
for file in [file for file in files if file.endswith(".md")]:
|
|
try:
|
|
self.build_article(root, file)
|
|
except UnicodeDecodeError:
|
|
continue
|
|
|
|
def build_article(self, filepath, filename):
|
|
"""Build a new article from an existing file.
|
|
|
|
The newly built article is added to the property `self.articles`.
|
|
|
|
Args:
|
|
root_path (str): the path where the file is stored.
|
|
filename (str): the filename of the file.
|
|
|
|
Returns:
|
|
None if no publication date can be built.
|
|
An `grnx.models.Article` instance instead.
|
|
"""
|
|
|
|
article_file_path = os.path.join(filepath, filename)
|
|
|
|
article = None
|
|
with open(article_file_path, encoding="utf8") as f:
|
|
content = f.read()
|
|
|
|
article = Article(article_file_path, content)
|
|
|
|
if article:
|
|
logger.warn('article found in %s: %s', article.category, article)
|
|
|
|
self.articles.append(article)
|
|
|
|
category = categories.setdefault(article.category, [])
|
|
category.append(article.slug)
|
|
|
|
return article
|
|
|
|
def serialize(self, indent=None):
|
|
"""Serialize the current files structure to index.json"""
|
|
|
|
with open('index.json', 'w') as json_serialized_file:
|
|
json_serialized_file.write(to_json(self, indent))
|
|
|
|
with open("categories.json", "w") as json_serialized_file:
|
|
json_serialized_file.write(to_json(self.categories, indent))
|