golbarg

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <http://www.gnu.org/licenses/>.

CONTENT_DIR   = "content"
POSTS_DIR     = "posts"
TEMPLATES_DIR = "templates"
STATIC_DIR    = "static"
SITE_DIR      = "site"

POST_HEADER_SEP_RE = "^--+$"

import datetime
import os
import os.path
import re
import shutil
import sys
import time
import unicodedata
import urllib.parse

import jinja2
import markdown
import yaml

# Compile regexps
POST_HEADER_SEP_RE = re.compile(POST_HEADER_SEP_RE, re.M)

# Jinja2 filters management
class JinjaFilters:
    filters = []
    @staticmethod
    def add(func):
        JinjaFilters.filters.append(func)
        return func
    @staticmethod
    def to_env(env):
        for func in JinjaFilters.filters:
            env[func.__name__] = func

class Golbarg:
    def __init__(self):
        # Load configuration from the config.yaml file
        with open("config.yaml") as cf:
            self.site_config = yaml.load(cf.read())

        self.posts = list()

        # Hidden tags
        if not 'hidden_tags' in self.site_config:
            self.site_config['hidden_tags'] = []
      
        self.env = jinja2.Environment(loader=jinja2.FileSystemLoader(TEMPLATES_DIR))
        self.env.globals['site'] = self.site_config
        self.env.globals['posts'] = self.posts

        JinjaFilters.to_env(self.env.filters)

        # Months
        months = {}
        for m in range(1, 13):
            months[m] = datetime.date(2010, m, 1).strftime("%B")
        self.env.globals['months'] = months

    def run(self):
        # First, parse files in the content/posts dir
        self.parse_posts()
        self.update_metadata()

        # Then prepare the destination directory
        if not os.path.isdir(SITE_DIR):
            os.makedirs(SITE_DIR)
        
        # Then generate site root
        self.make_site_root()

        # ...and actual posts
        self.make_posts()

        # ...and archive
        self.make_archive()

        # ...and tags
        self.make_tags()

        # ...and symlink static files
        print("Symlinking static files...")
        if not os.path.islink(os.path.join(SITE_DIR, STATIC_DIR)):
            os.symlink(os.path.abspath(STATIC_DIR), os.path.join(SITE_DIR, STATIC_DIR))

        print("All done! =]")

    def parse_posts(self):
        """Parse all the files in the posts directory, extracting metadata and generating HTML files"""
        print("Parsing posts...")
        
        post_files = list(filter(os.path.isfile, [os.path.join(POSTS_DIR, post) for post in os.listdir(POSTS_DIR)]))
        post_files.sort()

        for post_path in post_files:
            p = Post(post_path)
            self.posts.append(p)
        print(("Parsed %d posts" % len(post_files)))

    def update_metadata(self):
        """Add some useful informations to the metadata available, both for site and posts"""
        # Sort posts by date
        self.posts.sort(key=Post.timestamp)

        # Site last update date
        self.site_config['update_date'] = self.posts[-1].date

        # Copyright period
        self.site_config['copyright_period'] = "%04d - %04d" % (self.posts[0].date.year, self.posts[-1].date.year)

        # Post absolute URL and language
        for post in self.posts:
            post.absolute_url = urllib.parse.urljoin(
                self.site_config['url'],
                post.relative_url
            )
            if not post.has_config("language"):
                post.language = self.site_config['language']

        # Tags and hidden tags
        self.tags = {}
        self.hidden_tags = {}
        
        for post in self.posts:
            for tag in post.tags:
                if tag in self.site_config['hidden_tags']:
                    post.remove_tag(tag)
                    dst = self.hidden_tags
                else:
                    dst = self.tags
                    
                if tag not in dst:
                    dst[tag] = [post]
                else:
                    dst[tag].append(post)
        
        tag_min = sys.maxsize
        tag_max = 0
        env_tags = {}
        for tag in self.tags.keys():
            n = len(self.tags[tag])
            env_tags[tag] = n
            if tag_min > n:
                tag_min = n
            if n > tag_max:
                tag_max = n
                
        self.env.globals['tags'] = env_tags
        self.env.globals['tag_min'] = tag_min
        self.env.globals['tag_max'] = tag_max

        # Archive
        self.archive = {}
        self.month_names = {}
        for post in self.posts:
            y, m = post.date.year, post.date.month
            if m not in self.month_names:
                self.month_names[m] = post.date.strftime("%B")
            if not y in self.archive:
                self.archive[y] = {}
            if not m in self.archive[y]:
                self.archive[y][m] = 0
            self.archive[y][m] += 1
        self.env.globals['archive'] = self.archive
        self.env.globals['month_names'] = self.month_names

        print(("Latest update: " + str(self.site_config['update_date'])))

    def make_site_root(self):
        """Generate all the files in the root content directory (index, feed...)"""

        print("Generating files at site root...")
        
        files = [f for f in os.listdir(CONTENT_DIR) if os.path.isfile(os.path.join(CONTENT_DIR, f))]
        files.sort()

        for fn in files:
            # Load file as a template
            with open(os.path.join(CONTENT_DIR, fn)) as f:
                content = f.read()
            tpl = self.env.from_string(content)

            # Prepare template context
            ctx = {
                'absolute_url': urllib.parse.urljoin(self.site_config['url'], fn)
            }

            # Render template to file
            with open(os.path.join(SITE_DIR, fn), 'w') as out:
                out.write(tpl.render(ctx))
            print(("  " + fn))

    def make_posts(self):
        """Generate all posts"""
        
        print("Generating posts...")

        for n in range(len(self.posts)):
            post = self.posts[n]

            # Template to load?
            if post.has_config("template"):
                tpl_name = post.template
            else:
                tpl_name = self.site_config['default_post_template']
            tpl = self.env.get_template(tpl_name)

            # Prepare template context
            ctx = {'post': post}
            if n > 0:
                ctx['prev_post'] = self.posts[n-1]
            if n < len(self.posts)-1:
                ctx['next_post'] = self.posts[n+1]

            # Render template to file
            post_dir = os.path.split(post.file_path)[0]
            if not os.path.isdir(post_dir):
                os.makedirs(post_dir)
            with open(post.file_path, 'w') as out:
                out.write(tpl.render(ctx))
            print(("  %s - %s (%s)" % (post.date.strftime("%Y-%m-%d"), post.title, post.source_path)))
    
    def make_archive(self):
        """Generate archive"""

        print("Generating archive...")

        year_tpl = self.env.get_template(self.site_config['archive_year_template'])
        month_tpl = self.env.get_template(self.site_config['archive_month_template'])

        # Context
        ctx = {}
        
        for year in self.archive.keys():
            year_dir = os.path.join(SITE_DIR, "archive", "%04d" % year)
            if not os.path.isdir(year_dir):
                os.makedirs(year_dir)
            
            ctx['year'] = year
            with open(os.path.join(year_dir, "index.html"), 'w') as out:
                out.write(year_tpl.render(ctx))
            print(("  %04d" % year))

            for month in self.archive[year].keys():
                month_dir = os.path.join(year_dir, "%02d" % month)
                if not os.path.isdir(month_dir):
                    os.makedirs(month_dir)

                ctx['month'] = month
                with open(os.path.join(month_dir, "index.html"), 'w') as out:
                    out.write(month_tpl.render(ctx))
                print(("    %04d/%02d" % (year, month)))


    def make_tags(self):
        """Generate tag pages"""

        print("Generating tag pages...")

        # Template and context
        tpl_names = self.site_config['tag_template']
        if type(tpl_names) is str:
            tpls = {"index.html": self.env.get_template(tpl_names)}
        else:
            tpls = {}
            for file_name, tpl in tpl_names.items():
                tpls[file_name] = self.env.get_template(tpl)
        ctx = {}

        # First normal tags
        for tag in self.tags:
            tag_dir = os.path.join(SITE_DIR, "tag", tag)
            if not os.path.isdir(tag_dir):
                os.makedirs(tag_dir)

            ctx['tag'] = tag
            ctx['posts'] = self.tags[tag]
            for file_name, tpl in tpls.items():
                with open(os.path.join(tag_dir, file_name), 'w') as out:
                    out.write(tpl.render(ctx))
            print(("  %s" % tag))

        # Then hidden tags
        for tag in self.hidden_tags:
            tag_dir = os.path.join(SITE_DIR, "tag", tag)
            if not os.path.isdir(tag_dir):
                os.makedirs(tag_dir)

            ctx['tag'] = tag
            ctx['posts'] = self.hidden_tags[tag]
            for file_name, tpl in tpls.items():
                with open(os.path.join(tag_dir, file_name), 'w') as out:
                    out.write(tpl.render(ctx))
            print(("  %s (hidden)" % tag))
            

class Post:
    def __init__(self, path):
        self.source_path = path
        
        with open(self.source_path) as post_file:
            post_data = post_file.read()

        # Separate header (yaml) and content (markdown)
        (post_header, post_content,) = re.split(POST_HEADER_SEP_RE, post_data, 1)

        # Parse header and content
        self.config = yaml.load(post_header)

        md_exts = ['extra']
        if 'md_exts' in self.config:
            post_exts = self.config['md_exts']
            if type(post_exts) is list:
                md_exts.extend(post_exts)
            else:
                md.exts.append(post_exts)
        self.content = markdown.markdown(post_content, md_exts)

        # Check for required headers
        all_headers_ok = True
        for h in ["title", "date"]:
            if h not in self.config:
                print(("! missing header in %s: %s" % (path, h)))
                all_headers_ok = False
            if self.config[h] is None:
                print(("! empty header in %s: %s" % (path, h)))
                all_headers_ok = False
        if not all_headers_ok:
            sys.exit(1)

        # Optional headers
        if 'tags' not in self.config or type(self.config['tags']) is not list:
            self.config['tags'] = list()

        if 'slug' not in self.config:
            self.config['slug'] = sluggify(self.config['title'])

        # Add some attributes
        self.relative_url = "/%04d/%02d/%02d/%s/" % (self.date.year, self.date.month, self.date.day, self.slug)
        self.file_path = os.path.join(SITE_DIR, "%04d" % self.date.year, "%02d" % self.date.month, "%02d" % self.date.day, self.slug, "index.html")

    def __getattr__(self, name):
        try:
            # Is it something like "a.b"?
            s = name.split(".", 1)
            if len(s) <= 1:
                return self.config[name]
            else:
                return getattr(self.config[s[0]], s[1])
        except KeyError:
            raise AttributeError
    __getitem__ = __getattr__

    def has_config(self, name):
        return (name in self.config)

    def remove_tag(self, tag):
        try:
            self.config['tags'].remove(tag)
        except ValueError:
            pass

    @staticmethod
    def timestamp(post):
        return time.mktime(post.date.timetuple())


# Jinja2 filters
@JinjaFilters.add
def rfc3339(date):
    if type(date) is datetime.date:
        return date.isoformat()
    else:
        tz = date.strftime('%Z') or 'Z'
        return date.isoformat() + tz

@JinjaFilters.add
def human_readable_date(date):
    fmt = "%B %e, %Y"
    if type(date) is datetime.datetime:
        fmt += " at %H:%M"
    return date.strftime(fmt)

@JinjaFilters.add
def short_date(date):
    return date.strftime("%b. %e")

@JinjaFilters.add
def filter_month(posts, year, month):
    return [p for p in posts if (p.date.year == year) and (p.date.month == month)]

@JinjaFilters.add
def shorten(value, length):
    ender = " (...)"
    if length <= len(ender):
        length += len(ender)
    if len(value) <= length:
        return value

    # Cut the string at its last space
    s = value[:length-len(ender)]
    s = s.rsplit(None, 1)[0]
    s += ender
    return s

# Convert post title to slug
# Code to remove accents from http://bit.ly/aIvvHB
def sluggify(title):
    # Remove accents
    s = ''.join((c for c in unicodedata.normalize('NFD', str(title)) if unicodedata.category(c) != 'Mn'))

    # Lower case
    s = s.lower()

    # Convert spaces, etc. to -
    s = re.sub(r'[^a-zA-Z0-9]+', '-', s)

    # Remove multiple dashes
    s = re.sub(r'--+', '-', s)

    return s


if __name__ == '__main__':
    g = Golbarg()
    g.run()