#!/usr/bin/env python
# -*- python -*-

import getopt, sys
from datetime import datetime
import os
import os.path
import shutil
import re
import sys
import urllib
from BeautifulSoup import BeautifulSoup
import traceback

blogdir = os.path.expanduser('~/w/weblog.dme.org/')
verbose = False

# Given a string, return something based upon it which is both
# representative and useful as a filename.
def flatten(s):
    s = s.lower()
    # Remove file suffix.
    s = re.sub('\.[^\.]+$', '', s)
    # Replace spaces with dash.
    s = re.sub(' ', '-', s)
    # Keep only letters, numbers and dashes.
    s = re.sub('[^a-z0-9-]', '', s)
    # Compress multiple dashes.
    s = re.sub('-+', '-', s)

    return s
    
class Page(object):
    _date = datetime.now().replace(microsecond = 0)
    _tags = []
    _title = None
    _comment = None

    def type(self):
        return 'post'

    def date(self):
        return self._date

    def title(self, title = None):
        if title:
            self._title = title.strip()
        return self._title or ''

    def header(self):
        s = ''

        s += '---\n'
        s += 'layout: default\n'
        s += 'type: %s\n' % self.type()
        s += 'title: "%s"\n' % self.title()
        s += 'tags:\n' + ''.join(map(lambda s: '- %s\n' % s, self._tags))
        s += '---\n'

        return s

    def body(self):
        return '\n'

    def comment(self, comment = None):
        if comment:
            self._comment = comment
        return self._comment or '\n'

    def footer(self):
        return ''

    def entry(self):
        return '\n'.join([self.header(), self.body(), self.comment(), self.footer()])

    def slug(self):
        s = flatten(self.title())
        if s == '':
            return self.date().strftime('%H%M%S')
        else:
            return s

    def filename(self):
        return '%s-%s.markdown' % (self.date().strftime('%Y-%m-%d'), self.slug())

    def save(self, directory = '/tmp/'):
        fn = os.path.join(directory, self.filename())

        f = open(fn, 'w')
        f.write(self.entry().encode('utf-8'))
        f.close()

        return fn

    def tag(self, tag):
        if not tag in self._tags:
            self._tags.append(tag)

    def __str__(self):
        return self.entry()

class LinkPage(Page):

    def __init__(self, link):
        super(LinkPage, self).__init__()
        self._link = link

    def type(self):
        return 'link'

    def link(self):
        return self._link

    def body(self):
        return '[%s](%s)\n' % (self.title(), self.link())

class ImagePage(LinkPage):
    
    def type(self):
        return 'image'

    def body(self):
        return '<img src="%s" alt="%s" width="100%%">\n' % (self.link(), self.title())

class SoupedPage(LinkPage):

    def __init__(self, link):
        super(SoupedPage, self).__init__(link)
        self._soup = BeautifulSoup(urllib.urlopen(self.link()))

        try:
            self.title(self._soup.html.head.title.string)
        except:
            pass

class FlickrImagePage(SoupedPage):

    def __init__(self, link, title = None):
        super(FlickrImagePage, self).__init__(link)
        self.tag('photograph')

        # Flickr includes a meta header with the keywords for the
        # image. They are flattened (lower case, spaces removed). They
        # also add three new keywords of their own:
        #
        # <meta name="keywords" content="unitedkingdom, essex,
        # 	hydehall, photography, photos, photo">

        keywords = self._soup.html.head.find('meta', attrs={'name': 'keywords'})['content'].split(', ')

        # These tags are not something that we added, and we prefer
        # 'photograph' to any of them.
        remove_tags = ['photography', 'photos', 'photo']
        map(self.tag, filter(lambda t: t not in remove_tags, keywords))

        # The title of the picture rather than that of the page.
        self.title(self._soup.html.head.find('meta', attrs={'name': 'title'})['content'])

    def type(self):
        return 'image'

    def body(self):
        try:
            t = self.title()
            i = self._soup.html.head.find('link', attrs={'rel': 'image_src'})['href']
            s = self._soup.html.head.find('link', attrs={'rel': 'canonical'})['href']

            # The image is often only 240px wide - prefer the bigger size.
            i = i.replace('_m', '_b', 1)

            imagelink = '<a href="%s" title="%s">\n<img src="%s" alt="%s" width="100%%">\n</a>\n' % (self.link(), t, i, t)
            comment = '%s (at [flickr](%s))\n' % (t, s)
            return imagelink + comment
        except:
            super(FlickrImagePage, self).body()

rules = [
    ['^http://www.flickr.com/photos/.*/[0-9]+/', FlickrImagePage],
    ['.*\.(jpg|png|gif)$', ImagePage],
    ['', SoupedPage],
    ['', LinkPage],
    ]

def url_to_entry(url):
    if url == None:
        return Page()

    for rule in rules:
        if verbose:
            print 'trying regexp %s' % rule[0]
        if re.match(rule[0], url, re.IGNORECASE):
            if verbose:
                print 'it matched'
            try:
                entry = rule[1](url)
                return entry
            except:
                if verbose:
                    print traceback.print_exc()
                pass

def main():
    def usage():
        print """blog options:
-b --buildonly
-c --comment=
-h --help
-l --link=
-n --noedit
-t --title=
-T --tags=
-v"""
    global verbose

    try:
        opts, args = getopt.getopt(sys.argv[1:], 'bc:hl:nt:T:v',
                                   ['buildonly', 'comment=' 'noedit', 'help', 'link=', 'tags=', 'title='])
    except getopt.GetoptError, err:
        # print help information and exit:
        print str(err) # will print something like 'option -a not recognized'
        usage()
        sys.exit(2)

    buildonly = False
    comment = None
    edit = True
    link = None
    tags = None
    title = None
    for o, a in opts:
        if o == '-v':
            verbose = True
        elif o in ('-h', '--help'):
            usage()
            sys.exit()
        elif o in ('-b', '--buildonly'):
            buildonly = True
        elif o in ('-c', '--comment'):
            comment = a
        elif o in ('-l', '--link'):
            link = a
        elif o in ('-n', '--noedit'):
            edit = False
        elif o in ('-T', '--tags'):
            tags = a
        elif o in ('-t', '--title'):
            title = a
        else:
            assert False, 'unhandled option'

    if verbose:
        print "title is %s, link is %s, tags are %s." % (title, link, tags)
        print "comment is %s." % (comment)

    e = url_to_entry(link)

    if title:
        e.title(title)

    if comment:
        e.comment(comment)

    if tags:
        map(e.tag, tags.split(','))

    fn = e.save()

    if edit:
        os.system('ec ' + fn)

    # An empty file indicates 'abandon this entry'.
    if os.path.getsize(fn) == 0:
        if verbose:
            print 'Abandoned.'
        os.remove(fn)
        sys.exit(0)

    # Move to the right place, check in.
    entryname = os.path.join('_posts/', os.path.basename(fn))
    shutil.move(fn, os.path.join(blogdir, entryname))
    os.chdir(blogdir)
    os.system('git add ' + entryname)
    os.system('git commit -m "Add ' + entryname + '." ' + entryname)

    if buildonly:
        os.system('blog-build')
    else:
        os.system('blog-update')

if __name__ == '__main__':
    main()
