misc/wikka2doku.py

#!/usr/bin/env python
#
# wikka2doku.py
# This script help you to import pages (with all history!!!) from Wikka Wiki
# to DokuWiki. You still need to edit some of the generated pages.
#
# Tested MySQL dump version: 10.11
#
# Wed Jan 31 02:24:30 CET 2007
# -  Eriol (@mornie.org)
#
# Thanks to MancaUSoft for bugs hunting and tosky for contributing! ;)


import time
import re
import gzip
import os
import os.path
import getopt
import sys

USER_LIST = {}

def w2t(s):
    ''' wikka time 2 doku time '''
    wt = time.strptime(s,'%Y-%m-%d %H:%M:%S')
    return int(time.mktime(wt))

pages = re.compile(
        r'''INSERT\sINTO\s`wikka_pages`\sVALUES\s\( # INSERT INTO...
            (\d*),\'(\w*)\', # page id and page name
            \'(\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2})\', # date and time
            \'(.*?)\', # page
            \'(\w*)\', # owner
            \'([-\d\.\w]*)\', # user or hostname
            \'(\w)\', # last
             ''',
                re.VERBOSE |
                re.IGNORECASE |
                re.DOTALL)

users = re.compile(
        r'''INSERT\sINTO\s`wikka_users`\sVALUES\s\( #INSERT INTO...
        \'(\w*)\', # user name
        \'(\w*)\', # password
        \'([\w*\.\w*]+\@[\w*\.\w*]+)\'
        ''',
            re.VERBOSE |
            re.IGNORECASE |
            re.DOTALL)

def get_users(fin):
        l = []
        m = users.findall(fin)
        for row in m:
            username, password, email = row
            l.append(username)
        return l

def trasftable(mo):
    columns = int(mo.groups()[0])
    cells =  mo.groups()[1]
    tablestr = ''

    splitted_cells = cells.split(';')
    heading = splitted_cells[:columns]
    body = splitted_cells[columns:]
    body = zip(*[body[i::columns] for i in range(columns)])

    tablestr = '^' + '^'.join(heading) + '^\n'
    for cells in body:
        tablestr += '|' + '|'.join(cells) + '|\n'

    return tablestr

def process_bodypage(body):
    link = re.compile(r'''\[\[([^\[\]]+?)\s([^\[\]]+?)\]\]''')
    ulist = re.compile(r'''(\t|  +)~?-''')
    autolink = re.compile(r'''\b(?!\[\[)    # Not starting with [[
                              ([A-Z][a-z]*[A-Z]+[a-z]+[a-zA-Z]*) # CamelCase
                              (?!\]\]|\|)\b # Not ending with ]] or |
                              ''', re.VERBOSE)
    images = re.compile(r'''img="(.*?)"''')
    monospaced = re.compile(r'''##(.*?)##''')
    code = re.compile(r'''%%\((.*?)\)(.*?)%%''')
    bold = re.compile(r'''\+\+(.*?)\+\+''')
    table = re.compile(r'''{{table[ ]+columns="(.*?)".*cells="(.*?)"}}''')

    body = body.replace('\\n', '\n')
    body = body.replace('~~-', '    *')
    body = body.replace('~-', '  *')
    body = body.replace('~~~&', '>>>')
    body = body.replace('~~&', '>>')
    body = body.replace('~&', '>')
    body = body.replace("\\'", "'")
    body = body.replace('\\"', '"')
    body = body.replace('""', '')
    body = link.sub(r'[[\1|\2]]', body)
    body = autolink.sub(r'[[\1]]', body)
    body = ulist.sub(r'\1*', body)
    body = images.sub(r'{{\1}}', body)
    body = monospaced.sub(r"''\1''", body)
    body = code.sub(r'<code \1>\2</code>', body)
    body = bold.sub(r'<del>\1</del>', body)
    body = table.sub(trasftable, body)

    return body

def process_user(user):

    if user in USER_LIST:
        return USER_LIST[user]
    else:
        return user.lower()

def makepages(fin, base_directory='.'):
    knowpages = {}

    fmeta = open(os.path.join(base_directory, 'changes.log'), 'a')

    # Fixing a problematic Owner name instead of make more complex
    # the pages regex
    fin = fin.replace('(Public)', 'Public')

    m = pages.findall(fin)

    for row in m:
        page_id, page_name, date, page_body, owner, user, last = row
        page_name = page_name.lower()

        user = process_user(user)

        if page_name in knowpages:
                meta_operation = ''
        else:
            meta_operation = 'created'
            knowpages[page_name] = None

        converted_body = process_bodypage(page_body)

        if last == 'Y': # go in pages
            path = os.path.join(base_directory, 'pages/')
            fpage = open(path + page_name + '.txt', 'w')
            fpage.write(converted_body)
            fpage.close()
        else:
            path = os.path.join(base_directory, 'attic/')
            fattic = gzip.open(path + page_name + '.' + str(w2t(date)) + '.txt.gz',
                          'wb')
            fattic.write(converted_body)
            fattic.close()

        fmeta.write('\t'.join([str(w2t(date)), '127.0.0.1', page_name,
                    user, meta_operation]) + '\n')

    fmeta.close()

def usage():
    use = '''
    wikka2doku [-d directory] wikkadump.sql
    Launch passing a sql dump of your wikka wiki database.

    -d directory:
        Creates pages, history and metadata in the
        directory passed as parameter
    -u users:
        Change username depending on file passed as
        parameter.
        The format of this file must be:

        old_username<space>new_username
    '''
    print use

if __name__ == '__main__':
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hd:u:", ['help',
                                                          'directory=',
                                                          'users='])

    except getopt.GetoptError:

        usage()
        sys.exit(2)

    if not args:
        usage()
        sys.exit(2)
    elif not os.path.isfile(args[0]):
        print args[0], 'not found :('
        sys.exit(2)

    base_directory = '.'
    users_file = None

    for o, x in opts:
        if o in ("-h", "--help"):
            usage()
            sys.exit()
        if o in ("-d", "--directory"):
            base_directory = x
        if o in ("-u", "--users"):
            users_file = x

    for p in ('attic', 'pages'):
        d = os.path.join(base_directory, p)
        if not os.path.isdir(d):
            os.makedirs(d)

    if users_file:
        for line in open(users_file):
            old_user, new_user = line.split()
            USER_LIST[old_user] = new_user

    fline = file(args[0], 'r').read()
    makepages(fline, base_directory)