misc/wikka2doku.py

219 lines
6 KiB
Python

#!/usr/bin/env python
#
# wikka2doku.py
# This script help you to import pages (with all history!!!) from Wikka Wiki
# to DokuWiki. You still need to edit some of the generated pages.
#
# Tested MySQL dump version: 10.11
#
# Wed Jan 31 02:24:30 CET 2007
# - Eriol (@mornie.org)
#
# Thanks to MancaUSoft for bugs hunting and tosky for contributing! ;)
import time
import re
import gzip
import os
import os.path
import getopt
import sys
USER_LIST = {}
def w2t(s):
''' wikka time 2 doku time '''
wt = time.strptime(s,'%Y-%m-%d %H:%M:%S')
return int(time.mktime(wt))
pages = re.compile(
r'''INSERT\sINTO\s`wikka_pages`\sVALUES\s\( # INSERT INTO...
(\d*),\'(\w*)\', # page id and page name
\'(\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2})\', # date and time
\'(.*?)\', # page
\'(\w*)\', # owner
\'([-\d\.\w]*)\', # user or hostname
\'(\w)\', # last
''',
re.VERBOSE |
re.IGNORECASE |
re.DOTALL)
users = re.compile(
r'''INSERT\sINTO\s`wikka_users`\sVALUES\s\( #INSERT INTO...
\'(\w*)\', # user name
\'(\w*)\', # password
\'([\w*\.\w*]+\@[\w*\.\w*]+)\'
''',
re.VERBOSE |
re.IGNORECASE |
re.DOTALL)
def get_users(fin):
l = []
m = users.findall(fin)
for row in m:
username, password, email = row
l.append(username)
return l
def trasftable(mo):
columns = int(mo.groups()[0])
cells = mo.groups()[1]
tablestr = ''
splitted_cells = cells.split(';')
heading = splitted_cells[:columns]
body = splitted_cells[columns:]
body = zip(*[body[i::columns] for i in range(columns)])
tablestr = '^' + '^'.join(heading) + '^\n'
for cells in body:
tablestr += '|' + '|'.join(cells) + '|\n'
return tablestr
def process_bodypage(body):
link = re.compile(r'''\[\[([^\[\]]+?)\s([^\[\]]+?)\]\]''')
ulist = re.compile(r'''(\t| +)~?-''')
autolink = re.compile(r'''\b(?!\[\[) # Not starting with [[
([A-Z][a-z]*[A-Z]+[a-z]+[a-zA-Z]*) # CamelCase
(?!\]\]|\|)\b # Not ending with ]] or |
''', re.VERBOSE)
images = re.compile(r'''img="(.*?)"''')
monospaced = re.compile(r'''##(.*?)##''')
code = re.compile(r'''%%\((.*?)\)(.*?)%%''')
bold = re.compile(r'''\+\+(.*?)\+\+''')
table = re.compile(r'''{{table[ ]+columns="(.*?)".*cells="(.*?)"}}''')
body = body.replace('\\n', '\n')
body = body.replace('~~-', ' *')
body = body.replace('~-', ' *')
body = body.replace('~~~&', '>>>')
body = body.replace('~~&', '>>')
body = body.replace('~&', '>')
body = body.replace("\\'", "'")
body = body.replace('\\"', '"')
body = body.replace('""', '')
body = link.sub(r'[[\1|\2]]', body)
body = autolink.sub(r'[[\1]]', body)
body = ulist.sub(r'\1*', body)
body = images.sub(r'{{\1}}', body)
body = monospaced.sub(r"''\1''", body)
body = code.sub(r'<code \1>\2</code>', body)
body = bold.sub(r'<del>\1</del>', body)
body = table.sub(trasftable, body)
return body
def process_user(user):
if user in USER_LIST:
return USER_LIST[user]
else:
return user.lower()
def makepages(fin, base_directory='.'):
knowpages = {}
fmeta = open(os.path.join(base_directory, 'changes.log'), 'a')
# Fixing a problematic Owner name instead of make more complex
# the pages regex
fin = fin.replace('(Public)', 'Public')
m = pages.findall(fin)
for row in m:
page_id, page_name, date, page_body, owner, user, last = row
page_name = page_name.lower()
user = process_user(user)
if page_name in knowpages:
meta_operation = ''
else:
meta_operation = 'created'
knowpages[page_name] = None
converted_body = process_bodypage(page_body)
if last == 'Y': # go in pages
path = os.path.join(base_directory, 'pages/')
fpage = open(path + page_name + '.txt', 'w')
fpage.write(converted_body)
fpage.close()
else:
path = os.path.join(base_directory, 'attic/')
fattic = gzip.open(path + page_name + '.' + str(w2t(date)) + '.txt.gz',
'wb')
fattic.write(converted_body)
fattic.close()
fmeta.write('\t'.join([str(w2t(date)), '127.0.0.1', page_name,
user, meta_operation]) + '\n')
fmeta.close()
def usage():
use = '''
wikka2doku [-d directory] wikkadump.sql
Launch passing a sql dump of your wikka wiki database.
-d directory:
Creates pages, history and metadata in the
directory passed as parameter
-u users:
Change username depending on file passed as
parameter.
The format of this file must be:
old_username<space>new_username
'''
print use
if __name__ == '__main__':
try:
opts, args = getopt.getopt(sys.argv[1:], "hd:u:", ['help',
'directory=',
'users='])
except getopt.GetoptError:
usage()
sys.exit(2)
if not args:
usage()
sys.exit(2)
elif not os.path.isfile(args[0]):
print args[0], 'not found :('
sys.exit(2)
base_directory = '.'
users_file = None
for o, x in opts:
if o in ("-h", "--help"):
usage()
sys.exit()
if o in ("-d", "--directory"):
base_directory = x
if o in ("-u", "--users"):
users_file = x
for p in ('attic', 'pages'):
d = os.path.join(base_directory, p)
if not os.path.isdir(d):
os.makedirs(d)
if users_file:
for line in open(users_file):
old_user, new_user = line.split()
USER_LIST[old_user] = new_user
fline = file(args[0], 'r').read()
makepages(fline, base_directory)