219 lines
6 KiB
Python
219 lines
6 KiB
Python
#!/usr/bin/env python
|
|
#
|
|
# wikka2doku.py
|
|
# This script help you to import pages (with all history!!!) from Wikka Wiki
|
|
# to DokuWiki. You still need to edit some of the generated pages.
|
|
#
|
|
# Tested MySQL dump version: 10.11
|
|
#
|
|
# Wed Jan 31 02:24:30 CET 2007
|
|
# - Eriol (@mornie.org)
|
|
#
|
|
# Thanks to MancaUSoft for bugs hunting and tosky for contributing! ;)
|
|
|
|
|
|
|
|
import time
|
|
import re
|
|
import gzip
|
|
import os
|
|
import os.path
|
|
import getopt
|
|
import sys
|
|
|
|
USER_LIST = {}
|
|
|
|
def w2t(s):
|
|
''' wikka time 2 doku time '''
|
|
wt = time.strptime(s,'%Y-%m-%d %H:%M:%S')
|
|
return int(time.mktime(wt))
|
|
|
|
pages = re.compile(
|
|
r'''INSERT\sINTO\s`wikka_pages`\sVALUES\s\( # INSERT INTO...
|
|
(\d*),\'(\w*)\', # page id and page name
|
|
\'(\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2})\', # date and time
|
|
\'(.*?)\', # page
|
|
\'(\w*)\', # owner
|
|
\'([-\d\.\w]*)\', # user or hostname
|
|
\'(\w)\', # last
|
|
''',
|
|
re.VERBOSE |
|
|
re.IGNORECASE |
|
|
re.DOTALL)
|
|
|
|
users = re.compile(
|
|
r'''INSERT\sINTO\s`wikka_users`\sVALUES\s\( #INSERT INTO...
|
|
\'(\w*)\', # user name
|
|
\'(\w*)\', # password
|
|
\'([\w*\.\w*]+\@[\w*\.\w*]+)\'
|
|
''',
|
|
re.VERBOSE |
|
|
re.IGNORECASE |
|
|
re.DOTALL)
|
|
|
|
def get_users(fin):
|
|
l = []
|
|
m = users.findall(fin)
|
|
for row in m:
|
|
username, password, email = row
|
|
l.append(username)
|
|
return l
|
|
|
|
def trasftable(mo):
|
|
columns = int(mo.groups()[0])
|
|
cells = mo.groups()[1]
|
|
tablestr = ''
|
|
|
|
splitted_cells = cells.split(';')
|
|
heading = splitted_cells[:columns]
|
|
body = splitted_cells[columns:]
|
|
body = zip(*[body[i::columns] for i in range(columns)])
|
|
|
|
tablestr = '^' + '^'.join(heading) + '^\n'
|
|
for cells in body:
|
|
tablestr += '|' + '|'.join(cells) + '|\n'
|
|
|
|
return tablestr
|
|
|
|
def process_bodypage(body):
|
|
link = re.compile(r'''\[\[([^\[\]]+?)\s([^\[\]]+?)\]\]''')
|
|
ulist = re.compile(r'''(\t| +)~?-''')
|
|
autolink = re.compile(r'''\b(?!\[\[) # Not starting with [[
|
|
([A-Z][a-z]*[A-Z]+[a-z]+[a-zA-Z]*) # CamelCase
|
|
(?!\]\]|\|)\b # Not ending with ]] or |
|
|
''', re.VERBOSE)
|
|
images = re.compile(r'''img="(.*?)"''')
|
|
monospaced = re.compile(r'''##(.*?)##''')
|
|
code = re.compile(r'''%%\((.*?)\)(.*?)%%''')
|
|
bold = re.compile(r'''\+\+(.*?)\+\+''')
|
|
table = re.compile(r'''{{table[ ]+columns="(.*?)".*cells="(.*?)"}}''')
|
|
|
|
body = body.replace('\\n', '\n')
|
|
body = body.replace('~~-', ' *')
|
|
body = body.replace('~-', ' *')
|
|
body = body.replace('~~~&', '>>>')
|
|
body = body.replace('~~&', '>>')
|
|
body = body.replace('~&', '>')
|
|
body = body.replace("\\'", "'")
|
|
body = body.replace('\\"', '"')
|
|
body = body.replace('""', '')
|
|
body = link.sub(r'[[\1|\2]]', body)
|
|
body = autolink.sub(r'[[\1]]', body)
|
|
body = ulist.sub(r'\1*', body)
|
|
body = images.sub(r'{{\1}}', body)
|
|
body = monospaced.sub(r"''\1''", body)
|
|
body = code.sub(r'<code \1>\2</code>', body)
|
|
body = bold.sub(r'<del>\1</del>', body)
|
|
body = table.sub(trasftable, body)
|
|
|
|
return body
|
|
|
|
def process_user(user):
|
|
|
|
if user in USER_LIST:
|
|
return USER_LIST[user]
|
|
else:
|
|
return user.lower()
|
|
|
|
def makepages(fin, base_directory='.'):
|
|
knowpages = {}
|
|
|
|
fmeta = open(os.path.join(base_directory, 'changes.log'), 'a')
|
|
|
|
# Fixing a problematic Owner name instead of make more complex
|
|
# the pages regex
|
|
fin = fin.replace('(Public)', 'Public')
|
|
|
|
m = pages.findall(fin)
|
|
|
|
for row in m:
|
|
page_id, page_name, date, page_body, owner, user, last = row
|
|
page_name = page_name.lower()
|
|
|
|
user = process_user(user)
|
|
|
|
if page_name in knowpages:
|
|
meta_operation = ''
|
|
else:
|
|
meta_operation = 'created'
|
|
knowpages[page_name] = None
|
|
|
|
converted_body = process_bodypage(page_body)
|
|
|
|
if last == 'Y': # go in pages
|
|
path = os.path.join(base_directory, 'pages/')
|
|
fpage = open(path + page_name + '.txt', 'w')
|
|
fpage.write(converted_body)
|
|
fpage.close()
|
|
else:
|
|
path = os.path.join(base_directory, 'attic/')
|
|
fattic = gzip.open(path + page_name + '.' + str(w2t(date)) + '.txt.gz',
|
|
'wb')
|
|
fattic.write(converted_body)
|
|
fattic.close()
|
|
|
|
fmeta.write('\t'.join([str(w2t(date)), '127.0.0.1', page_name,
|
|
user, meta_operation]) + '\n')
|
|
|
|
fmeta.close()
|
|
|
|
def usage():
|
|
use = '''
|
|
wikka2doku [-d directory] wikkadump.sql
|
|
Launch passing a sql dump of your wikka wiki database.
|
|
|
|
-d directory:
|
|
Creates pages, history and metadata in the
|
|
directory passed as parameter
|
|
-u users:
|
|
Change username depending on file passed as
|
|
parameter.
|
|
The format of this file must be:
|
|
|
|
old_username<space>new_username
|
|
'''
|
|
print use
|
|
|
|
if __name__ == '__main__':
|
|
try:
|
|
opts, args = getopt.getopt(sys.argv[1:], "hd:u:", ['help',
|
|
'directory=',
|
|
'users='])
|
|
|
|
except getopt.GetoptError:
|
|
|
|
usage()
|
|
sys.exit(2)
|
|
|
|
if not args:
|
|
usage()
|
|
sys.exit(2)
|
|
elif not os.path.isfile(args[0]):
|
|
print args[0], 'not found :('
|
|
sys.exit(2)
|
|
|
|
base_directory = '.'
|
|
users_file = None
|
|
|
|
for o, x in opts:
|
|
if o in ("-h", "--help"):
|
|
usage()
|
|
sys.exit()
|
|
if o in ("-d", "--directory"):
|
|
base_directory = x
|
|
if o in ("-u", "--users"):
|
|
users_file = x
|
|
|
|
for p in ('attic', 'pages'):
|
|
d = os.path.join(base_directory, p)
|
|
if not os.path.isdir(d):
|
|
os.makedirs(d)
|
|
|
|
if users_file:
|
|
for line in open(users_file):
|
|
old_user, new_user = line.split()
|
|
USER_LIST[old_user] = new_user
|
|
|
|
fline = file(args[0], 'r').read()
|
|
makepages(fline, base_directory)
|