#!/usr/bin/python
import sys, re, HTMLParser
from path import path
PO = r"""# LightPress wp-frontend language file
# Copyright (c) 2004-2005 L. Magnocavallo, J. Lavigne
# This file is distributed under the same license as the LightPress package.
# FIRST AUTHOR , YEAR.
#
# msgfmt -o locale/it/LC_MESSAGES/wp-frontend.mo locale/it/LC_MESSAGES/wp-frontend
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: Lightpress 1.2\n"
"Report-Msgid-Bugs-To: devteam@lightpress.org\n"
"POT-Creation-Date: 2005-09-21 12:24+0200\n"
"PO-Revision-Date: 2005-09-21 12:39:00+2\n"
"Last-Translator: Ludovico Magnocavallo \n"
"Language-Team: LP Translations \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=2; plural=n != 1;\n"
"""
class LPi18nParser(HTMLParser.HTMLParser):
re_whitespace_left = re.compile(r'\s+<', re.S)
re_whitespace_right = re.compile(r'>\s+', re.S)
def __init__(self, folder):
HTMLParser.HTMLParser.__init__(self)
self._folder_len = len(folder) + 1
self._file = ''
self._lines = None
self._open_tags = []
self._pos = None
self.i18n = {}
def _addi18n(self, text):
if not text:
return
if not text in self.i18n:
self.i18n[text] = []
lineno, offset = self.getpos()
self.i18n[text].append("%s:%s:c%s" % (self._file, lineno, offset))
def handle_starttag(self, tag, attrs):
# print >>sys.stderr, "open %s" % tag
attrs = dict(attrs)
# print >>sys.stderr, self._file, tag, self.getpos()
if self._open_tags:
# child element of an i18n tag
# print >>sys.stderr, "adding tag %s to open tags" % tag
self._open_tags.append(tag)
return
for name, value in attrs.items():
if not name.startswith('i18n:'):
continue
op = name[5:]
if op == 'attributes':
for attr in value.split(';'):
self._addi18n(attrs.get(attr, None))
if op == 'translate':
if value:
self._addi18n(value)
else:
# print >>sys.stderr, "adding tag %s to open tags as i18n tag" % tag
self._open_tags.append(tag)
def handle_data(self, data):
if self._pos is None and len(self._open_tags) == 1:
# print >>sys.stderr, "saving pos %s" % self._open_tags
self._pos = self.getpos()
def handle_endtag(self, tag):
if not self._open_tags:
# print >>sys.stderr, "returning from end tag %s" % tag
return
popped_tag = self._open_tags.pop()
if self._open_tags:
# print >>sys.stderr, "returning from end tag %s after popping %s" % (tag, popped_tag)
return
# print >>sys.stderr, "saving data for tag %s after popping %s, pos %s" % (tag, popped_tag, self.getpos())
try:
s_line, s_col = self._pos
except TypeError, e:
# no tag data for this tag
s_line, s_col = self.getpos()
e_line, e_col = self.getpos()
s_line -= 1
lines = self._lines[s_line:e_line]
if len(lines) == 1:
lines[0] = lines[0][s_col:e_col]
else:
lines[0] = lines[0][s_col:]
lines[len(lines) - 1] = lines[len(lines) - 1][:e_col]
# print >>sys.stderr, s_line, s_col, e_line, e_col
# print >>sys.stderr, self._lines[s_line:e_line]
data = ''.join(lines).strip()
data = self.re_whitespace_left.sub(lambda m: ' <', data)
data = self.re_whitespace_right.sub(lambda m: '> ', data)
self._addi18n(data)
self._pos = None
def parse(self, f):
print >>sys.stderr, "processing %s" % f
self._file = str(f)[self._folder_len:]
self._lines = f.lines()
self.reset()
try:
self.feed(''.join(self._lines))
self.close()
except HTMLParser.HTMLParseError, e:
print >>sys.stderr, e
if __name__ == '__main__':
if len(sys.argv) == 1:
raise SystemExit, "Usage: parser.py MASTERS-FOLDER"
p = path(sys.argv[1])
parser = LPi18nParser(sys.argv[1])
if p.isfile():
parser.parse(p)
elif p.isdir():
for f in p.walkfiles():
if f.ext in ['.htm', '.html', '.xml', '.txt']:
parser.parse(f)
else:
raise SystemExit, "%s not a file nor a folder. Usage: parser.py MASTERS-FOLDER" % p
print PO
keys = parser.i18n.keys()
keys.sort()
for k in keys:
print "# %s" % ', '.join(parser.i18n[k])
print 'msgid "%s"' % k.replace('\n', '\\n').replace('"', r'\"')
print 'msgstr ""'
print