#!/usr/bin/python import sys, re, HTMLParser from path import path PO = r"""# LightPress wp-frontend language file # Copyright (c) 2004-2005 L. Magnocavallo, J. Lavigne # This file is distributed under the same license as the LightPress package. # FIRST AUTHOR , YEAR. # # msgfmt -o locale/it/LC_MESSAGES/wp-frontend.mo locale/it/LC_MESSAGES/wp-frontend # #, fuzzy msgid "" msgstr "" "Project-Id-Version: Lightpress 1.2\n" "Report-Msgid-Bugs-To: devteam@lightpress.org\n" "POT-Creation-Date: 2005-09-21 12:24+0200\n" "PO-Revision-Date: 2005-09-21 12:39:00+2\n" "Last-Translator: Ludovico Magnocavallo \n" "Language-Team: LP Translations \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=2; plural=n != 1;\n" """ class LPi18nParser(HTMLParser.HTMLParser): re_whitespace_left = re.compile(r'\s+<', re.S) re_whitespace_right = re.compile(r'>\s+', re.S) def __init__(self, folder): HTMLParser.HTMLParser.__init__(self) self._folder_len = len(folder) + 1 self._file = '' self._lines = None self._open_tags = [] self._pos = None self.i18n = {} def _addi18n(self, text): if not text: return if not text in self.i18n: self.i18n[text] = [] lineno, offset = self.getpos() self.i18n[text].append("%s:%s:c%s" % (self._file, lineno, offset)) def handle_starttag(self, tag, attrs): # print >>sys.stderr, "open %s" % tag attrs = dict(attrs) # print >>sys.stderr, self._file, tag, self.getpos() if self._open_tags: # child element of an i18n tag # print >>sys.stderr, "adding tag %s to open tags" % tag self._open_tags.append(tag) return for name, value in attrs.items(): if not name.startswith('i18n:'): continue op = name[5:] if op == 'attributes': for attr in value.split(';'): self._addi18n(attrs.get(attr, None)) if op == 'translate': if value: self._addi18n(value) else: # print >>sys.stderr, "adding tag %s to open tags as i18n tag" % tag self._open_tags.append(tag) def handle_data(self, data): if self._pos is None and len(self._open_tags) == 1: # print >>sys.stderr, "saving pos %s" % self._open_tags self._pos = self.getpos() def handle_endtag(self, tag): if not self._open_tags: # print >>sys.stderr, "returning from end tag %s" % tag return popped_tag = self._open_tags.pop() if self._open_tags: # print >>sys.stderr, "returning from end tag %s after popping %s" % (tag, popped_tag) return # print >>sys.stderr, "saving data for tag %s after popping %s, pos %s" % (tag, popped_tag, self.getpos()) try: s_line, s_col = self._pos except TypeError, e: # no tag data for this tag s_line, s_col = self.getpos() e_line, e_col = self.getpos() s_line -= 1 lines = self._lines[s_line:e_line] if len(lines) == 1: lines[0] = lines[0][s_col:e_col] else: lines[0] = lines[0][s_col:] lines[len(lines) - 1] = lines[len(lines) - 1][:e_col] # print >>sys.stderr, s_line, s_col, e_line, e_col # print >>sys.stderr, self._lines[s_line:e_line] data = ''.join(lines).strip() data = self.re_whitespace_left.sub(lambda m: ' <', data) data = self.re_whitespace_right.sub(lambda m: '> ', data) self._addi18n(data) self._pos = None def parse(self, f): print >>sys.stderr, "processing %s" % f self._file = str(f)[self._folder_len:] self._lines = f.lines() self.reset() try: self.feed(''.join(self._lines)) self.close() except HTMLParser.HTMLParseError, e: print >>sys.stderr, e if __name__ == '__main__': if len(sys.argv) == 1: raise SystemExit, "Usage: parser.py MASTERS-FOLDER" p = path(sys.argv[1]) parser = LPi18nParser(sys.argv[1]) if p.isfile(): parser.parse(p) elif p.isdir(): for f in p.walkfiles(): if f.ext in ['.htm', '.html', '.xml', '.txt']: parser.parse(f) else: raise SystemExit, "%s not a file nor a folder. Usage: parser.py MASTERS-FOLDER" % p print PO keys = parser.i18n.keys() keys.sort() for k in keys: print "# %s" % ', '.join(parser.i18n[k]) print 'msgid "%s"' % k.replace('\n', '\\n').replace('"', r'\"') print 'msgstr ""' print