summaryrefslogtreecommitdiffstats
path: root/ubuntu/maverick/kdesdk/debian/desktop-i18n/msgsplit
diff options
context:
space:
mode:
Diffstat (limited to 'ubuntu/maverick/kdesdk/debian/desktop-i18n/msgsplit')
-rw-r--r--ubuntu/maverick/kdesdk/debian/desktop-i18n/msgsplit168
1 files changed, 168 insertions, 0 deletions
diff --git a/ubuntu/maverick/kdesdk/debian/desktop-i18n/msgsplit b/ubuntu/maverick/kdesdk/debian/desktop-i18n/msgsplit
new file mode 100644
index 000000000..98789a47f
--- /dev/null
+++ b/ubuntu/maverick/kdesdk/debian/desktop-i18n/msgsplit
@@ -0,0 +1,168 @@
+#! /usr/bin/env python
+
+import sys, string, codecs, os
+
+# TODO: currently the 78 chars are *without* the quotes, while for Gettext it is *with* the quotes
+# FIXME: it seems possible to get lines bigger than 80 characters.
+max_length = 78
+
+wrap_before = ['<h1>', '<h2>', '<h3>', '<h4>', '<h5>', '<h6>', '<p>', '<br>', '<br/>',
+ '<ol>', '<ul>', '<li>', '<table>', '<th>', '<tr>', '<td>', '<center>',
+ '<blockquote>', '<pre>', '<hr>', '<hr/>']
+
+### TODO: try to support any charset, not only UTF-8 (so that it can be used outside KDE)
+
+def splitit( start, message, outfile ):
+ # print start+"\""+message+"\"" # DEBUG
+ if len(start):
+ if len(message) + len(start) < max_length and \
+ string.find(message, '\\n') == -1:
+ outstr = '%s"%s"\n' % (start, message)
+ outfile.write(outstr.encode('utf-8'))
+ return
+ outfile.write(start)
+ outfile.write(u'""\n')
+ index = 0
+ mlen = len(message)
+ last_brace = 0
+ last_space = 0
+ last_comma = 0
+ while index < mlen:
+ if message[index] == r'n' and (index > 0 and message[index-1] == '\\') \
+ and (index < 2 or message[index-2] != '\\'):
+ outstr = '"%s"\n' % message[:index+1]
+ outfile.write(outstr.encode('utf-8'))
+ message = message[index+1:]
+ mlen -= index + 1
+ index = 0
+ last_brace = 0
+ last_space = 0
+ last_comma = 0
+ continue
+ elif message[index] == u'>':
+ last_brace = index
+ elif message[index] == u' ':
+ last_space = index
+ elif message[index] == u',':
+ last_comma = index
+ elif message[index] == u'<':
+ for s in wrap_before:
+ if index > 0 and message[index:].startswith(s):
+ outstr = '"%s"\n' % message[:index]
+ outfile.write(outstr.encode('utf-8'))
+ message = message[index:]
+ mlen -= index
+ index = 0
+ last_brace = 0
+ last_space = 0
+ last_comma = 0
+ continue
+ if index > max_length:
+ if last_brace > 50:
+ index = last_brace
+ while index < mlen - 1 and message[index+1] == ' ':
+ index += 1
+ elif last_space != 0:
+ index = last_space
+ elif last_comma != 0:
+ index = last_comma
+ else:
+ while index > 0 and message[index] == u'\\':
+ index = index - 1
+ outstr = '"%s"\n' % message[:index+1]
+ outfile.write(outstr.encode('utf-8'))
+ message = message[index+1:]
+ mlen -= index + 1
+ index = 0
+ last_brace = 0
+ last_space = 0
+ last_comma = 0
+ continue
+ index += 1
+ if len(message):
+ outstr = '"%s"\n' % message
+ outfile.write(outstr.encode('utf-8'))
+
+if sys.hexversion >= 0x02030000:
+ # We have Python 2.3 or better
+ open_type="rU" # Open for read with "Universal Newline Support"
+else:
+ # We have a Python older than 2.3
+ open_type="r" # Normal open for read
+### TODO: even in the case of a parse error, the script could try to process the next file(s) instead of exiting.
+for file in sys.argv[1:]:
+ orig_file = open(file, open_type)
+ new_file = open(file + ".new", 'w')
+
+ last=''
+ start=''
+ index=0
+ line=' '
+ while 1: # python 2.1 has no True ;)
+ line = orig_file.readline()
+ index += 1
+ if not line:
+ break
+ if line == '\n' or line[0] == '#':
+ splitit(start, last, new_file)
+ start = ''
+ last = ''
+ new_file.write(line)
+ continue
+ try:
+ line = string.strip(unicode(line, 'utf-8'))
+ except UnicodeError:
+ print file
+ if line[0] == '"' and line[-1:] == '"':
+ last += line[1:-1]
+ continue
+ # new message
+ splitit(start, last, new_file)
+ if line.startswith("msgid "):
+ start = "msgid "
+ last = string.lstrip(line[6:-1])[1:]
+ elif line.startswith("msgstr "):
+ start = "msgstr "
+ last = string.lstrip(line[7:-1])[1:]
+ elif line.startswith("msgctxt "):
+ start = "msgctxt "
+ last = string.lstrip(line[8:-1])[1:]
+ elif line.startswith("msgid_plural "):
+ start = "msgid_plural "
+ last = string.lstrip(line[13:-1])[1:]
+ elif line.startswith("msgstr["):
+ # For most languages, there will be only one digit
+ if line[8] == "]" and line[9] == " ":
+ if line[7].isdigit():
+ start = line[:10]
+ last = string.lstrip(line[10:-1])[1:]
+ else:
+ print file, "not-a-digit error for mgstr[] in line", index
+ orig_file.close()
+ new_file.close()
+ sys.exit(1)
+ else:
+ posdigit = 7 # The first digit is at position 7
+ while line[posdigit].isdigit():
+ posdigit += 1
+ if posdigit > 7 and line[posdigit] == "]" and line[posdigit+1] == " ":
+ posdigit += 2 # skip ] and the space
+ start = line[:posdigit]
+ last = string.lstrip(line[posdigit:-1])[1:]
+ else:
+ print file, "parse error after msgstr[ in line", index
+ orig_file.close()
+ new_file.close()
+ sys.exit(1)
+ else:
+ print file, "parsing error in line", index
+ orig_file.close()
+ new_file.close()
+ sys.exit(1)
+
+ splitit(start, last, new_file)
+ orig_file.close()
+ new_file.close()
+ os.rename(file + ".new", file)
+
+# kate: space-indent off; indent-width 8; replace-tabs off;