Mercurial > public > sg101
comparison gpp/legacy/phpbb.py @ 294:254db4cb6a86
Changes / scripts to import forums. Other tweaks and moving other import scripts to the legacy application.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Wed, 05 Jan 2011 04:09:35 +0000 |
parents | 64c188a9d31f |
children |
comparison
equal
deleted
inserted
replaced
293:c92fb89dbc7d | 294:254db4cb6a86 |
---|---|
46 pass | 46 pass |
47 return text # leave as is | 47 return text # leave as is |
48 return re.sub("&#?\w+;", fixup, text) | 48 return re.sub("&#?\w+;", fixup, text) |
49 | 49 |
50 | 50 |
51 def unphpbb(s): | 51 def unphpbb(s, encoding='latin-1'): |
52 """Converts BBCode from phpBB database data into 'pure' BBCode. | 52 """Converts BBCode from phpBB database data into 'pure' BBCode. |
53 | 53 |
54 phpBB doesn't store plain BBCode in its database. The BBCode tags have | 54 phpBB doesn't store plain BBCode in its database. The BBCode tags have |
55 "uids" added to them and the data has already been HTML entity'ized. | 55 "uids" added to them and the data has already been HTML entity'ized. |
56 This function removes the uid stuff and undoes the entity'ification and | 56 This function removes the uid stuff and undoes the entity'ification and |
57 returns the result as a unicode string. | 57 returns the result as a unicode string. |
58 | 58 |
59 If the input 's' is not already unicode, it will be decoded using the | |
60 supplied encoding. | |
61 | |
59 """ | 62 """ |
60 if not isinstance(s, unicode): | 63 if not isinstance(s, unicode): |
61 s = s.decode('utf-8', 'replace') | 64 s = s.decode(encoding, 'replace') |
62 for start, end in BBCODE_RES: | 65 for start, end in BBCODE_RES: |
63 s = re.sub(start, r'\1', s, re.MULTILINE) | 66 s = re.sub(start, r'\1', s, re.MULTILINE) |
64 s = re.sub(end, r'\1]', s, re.MULTILINE) | 67 s = re.sub(end, r'\1]', s, re.MULTILINE) |
65 return unescape(s) | 68 return unescape(s) |