# see ticket #12 import re META_CHARSET_TAG = re.compile( """(]*charset=["']?(?P[^"'>]*)["']?[ ]?[/]?[>])""", re.IGNORECASE|re.DOTALL) HEAD_TAG = re.compile('', re.IGNORECASE) def fix_meta_charset_position(s): """ Move tag with charset definition to be first child of head tag. """ data = META_CHARSET_TAG.search(s) if data: tag = data.group() s = META_CHARSET_TAG.sub('',s) s = HEAD_TAG.sub(''+tag, s) return s def force_charset(resp, default="utf8"): """ Sets the charset of the response, to guarantee that ``resp.unicode_body`` won't raise AttributeError: 1. If the charset is already set, leave it. 2. If a charset declaration is found in the response body, use it. 3. Otherwise use ``default``. """ if resp.charset: return resp match = META_CHARSET_TAG.search(resp.body) if match is None: resp.charset = default return resp charset = match.group('charset') resp.charset = charset return resp