from deliverance.util.charset import META_CHARSET_TAG
from nose.tools import assert_true, assert_false, assert_equals
docs = {
"""""": "UTF-8",
"""""": "ASCII",
"""""": "ISO-8859-1",
"""""": "UTF-8",
"""""": "UTF-8",
"""""": "UTF-8",
"""""": "UTF-8",
"""""": "UTF-8",
# it's not completely strict; these are OK too:
"""""", # shouldn't have nested quotes
"""""", # can have only one trailing quote!
""" """, # has to be in the meta tag
""" charset=UTF-8" >""", # really .. has to be in the meta tag
]
def test_regex():
for doc in docs:
should_match(doc, docs[doc])
for doc in bad_docs:
shouldnt_match(doc)
def shouldnt_match(doc):
match = META_CHARSET_TAG.search(doc)
assert_false(match)
def should_match(doc, charset):
match = META_CHARSET_TAG.search(doc)
assert_true(match)
assert_equals(match.group('charset'), charset)