# Portal transform for images with captions
#
# We want to be able to support captions in images.
# The easiest way to do this is to define a Portal Transform which is
# applied to the HTML body text on output.
#
# The transform finds all the embedded images, and replaces them with
# an appropriate chunk of HTML to include the caption.
#
try:
try:
from Products.PortalTransforms.interfaces import ITransform
except ImportError:
from Products.PortalTransforms.z3.interfaces import ITransform
except ImportError:
ITransform = None
from Products.PortalTransforms.interfaces import itransform
from DocumentTemplate.DT_Util import html_quote
from DocumentTemplate.DT_Var import newline_to_br
from Products.CMFCore.utils import getToolByName
import re
from cgi import escape
from urlparse import urlsplit, urljoin, urlunsplit
from urllib import unquote_plus, quote_plus
from Acquisition import aq_base
from htmlentitydefs import name2codepoint
from Products.kupu.plone.config import UID_PATTERN
from zope.interface import implements
name2codepoint = name2codepoint.copy()
name2codepoint['apos']=ord("'")
__revision__ = '$Id$'
# IMAGE_PATTERN matches an image tag on its own, or an image tag
# enclosed in a simple
or
. In the latter case we strip out
# the enclosing tag since we are going to insert our own.
PATIMG = '\\]+class\s*=[^=>]*captioned[^>]+\\>'
PATA = '(?:(?P\\]*\\>)'+PATIMG+'\\)' + '|' + PATIMG
PAT0 = '(?P'+PATA+')'
PAT1 = '<(?:p|div)[^>]*>'+PAT0 + '(?:p|div)>' + '|' + PAT0.replace('0>','1>')
IMAGE_PATTERN = re.compile(PAT1, re.IGNORECASE)
# Regex to match stupid IE attributes. In IE generated HTML an
# attribute may not be enclosed by quotes if it doesn't contain
# certain punctuation.
ATTR_VALUE = '=(?:"?)(?P<%s>(?<=")[^"]*|[^ \/>]*)'
ATTR_CLASS = ATTR_VALUE % 'class'
ATTR_WIDTH = ATTR_VALUE % 'width'
ATTR_HEIGHT = ATTR_VALUE % 'height'
ATTR_ALT = ATTR_VALUE % 'alt'
ATTR_PATTERN = re.compile('''
(?P\<
( class%s
| src\s*=\s*"resolveuid/(?P([^/"#? ]*))
| width%s
| alt%s
| height%s
| .
)*\>
)''' % (ATTR_CLASS, ATTR_WIDTH, ATTR_ALT, ATTR_HEIGHT), re.VERBOSE | re.IGNORECASE | re.DOTALL)
SRC_TAIL = re.compile(r'/([^" \/>]+)')
CLASS_PATTERN = re.compile('\s*class\s*=\s*("[^"]*captioned[^"]*"|[^" \/>]+)')
ALT_PATTERN = re.compile('\\balt\s*=\s*("[^"]*"|[^" \/>]+)')
END_TAG_PATTERN = re.compile('(]*?)( */?>)')
IMAGE_TEMPLATE = '''\
%(tag)s
%(caption)s
'''
class HTMLToCaptioned:
"""Transform which adds captions to images embedded in HTML"""
if ITransform is not None:
implements(ITransform)
__implements__ = itransform
__name__ = "html_to_captioned"
inputs = ('text/html',)
output = "text/x-html-captioned"
def __init__(self, name=None):
self.config_metadata = {
'inputs' : ('list', 'Inputs', 'Input(s) MIME type. Change with care.'),
}
if name is not None:
self.__name__ = name
def name(self):
return self.__name__
def __getattr__(self, attr):
if attr == 'inputs':
return self.config['inputs']
if attr == 'output':
return self.config['output']
raise AttributeError(attr)
def resolveuid(self, context, reference_catalog, uid):
"""Convert a uid to an object by looking it up in the reference catalog.
If not found then tries to fallback to a possible hook (e.g. so you could
resolve uids on another system).
"""
target = reference_catalog.lookupObject(uid)
if target is not None:
return target
hook = getattr(context, 'kupu_resolveuid_hook', None)
if hook is not None:
target = hook(uid)
return target
def convert(self, data, idata, filename=None, **kwargs):
"""convert the data, store the result in idata and return that
optional argument filename may give the original file name of received data
additional arguments given to engine's convert, convertTo or __call__ are
passed back to the transform
The object on which the translation was invoked is available as context
(default: None)
"""
context = kwargs.get('context', None)
at_tool = None
template = context.kupu_captioned_image
if context is not None:
at_tool = context.archetype_tool
rc = at_tool.reference_catalog
if context is not None and at_tool is not None:
def replaceImage(match):
tag = match.group('pat0') or match.group('pat1')
attrs = ATTR_PATTERN.match(tag)
atag = match.group('atag0') or match.group('atag1')
src = attrs.group('src')
subtarget = None
m = SRC_TAIL.match(tag, attrs.end('src'))
if m is not None:
srctail = m.group(1)
else:
srctail = None
if src is not None:
d = attrs.groupdict()
target = self.resolveuid(context, rc, src)
if target is not None:
d['class'] = attrs.group('class')
d['originalwidth'] = attrs.group('width')
d['originalalt'] = attrs.group('alt')
d['url_path'] = target.absolute_url_path()
d['caption'] = newline_to_br(html_quote(target.Description()))
d['image'] = d['fullimage'] = target
d['tag'] = None
d['isfullsize'] = True
d['width'] = target.width
if srctail:
if isinstance(srctail, unicode):
srctail =srctail.encode('utf8') # restrictedTraverse doesn't accept unicode
try:
subtarget = target.restrictedTraverse(srctail)
except:
subtarget = getattr(target, srctail, None)
if subtarget is not None:
d['image'] = subtarget
if srctail.startswith('image_'):
d['tag'] = target.getField('image').tag(target, scale=srctail[6:])
elif subtarget:
d['tag'] = subtarget.tag()
if d['tag'] is None:
d['tag'] = target.tag()
if subtarget is not None:
d['isfullsize'] = subtarget.width == target.width and subtarget.height == target.height
d['width'] = subtarget.width
# strings that may contain non-ascii characters need to be decoded to unicode
for key in ('caption', 'tag'):
if isinstance(d[key], str):
d[key] = d[key].decode('utf8', 'replace')
if atag is not None: # Must preserve original link, don't overwrite with a link to the image
d['isfullsize'] = True
d['tag'] = "%s%s" % (atag, d['tag'])
result = template(**d)
if isinstance(result, str):
result = result.decode('utf8')
return result
return match.group(0) # No change
if isinstance(data, str):
# Transform for end user output should avoid erroring
# if it can, so use 'replace' on decode.
data = data.decode('utf8', 'replace')
html = IMAGE_PATTERN.sub(replaceImage, data)
# Replace urls that use UIDs with human friendly urls.
def replaceUids(match):
tag = match.group('tag')
uid = match.group('uid')
target = self.resolveuid(context, rc, uid)
if target is not None:
if getattr(aq_base(target), 'getRemoteUrl', None) is not None:
url = target.getRemoteUrl()
else:
url = target.absolute_url_path()
return tag + url
return match.group(0)
html = UID_PATTERN.sub(replaceUids, html)
if isinstance(html, unicode):
html = html.encode('utf8') # Indexing requires a string result.
idata.setData(html)
return idata
# No context to use for replacements, so don't bother trying.
idata.setData(data)
return idata
def register():
return HTMLToCaptioned()
def initialize():
engine = getToolByName(portal, 'portal_transforms')
engine.registerTransform(register())
ATTR_HREF = ATTR_VALUE % 'href'
LINK_PATTERN = re.compile(
r'(?P\<(?:img\s[^>]*src|a\s[^>]*href)\s*=\s*(?:"?))(?P(?<=")[^"]*|[^ \/>]*)',
re.IGNORECASE)
FRAGMENT_TYPE = 'CompositePack Fragments'
NAVIGATION_PAGE = 'Navigation Page'
SUMMARY_PATTERN = re.compile(r'(\]*>.*?)|(\]*\>)', re.IGNORECASE|re.DOTALL)
class Migration:
FIELDS = ('portal_type', 'typename', 'fieldname',
'fieldlabel', 'position', 'action', 'commit_changes',
'image_tails', 'paths', 'pathuids', 'uids', 'found',
'batch_size',
)
def __init__(self, tool):
self.tool = tool
self.url_tool = getToolByName(tool, 'portal_url')
self.portal = self.url_tool.getPortalObject()
self.portal_base = self.url_tool.getPortalPath()
self.portal_base_url = self.portal.absolute_url()
self.prefix_length = len(self.portal_base)+1
self.uid_catalog = getToolByName(tool, 'uid_catalog')
self.reference_tool = getToolByName(tool, 'reference_catalog')
self.portal_catalog = getToolByName(tool, 'portal_catalog')
self._continue = True
self._firstoutput = False
self.commit_changes = False
self._objects = []
self.image_tails = []
def initImageSizes(self):
self.image_tails = self.tool._getImageSizes()
def initFromRequest(self):
self.initImageSizes()
self.uids = None
self.found = 0
request = self.tool.REQUEST
rfg = request.form.get
fields = [f for f in rfg('fields',()) if f.get('selected',0)]
if fields:
f = fields[0]
self.portal_type = f.portal_type
self.typename = f.type.decode('utf-8')
self.fieldname = f.name.decode('utf-8')
self.fieldlabel = f.label.decode('utf-8')
else:
self.portal_type = rfg('portal_type', None)
self.fieldname = None
self.fieldlabel = None
self.typename = None
self.position = 0
self.action = rfg('button', None)
self.commit_changes = rfg('commit', False)
self.batch_size = 10
if self.commit_changes:
self.uids = rfg('uids', [])
pathuids = rfg('folderpaths', [])
self.paths = self.tool.convertUidsToPaths(pathuids)
self.pathuids = pathuids
def initCommit(self):
"""Reset counters for a commit pass"""
self.restoreState()
request = self.tool.REQUEST
rfg = request.form.get
self.commit_changes = True
self._firstoutput = True
self.found = 0
self.position = 0
self.batch_size = 5
self.uids = rfg('uids')
def saveState(self):
SESSION = self.tool.REQUEST.SESSION
SESSION['kupu_migrator'] = dict([(f, getattr(self, f, None)) for f in self.FIELDS])
def restoreState(self):
SESSION = self.tool.REQUEST.SESSION
state = SESSION['kupu_migrator']
for f in self.FIELDS:
setattr(self, f, state[f])
# def clearState(self):
# return
# SESSION = self.tool.REQUEST.SESSION
# if SESSION.has_key('kupu_migrator'):
# del SESSION['kupu_migrator']
def status(self):
s = [ '%s=%s' % (f,getattr(self, f, 'unset')) for f in
self.FIELDS ]
return '\n'.join(s)
def mkQuery(self):
query = {}
if self.portal_type:
query['portal_type'] = sanitize_portal_type(self.portal_type)
if self.paths:
query['path'] = self.paths
query['Language'] = 'all'
return query
def getInfo(self, saveState=True):
info = {}
if self._continue:
info['nexturi'] = self.tool.absolute_url_path()+'/kupu_migration.xml?button=continue'
if self.commit_changes and self._objects and self.position < getattr(self, '_total', -1):
info['delay'] = 5 # Avoid killing everyone else with conflict errors.
else:
info['nexturi'] = None
info['firstoutput'] = self._firstoutput
if hasattr(self, '_total'):
info['total'] = self._total
info['position'] = self.position
if self._total==0:
info['percent'] = '100%'
else:
info['percent'] = '%d%%' % ((100.*self.position)/self._total)
info['objects'] = self._objects
info['action'] = action = self.action
info['action_check'] = action=='check'
info['action_touid'] = action=='touid'
info['action_topath'] = action=='topath'
info['typename'] = self.typename
info['fieldlabel'] = self.fieldlabel
info['checkboxes'] = action != 'check' and not self.commit_changes
info['commit_changes'] = self.commit_changes
info['dryrun'] = not (self.action == 'check' or self.commit_changes)
info['found'] = self.found
if saveState:
self.saveState()
return info
def docontinue(self):
"""Scan selected documents looking for convertible links"""
uids = self.uids
if uids is None:
self.uids = uids = []
brains = self.portal_catalog.searchResults(self.mkQuery())
for b in brains:
uid = self.UIDfromBrain(b)
if uid:
uids.append(uid)
self._firstoutput = True
self._continue = True
return True
pos = self.position
self._total = total = len(uids)
uids = uids[pos:pos+self.batch_size]
self.position = pos + len(uids)
if not uids:
self._continue = False
return False # Done
self._objects = res = []
for uid in uids:
obj = self.reference_tool.lookupObject(uid)
if self.portal_type==FRAGMENT_TYPE and obj.portal_type!=FRAGMENT_TYPE:
try:
fldr = obj.cp_container.titles
except:
continue
else:
for o in fldr.objectValues([FRAGMENT_TYPE]):
objinfo = self.object_check(o)
if objinfo:
res.append(objinfo)
else:
objinfo = self.object_check(obj)
if objinfo:
res.append(objinfo)
self._continue = True
return True
def brain_check(self, brain):
object = brain.getObject()
return self.object_check(object)
def link_summary(self, data, start, link):
"""Summary information for a link"""
m = SUMMARY_PATTERN.match(data, start)
if m:
text = m.group(0)
else:
text = data[start:start+200]
bits = text.split(link, 1)
if len(bits)==1:
bits.append('')
return bits
def object_check(self, object):
"""Check the relative links within this object."""
def checklink(match):
matched = match.group(0)
newlink = link = decodeEntities(match.group('href'))
classification, uid, relpath, tail = self.classifyLink(link, base)
if self.action=='check':
if classification=='bad':
abslink = urljoin(base, link)
before, after = self.link_summary(data, match.start(), link)
summary = {'text':link, 'url':abslink,
'before': before,
'after': after, }
info.append(summary)
elif self.action=='touid':
if classification=='internal':
if uid and uid==objuid:
newlink = tail
elif uid:
newlink = 'resolveuid/%s%s' % (uid, tail)
else:
newlink = relpath+tail
elif self.action=='topath':
if classification=='internal':
newlink = relpath+tail
if newlink != link:
prefix = match.group('prefix')
newlink = html_quote(newlink).encode('ascii', 'xmlcharrefreplace')
changes.append((match.start()+len(prefix), match.end(), newlink))
return prefix + newlink
return matched
info = []
changes = []
try:
objuid = aq_base(object).UID
except:
return None # only archetypes objects
baseobj = object
if object.portal_type==FRAGMENT_TYPE:
baseobj = object.aq_parent.aq_parent.aq_parent
base = baseobj.absolute_url()
if getattr(baseobj.aq_explicit, 'isPrincipiaFolderish', 0):
base += '/'
field = object.getField(self.fieldname)
if field is None:
return None
content_type = field.getContentType(object)
if content_type != 'text/html':
# Don't attempt to modify non-html
return None
data = field.getEditAccessor(object)().decode('utf8')
__traceback_info__ = (object, data)
newdata = LINK_PATTERN.sub(checklink, data)
if data != newdata and self.commit_changes:
mutator = field.getMutator(object)
if mutator:
mutator(newdata.encode('utf8'), mimetype='text/html')
object.reindexObject() # Need to flag update
if info or changes:
self.found += 1
title = object.Title()
if not title:
title = object.getId()
if not title:
title = '