xpcom/analysis/deki.py

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 # This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0 4
michael@0 5 """ deki.py - Access the wiki pages on a MindTouch Deki server via the API.
michael@0 6
michael@0 7 Here's what this code can do:
michael@0 8
michael@0 9 wiki = deki.Deki("http://developer.mozilla.org/@api/deki/", username, password)
michael@0 10 page = wiki.get_page("Sheep")
michael@0 11 print page.title
michael@0 12 print page.doc.toxml()
michael@0 13
michael@0 14 page.title = "Bananas"
michael@0 15 page.save()
michael@0 16
michael@0 17 There are also some additional methods:
michael@0 18 wiki.create_page(path, content, title=, override=)
michael@0 19 wiki.move_page(old, new)
michael@0 20 wiki.get_subpages(page)
michael@0 21
michael@0 22 This module does not try to mimic the MindTouch "Plug" API. It's meant to be
michael@0 23 higher-level than that.
michael@0 24 """
michael@0 25
michael@0 26 import sys
michael@0 27 import urllib2, cookielib, httplib
michael@0 28 import xml.dom.minidom as dom
michael@0 29 from urllib import quote as _urllib_quote
michael@0 30 from urllib import urlencode as _urlencode
michael@0 31 import urlparse
michael@0 32 from datetime import datetime
michael@0 33 import re
michael@0 34
michael@0 35 __all__ = ['Deki']
michael@0 36
michael@0 37
michael@0 38 # === Utils
michael@0 39
michael@0 40 def _check(fact):
michael@0 41 if not fact:
michael@0 42 raise AssertionError('check failed')
michael@0 43
michael@0 44 def _urlquote(s, *args):
michael@0 45 return _urllib_quote(s.encode('utf-8'), *args)
michael@0 46
michael@0 47 def _make_url(*dirs, **params):
michael@0 48 """ dirs must already be url-encoded, params must not """
michael@0 49 url = '/'.join(dirs)
michael@0 50 if params:
michael@0 51 url += '?' + _urlencode(params)
michael@0 52 return url
michael@0 53
michael@0 54 class PutRequest(urllib2.Request):
michael@0 55 def get_method(self):
michael@0 56 return "PUT"
michael@0 57
michael@0 58 # === Dream framework client code
michael@0 59
michael@0 60 # This handler causes python to "always be logged in" when it's talking to the
michael@0 61 # server. If you're just accessing public pages, it generates more requests
michael@0 62 # than are strictly needed, but this is the behavior you want for a bot.
michael@0 63 #
michael@0 64 # The users/authenticate request is sent twice: once without any basic auth and
michael@0 65 # once with. Dumb. Feel free to fix.
michael@0 66 #
michael@0 67 class _LoginHandler(urllib2.HTTPCookieProcessor):
michael@0 68 def __init__(self, server):
michael@0 69 policy = cookielib.DefaultCookiePolicy(rfc2965=True)
michael@0 70 cookiejar = cookielib.CookieJar(policy)
michael@0 71 urllib2.HTTPCookieProcessor.__init__(self, cookiejar)
michael@0 72 self.server = server
michael@0 73
michael@0 74 def http_request(self, req):
michael@0 75 #print "DEBUG- Requesting " + req.get_full_url()
michael@0 76 s = self.server
michael@0 77 req = urllib2.HTTPCookieProcessor.http_request(self, req)
michael@0 78 if ('Cookie' not in req.unredirected_hdrs
michael@0 79 and req.get_full_url() != s.base + 'users/authenticate'):
michael@0 80 s.login()
michael@0 81 # Retry - should have a new cookie.
michael@0 82 req = urllib2.HTTPCookieProcessor.http_request(self, req)
michael@0 83 _check('Cookie' in req.unredirected_hdrs)
michael@0 84 return req
michael@0 85
michael@0 86 class DreamClient:
michael@0 87 def __init__(self, base, user, password):
michael@0 88 """
michael@0 89 base - The base URI of the Deki API, with trailing slash.
michael@0 90 Typically, 'http://wiki.example.org/@api/deki/'.
michael@0 91 user, password - Your Deki login information.
michael@0 92 """
michael@0 93 self.base = base
michael@0 94 pm = urllib2.HTTPPasswordMgrWithDefaultRealm()
michael@0 95 pm.add_password(None, self.base, user, password)
michael@0 96 ah = urllib2.HTTPBasicAuthHandler(pm)
michael@0 97 lh = _LoginHandler(self)
michael@0 98 self._opener = urllib2.build_opener(ah, lh)
michael@0 99
michael@0 100 def login(self):
michael@0 101 response = self._opener.open(self.base + 'users/authenticate')
michael@0 102 response.close()
michael@0 103
michael@0 104 def open(self, url):
michael@0 105 return self._opener.open(self.base + url)
michael@0 106
michael@0 107 def _handleResponse(self, req):
michael@0 108 """Helper method shared between post() and put()"""
michael@0 109 resp = self._opener.open(req)
michael@0 110 try:
michael@0 111 ct = resp.headers.get('Content-Type', '(none)')
michael@0 112 if '/xml' in ct or '+xml' in ct:
michael@0 113 return dom.parse(resp)
michael@0 114 else:
michael@0 115 #print "DEBUG- Content-Type:", ct
michael@0 116 crud = resp.read()
michael@0 117 #print 'DEBUG- crud:\n---\n%s\n---' % re.sub(r'(?m)^', ' ', crud)
michael@0 118 return None
michael@0 119 finally:
michael@0 120 resp.close()
michael@0 121
michael@0 122
michael@0 123 def post(self, url, data, type):
michael@0 124 #print "DEBUG- posting to:", self.base + url
michael@0 125 req = urllib2.Request(self.base + url, data, {'Content-Type': type})
michael@0 126 return self._handleResponse(req)
michael@0 127
michael@0 128 def put(self, url, data, type):
michael@0 129 #print "DEBUG- putting to:", self.base + url
michael@0 130 req = PutRequest(self.base + url, data, {'Content-Type': type})
michael@0 131 return self._handleResponse(req)
michael@0 132
michael@0 133 def get_xml(self, url):
michael@0 134 resp = self.open(url)
michael@0 135 try:
michael@0 136 return dom.parse(resp)
michael@0 137 finally:
michael@0 138 resp.close()
michael@0 139
michael@0 140
michael@0 141 # === DOM
michael@0 142
michael@0 143 def _text_of(node):
michael@0 144 if node.nodeType == node.ELEMENT_NODE:
michael@0 145 return u''.join(_text_of(n) for n in node.childNodes)
michael@0 146 elif node.nodeType == node.TEXT_NODE:
michael@0 147 return node.nodeValue
michael@0 148 else:
michael@0 149 return u''
michael@0 150
michael@0 151 def _the_element_by_name(doc, tagName):
michael@0 152 elts = doc.getElementsByTagName(tagName)
michael@0 153 if len(elts) != 1:
michael@0 154 raise ValueError("Expected exactly one <%s> tag, got %d." % (tagName, len(elts)))
michael@0 155 return elts[0]
michael@0 156
michael@0 157 def _first_element(node):
michael@0 158 n = node.firstChild
michael@0 159 while n is not None:
michael@0 160 if n.nodeType == n.ELEMENT_NODE:
michael@0 161 return n
michael@0 162 n = node.nextSibling
michael@0 163 return None
michael@0 164
michael@0 165 def _find_elements(node, path):
michael@0 166 if u'/' in path:
michael@0 167 [first, rest] = path.split(u'/', 1)
michael@0 168 for child in _find_elements(node, first):
michael@0 169 for desc in _find_elements(child, rest):
michael@0 170 yield desc
michael@0 171 else:
michael@0 172 for n in node.childNodes:
michael@0 173 if n.nodeType == node.ELEMENT_NODE and n.nodeName == path:
michael@0 174 yield n
michael@0 175
michael@0 176
michael@0 177 # === Deki
michael@0 178
michael@0 179 def _format_page_id(id):
michael@0 180 if isinstance(id, int):
michael@0 181 return str(id)
michael@0 182 elif id is Deki.HOME:
michael@0 183 return 'home'
michael@0 184 elif isinstance(id, basestring):
michael@0 185 # Double-encoded, per the Deki API reference.
michael@0 186 return '=' + _urlquote(_urlquote(id, ''))
michael@0 187
michael@0 188 class Deki(DreamClient):
michael@0 189 HOME = object()
michael@0 190
michael@0 191 def get_page(self, page_id):
michael@0 192 """ Get the content of a page from the wiki.
michael@0 193
michael@0 194 The page_id argument must be one of:
michael@0 195 an int - The page id (an arbitrary number assigned by Deki)
michael@0 196 a str - The page name (not the title, the full path that shows up in the URL)
michael@0 197 Deki.HOME - Refers to the main page of the wiki.
michael@0 198
michael@0 199 Returns a Page object.
michael@0 200 """
michael@0 201 p = Page(self)
michael@0 202 p._load(page_id)
michael@0 203 return p
michael@0 204
michael@0 205 def create_page(self, path, content, title=None, overwrite=False):
michael@0 206 """ Create a new wiki page.
michael@0 207
michael@0 208 Parameters:
michael@0 209 path - str - The page id.
michael@0 210 content - str - The XML content to put in the new page.
michael@0 211 The document element must be a <body>.
michael@0 212 title - str - The page title. Keyword argument only.
michael@0 213 Defaults to the last path-segment of path.
michael@0 214 overwrite - bool - Whether to overwrite an existing page. If false,
michael@0 215 and the page already exists, the method will throw an error.
michael@0 216 """
michael@0 217 if title is None:
michael@0 218 title = path.split('/')[-1]
michael@0 219 doc = dom.parseString(content)
michael@0 220 _check(doc.documentElement.tagName == 'body')
michael@0 221 p = Page(self)
michael@0 222 p._create(path, title, doc, overwrite)
michael@0 223
michael@0 224 def attach_file(self, page, name, data, mimetype, description=None):
michael@0 225 """Create or update a file attachment.
michael@0 226
michael@0 227 Parameters:
michael@0 228 page - str - the page ID this file is related to
michael@0 229 name - str - the name of the file
michael@0 230 data - str - the file data
michael@0 231 mimetype - str - the MIME type of the file
michael@0 232 description - str - a description of the file
michael@0 233 """
michael@0 234
michael@0 235 p = {}
michael@0 236 if description is not None:
michael@0 237 p['description'] = description
michael@0 238
michael@0 239 url = _make_url('pages', _format_page_id(page),
michael@0 240 'files', _format_page_id(name), **p)
michael@0 241
michael@0 242 r = self.put(url, data, mimetype)
michael@0 243 _check(r.documentElement.nodeName == u'file')
michael@0 244
michael@0 245 def get_subpages(self, page_id):
michael@0 246 """ Return the ids of all subpages of the given page. """
michael@0 247 doc = self.get_xml(_make_url("pages", _format_page_id(page_id),
michael@0 248 "files,subpages"))
michael@0 249 for elt in _find_elements(doc, u'page/subpages/page.subpage/path'):
michael@0 250 yield _text_of(elt)
michael@0 251
michael@0 252 def move_page(self, page_id, new_title, redirects=True):
michael@0 253 """ Move an existing page to a new location.
michael@0 254
michael@0 255 A page cannot be moved to a destination that already exists, is a
michael@0 256 descendant, or has a protected title (ex. Special:xxx, User:,
michael@0 257 Template:).
michael@0 258
michael@0 259 When a page is moved, subpages under the specified page are also moved.
michael@0 260 For each moved page, the system automatically creates an alias page
michael@0 261 that redirects from the old to the new destination.
michael@0 262 """
michael@0 263 self.post(_make_url("pages", _format_page_id(page_id), "move",
michael@0 264 to=new_title,
michael@0 265 redirects=redirects and "1" or "0"),
michael@0 266 "", "text/plain")
michael@0 267
michael@0 268 class Page:
michael@0 269 """ A Deki wiki page.
michael@0 270
michael@0 271 To obtain a page, call wiki.get_page(id).
michael@0 272 Attributes:
michael@0 273 title : unicode - The page title.
michael@0 274 doc : Document - The content of the page as a DOM Document.
michael@0 275 The root element of this document is a <body>.
michael@0 276 path : unicode - The path. Use this to detect redirects, as otherwise
michael@0 277 page.save() will overwrite the redirect with a copy of the content!
michael@0 278 deki : Deki - The Deki object from which the page was loaded.
michael@0 279 page_id : str/id/Deki.HOME - The page id used to load the page.
michael@0 280 load_time : datetime - The time the page was loaded,
michael@0 281 according to the clock on the client machine.
michael@0 282 Methods:
michael@0 283 save() - Save the modified document back to the server.
michael@0 284 Only the page.title and the contents of page.doc are saved.
michael@0 285 """
michael@0 286
michael@0 287 def __init__(self, deki):
michael@0 288 self.deki = deki
michael@0 289
michael@0 290 def _create(self, path, title, doc, overwrite):
michael@0 291 self.title = title
michael@0 292 self.doc = doc
michael@0 293 self.page_id = path
michael@0 294 if overwrite:
michael@0 295 self.load_time = datetime(2500, 1, 1)
michael@0 296 else:
michael@0 297 self.load_time = datetime(1900, 1, 1)
michael@0 298 self.path = path
michael@0 299 self.save()
michael@0 300
michael@0 301 def _load(self, page_id):
michael@0 302 """ page_id - See comment near the definition of `HOME`. """
michael@0 303 load_time = datetime.utcnow()
michael@0 304
michael@0 305 # Getting the title is a whole separate query!
michael@0 306 url = 'pages/%s/info' % _format_page_id(page_id)
michael@0 307 doc = self.deki.get_xml(url)
michael@0 308 title = _text_of(_the_element_by_name(doc, 'title'))
michael@0 309 path = _text_of(_the_element_by_name(doc, 'path'))
michael@0 310
michael@0 311 # If you prefer to sling regexes, you can request format=raw instead.
michael@0 312 # The result is an XML document with one big fat text node in the body.
michael@0 313 url = _make_url('pages', _format_page_id(page_id), 'contents',
michael@0 314 format='xhtml', mode='edit')
michael@0 315 doc = self.deki.get_xml(url)
michael@0 316
michael@0 317 content = doc.documentElement
michael@0 318 _check(content.tagName == u'content')
michael@0 319 body = _first_element(content)
michael@0 320 _check(body is not None)
michael@0 321 _check(body.tagName == u'body')
michael@0 322
michael@0 323 doc.removeChild(content)
michael@0 324 doc.appendChild(body)
michael@0 325
michael@0 326 self.page_id = page_id
michael@0 327 self.load_time = load_time
michael@0 328 self.title = title
michael@0 329 self.path = path
michael@0 330 self.doc = doc
michael@0 331
michael@0 332 def save(self):
michael@0 333 p = {'edittime': _urlquote(self.load_time.strftime('%Y%m%d%H%M%S')),
michael@0 334 'abort': 'modified'}
michael@0 335
michael@0 336 if self.title is not None:
michael@0 337 p['title'] = _urlquote(self.title)
michael@0 338
michael@0 339 url = _make_url('pages', _format_page_id(self.page_id), 'contents', **p)
michael@0 340
michael@0 341 body = self.doc.documentElement
michael@0 342 bodyInnerXML = ''.join(n.toxml('utf-8') for n in body.childNodes)
michael@0 343
michael@0 344 reply = self.deki.post(url, bodyInnerXML, 'text/plain; charset=utf-8')
michael@0 345 _check(reply.documentElement.nodeName == u'edit')
michael@0 346 _check(reply.documentElement.getAttribute(u'status') == u'success')

mercurial