Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | # This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 2 | # License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 3 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
michael@0 | 4 | |
michael@0 | 5 | """ deki.py - Access the wiki pages on a MindTouch Deki server via the API. |
michael@0 | 6 | |
michael@0 | 7 | Here's what this code can do: |
michael@0 | 8 | |
michael@0 | 9 | wiki = deki.Deki("http://developer.mozilla.org/@api/deki/", username, password) |
michael@0 | 10 | page = wiki.get_page("Sheep") |
michael@0 | 11 | print page.title |
michael@0 | 12 | print page.doc.toxml() |
michael@0 | 13 | |
michael@0 | 14 | page.title = "Bananas" |
michael@0 | 15 | page.save() |
michael@0 | 16 | |
michael@0 | 17 | There are also some additional methods: |
michael@0 | 18 | wiki.create_page(path, content, title=, override=) |
michael@0 | 19 | wiki.move_page(old, new) |
michael@0 | 20 | wiki.get_subpages(page) |
michael@0 | 21 | |
michael@0 | 22 | This module does not try to mimic the MindTouch "Plug" API. It's meant to be |
michael@0 | 23 | higher-level than that. |
michael@0 | 24 | """ |
michael@0 | 25 | |
michael@0 | 26 | import sys |
michael@0 | 27 | import urllib2, cookielib, httplib |
michael@0 | 28 | import xml.dom.minidom as dom |
michael@0 | 29 | from urllib import quote as _urllib_quote |
michael@0 | 30 | from urllib import urlencode as _urlencode |
michael@0 | 31 | import urlparse |
michael@0 | 32 | from datetime import datetime |
michael@0 | 33 | import re |
michael@0 | 34 | |
michael@0 | 35 | __all__ = ['Deki'] |
michael@0 | 36 | |
michael@0 | 37 | |
michael@0 | 38 | # === Utils |
michael@0 | 39 | |
michael@0 | 40 | def _check(fact): |
michael@0 | 41 | if not fact: |
michael@0 | 42 | raise AssertionError('check failed') |
michael@0 | 43 | |
michael@0 | 44 | def _urlquote(s, *args): |
michael@0 | 45 | return _urllib_quote(s.encode('utf-8'), *args) |
michael@0 | 46 | |
michael@0 | 47 | def _make_url(*dirs, **params): |
michael@0 | 48 | """ dirs must already be url-encoded, params must not """ |
michael@0 | 49 | url = '/'.join(dirs) |
michael@0 | 50 | if params: |
michael@0 | 51 | url += '?' + _urlencode(params) |
michael@0 | 52 | return url |
michael@0 | 53 | |
michael@0 | 54 | class PutRequest(urllib2.Request): |
michael@0 | 55 | def get_method(self): |
michael@0 | 56 | return "PUT" |
michael@0 | 57 | |
michael@0 | 58 | # === Dream framework client code |
michael@0 | 59 | |
michael@0 | 60 | # This handler causes python to "always be logged in" when it's talking to the |
michael@0 | 61 | # server. If you're just accessing public pages, it generates more requests |
michael@0 | 62 | # than are strictly needed, but this is the behavior you want for a bot. |
michael@0 | 63 | # |
michael@0 | 64 | # The users/authenticate request is sent twice: once without any basic auth and |
michael@0 | 65 | # once with. Dumb. Feel free to fix. |
michael@0 | 66 | # |
michael@0 | 67 | class _LoginHandler(urllib2.HTTPCookieProcessor): |
michael@0 | 68 | def __init__(self, server): |
michael@0 | 69 | policy = cookielib.DefaultCookiePolicy(rfc2965=True) |
michael@0 | 70 | cookiejar = cookielib.CookieJar(policy) |
michael@0 | 71 | urllib2.HTTPCookieProcessor.__init__(self, cookiejar) |
michael@0 | 72 | self.server = server |
michael@0 | 73 | |
michael@0 | 74 | def http_request(self, req): |
michael@0 | 75 | #print "DEBUG- Requesting " + req.get_full_url() |
michael@0 | 76 | s = self.server |
michael@0 | 77 | req = urllib2.HTTPCookieProcessor.http_request(self, req) |
michael@0 | 78 | if ('Cookie' not in req.unredirected_hdrs |
michael@0 | 79 | and req.get_full_url() != s.base + 'users/authenticate'): |
michael@0 | 80 | s.login() |
michael@0 | 81 | # Retry - should have a new cookie. |
michael@0 | 82 | req = urllib2.HTTPCookieProcessor.http_request(self, req) |
michael@0 | 83 | _check('Cookie' in req.unredirected_hdrs) |
michael@0 | 84 | return req |
michael@0 | 85 | |
michael@0 | 86 | class DreamClient: |
michael@0 | 87 | def __init__(self, base, user, password): |
michael@0 | 88 | """ |
michael@0 | 89 | base - The base URI of the Deki API, with trailing slash. |
michael@0 | 90 | Typically, 'http://wiki.example.org/@api/deki/'. |
michael@0 | 91 | user, password - Your Deki login information. |
michael@0 | 92 | """ |
michael@0 | 93 | self.base = base |
michael@0 | 94 | pm = urllib2.HTTPPasswordMgrWithDefaultRealm() |
michael@0 | 95 | pm.add_password(None, self.base, user, password) |
michael@0 | 96 | ah = urllib2.HTTPBasicAuthHandler(pm) |
michael@0 | 97 | lh = _LoginHandler(self) |
michael@0 | 98 | self._opener = urllib2.build_opener(ah, lh) |
michael@0 | 99 | |
michael@0 | 100 | def login(self): |
michael@0 | 101 | response = self._opener.open(self.base + 'users/authenticate') |
michael@0 | 102 | response.close() |
michael@0 | 103 | |
michael@0 | 104 | def open(self, url): |
michael@0 | 105 | return self._opener.open(self.base + url) |
michael@0 | 106 | |
michael@0 | 107 | def _handleResponse(self, req): |
michael@0 | 108 | """Helper method shared between post() and put()""" |
michael@0 | 109 | resp = self._opener.open(req) |
michael@0 | 110 | try: |
michael@0 | 111 | ct = resp.headers.get('Content-Type', '(none)') |
michael@0 | 112 | if '/xml' in ct or '+xml' in ct: |
michael@0 | 113 | return dom.parse(resp) |
michael@0 | 114 | else: |
michael@0 | 115 | #print "DEBUG- Content-Type:", ct |
michael@0 | 116 | crud = resp.read() |
michael@0 | 117 | #print 'DEBUG- crud:\n---\n%s\n---' % re.sub(r'(?m)^', ' ', crud) |
michael@0 | 118 | return None |
michael@0 | 119 | finally: |
michael@0 | 120 | resp.close() |
michael@0 | 121 | |
michael@0 | 122 | |
michael@0 | 123 | def post(self, url, data, type): |
michael@0 | 124 | #print "DEBUG- posting to:", self.base + url |
michael@0 | 125 | req = urllib2.Request(self.base + url, data, {'Content-Type': type}) |
michael@0 | 126 | return self._handleResponse(req) |
michael@0 | 127 | |
michael@0 | 128 | def put(self, url, data, type): |
michael@0 | 129 | #print "DEBUG- putting to:", self.base + url |
michael@0 | 130 | req = PutRequest(self.base + url, data, {'Content-Type': type}) |
michael@0 | 131 | return self._handleResponse(req) |
michael@0 | 132 | |
michael@0 | 133 | def get_xml(self, url): |
michael@0 | 134 | resp = self.open(url) |
michael@0 | 135 | try: |
michael@0 | 136 | return dom.parse(resp) |
michael@0 | 137 | finally: |
michael@0 | 138 | resp.close() |
michael@0 | 139 | |
michael@0 | 140 | |
michael@0 | 141 | # === DOM |
michael@0 | 142 | |
michael@0 | 143 | def _text_of(node): |
michael@0 | 144 | if node.nodeType == node.ELEMENT_NODE: |
michael@0 | 145 | return u''.join(_text_of(n) for n in node.childNodes) |
michael@0 | 146 | elif node.nodeType == node.TEXT_NODE: |
michael@0 | 147 | return node.nodeValue |
michael@0 | 148 | else: |
michael@0 | 149 | return u'' |
michael@0 | 150 | |
michael@0 | 151 | def _the_element_by_name(doc, tagName): |
michael@0 | 152 | elts = doc.getElementsByTagName(tagName) |
michael@0 | 153 | if len(elts) != 1: |
michael@0 | 154 | raise ValueError("Expected exactly one <%s> tag, got %d." % (tagName, len(elts))) |
michael@0 | 155 | return elts[0] |
michael@0 | 156 | |
michael@0 | 157 | def _first_element(node): |
michael@0 | 158 | n = node.firstChild |
michael@0 | 159 | while n is not None: |
michael@0 | 160 | if n.nodeType == n.ELEMENT_NODE: |
michael@0 | 161 | return n |
michael@0 | 162 | n = node.nextSibling |
michael@0 | 163 | return None |
michael@0 | 164 | |
michael@0 | 165 | def _find_elements(node, path): |
michael@0 | 166 | if u'/' in path: |
michael@0 | 167 | [first, rest] = path.split(u'/', 1) |
michael@0 | 168 | for child in _find_elements(node, first): |
michael@0 | 169 | for desc in _find_elements(child, rest): |
michael@0 | 170 | yield desc |
michael@0 | 171 | else: |
michael@0 | 172 | for n in node.childNodes: |
michael@0 | 173 | if n.nodeType == node.ELEMENT_NODE and n.nodeName == path: |
michael@0 | 174 | yield n |
michael@0 | 175 | |
michael@0 | 176 | |
michael@0 | 177 | # === Deki |
michael@0 | 178 | |
michael@0 | 179 | def _format_page_id(id): |
michael@0 | 180 | if isinstance(id, int): |
michael@0 | 181 | return str(id) |
michael@0 | 182 | elif id is Deki.HOME: |
michael@0 | 183 | return 'home' |
michael@0 | 184 | elif isinstance(id, basestring): |
michael@0 | 185 | # Double-encoded, per the Deki API reference. |
michael@0 | 186 | return '=' + _urlquote(_urlquote(id, '')) |
michael@0 | 187 | |
michael@0 | 188 | class Deki(DreamClient): |
michael@0 | 189 | HOME = object() |
michael@0 | 190 | |
michael@0 | 191 | def get_page(self, page_id): |
michael@0 | 192 | """ Get the content of a page from the wiki. |
michael@0 | 193 | |
michael@0 | 194 | The page_id argument must be one of: |
michael@0 | 195 | an int - The page id (an arbitrary number assigned by Deki) |
michael@0 | 196 | a str - The page name (not the title, the full path that shows up in the URL) |
michael@0 | 197 | Deki.HOME - Refers to the main page of the wiki. |
michael@0 | 198 | |
michael@0 | 199 | Returns a Page object. |
michael@0 | 200 | """ |
michael@0 | 201 | p = Page(self) |
michael@0 | 202 | p._load(page_id) |
michael@0 | 203 | return p |
michael@0 | 204 | |
michael@0 | 205 | def create_page(self, path, content, title=None, overwrite=False): |
michael@0 | 206 | """ Create a new wiki page. |
michael@0 | 207 | |
michael@0 | 208 | Parameters: |
michael@0 | 209 | path - str - The page id. |
michael@0 | 210 | content - str - The XML content to put in the new page. |
michael@0 | 211 | The document element must be a <body>. |
michael@0 | 212 | title - str - The page title. Keyword argument only. |
michael@0 | 213 | Defaults to the last path-segment of path. |
michael@0 | 214 | overwrite - bool - Whether to overwrite an existing page. If false, |
michael@0 | 215 | and the page already exists, the method will throw an error. |
michael@0 | 216 | """ |
michael@0 | 217 | if title is None: |
michael@0 | 218 | title = path.split('/')[-1] |
michael@0 | 219 | doc = dom.parseString(content) |
michael@0 | 220 | _check(doc.documentElement.tagName == 'body') |
michael@0 | 221 | p = Page(self) |
michael@0 | 222 | p._create(path, title, doc, overwrite) |
michael@0 | 223 | |
michael@0 | 224 | def attach_file(self, page, name, data, mimetype, description=None): |
michael@0 | 225 | """Create or update a file attachment. |
michael@0 | 226 | |
michael@0 | 227 | Parameters: |
michael@0 | 228 | page - str - the page ID this file is related to |
michael@0 | 229 | name - str - the name of the file |
michael@0 | 230 | data - str - the file data |
michael@0 | 231 | mimetype - str - the MIME type of the file |
michael@0 | 232 | description - str - a description of the file |
michael@0 | 233 | """ |
michael@0 | 234 | |
michael@0 | 235 | p = {} |
michael@0 | 236 | if description is not None: |
michael@0 | 237 | p['description'] = description |
michael@0 | 238 | |
michael@0 | 239 | url = _make_url('pages', _format_page_id(page), |
michael@0 | 240 | 'files', _format_page_id(name), **p) |
michael@0 | 241 | |
michael@0 | 242 | r = self.put(url, data, mimetype) |
michael@0 | 243 | _check(r.documentElement.nodeName == u'file') |
michael@0 | 244 | |
michael@0 | 245 | def get_subpages(self, page_id): |
michael@0 | 246 | """ Return the ids of all subpages of the given page. """ |
michael@0 | 247 | doc = self.get_xml(_make_url("pages", _format_page_id(page_id), |
michael@0 | 248 | "files,subpages")) |
michael@0 | 249 | for elt in _find_elements(doc, u'page/subpages/page.subpage/path'): |
michael@0 | 250 | yield _text_of(elt) |
michael@0 | 251 | |
michael@0 | 252 | def move_page(self, page_id, new_title, redirects=True): |
michael@0 | 253 | """ Move an existing page to a new location. |
michael@0 | 254 | |
michael@0 | 255 | A page cannot be moved to a destination that already exists, is a |
michael@0 | 256 | descendant, or has a protected title (ex. Special:xxx, User:, |
michael@0 | 257 | Template:). |
michael@0 | 258 | |
michael@0 | 259 | When a page is moved, subpages under the specified page are also moved. |
michael@0 | 260 | For each moved page, the system automatically creates an alias page |
michael@0 | 261 | that redirects from the old to the new destination. |
michael@0 | 262 | """ |
michael@0 | 263 | self.post(_make_url("pages", _format_page_id(page_id), "move", |
michael@0 | 264 | to=new_title, |
michael@0 | 265 | redirects=redirects and "1" or "0"), |
michael@0 | 266 | "", "text/plain") |
michael@0 | 267 | |
michael@0 | 268 | class Page: |
michael@0 | 269 | """ A Deki wiki page. |
michael@0 | 270 | |
michael@0 | 271 | To obtain a page, call wiki.get_page(id). |
michael@0 | 272 | Attributes: |
michael@0 | 273 | title : unicode - The page title. |
michael@0 | 274 | doc : Document - The content of the page as a DOM Document. |
michael@0 | 275 | The root element of this document is a <body>. |
michael@0 | 276 | path : unicode - The path. Use this to detect redirects, as otherwise |
michael@0 | 277 | page.save() will overwrite the redirect with a copy of the content! |
michael@0 | 278 | deki : Deki - The Deki object from which the page was loaded. |
michael@0 | 279 | page_id : str/id/Deki.HOME - The page id used to load the page. |
michael@0 | 280 | load_time : datetime - The time the page was loaded, |
michael@0 | 281 | according to the clock on the client machine. |
michael@0 | 282 | Methods: |
michael@0 | 283 | save() - Save the modified document back to the server. |
michael@0 | 284 | Only the page.title and the contents of page.doc are saved. |
michael@0 | 285 | """ |
michael@0 | 286 | |
michael@0 | 287 | def __init__(self, deki): |
michael@0 | 288 | self.deki = deki |
michael@0 | 289 | |
michael@0 | 290 | def _create(self, path, title, doc, overwrite): |
michael@0 | 291 | self.title = title |
michael@0 | 292 | self.doc = doc |
michael@0 | 293 | self.page_id = path |
michael@0 | 294 | if overwrite: |
michael@0 | 295 | self.load_time = datetime(2500, 1, 1) |
michael@0 | 296 | else: |
michael@0 | 297 | self.load_time = datetime(1900, 1, 1) |
michael@0 | 298 | self.path = path |
michael@0 | 299 | self.save() |
michael@0 | 300 | |
michael@0 | 301 | def _load(self, page_id): |
michael@0 | 302 | """ page_id - See comment near the definition of `HOME`. """ |
michael@0 | 303 | load_time = datetime.utcnow() |
michael@0 | 304 | |
michael@0 | 305 | # Getting the title is a whole separate query! |
michael@0 | 306 | url = 'pages/%s/info' % _format_page_id(page_id) |
michael@0 | 307 | doc = self.deki.get_xml(url) |
michael@0 | 308 | title = _text_of(_the_element_by_name(doc, 'title')) |
michael@0 | 309 | path = _text_of(_the_element_by_name(doc, 'path')) |
michael@0 | 310 | |
michael@0 | 311 | # If you prefer to sling regexes, you can request format=raw instead. |
michael@0 | 312 | # The result is an XML document with one big fat text node in the body. |
michael@0 | 313 | url = _make_url('pages', _format_page_id(page_id), 'contents', |
michael@0 | 314 | format='xhtml', mode='edit') |
michael@0 | 315 | doc = self.deki.get_xml(url) |
michael@0 | 316 | |
michael@0 | 317 | content = doc.documentElement |
michael@0 | 318 | _check(content.tagName == u'content') |
michael@0 | 319 | body = _first_element(content) |
michael@0 | 320 | _check(body is not None) |
michael@0 | 321 | _check(body.tagName == u'body') |
michael@0 | 322 | |
michael@0 | 323 | doc.removeChild(content) |
michael@0 | 324 | doc.appendChild(body) |
michael@0 | 325 | |
michael@0 | 326 | self.page_id = page_id |
michael@0 | 327 | self.load_time = load_time |
michael@0 | 328 | self.title = title |
michael@0 | 329 | self.path = path |
michael@0 | 330 | self.doc = doc |
michael@0 | 331 | |
michael@0 | 332 | def save(self): |
michael@0 | 333 | p = {'edittime': _urlquote(self.load_time.strftime('%Y%m%d%H%M%S')), |
michael@0 | 334 | 'abort': 'modified'} |
michael@0 | 335 | |
michael@0 | 336 | if self.title is not None: |
michael@0 | 337 | p['title'] = _urlquote(self.title) |
michael@0 | 338 | |
michael@0 | 339 | url = _make_url('pages', _format_page_id(self.page_id), 'contents', **p) |
michael@0 | 340 | |
michael@0 | 341 | body = self.doc.documentElement |
michael@0 | 342 | bodyInnerXML = ''.join(n.toxml('utf-8') for n in body.childNodes) |
michael@0 | 343 | |
michael@0 | 344 | reply = self.deki.post(url, bodyInnerXML, 'text/plain; charset=utf-8') |
michael@0 | 345 | _check(reply.documentElement.nodeName == u'edit') |
michael@0 | 346 | _check(reply.documentElement.getAttribute(u'status') == u'success') |