1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/xpcom/analysis/deki.py Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,346 @@ 1.4 +# This Source Code Form is subject to the terms of the Mozilla Public 1.5 +# License, v. 2.0. If a copy of the MPL was not distributed with this 1.6 +# file, You can obtain one at http://mozilla.org/MPL/2.0/. 1.7 + 1.8 +""" deki.py - Access the wiki pages on a MindTouch Deki server via the API. 1.9 + 1.10 +Here's what this code can do: 1.11 + 1.12 + wiki = deki.Deki("http://developer.mozilla.org/@api/deki/", username, password) 1.13 + page = wiki.get_page("Sheep") 1.14 + print page.title 1.15 + print page.doc.toxml() 1.16 + 1.17 + page.title = "Bananas" 1.18 + page.save() 1.19 + 1.20 +There are also some additional methods: 1.21 + wiki.create_page(path, content, title=, override=) 1.22 + wiki.move_page(old, new) 1.23 + wiki.get_subpages(page) 1.24 + 1.25 +This module does not try to mimic the MindTouch "Plug" API. It's meant to be 1.26 +higher-level than that. 1.27 +""" 1.28 + 1.29 +import sys 1.30 +import urllib2, cookielib, httplib 1.31 +import xml.dom.minidom as dom 1.32 +from urllib import quote as _urllib_quote 1.33 +from urllib import urlencode as _urlencode 1.34 +import urlparse 1.35 +from datetime import datetime 1.36 +import re 1.37 + 1.38 +__all__ = ['Deki'] 1.39 + 1.40 + 1.41 +# === Utils 1.42 + 1.43 +def _check(fact): 1.44 + if not fact: 1.45 + raise AssertionError('check failed') 1.46 + 1.47 +def _urlquote(s, *args): 1.48 + return _urllib_quote(s.encode('utf-8'), *args) 1.49 + 1.50 +def _make_url(*dirs, **params): 1.51 + """ dirs must already be url-encoded, params must not """ 1.52 + url = '/'.join(dirs) 1.53 + if params: 1.54 + url += '?' + _urlencode(params) 1.55 + return url 1.56 + 1.57 +class PutRequest(urllib2.Request): 1.58 + def get_method(self): 1.59 + return "PUT" 1.60 + 1.61 +# === Dream framework client code 1.62 + 1.63 +# This handler causes python to "always be logged in" when it's talking to the 1.64 +# server. If you're just accessing public pages, it generates more requests 1.65 +# than are strictly needed, but this is the behavior you want for a bot. 1.66 +# 1.67 +# The users/authenticate request is sent twice: once without any basic auth and 1.68 +# once with. Dumb. Feel free to fix. 1.69 +# 1.70 +class _LoginHandler(urllib2.HTTPCookieProcessor): 1.71 + def __init__(self, server): 1.72 + policy = cookielib.DefaultCookiePolicy(rfc2965=True) 1.73 + cookiejar = cookielib.CookieJar(policy) 1.74 + urllib2.HTTPCookieProcessor.__init__(self, cookiejar) 1.75 + self.server = server 1.76 + 1.77 + def http_request(self, req): 1.78 + #print "DEBUG- Requesting " + req.get_full_url() 1.79 + s = self.server 1.80 + req = urllib2.HTTPCookieProcessor.http_request(self, req) 1.81 + if ('Cookie' not in req.unredirected_hdrs 1.82 + and req.get_full_url() != s.base + 'users/authenticate'): 1.83 + s.login() 1.84 + # Retry - should have a new cookie. 1.85 + req = urllib2.HTTPCookieProcessor.http_request(self, req) 1.86 + _check('Cookie' in req.unredirected_hdrs) 1.87 + return req 1.88 + 1.89 +class DreamClient: 1.90 + def __init__(self, base, user, password): 1.91 + """ 1.92 + base - The base URI of the Deki API, with trailing slash. 1.93 + Typically, 'http://wiki.example.org/@api/deki/'. 1.94 + user, password - Your Deki login information. 1.95 + """ 1.96 + self.base = base 1.97 + pm = urllib2.HTTPPasswordMgrWithDefaultRealm() 1.98 + pm.add_password(None, self.base, user, password) 1.99 + ah = urllib2.HTTPBasicAuthHandler(pm) 1.100 + lh = _LoginHandler(self) 1.101 + self._opener = urllib2.build_opener(ah, lh) 1.102 + 1.103 + def login(self): 1.104 + response = self._opener.open(self.base + 'users/authenticate') 1.105 + response.close() 1.106 + 1.107 + def open(self, url): 1.108 + return self._opener.open(self.base + url) 1.109 + 1.110 + def _handleResponse(self, req): 1.111 + """Helper method shared between post() and put()""" 1.112 + resp = self._opener.open(req) 1.113 + try: 1.114 + ct = resp.headers.get('Content-Type', '(none)') 1.115 + if '/xml' in ct or '+xml' in ct: 1.116 + return dom.parse(resp) 1.117 + else: 1.118 + #print "DEBUG- Content-Type:", ct 1.119 + crud = resp.read() 1.120 + #print 'DEBUG- crud:\n---\n%s\n---' % re.sub(r'(?m)^', ' ', crud) 1.121 + return None 1.122 + finally: 1.123 + resp.close() 1.124 + 1.125 + 1.126 + def post(self, url, data, type): 1.127 + #print "DEBUG- posting to:", self.base + url 1.128 + req = urllib2.Request(self.base + url, data, {'Content-Type': type}) 1.129 + return self._handleResponse(req) 1.130 + 1.131 + def put(self, url, data, type): 1.132 + #print "DEBUG- putting to:", self.base + url 1.133 + req = PutRequest(self.base + url, data, {'Content-Type': type}) 1.134 + return self._handleResponse(req) 1.135 + 1.136 + def get_xml(self, url): 1.137 + resp = self.open(url) 1.138 + try: 1.139 + return dom.parse(resp) 1.140 + finally: 1.141 + resp.close() 1.142 + 1.143 + 1.144 +# === DOM 1.145 + 1.146 +def _text_of(node): 1.147 + if node.nodeType == node.ELEMENT_NODE: 1.148 + return u''.join(_text_of(n) for n in node.childNodes) 1.149 + elif node.nodeType == node.TEXT_NODE: 1.150 + return node.nodeValue 1.151 + else: 1.152 + return u'' 1.153 + 1.154 +def _the_element_by_name(doc, tagName): 1.155 + elts = doc.getElementsByTagName(tagName) 1.156 + if len(elts) != 1: 1.157 + raise ValueError("Expected exactly one <%s> tag, got %d." % (tagName, len(elts))) 1.158 + return elts[0] 1.159 + 1.160 +def _first_element(node): 1.161 + n = node.firstChild 1.162 + while n is not None: 1.163 + if n.nodeType == n.ELEMENT_NODE: 1.164 + return n 1.165 + n = node.nextSibling 1.166 + return None 1.167 + 1.168 +def _find_elements(node, path): 1.169 + if u'/' in path: 1.170 + [first, rest] = path.split(u'/', 1) 1.171 + for child in _find_elements(node, first): 1.172 + for desc in _find_elements(child, rest): 1.173 + yield desc 1.174 + else: 1.175 + for n in node.childNodes: 1.176 + if n.nodeType == node.ELEMENT_NODE and n.nodeName == path: 1.177 + yield n 1.178 + 1.179 + 1.180 +# === Deki 1.181 + 1.182 +def _format_page_id(id): 1.183 + if isinstance(id, int): 1.184 + return str(id) 1.185 + elif id is Deki.HOME: 1.186 + return 'home' 1.187 + elif isinstance(id, basestring): 1.188 + # Double-encoded, per the Deki API reference. 1.189 + return '=' + _urlquote(_urlquote(id, '')) 1.190 + 1.191 +class Deki(DreamClient): 1.192 + HOME = object() 1.193 + 1.194 + def get_page(self, page_id): 1.195 + """ Get the content of a page from the wiki. 1.196 + 1.197 + The page_id argument must be one of: 1.198 + an int - The page id (an arbitrary number assigned by Deki) 1.199 + a str - The page name (not the title, the full path that shows up in the URL) 1.200 + Deki.HOME - Refers to the main page of the wiki. 1.201 + 1.202 + Returns a Page object. 1.203 + """ 1.204 + p = Page(self) 1.205 + p._load(page_id) 1.206 + return p 1.207 + 1.208 + def create_page(self, path, content, title=None, overwrite=False): 1.209 + """ Create a new wiki page. 1.210 + 1.211 + Parameters: 1.212 + path - str - The page id. 1.213 + content - str - The XML content to put in the new page. 1.214 + The document element must be a <body>. 1.215 + title - str - The page title. Keyword argument only. 1.216 + Defaults to the last path-segment of path. 1.217 + overwrite - bool - Whether to overwrite an existing page. If false, 1.218 + and the page already exists, the method will throw an error. 1.219 + """ 1.220 + if title is None: 1.221 + title = path.split('/')[-1] 1.222 + doc = dom.parseString(content) 1.223 + _check(doc.documentElement.tagName == 'body') 1.224 + p = Page(self) 1.225 + p._create(path, title, doc, overwrite) 1.226 + 1.227 + def attach_file(self, page, name, data, mimetype, description=None): 1.228 + """Create or update a file attachment. 1.229 + 1.230 + Parameters: 1.231 + page - str - the page ID this file is related to 1.232 + name - str - the name of the file 1.233 + data - str - the file data 1.234 + mimetype - str - the MIME type of the file 1.235 + description - str - a description of the file 1.236 + """ 1.237 + 1.238 + p = {} 1.239 + if description is not None: 1.240 + p['description'] = description 1.241 + 1.242 + url = _make_url('pages', _format_page_id(page), 1.243 + 'files', _format_page_id(name), **p) 1.244 + 1.245 + r = self.put(url, data, mimetype) 1.246 + _check(r.documentElement.nodeName == u'file') 1.247 + 1.248 + def get_subpages(self, page_id): 1.249 + """ Return the ids of all subpages of the given page. """ 1.250 + doc = self.get_xml(_make_url("pages", _format_page_id(page_id), 1.251 + "files,subpages")) 1.252 + for elt in _find_elements(doc, u'page/subpages/page.subpage/path'): 1.253 + yield _text_of(elt) 1.254 + 1.255 + def move_page(self, page_id, new_title, redirects=True): 1.256 + """ Move an existing page to a new location. 1.257 + 1.258 + A page cannot be moved to a destination that already exists, is a 1.259 + descendant, or has a protected title (ex. Special:xxx, User:, 1.260 + Template:). 1.261 + 1.262 + When a page is moved, subpages under the specified page are also moved. 1.263 + For each moved page, the system automatically creates an alias page 1.264 + that redirects from the old to the new destination. 1.265 + """ 1.266 + self.post(_make_url("pages", _format_page_id(page_id), "move", 1.267 + to=new_title, 1.268 + redirects=redirects and "1" or "0"), 1.269 + "", "text/plain") 1.270 + 1.271 +class Page: 1.272 + """ A Deki wiki page. 1.273 + 1.274 + To obtain a page, call wiki.get_page(id). 1.275 + Attributes: 1.276 + title : unicode - The page title. 1.277 + doc : Document - The content of the page as a DOM Document. 1.278 + The root element of this document is a <body>. 1.279 + path : unicode - The path. Use this to detect redirects, as otherwise 1.280 + page.save() will overwrite the redirect with a copy of the content! 1.281 + deki : Deki - The Deki object from which the page was loaded. 1.282 + page_id : str/id/Deki.HOME - The page id used to load the page. 1.283 + load_time : datetime - The time the page was loaded, 1.284 + according to the clock on the client machine. 1.285 + Methods: 1.286 + save() - Save the modified document back to the server. 1.287 + Only the page.title and the contents of page.doc are saved. 1.288 + """ 1.289 + 1.290 + def __init__(self, deki): 1.291 + self.deki = deki 1.292 + 1.293 + def _create(self, path, title, doc, overwrite): 1.294 + self.title = title 1.295 + self.doc = doc 1.296 + self.page_id = path 1.297 + if overwrite: 1.298 + self.load_time = datetime(2500, 1, 1) 1.299 + else: 1.300 + self.load_time = datetime(1900, 1, 1) 1.301 + self.path = path 1.302 + self.save() 1.303 + 1.304 + def _load(self, page_id): 1.305 + """ page_id - See comment near the definition of `HOME`. """ 1.306 + load_time = datetime.utcnow() 1.307 + 1.308 + # Getting the title is a whole separate query! 1.309 + url = 'pages/%s/info' % _format_page_id(page_id) 1.310 + doc = self.deki.get_xml(url) 1.311 + title = _text_of(_the_element_by_name(doc, 'title')) 1.312 + path = _text_of(_the_element_by_name(doc, 'path')) 1.313 + 1.314 + # If you prefer to sling regexes, you can request format=raw instead. 1.315 + # The result is an XML document with one big fat text node in the body. 1.316 + url = _make_url('pages', _format_page_id(page_id), 'contents', 1.317 + format='xhtml', mode='edit') 1.318 + doc = self.deki.get_xml(url) 1.319 + 1.320 + content = doc.documentElement 1.321 + _check(content.tagName == u'content') 1.322 + body = _first_element(content) 1.323 + _check(body is not None) 1.324 + _check(body.tagName == u'body') 1.325 + 1.326 + doc.removeChild(content) 1.327 + doc.appendChild(body) 1.328 + 1.329 + self.page_id = page_id 1.330 + self.load_time = load_time 1.331 + self.title = title 1.332 + self.path = path 1.333 + self.doc = doc 1.334 + 1.335 + def save(self): 1.336 + p = {'edittime': _urlquote(self.load_time.strftime('%Y%m%d%H%M%S')), 1.337 + 'abort': 'modified'} 1.338 + 1.339 + if self.title is not None: 1.340 + p['title'] = _urlquote(self.title) 1.341 + 1.342 + url = _make_url('pages', _format_page_id(self.page_id), 'contents', **p) 1.343 + 1.344 + body = self.doc.documentElement 1.345 + bodyInnerXML = ''.join(n.toxml('utf-8') for n in body.childNodes) 1.346 + 1.347 + reply = self.deki.post(url, bodyInnerXML, 'text/plain; charset=utf-8') 1.348 + _check(reply.documentElement.nodeName == u'edit') 1.349 + _check(reply.documentElement.getAttribute(u'status') == u'success')