xpcom/analysis/deki.py

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/xpcom/analysis/deki.py	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,346 @@
     1.4 +# This Source Code Form is subject to the terms of the Mozilla Public
     1.5 +# License, v. 2.0. If a copy of the MPL was not distributed with this
     1.6 +# file, You can obtain one at http://mozilla.org/MPL/2.0/.
     1.7 +
     1.8 +""" deki.py - Access the wiki pages on a MindTouch Deki server via the API.
     1.9 +
    1.10 +Here's what this code can do:
    1.11 +
    1.12 +  wiki = deki.Deki("http://developer.mozilla.org/@api/deki/", username, password)
    1.13 +  page = wiki.get_page("Sheep")
    1.14 +  print page.title
    1.15 +  print page.doc.toxml()
    1.16 +
    1.17 +  page.title = "Bananas"
    1.18 +  page.save()
    1.19 +
    1.20 +There are also some additional methods:
    1.21 +  wiki.create_page(path, content, title=, override=)
    1.22 +  wiki.move_page(old, new)
    1.23 +  wiki.get_subpages(page)
    1.24 +
    1.25 +This module does not try to mimic the MindTouch "Plug" API.  It's meant to be
    1.26 +higher-level than that.
    1.27 +"""
    1.28 +
    1.29 +import sys
    1.30 +import urllib2, cookielib, httplib
    1.31 +import xml.dom.minidom as dom
    1.32 +from urllib import quote as _urllib_quote
    1.33 +from urllib import urlencode as _urlencode
    1.34 +import urlparse
    1.35 +from datetime import datetime
    1.36 +import re
    1.37 +
    1.38 +__all__ = ['Deki']
    1.39 +
    1.40 +
    1.41 +# === Utils
    1.42 +
    1.43 +def _check(fact):
    1.44 +    if not fact:
    1.45 +        raise AssertionError('check failed')
    1.46 +
    1.47 +def _urlquote(s, *args):
    1.48 +    return _urllib_quote(s.encode('utf-8'), *args)
    1.49 +
    1.50 +def _make_url(*dirs, **params):
    1.51 +    """ dirs must already be url-encoded, params must not """
    1.52 +    url = '/'.join(dirs)
    1.53 +    if params:
    1.54 +        url += '?' + _urlencode(params)
    1.55 +    return url
    1.56 +
    1.57 +class PutRequest(urllib2.Request):
    1.58 +    def get_method(self):
    1.59 +        return "PUT"
    1.60 +
    1.61 +# === Dream framework client code
    1.62 +
    1.63 +# This handler causes python to "always be logged in" when it's talking to the
    1.64 +# server.  If you're just accessing public pages, it generates more requests
    1.65 +# than are strictly needed, but this is the behavior you want for a bot.
    1.66 +#
    1.67 +# The users/authenticate request is sent twice: once without any basic auth and
    1.68 +# once with.  Dumb.  Feel free to fix.
    1.69 +#
    1.70 +class _LoginHandler(urllib2.HTTPCookieProcessor):
    1.71 +    def __init__(self, server):
    1.72 +        policy = cookielib.DefaultCookiePolicy(rfc2965=True)
    1.73 +        cookiejar = cookielib.CookieJar(policy)
    1.74 +        urllib2.HTTPCookieProcessor.__init__(self, cookiejar)
    1.75 +        self.server = server
    1.76 +
    1.77 +    def http_request(self, req):
    1.78 +        #print "DEBUG- Requesting " + req.get_full_url()
    1.79 +        s = self.server
    1.80 +        req = urllib2.HTTPCookieProcessor.http_request(self, req)
    1.81 +        if ('Cookie' not in req.unredirected_hdrs
    1.82 +              and req.get_full_url() != s.base + 'users/authenticate'):
    1.83 +            s.login()
    1.84 +            # Retry - should have a new cookie.
    1.85 +            req = urllib2.HTTPCookieProcessor.http_request(self, req)
    1.86 +            _check('Cookie' in req.unredirected_hdrs)
    1.87 +        return req
    1.88 +
    1.89 +class DreamClient:
    1.90 +    def __init__(self, base, user, password):
    1.91 +        """ 
    1.92 +        base - The base URI of the Deki API, with trailing slash.
    1.93 +               Typically, 'http://wiki.example.org/@api/deki/'.
    1.94 +        user, password - Your Deki login information.
    1.95 +        """
    1.96 +        self.base = base
    1.97 +        pm = urllib2.HTTPPasswordMgrWithDefaultRealm()
    1.98 +        pm.add_password(None, self.base, user, password)
    1.99 +        ah = urllib2.HTTPBasicAuthHandler(pm)
   1.100 +        lh = _LoginHandler(self)
   1.101 +        self._opener = urllib2.build_opener(ah, lh)
   1.102 +
   1.103 +    def login(self):
   1.104 +        response = self._opener.open(self.base + 'users/authenticate')
   1.105 +        response.close()
   1.106 +
   1.107 +    def open(self, url):
   1.108 +        return self._opener.open(self.base + url)
   1.109 +
   1.110 +    def _handleResponse(self, req):
   1.111 +        """Helper method shared between post() and put()"""
   1.112 +        resp = self._opener.open(req)
   1.113 +        try:
   1.114 +            ct = resp.headers.get('Content-Type', '(none)')
   1.115 +            if '/xml' in ct or '+xml' in ct:
   1.116 +                return dom.parse(resp)
   1.117 +            else:
   1.118 +                #print "DEBUG- Content-Type:", ct
   1.119 +                crud = resp.read()
   1.120 +                #print 'DEBUG- crud:\n---\n%s\n---' % re.sub(r'(?m)^', '    ', crud)
   1.121 +                return None
   1.122 +        finally:
   1.123 +            resp.close()
   1.124 +
   1.125 +
   1.126 +    def post(self, url, data, type):
   1.127 +        #print "DEBUG- posting to:", self.base + url
   1.128 +        req = urllib2.Request(self.base + url, data, {'Content-Type': type})
   1.129 +        return self._handleResponse(req)
   1.130 +
   1.131 +    def put(self, url, data, type):
   1.132 +        #print "DEBUG- putting to:", self.base + url
   1.133 +        req = PutRequest(self.base + url, data, {'Content-Type': type})
   1.134 +        return self._handleResponse(req)
   1.135 +
   1.136 +    def get_xml(self, url):
   1.137 +        resp = self.open(url)
   1.138 +        try:
   1.139 +            return dom.parse(resp)
   1.140 +        finally:
   1.141 +            resp.close()
   1.142 +
   1.143 +
   1.144 +# === DOM
   1.145 +
   1.146 +def _text_of(node):
   1.147 +    if node.nodeType == node.ELEMENT_NODE:
   1.148 +        return u''.join(_text_of(n) for n in node.childNodes)
   1.149 +    elif node.nodeType == node.TEXT_NODE:
   1.150 +        return node.nodeValue
   1.151 +    else:
   1.152 +        return u''
   1.153 +
   1.154 +def _the_element_by_name(doc, tagName):
   1.155 +    elts = doc.getElementsByTagName(tagName)
   1.156 +    if len(elts) != 1:
   1.157 +        raise ValueError("Expected exactly one <%s> tag, got %d." % (tagName, len(elts)))
   1.158 +    return elts[0]
   1.159 +
   1.160 +def _first_element(node):
   1.161 +    n = node.firstChild
   1.162 +    while n is not None:
   1.163 +        if n.nodeType == n.ELEMENT_NODE:
   1.164 +            return n
   1.165 +        n = node.nextSibling
   1.166 +    return None
   1.167 +
   1.168 +def _find_elements(node, path):
   1.169 +    if u'/' in path:
   1.170 +        [first, rest] = path.split(u'/', 1)
   1.171 +        for child in _find_elements(node, first):
   1.172 +            for desc in _find_elements(child, rest):
   1.173 +                yield desc
   1.174 +    else:
   1.175 +        for n in node.childNodes:
   1.176 +            if n.nodeType == node.ELEMENT_NODE and n.nodeName == path:
   1.177 +                yield n
   1.178 +
   1.179 +
   1.180 +# === Deki
   1.181 +
   1.182 +def _format_page_id(id):
   1.183 +    if isinstance(id, int):
   1.184 +        return str(id)
   1.185 +    elif id is Deki.HOME:
   1.186 +        return 'home'
   1.187 +    elif isinstance(id, basestring):
   1.188 +        # Double-encoded, per the Deki API reference.
   1.189 +        return '=' + _urlquote(_urlquote(id, ''))
   1.190 +
   1.191 +class Deki(DreamClient):
   1.192 +    HOME = object()
   1.193 +
   1.194 +    def get_page(self, page_id):
   1.195 +        """ Get the content of a page from the wiki.
   1.196 +
   1.197 +        The page_id argument must be one of:
   1.198 +          an int - The page id (an arbitrary number assigned by Deki)
   1.199 +          a str - The page name (not the title, the full path that shows up in the URL)
   1.200 +          Deki.HOME - Refers to the main page of the wiki.
   1.201 +
   1.202 +        Returns a Page object.
   1.203 +        """
   1.204 +        p = Page(self)
   1.205 +        p._load(page_id)
   1.206 +        return p
   1.207 +
   1.208 +    def create_page(self, path, content, title=None, overwrite=False):
   1.209 +        """ Create a new wiki page.
   1.210 +
   1.211 +        Parameters:
   1.212 +          path - str - The page id.
   1.213 +          content - str - The XML content to put in the new page.
   1.214 +            The document element must be a <body>.
   1.215 +          title - str - The page title.  Keyword argument only.
   1.216 +            Defaults to the last path-segment of path.
   1.217 +          overwrite - bool - Whether to overwrite an existing page. If false,
   1.218 +            and the page already exists, the method will throw an error.
   1.219 +        """
   1.220 +        if title is None:
   1.221 +            title = path.split('/')[-1]
   1.222 +        doc = dom.parseString(content)
   1.223 +        _check(doc.documentElement.tagName == 'body')
   1.224 +        p = Page(self)
   1.225 +        p._create(path, title, doc, overwrite)
   1.226 +
   1.227 +    def attach_file(self, page, name, data, mimetype, description=None):
   1.228 +        """Create or update a file attachment.
   1.229 +
   1.230 +        Parameters:
   1.231 +          page - str - the page ID this file is related to
   1.232 +          name - str - the name of the file
   1.233 +          data - str - the file data
   1.234 +          mimetype - str - the MIME type of the file
   1.235 +          description - str - a description of the file
   1.236 +        """
   1.237 +
   1.238 +        p = {}
   1.239 +        if description is not None:
   1.240 +            p['description'] = description
   1.241 +
   1.242 +        url = _make_url('pages', _format_page_id(page),
   1.243 +                        'files', _format_page_id(name), **p)
   1.244 +
   1.245 +        r = self.put(url, data, mimetype)
   1.246 +        _check(r.documentElement.nodeName == u'file')
   1.247 +
   1.248 +    def get_subpages(self, page_id):
   1.249 +        """ Return the ids of all subpages of the given page. """
   1.250 +        doc = self.get_xml(_make_url("pages", _format_page_id(page_id),
   1.251 +                                     "files,subpages"))
   1.252 +        for elt in _find_elements(doc, u'page/subpages/page.subpage/path'):
   1.253 +            yield _text_of(elt)
   1.254 +
   1.255 +    def move_page(self, page_id, new_title, redirects=True):
   1.256 +        """ Move an existing page to a new location.
   1.257 +
   1.258 +        A page cannot be moved to a destination that already exists, is a
   1.259 +        descendant, or has a protected title (ex.  Special:xxx, User:,
   1.260 +        Template:).
   1.261 +
   1.262 +        When a page is moved, subpages under the specified page are also moved.
   1.263 +        For each moved page, the system automatically creates an alias page
   1.264 +        that redirects from the old to the new destination.
   1.265 +        """
   1.266 +        self.post(_make_url("pages", _format_page_id(page_id), "move",
   1.267 +                            to=new_title,
   1.268 +                            redirects=redirects and "1" or "0"),
   1.269 +                  "", "text/plain")
   1.270 +
   1.271 +class Page:
   1.272 +    """ A Deki wiki page.
   1.273 +
   1.274 +    To obtain a page, call wiki.get_page(id).
   1.275 +    Attributes:
   1.276 +        title : unicode - The page title.
   1.277 +        doc : Document - The content of the page as a DOM Document.
   1.278 +          The root element of this document is a <body>.
   1.279 +        path : unicode - The path.  Use this to detect redirects, as otherwise
   1.280 +          page.save() will overwrite the redirect with a copy of the content!
   1.281 +        deki : Deki - The Deki object from which the page was loaded.
   1.282 +        page_id : str/id/Deki.HOME - The page id used to load the page.
   1.283 +        load_time : datetime - The time the page was loaded,
   1.284 +          according to the clock on the client machine.
   1.285 +    Methods:
   1.286 +        save() - Save the modified document back to the server.
   1.287 +          Only the page.title and the contents of page.doc are saved.
   1.288 +    """
   1.289 +
   1.290 +    def __init__(self, deki):
   1.291 +        self.deki = deki
   1.292 +
   1.293 +    def _create(self, path, title, doc, overwrite):
   1.294 +        self.title = title
   1.295 +        self.doc = doc
   1.296 +        self.page_id = path
   1.297 +        if overwrite:
   1.298 +            self.load_time = datetime(2500, 1, 1)
   1.299 +        else:
   1.300 +            self.load_time = datetime(1900, 1, 1)
   1.301 +        self.path = path
   1.302 +        self.save()
   1.303 +
   1.304 +    def _load(self, page_id):
   1.305 +        """ page_id - See comment near the definition of `HOME`. """
   1.306 +        load_time = datetime.utcnow()
   1.307 +
   1.308 +        # Getting the title is a whole separate query!
   1.309 +        url = 'pages/%s/info' % _format_page_id(page_id)
   1.310 +        doc = self.deki.get_xml(url)
   1.311 +        title = _text_of(_the_element_by_name(doc, 'title'))
   1.312 +        path = _text_of(_the_element_by_name(doc, 'path'))
   1.313 +
   1.314 +        # If you prefer to sling regexes, you can request format=raw instead.
   1.315 +        # The result is an XML document with one big fat text node in the body.
   1.316 +        url = _make_url('pages', _format_page_id(page_id), 'contents',
   1.317 +                        format='xhtml', mode='edit')
   1.318 +        doc = self.deki.get_xml(url)
   1.319 +
   1.320 +        content = doc.documentElement
   1.321 +        _check(content.tagName == u'content')
   1.322 +        body = _first_element(content)
   1.323 +        _check(body is not None)
   1.324 +        _check(body.tagName == u'body')
   1.325 +
   1.326 +        doc.removeChild(content)
   1.327 +        doc.appendChild(body)
   1.328 +
   1.329 +        self.page_id = page_id
   1.330 +        self.load_time = load_time
   1.331 +        self.title = title
   1.332 +        self.path = path
   1.333 +        self.doc = doc
   1.334 +
   1.335 +    def save(self):
   1.336 +        p = {'edittime': _urlquote(self.load_time.strftime('%Y%m%d%H%M%S')),
   1.337 +             'abort': 'modified'}
   1.338 +
   1.339 +        if self.title is not None:
   1.340 +            p['title'] = _urlquote(self.title)
   1.341 +
   1.342 +        url = _make_url('pages', _format_page_id(self.page_id), 'contents', **p)
   1.343 +
   1.344 +        body = self.doc.documentElement
   1.345 +        bodyInnerXML = ''.join(n.toxml('utf-8') for n in body.childNodes)
   1.346 +
   1.347 +        reply = self.deki.post(url, bodyInnerXML, 'text/plain; charset=utf-8')
   1.348 +        _check(reply.documentElement.nodeName == u'edit')
   1.349 +        _check(reply.documentElement.getAttribute(u'status') == u'success')

mercurial