eff_bdecode.py

   1 # http://effbot.org/zone/bencode.htm
   2 # http://effbot.org/zone/copyright.htm
   3 #
   4 # Copyright (C) 1995-2013 by Fredrik Lundh
   5 #
   6 # By obtaining, using, and/or copying this software and/or its associated
   7 # documentation, you agree that you have read, understood, and will comply with
   8 # the following terms and conditions:
   9 #
  10 # Permission to use, copy, modify, and distribute this software and its
  11 # associated documentation for any purpose and without fee is hereby granted,
  12 # provided that the above copyright notice appears in all copies, and that both
  13 # that copyright notice and this permission notice appear in supporting
  14 # documentation, and that the name of Secret Labs AB or the author not be used
  15 # in advertising or publicity pertaining to distribution of the software
  16 # without specific, written prior permission.
  17 #
  18 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
  19 # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN
  20 # NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR ANY SPECIAL,
  21 # INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
  22 # LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
  23 # OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  24 # PERFORMANCE OF THIS SOFTWARE.
  25
  26 from functools import partial
  27 import re
  28
  29
  30 def tokenize(text, match=re.compile(b"([idel])|(\\d+):|(-?\\d+)").match):
  31     i = 0
  32     while i < len(text):
  33         m = match(text, i)
  34         s = m.group(m.lastindex)
  35         i = m.end()
  36         if m.lastindex == 2:
  37             yield "s"
  38             yield text[i:i+int(s)]
  39             i = i + int(s)
  40         else:
  41             yield s.decode('ascii')
  42
  43
  44 def decode_item(next_, token):
  45     if token == "i":
  46         # integer: "i" value "e"
  47         data = int(next_())
  48         if next_() != "e":
  49             raise ValueError
  50     elif token == "s":
  51         # string: "s" value (virtual tokens)
  52         data = next_()
  53     elif token == "l" or token == "d":
  54         # container: "l" (or "d") values "e"
  55         data = []
  56         tok = next_()
  57         while tok != "e":
  58             data.append(decode_item(next_, tok))
  59             try:
  60                 tok = next_()
  61             except StopIteration:
  62                 break
  63         if token == "d":
  64             data = dict(zip(data[0::2], data[1::2]))
  65     else:
  66         raise ValueError
  67     return data
  68
  69
  70 def decode(text):
  71     try:
  72         src = tokenize(text)
  73         data = decode_item(partial(next, src), next(src))
  74         for token in src:  # look for more tokens
  75             raise SyntaxError("trailing junk")
  76     except (AttributeError, ValueError, StopIteration):
  77         raise SyntaxError("syntax error")
  78     return data