Use encoding (default is utf-8) and unicode

[sqlconvert.git] / mysql2sql / process_tokens.py
diff --git a/mysql2sql/process_tokens.py b/mysql2sql/process_tokens.py

index ddab2bf649eac2e69e67eef48e7b1edbf07b0dea..1e74ac9b69a5a416bb6cf8b0418219095b73b357 100644 (file)
--- a/mysql2sql/process_tokens.py
+++ b/mysql2sql/process_tokens.py
@@ -1,5 +1,6 @@
  
  from sqlparse import parse
+from sqlparse.compat import PY3
  from sqlparse.tokens import Name, Error, Punctuation, Comment, Newline, \
      Whitespace
  
@@ -25,19 +26,24 @@ def find_error(token_list):
      return False
  
  
+if PY3:
+    xrange = range
+
+
  class StatementGrouper(object):
      """Collect lines and reparse until the last statement is complete"""
  
-    def __init__(self):
+    def __init__(self, encoding=None):
          self.lines = []
          self.statements = []
+        self.encoding = encoding
  
      def process_line(self, line):
          self.lines.append(line)
          self.process_lines()
  
      def process_lines(self):
-        statements = parse('\n'.join(self.lines))
+        statements = parse(''.join(self.lines), encoding=self.encoding)
          last_stmt = statements[-1]
          for i in xrange(len(last_stmt.tokens) - 1, 0, -1):
              token = last_stmt.tokens[i]
@@ -59,7 +65,7 @@ class StatementGrouper(object):
      def close(self):
          if not self.lines:
              return
-        tokens = parse('\n'.join(self.lines))
+        tokens = parse(''.join(self.lines), encoding=self.encoding)
          for token in tokens:
              if (token.ttype not in (Comment.Single, Comment.Multiline,
                                      Newline, Whitespace)):