"""
Simple, strightforward robot
- Written by Oleg BroytMann. Copyright (C) 2000-2007 PhiloSoft Design.
+ Written by Oleg BroytMann. Copyright (C) 2000-2008 PhiloSoft Design.
"""
import sys, os
try:
content_type = headers["Content-Type"]
try:
+ # extract charset from "text/html; foo; charset=UTF-8, bar; baz;"
content_type, charset = content_type.split(';', 1)
content_type = content_type.strip()
- charset = charset.split('=')[1].strip()
+ charset = charset.split('=')[1].strip().split(',')[0]
self.log(" HTTP charset : %s" % charset)
except (ValueError, IndexError):
charset = None
if (not self.charset) and (http_equiv == "content-type"):
try:
- # extract charset from "text/html; foo; charset=UTF-8; bar;"
- self.charset = content.lower().split('charset=')[1].split(';')[0]
+ # extract charset from "text/html; foo; charset=UTF-8, bar; baz;"
+ self.charset = content.lower().split('charset=')[1].split(';')[0].split(',')[0]
self.meta_charset = 1 # Remember that the charset was retrieved from
# META tag, not from the Content-Type header
except IndexError: