From 201fb4e2412ffceb6cd56ae30b0e943fa3609ae9 Mon Sep 17 00:00:00 2001
From: blitzmann <holmes.ryan.90@gmail.com>
Date: Fri, 23 Feb 2018 00:00:53 -0500
Subject: [PATCH] Remove all encoding bits from fit importing. Might not work
 100% of the time, but as 2.0 gets introduced to the community, we'll get
 reports of possible problems. #1410

---
 service/port.py | 65 ++++++-------------------------------------------
 1 file changed, 7 insertions(+), 58 deletions(-)

diff --git a/service/port.py b/service/port.py
index db7dc0964..b0db7018d 100644
--- a/service/port.py
+++ b/service/port.py
@@ -264,7 +264,7 @@ class Port(object):
         fits are processed as well as when fits are being saved.
         returns
         """
-        defcodepage = locale.getpreferredencoding()
+
         sFit = svcFit.getInstance()
 
         fit_list = []
@@ -276,63 +276,15 @@ class Port(object):
                     PortProcessing.notify(iportuser, IPortUser.PROCESS_IMPORT | IPortUser.ID_UPDATE, msg)
                     # wx.CallAfter(callback, 1, msg)
 
-                with open(path, "r") as file_:
+                with open(path, "r", encoding='utf-8') as file_:
                     srcString = file_.read()
 
                 if len(srcString) == 0:  # ignore blank files
                     pyfalog.debug("File is blank.")
                     continue
 
-                codec_found = None
-                # If file had ANSI encoding, decode it to unicode using detection
-                # of BOM header or if there is no header try default
-                # codepage then fallback to utf-16, cp1252
-
-                if isinstance(srcString, str):
-                    savebom = None
-
-                    encoding_map = (
-                        ('\xef\xbb\xbf', 'utf-8'),
-                        ('\xff\xfe\0\0', 'utf-32'),
-                        ('\0\0\xfe\xff', 'UTF-32BE'),
-                        ('\xff\xfe', 'utf-16'),
-                        ('\xfe\xff', 'UTF-16BE'))
-
-                    for bom, encoding in encoding_map:
-                        if srcString.startswith(bom):
-                            codec_found = encoding
-                            savebom = bom
-
-                    if codec_found is None:
-                        pyfalog.info("Unicode BOM not found in file {0}.", path)
-                        attempt_codecs = (defcodepage, "utf-8", "utf-16", "cp1252")
-
-                        for page in attempt_codecs:
-                            try:
-                                pyfalog.info("Attempting to decode file {0} using {1} page.", path, page)
-                                srcString = str(srcString, page)
-                                codec_found = page
-                                pyfalog.info("File {0} decoded using {1} page.", path, page)
-                            except UnicodeDecodeError:
-                                pyfalog.info("Error unicode decoding {0} from page {1}, trying next codec", path, page)
-                            else:
-                                break
-                    else:
-                        pyfalog.info("Unicode BOM detected in {0}, using {1} page.", path, codec_found)
-                        srcString = str(srcString[len(savebom):], codec_found)
-
-                else:
-                    # nasty hack to detect other transparent utf-16 loading
-                    if srcString[0] == '<' and 'utf-16' in srcString[:128].lower():
-                        codec_found = "utf-16"
-                    else:
-                        codec_found = "utf-8"
-
-                if codec_found is None:
-                    return False, "Proper codec could not be established for %s" % path
-
                 try:
-                    _, fitsImport = Port.importAuto(srcString, path, iportuser=iportuser, encoding=codec_found)
+                    _, fitsImport = Port.importAuto(srcString, path, iportuser=iportuser)
                     fit_list += fitsImport
                 except xml.parsers.expat.ExpatError:
                     pyfalog.warning("Malformed XML in:\n{0}", path)
@@ -477,7 +429,7 @@ class Port(object):
         return json.dumps(fit)
 
     @classmethod
-    def importAuto(cls, string, path=None, activeFit=None, iportuser=None, encoding=None):
+    def importAuto(cls, string, path=None, activeFit=None, iportuser=None):
         # type: (basestring, basestring, object, IPortUser, basestring) -> object
         # Get first line and strip space symbols of it to avoid possible detection errors
         firstLine = re.split("[\n\r]+", string.strip(), maxsplit=1)[0]
@@ -485,10 +437,7 @@ class Port(object):
 
         # If XML-style start of tag encountered, detect as XML
         if re.search(RE_XML_START, firstLine):
-            if encoding:
-                return "XML", cls.importXml(string, iportuser, encoding)
-            else:
-                return "XML", cls.importXml(string, iportuser)
+            return "XML", cls.importXml(string, iportuser)
 
         # If JSON-style start, parse as CREST/JSON
         if firstLine[0] == '{':
@@ -1013,10 +962,10 @@ class Port(object):
         return fits
 
     @staticmethod
-    def importXml(text, iportuser=None, encoding="utf-8"):
+    def importXml(text, iportuser=None):
         # type: (basestring, IPortUser, basestring) -> list[eos.saveddata.fit.Fit]
         sMkt = Market.getInstance()
-        doc = xml.dom.minidom.parseString(text.encode(encoding))
+        doc = xml.dom.minidom.parseString(text)
         # NOTE:
         #   When L_MARK is included at this point,
         #   Decided to be localized data