Implement support for loading utf-16 XML.

Such XML is generated by software like jEveAssets's owned ships->fitting export tool. Without such detection, pyfa will go and try to import those as DNA fits, with all the ensuing hilarity, thus the DNA import debug code as well.
2015-04-22 05:05:48 -07:00
parent e5430cac84
commit 557f32ab91
3 changed files with 42 additions and 12 deletions
--- a/pyfa.py
+++ b/pyfa.py
@@ -78,6 +78,10 @@ if __name__ == "__main__":
        config.saveInRoot = True
    config.defPaths()

+    # Basic logging
+    import logging
+    logging.basicConfig()
+
    # Import everything
    import wx
    import os
--- a/service/fit.py
+++ b/service/fit.py
@@ -20,6 +20,7 @@
 import locale
 import copy
 import threading
+import logging
 import wx
 from codecs import open

@@ -35,6 +36,7 @@ from service.fleet import Fleet
 from service.settings import SettingsProvider
 from service.port import Port

+logger = logging.getLogger("pyfa.service.fit")

 class FitBackupThread(threading.Thread):
    def __init__(self, path, callback):
@@ -789,15 +791,28 @@ class Fit(object):

            file = open(path, "r")
            srcString = file.read()
+            codec_found = None
            # If file had ANSI encoding, convert it to unicode using system
-            # default codepage, or use fallback cp1252 on any encoding errors
+            # default codepage, or use fallbacks UTF-16, then cp1252 on any
+            # encoding errors
            if isinstance(srcString, str):
-                try:
-                    srcString = unicode(srcString, defcodepage)
-                except UnicodeDecodeError:
-                    srcString = unicode(srcString, "cp1252")
+                attempt_codecs = (defcodepage, "utf-16", "cp1252")
+                for page in attempt_codecs:
+                    try:
+                        srcString = unicode(srcString, page)
+                        codec_found = page
+                    except UnicodeDecodeError:
+                        logger.warn("Error unicode decoding %s from page %s, trying next codec", path, page)
+                    else:
+                        break
+            else:
+                # nasty hack to detect other transparent utf-16 loading
+                if srcString[0] == '<' and 'utf-16' in srcString[:128].lower():
+                    codec_found = "utf-16"
+                else:
+                    codec_found = "utf-8"

-            _, fitsImport = Port.importAuto(srcString, path, callback=callback)
+            _, fitsImport = Port.importAuto(srcString, path, callback=callback, encoding=codec_found)
            fits += fitsImport

        IDs = []
--- a/service/port.py
+++ b/service/port.py
@@ -24,6 +24,9 @@ import xml.dom
 from eos.types import State, Slot, Module, Cargo, Fit, Ship, Drone, Implant, Booster
 import service
 import wx
+import logging
+
+logger = logging.getLogger("pyfa.service.port")

 try:
    from collections import OrderedDict
@@ -36,14 +39,14 @@ class Port(object):
    """Service which houses all import/export format functions"""

    @classmethod
-    def importAuto(cls, string, path=None, activeFit=None, callback=None):
+    def importAuto(cls, string, path=None, activeFit=None, callback=None, encoding=None):
        # Get first line and strip space symbols of it to avoid possible detection errors
        firstLine = re.split("[\n\r]+", string.strip(), maxsplit=1)[0]
        firstLine = firstLine.strip()

        # If XML-style start of tag encountered, detect as XML
        if re.match("<", firstLine):
-            return "XML", cls.importXml(string, callback)
+            return "XML", cls.importXml(string, callback, encoding)

        # If we've got source file name which is used to describe ship name
        # and first line contains something like [setup name], detect as eft config file
@@ -66,8 +69,16 @@ class Port(object):
        info = string.split(":")

        f = Fit()
-        f.ship = Ship(sMkt.getItem(int(info[0])))
-        f.name = "{0} - DNA Imported".format(f.ship.item.name)
+        try:
+            f.ship = Ship(sMkt.getItem(int(info[0])))
+            f.name = "{0} - DNA Imported".format(f.ship.item.name)
+        except UnicodeEncodeError as e:
+            def logtransform(s):
+                if len(s) > 10:
+                    return s[:10] + "..."
+                return s
+            logger.exception("Couldn't import ship data %r", [ logtransform(s) for s in info ])
+            return None

        for itemInfo in info[1:]:
            if itemInfo:
@@ -358,10 +369,10 @@ class Port(object):
        return fits

    @staticmethod
-    def importXml(text, callback=None):
+    def importXml(text, callback=None, encoding="utf-8"):
        sMkt = service.Market.getInstance()

-        doc = xml.dom.minidom.parseString(text.encode("utf-8"))
+        doc = xml.dom.minidom.parseString(text.encode(encoding))
        fittings = doc.getElementsByTagName("fittings").item(0)
        fittings = fittings.getElementsByTagName("fitting")
        fits = []