Use bs4.UnicodeDammit to handle different encodings for file imports (fits only for now, can look into expanding for other file import types)

This commit is contained in:
blitzmann
2018-03-03 14:21:55 -05:00
parent d5ca14ca52
commit 124bb027ab
2 changed files with 7 additions and 2 deletions

View File

@@ -6,4 +6,5 @@ requests >= 2.0.0
sqlalchemy >= 1.0.5
markdown2
packaging
roman
roman
beautifulsoup4

View File

@@ -25,6 +25,8 @@ import collections
import json
import threading
import locale
from bs4 import UnicodeDammit
from codecs import open
@@ -276,8 +278,10 @@ class Port(object):
PortProcessing.notify(iportuser, IPortUser.PROCESS_IMPORT | IPortUser.ID_UPDATE, msg)
# wx.CallAfter(callback, 1, msg)
with open(path, "r", encoding='utf-8') as file_:
with open(path, "rb") as file_:
srcString = file_.read()
dammit = UnicodeDammit(srcString)
srcString = dammit.unicode_markup
if len(srcString) == 0: # ignore blank files
pyfalog.debug("File is blank.")