From 124bb027abc7a9a1d975b5c6930d0825b64f6d20 Mon Sep 17 00:00:00 2001 From: blitzmann Date: Sat, 3 Mar 2018 14:21:55 -0500 Subject: [PATCH] Use `bs4.UnicodeDammit` to handle different encodings for file imports (fits only for now, can look into expanding for other file import types) --- requirements.txt | 3 ++- service/port.py | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 9dd1702ca..2b38e8685 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,5 @@ requests >= 2.0.0 sqlalchemy >= 1.0.5 markdown2 packaging -roman \ No newline at end of file +roman +beautifulsoup4 \ No newline at end of file diff --git a/service/port.py b/service/port.py index aa84cb59a..7307697a5 100644 --- a/service/port.py +++ b/service/port.py @@ -25,6 +25,8 @@ import collections import json import threading import locale +from bs4 import UnicodeDammit + from codecs import open @@ -276,8 +278,10 @@ class Port(object): PortProcessing.notify(iportuser, IPortUser.PROCESS_IMPORT | IPortUser.ID_UPDATE, msg) # wx.CallAfter(callback, 1, msg) - with open(path, "r", encoding='utf-8') as file_: + with open(path, "rb") as file_: srcString = file_.read() + dammit = UnicodeDammit(srcString) + srcString = dammit.unicode_markup if len(srcString) == 0: # ignore blank files pyfalog.debug("File is blank.")