Add functionality to identify and clean up problematic entries in notes

This commit is contained in:
2025-05-20 12:39:43 +02:00
parent d937ae212c
commit 621e85c747
3 changed files with 238 additions and 10 deletions

View File

@@ -6,8 +6,16 @@ class Note {
late final String displayDate;
String content;
String? snippet;
bool isProblematic;
String problemReason;
Note({required this.date, required this.content, this.snippet}) {
Note({
required this.date,
required this.content,
this.snippet,
this.isProblematic = false,
this.problemReason = '',
}) {
final dtUtc = DateFormat('yyyy-MM-dd HH:mm:ss').parse(date, true);
final dtLocal = dtUtc.toLocal();
displayDate = DateFormat('yyyy-MM-dd HH:mm:ss').format(dtLocal);
@@ -152,3 +160,36 @@ Future<List<Note>> searchNotes(String query) async {
)
.toList();
}
// Find potentially problematic entries based on character distribution
Future<List<Note>> findProblematicEntries() async {
const double maxCharPercentage = 0.7; // If a single char makes up more than 70%, it's suspicious
const int minLength = 10; // Only check notes longer than this
// Simple SQLite query that counts character occurrences using replace
final List<Map<String, dynamic>> results = await DB.db.rawQuery('''
WITH char_counts AS (
SELECT
id,
date,
content,
substr(content, 1, 1) as char,
(length(content) - length(replace(content, substr(content, 1, 1), ''))) as char_count,
length(content) as total_length,
cast(length(content) - length(replace(content, substr(content, 1, 1), '')) as float) / length(content) as percentage
FROM notes
WHERE length(content) >= ?
)
SELECT *
FROM char_counts
WHERE percentage > ?
ORDER BY date DESC
''', [minLength, maxCharPercentage]);
return results.map((row) => Note(
date: row['date'] as String,
content: row['content'] as String,
isProblematic: true,
problemReason: 'Character "${row['char']}" makes up ${(row['percentage'] * 100).toStringAsFixed(1)}% of the content',
)).toList();
}