Add script to find and report duplicate entries in translation files

pull/1060/head
Simon Neutert 1 week ago
parent 5a22ca563d
commit 9de69e335b

@ -0,0 +1,74 @@
import os
def remove_colon_and_strip_from_str(line):
"""
Removes the colon from the line and strips the line.
"""
return line.replace(":", "").strip()
def print_result_for_file(file_path, file_name, duplicate_entries):
"""
Prints the result for a given file.
"""
print(f"-" * 50)
print(f"Processing file: {file_name} \n")
# keep entries with more than one occurence
if len(duplicate_entries) > 0:
for key, value in duplicate_entries.items():
# prints debug info for each duplicate found
print(f"{file_path}: \"{key}\" appears in lines {", ".join(map(str, value))}")
else:
# prints debug info, if no duplicates found 🥳
print(f"No duplicates found!")
def find_duplicates(directory, debug=False):
"""
Reads all .txt files in a given directory and tries to detect duplicate entries.
"""
try:
language_lookup_table = {}
files = [f for f in os.listdir(directory) if f.endswith('.txt')]
files.sort()
if not files:
print("No .txt files found in the directory.")
return
for file_name in files:
# if file_name contains "-help" skip it for now!
if "-help" in file_name:
continue
file_path = os.path.join(directory, file_name)
try:
with open(file_path, 'r', encoding='utf-8') as file:
lookup_table = {}
for line_number, line in enumerate(file, start=1):
stripped_line = line.strip()
stripped_keywords = stripped_line.split(":")
stripped_keywords = list(map(remove_colon_and_strip_from_str, stripped_keywords))
trimmed_keywords = list(map(str.strip, stripped_keywords))
for tk in trimmed_keywords:
if tk == "" or tk.isdigit():
continue
if tk in lookup_table:
lookup_table[tk].append(line_number)
else:
lookup_table[tk] = [line_number]
duplicate_entries = {k: v for k, v in lookup_table.items() if len(v) > 1}
print_result_for_file(file_path, file_name, duplicate_entries)
language_lookup_table[file_name] = duplicate_entries
except Exception as e:
print(f"An error occurred while processing the file: {e}")
return language_lookup_table
except Exception as e:
print(f"An error occurred: {e}")
return None
# Example usage from the root directory:
# python ./lib/duplicate_translations.py
if __name__ == "__main__":
directory_path = "share/translations/"
find_duplicates(directory_path, debug=True)
Loading…
Cancel
Save