Merge pull request #1060 from simonneutert/german-translation-dups

Adds python script to find and report duplicate entries in translation files
Add script to find and report duplicate entries in translation files
1 changed files with 74 additions and 0 deletions
--- a/lib/duplicate_translations.py
+++ b/lib/duplicate_translations.py
@ -0,0 +1,74 @@
 import os
 def remove_colon_and_strip_from_str(line):
    """
    Removes the colon from the line and strips the line.
    """
    return line.replace(":", "").strip()
 def print_result_for_file(file_path, file_name, duplicate_entries):
    """
    Prints the result for a given file.
    """
    print(f"-" * 50)
    print(f"Processing file: {file_name} \n")
    # keep entries with more than one occurence
    if len(duplicate_entries) > 0:
        for key, value in duplicate_entries.items():
            # prints debug info for each duplicate found
            print(f"{file_path}: \"{key}\" appears in lines {", ".join(map(str, value))}")
    else:
        # prints debug info, if no duplicates found 🥳
        print(f"No duplicates found!")
 def find_duplicates(directory, debug=False):
    """
    Reads all .txt files in a given directory and tries to detect duplicate entries.
    """
    try:
        language_lookup_table = {}
        files = [f for f in os.listdir(directory) if f.endswith('.txt')]
        files.sort()        
        if not files:
            print("No .txt files found in the directory.")
            return
        for file_name in files:
            # if file_name contains "-help" skip it for now!
            if "-help" in file_name:
                continue
            file_path = os.path.join(directory, file_name)
            try:
                with open(file_path, 'r', encoding='utf-8') as file:
                    lookup_table = {}
                    for line_number, line in enumerate(file, start=1):
                        stripped_line = line.strip()
                        stripped_keywords = stripped_line.split(":")
                        stripped_keywords = list(map(remove_colon_and_strip_from_str, stripped_keywords))
                        trimmed_keywords = list(map(str.strip, stripped_keywords))
                        for tk in trimmed_keywords:
                            if tk == "" or tk.isdigit():
                                continue
                            if tk in lookup_table:
                                lookup_table[tk].append(line_number)
                            else:
                                lookup_table[tk] = [line_number]
                    duplicate_entries = {k: v for k, v in lookup_table.items() if len(v) > 1}
                    print_result_for_file(file_path, file_name, duplicate_entries)
                    language_lookup_table[file_name] = duplicate_entries
            except Exception as e:
                print(f"An error occurred while processing the file: {e}")
        return language_lookup_table
    except Exception as e:
        print(f"An error occurred: {e}")
        return None
 # Example usage from the root directory:
 #   python ./lib/duplicate_translations.py
 if __name__ == "__main__":
    directory_path = "share/translations/"
    find_duplicates(directory_path, debug=True)
Author	SHA1	Message	Date
Igor Chubin	b8a9d77432	Merge pull request #1060 from simonneutert/german-translation-dups Adds python script to find and report duplicate entries in translation files	1 week ago
Simon Neutert	9de69e335b	Add script to find and report duplicate entries in translation files	1 week ago