diff --git a/lib/duplicate_translations.py b/lib/duplicate_translations.py new file mode 100644 index 0000000..a0ebe39 --- /dev/null +++ b/lib/duplicate_translations.py @@ -0,0 +1,74 @@ +import os + +def remove_colon_and_strip_from_str(line): + """ + Removes the colon from the line and strips the line. + """ + return line.replace(":", "").strip() + +def print_result_for_file(file_path, file_name, duplicate_entries): + """ + Prints the result for a given file. + """ + print(f"-" * 50) + print(f"Processing file: {file_name} \n") + # keep entries with more than one occurence + if len(duplicate_entries) > 0: + for key, value in duplicate_entries.items(): + # prints debug info for each duplicate found + print(f"{file_path}: \"{key}\" appears in lines {", ".join(map(str, value))}") + else: + # prints debug info, if no duplicates found 🥳 + print(f"No duplicates found!") + +def find_duplicates(directory, debug=False): + """ + Reads all .txt files in a given directory and tries to detect duplicate entries. + """ + try: + language_lookup_table = {} + files = [f for f in os.listdir(directory) if f.endswith('.txt')] + files.sort() + + if not files: + print("No .txt files found in the directory.") + return + + for file_name in files: + # if file_name contains "-help" skip it for now! + if "-help" in file_name: + continue + + file_path = os.path.join(directory, file_name) + try: + with open(file_path, 'r', encoding='utf-8') as file: + lookup_table = {} + for line_number, line in enumerate(file, start=1): + stripped_line = line.strip() + stripped_keywords = stripped_line.split(":") + stripped_keywords = list(map(remove_colon_and_strip_from_str, stripped_keywords)) + trimmed_keywords = list(map(str.strip, stripped_keywords)) + + for tk in trimmed_keywords: + if tk == "" or tk.isdigit(): + continue + if tk in lookup_table: + lookup_table[tk].append(line_number) + else: + lookup_table[tk] = [line_number] + duplicate_entries = {k: v for k, v in lookup_table.items() if len(v) > 1} + print_result_for_file(file_path, file_name, duplicate_entries) + language_lookup_table[file_name] = duplicate_entries + except Exception as e: + print(f"An error occurred while processing the file: {e}") + + return language_lookup_table + except Exception as e: + print(f"An error occurred: {e}") + return None + +# Example usage from the root directory: +# python ./lib/duplicate_translations.py +if __name__ == "__main__": + directory_path = "share/translations/" + find_duplicates(directory_path, debug=True)