mirror of https://github.com/chubin/wttr.in
Compare commits
2 Commits
5a22ca563d
...
b8a9d77432
Author | SHA1 | Date |
---|---|---|
Igor Chubin | b8a9d77432 | 1 week ago |
Simon Neutert | 9de69e335b | 1 week ago |
@ -0,0 +1,74 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
def remove_colon_and_strip_from_str(line):
|
||||||
|
"""
|
||||||
|
Removes the colon from the line and strips the line.
|
||||||
|
"""
|
||||||
|
return line.replace(":", "").strip()
|
||||||
|
|
||||||
|
def print_result_for_file(file_path, file_name, duplicate_entries):
|
||||||
|
"""
|
||||||
|
Prints the result for a given file.
|
||||||
|
"""
|
||||||
|
print(f"-" * 50)
|
||||||
|
print(f"Processing file: {file_name} \n")
|
||||||
|
# keep entries with more than one occurence
|
||||||
|
if len(duplicate_entries) > 0:
|
||||||
|
for key, value in duplicate_entries.items():
|
||||||
|
# prints debug info for each duplicate found
|
||||||
|
print(f"{file_path}: \"{key}\" appears in lines {", ".join(map(str, value))}")
|
||||||
|
else:
|
||||||
|
# prints debug info, if no duplicates found 🥳
|
||||||
|
print(f"No duplicates found!")
|
||||||
|
|
||||||
|
def find_duplicates(directory, debug=False):
|
||||||
|
"""
|
||||||
|
Reads all .txt files in a given directory and tries to detect duplicate entries.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
language_lookup_table = {}
|
||||||
|
files = [f for f in os.listdir(directory) if f.endswith('.txt')]
|
||||||
|
files.sort()
|
||||||
|
|
||||||
|
if not files:
|
||||||
|
print("No .txt files found in the directory.")
|
||||||
|
return
|
||||||
|
|
||||||
|
for file_name in files:
|
||||||
|
# if file_name contains "-help" skip it for now!
|
||||||
|
if "-help" in file_name:
|
||||||
|
continue
|
||||||
|
|
||||||
|
file_path = os.path.join(directory, file_name)
|
||||||
|
try:
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as file:
|
||||||
|
lookup_table = {}
|
||||||
|
for line_number, line in enumerate(file, start=1):
|
||||||
|
stripped_line = line.strip()
|
||||||
|
stripped_keywords = stripped_line.split(":")
|
||||||
|
stripped_keywords = list(map(remove_colon_and_strip_from_str, stripped_keywords))
|
||||||
|
trimmed_keywords = list(map(str.strip, stripped_keywords))
|
||||||
|
|
||||||
|
for tk in trimmed_keywords:
|
||||||
|
if tk == "" or tk.isdigit():
|
||||||
|
continue
|
||||||
|
if tk in lookup_table:
|
||||||
|
lookup_table[tk].append(line_number)
|
||||||
|
else:
|
||||||
|
lookup_table[tk] = [line_number]
|
||||||
|
duplicate_entries = {k: v for k, v in lookup_table.items() if len(v) > 1}
|
||||||
|
print_result_for_file(file_path, file_name, duplicate_entries)
|
||||||
|
language_lookup_table[file_name] = duplicate_entries
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An error occurred while processing the file: {e}")
|
||||||
|
|
||||||
|
return language_lookup_table
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An error occurred: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Example usage from the root directory:
|
||||||
|
# python ./lib/duplicate_translations.py
|
||||||
|
if __name__ == "__main__":
|
||||||
|
directory_path = "share/translations/"
|
||||||
|
find_duplicates(directory_path, debug=True)
|
Loading…
Reference in new issue