mirror of https://github.com/chubin/wttr.in
Compare commits
No commits in common. 'b8a9d7743205bdc227f2a88b026a6a629f0803ea' and '5a22ca563d984e3f74087cf87bc9b61769f9e39f' have entirely different histories.
b8a9d77432
...
5a22ca563d
@ -1,74 +0,0 @@
|
|||||||
import os
|
|
||||||
|
|
||||||
def remove_colon_and_strip_from_str(line):
|
|
||||||
"""
|
|
||||||
Removes the colon from the line and strips the line.
|
|
||||||
"""
|
|
||||||
return line.replace(":", "").strip()
|
|
||||||
|
|
||||||
def print_result_for_file(file_path, file_name, duplicate_entries):
|
|
||||||
"""
|
|
||||||
Prints the result for a given file.
|
|
||||||
"""
|
|
||||||
print(f"-" * 50)
|
|
||||||
print(f"Processing file: {file_name} \n")
|
|
||||||
# keep entries with more than one occurence
|
|
||||||
if len(duplicate_entries) > 0:
|
|
||||||
for key, value in duplicate_entries.items():
|
|
||||||
# prints debug info for each duplicate found
|
|
||||||
print(f"{file_path}: \"{key}\" appears in lines {", ".join(map(str, value))}")
|
|
||||||
else:
|
|
||||||
# prints debug info, if no duplicates found 🥳
|
|
||||||
print(f"No duplicates found!")
|
|
||||||
|
|
||||||
def find_duplicates(directory, debug=False):
|
|
||||||
"""
|
|
||||||
Reads all .txt files in a given directory and tries to detect duplicate entries.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
language_lookup_table = {}
|
|
||||||
files = [f for f in os.listdir(directory) if f.endswith('.txt')]
|
|
||||||
files.sort()
|
|
||||||
|
|
||||||
if not files:
|
|
||||||
print("No .txt files found in the directory.")
|
|
||||||
return
|
|
||||||
|
|
||||||
for file_name in files:
|
|
||||||
# if file_name contains "-help" skip it for now!
|
|
||||||
if "-help" in file_name:
|
|
||||||
continue
|
|
||||||
|
|
||||||
file_path = os.path.join(directory, file_name)
|
|
||||||
try:
|
|
||||||
with open(file_path, 'r', encoding='utf-8') as file:
|
|
||||||
lookup_table = {}
|
|
||||||
for line_number, line in enumerate(file, start=1):
|
|
||||||
stripped_line = line.strip()
|
|
||||||
stripped_keywords = stripped_line.split(":")
|
|
||||||
stripped_keywords = list(map(remove_colon_and_strip_from_str, stripped_keywords))
|
|
||||||
trimmed_keywords = list(map(str.strip, stripped_keywords))
|
|
||||||
|
|
||||||
for tk in trimmed_keywords:
|
|
||||||
if tk == "" or tk.isdigit():
|
|
||||||
continue
|
|
||||||
if tk in lookup_table:
|
|
||||||
lookup_table[tk].append(line_number)
|
|
||||||
else:
|
|
||||||
lookup_table[tk] = [line_number]
|
|
||||||
duplicate_entries = {k: v for k, v in lookup_table.items() if len(v) > 1}
|
|
||||||
print_result_for_file(file_path, file_name, duplicate_entries)
|
|
||||||
language_lookup_table[file_name] = duplicate_entries
|
|
||||||
except Exception as e:
|
|
||||||
print(f"An error occurred while processing the file: {e}")
|
|
||||||
|
|
||||||
return language_lookup_table
|
|
||||||
except Exception as e:
|
|
||||||
print(f"An error occurred: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Example usage from the root directory:
|
|
||||||
# python ./lib/duplicate_translations.py
|
|
||||||
if __name__ == "__main__":
|
|
||||||
directory_path = "share/translations/"
|
|
||||||
find_duplicates(directory_path, debug=True)
|
|
Loading…
Reference in new issue