mirror of https://github.com/chubin/wttr.in
Compare commits
2 Commits
5a22ca563d
...
b8a9d77432
Author | SHA1 | Date |
---|---|---|
![]() |
b8a9d77432 | 1 week ago |
![]() |
9de69e335b | 1 week ago |
@ -0,0 +1,74 @@
|
||||
import os
|
||||
|
||||
def remove_colon_and_strip_from_str(line):
|
||||
"""
|
||||
Removes the colon from the line and strips the line.
|
||||
"""
|
||||
return line.replace(":", "").strip()
|
||||
|
||||
def print_result_for_file(file_path, file_name, duplicate_entries):
|
||||
"""
|
||||
Prints the result for a given file.
|
||||
"""
|
||||
print(f"-" * 50)
|
||||
print(f"Processing file: {file_name} \n")
|
||||
# keep entries with more than one occurence
|
||||
if len(duplicate_entries) > 0:
|
||||
for key, value in duplicate_entries.items():
|
||||
# prints debug info for each duplicate found
|
||||
print(f"{file_path}: \"{key}\" appears in lines {", ".join(map(str, value))}")
|
||||
else:
|
||||
# prints debug info, if no duplicates found 🥳
|
||||
print(f"No duplicates found!")
|
||||
|
||||
def find_duplicates(directory, debug=False):
|
||||
"""
|
||||
Reads all .txt files in a given directory and tries to detect duplicate entries.
|
||||
"""
|
||||
try:
|
||||
language_lookup_table = {}
|
||||
files = [f for f in os.listdir(directory) if f.endswith('.txt')]
|
||||
files.sort()
|
||||
|
||||
if not files:
|
||||
print("No .txt files found in the directory.")
|
||||
return
|
||||
|
||||
for file_name in files:
|
||||
# if file_name contains "-help" skip it for now!
|
||||
if "-help" in file_name:
|
||||
continue
|
||||
|
||||
file_path = os.path.join(directory, file_name)
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
lookup_table = {}
|
||||
for line_number, line in enumerate(file, start=1):
|
||||
stripped_line = line.strip()
|
||||
stripped_keywords = stripped_line.split(":")
|
||||
stripped_keywords = list(map(remove_colon_and_strip_from_str, stripped_keywords))
|
||||
trimmed_keywords = list(map(str.strip, stripped_keywords))
|
||||
|
||||
for tk in trimmed_keywords:
|
||||
if tk == "" or tk.isdigit():
|
||||
continue
|
||||
if tk in lookup_table:
|
||||
lookup_table[tk].append(line_number)
|
||||
else:
|
||||
lookup_table[tk] = [line_number]
|
||||
duplicate_entries = {k: v for k, v in lookup_table.items() if len(v) > 1}
|
||||
print_result_for_file(file_path, file_name, duplicate_entries)
|
||||
language_lookup_table[file_name] = duplicate_entries
|
||||
except Exception as e:
|
||||
print(f"An error occurred while processing the file: {e}")
|
||||
|
||||
return language_lookup_table
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
return None
|
||||
|
||||
# Example usage from the root directory:
|
||||
# python ./lib/duplicate_translations.py
|
||||
if __name__ == "__main__":
|
||||
directory_path = "share/translations/"
|
||||
find_duplicates(directory_path, debug=True)
|
Loading…
Reference in new issue