diff options
Diffstat (limited to 'executables/remove_duplicates.py')
-rw-r--r-- | executables/remove_duplicates.py | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/executables/remove_duplicates.py b/executables/remove_duplicates.py new file mode 100644 index 0000000..2480fce --- /dev/null +++ b/executables/remove_duplicates.py @@ -0,0 +1,29 @@ +import os +import sys # Add this import + +def remove_duplicate_lines(filepath): + # Read the file and split it into lines + with open(filepath, 'r') as file: + lines = file.readlines() + + # Remove duplicates by converting the list of lines to a set, then back to a list + # This also sorts the lines, as sets do not maintain order + unique_lines = list(set(lines)) + + # Sort the lines if needed (optional, depending on whether you want to maintain original order) + unique_lines.sort() + + # Write the unique lines back to the file + with open(filepath, 'w') as file: + file.writelines(unique_lines) + +if __name__ == "__main__": + # Get filepath from command line arguments + if len(sys.argv) < 2: + print("Usage: python remove_duplicates.py <path_to_file>") + sys.exit(1) + + file_to_process = sys.argv[1] + + print(f"Processing file: {file_to_process}") + remove_duplicate_lines(file_to_process)
\ No newline at end of file |