import os import sys # Add this import def remove_duplicate_lines(filepath): # Read the file and split it into lines with open(filepath, 'r') as file: lines = file.readlines() # Remove duplicates by converting the list of lines to a set, then back to a list # This also sorts the lines, as sets do not maintain order unique_lines = list(set(lines)) # Sort the lines if needed (optional, depending on whether you want to maintain original order) unique_lines.sort() # Write the unique lines back to the file with open(filepath, 'w') as file: file.writelines(unique_lines) if __name__ == "__main__": # Get filepath from command line arguments if len(sys.argv) < 2: print("Usage: python remove_duplicates.py ") sys.exit(1) file_to_process = sys.argv[1] print(f"Processing file: {file_to_process}") remove_duplicate_lines(file_to_process)