executables/remove_duplicates.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29

import os
import sys # Add this import

def remove_duplicate_lines(filepath):
    # Read the file and split it into lines
    with open(filepath, 'r') as file:
        lines = file.readlines()

    # Remove duplicates by converting the list of lines to a set, then back to a list
    # This also sorts the lines, as sets do not maintain order
    unique_lines = list(set(lines))

    # Sort the lines if needed (optional, depending on whether you want to maintain original order)
    unique_lines.sort()

    # Write the unique lines back to the file
    with open(filepath, 'w') as file:
        file.writelines(unique_lines)

if __name__ == "__main__":
    # Get filepath from command line arguments
    if len(sys.argv) < 2:
        print("Usage: python remove_duplicates.py <path_to_file>")
        sys.exit(1)
        
    file_to_process = sys.argv[1]
    
    print(f"Processing file: {file_to_process}")
    remove_duplicate_lines(file_to_process)