99 lines
2.5 KiB
Python
99 lines
2.5 KiB
Python
|
|
#!/usr/bin/python
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
"""
|
|||
|
|
function: This script resolves locales's BEP inconsistence,
|
|||
|
|
it scans a specific path and replaces the same file
|
|||
|
|
in that path with a hard link.Avoid different language
|
|||
|
|
packs each time due to concurrent compilation.
|
|||
|
|
"""
|
|||
|
|
import os
|
|||
|
|
import sys
|
|||
|
|
import time
|
|||
|
|
|
|||
|
|
all_file = {}
|
|||
|
|
|
|||
|
|
def cmp_file(f1, f2):
|
|||
|
|
"""compare two files in bytes"""
|
|||
|
|
st1 = os.stat(f1)
|
|||
|
|
st2 = os.stat(f2)
|
|||
|
|
|
|||
|
|
bufsize = 8 * 1024
|
|||
|
|
with open(f1, 'rb') as fp1, open(f2, 'rb') as fp2:
|
|||
|
|
while True:
|
|||
|
|
b1 = fp1.read(bufsize)
|
|||
|
|
b2 = fp2.read(bufsize)
|
|||
|
|
if b1 != b2:
|
|||
|
|
return False
|
|||
|
|
if not b1:
|
|||
|
|
return True
|
|||
|
|
|
|||
|
|
|
|||
|
|
def search_all_inode(dir_path):
|
|||
|
|
"""recursively traverse the directory to group all"""
|
|||
|
|
files = os.listdir(dir_path)
|
|||
|
|
|
|||
|
|
for fi in files:
|
|||
|
|
fi_d = os.path.join(dir_path, fi)
|
|||
|
|
if os.path.isdir(fi_d):
|
|||
|
|
search_all_inode(fi_d)
|
|||
|
|
else:
|
|||
|
|
size = os.stat(fi_d).st_size
|
|||
|
|
if size in all_file:
|
|||
|
|
all_file[size].append(fi_d)
|
|||
|
|
else:
|
|||
|
|
all_file[size] = [fi_d]
|
|||
|
|
|
|||
|
|
|
|||
|
|
def deal_one(file_paths):
|
|||
|
|
"""traverse the file array, delete the same file and create a hard link"""
|
|||
|
|
file_count = len(file_paths)
|
|||
|
|
inode_files = {}
|
|||
|
|
|
|||
|
|
for i in range(0, file_count):
|
|||
|
|
for j in range(i + 1, file_count):
|
|||
|
|
file1 = file_paths[i]
|
|||
|
|
file2 = file_paths[j]
|
|||
|
|
|
|||
|
|
file1_inode = os.stat(file1).st_ino
|
|||
|
|
file2_inode = os.stat(file2).st_ino
|
|||
|
|
|
|||
|
|
if file1_inode not in inode_files:
|
|||
|
|
inode_files[file1_inode] = file1
|
|||
|
|
|
|||
|
|
if file1_inode == file2_inode:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
if cmp_file(file1, file2):
|
|||
|
|
print('deal same file:', file1, '==', file2)
|
|||
|
|
os.remove(file2)
|
|||
|
|
os.link(file1, file2)
|
|||
|
|
else:
|
|||
|
|
if file2_inode not in inode_files:
|
|||
|
|
inode_files[file2_inode] = file2
|
|||
|
|
|
|||
|
|
|
|||
|
|
def deal_files():
|
|||
|
|
"""get file array and processed one by one"""
|
|||
|
|
for size in all_file:
|
|||
|
|
file_paths = all_file[size]
|
|||
|
|
if len(file_paths) > 1:
|
|||
|
|
deal_one(file_paths)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def usage():
|
|||
|
|
"""print usage"""
|
|||
|
|
print("""
|
|||
|
|
rm_same_file: Replace the same file with a hard link.
|
|||
|
|
|
|||
|
|
rm_same_file.py [target path]
|
|||
|
|
|
|||
|
|
""")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
if len(sys.argv) == 2:
|
|||
|
|
search_all_inode(sys.argv[1])
|
|||
|
|
deal_files()
|
|||
|
|
else:
|
|||
|
|
usage()
|
|||
|
|
sys.exit()
|