##################################################
# Search for duplicated files without, #
# meaningless file exstension #
# NOT EDIT BY HAND!!! #
##################################################
#IMPORT MODULES FOR SCRIPT
import hashlib, os, locale, fileinput, platform, logging
from itertools import *
from operator import itemgetter
#THIS USE FOR GIVE MD5 FOR FILE'S
md = hashlib.md5()
encoding = locale.getdefaultlocale()[1]
logging.basicConfig(filename='logfile.log', level=logging.DEBUG, \
format='%(message)s', filemode="w")
Dir = raw_input("Enter destination for begin use \\ : ")
extension = raw_input("Enter extension for file's : ")
#Dir = "C:\\script"
#extension = "py"
_File = [] #CONTAINED PATH FOR FILE
_File_H = [] #CONTAINED MD5 CODE FOR FILE
_duplicated = [] #CONTAINED DUPLIATED
_duplicated_sravnqva = [] #NEED FOR ZIP
subdirlist = [] #MAY DELETED IF YOU WON USED FOR FUNCTION search_file
one= [] #ADD FILEDPUPLICATES
two= [] #ADD MD5 FOR DUPLICATE FILE
#SEARCH WITH OS.LISTDIR NOT WORKING FOR MOMENT
# def search_file(Dir):
# for file in os.listdir(Dir):
# if os.path.isfile(file):
# yield os.path.join(Dir,file)
# else:
# subdirlist.append(os.path.join(Dir, file))
# try:
# for subdir in subdirlist:
# search_file(subdir)
# except:
# print "Somting wrong"
#SEARCH WITH OS.WALK
def search_os_walk(Dir):
for path,dirlist,filelist in os.walk(Dir):
for fn in filelist:
if fn.endswith("."+extension):
try:
yield os.path.join(path,fn)
except IOError, e:
print "Not allowed", e
#FUNCTION USE FOR ENCRIPT FILE TO MD5
def md_5(filePath):
try:
with open(filePath, 'rb') as fh:
m = hashlib.md5()
while True:
try:
data = fh.read(8192)
if not data:
break
m.update(data)
except IOError, e:
print "Not allowerd Basi", e
return m.hexdigest()
except IOError, e:
pass
#SEARCH FOR FILE AND APPEND THEM TO REQUIRED LIST (LOOKED ABOVE FOR DESCRIPTION'S )
for file in search_os_walk(Dir):
_File.append(file)
_File_H.append(md_5(file))
for f,z in (izip(_File, _File_H)):
if z in _duplicated:
_duplicated_sravnqva.append(z)
_duplicated.append(z)
print "*"*80
_Dupliated = [x[0] for x in zip(_File, _File_H) if x[1] in _duplicated_sravnqva]
for i in _Dupliated:
one.append(i)
two.append(md_5(i))
#RESULT IN DICTIONARI, NEEDED FOR FUNCTION GROUPBY
d = dict(zip(one,two))
di = sorted(d.iteritems(), key=itemgetter(1))
for k, g in groupby(di, key=itemgetter(1)):
#IF YOU WHANT TO SHOW IN COLUMN UNCHECK BOTTOM ROW'S
#~ for i,z in enumerate(map(itemgetter(0), g)):
#~ logging.info(str(i),(z))
info = map(itemgetter(0), g)
print "Duplicated ", info
logging.info(str(info))
print "\n"
answer = raw_input("Do you whant to view txt file with result (y/n) :")
if answer.lower() != 'y':
pass
else:
File = open('logfile.log')
for line in list(File.readlines()):
print line
25.03.2015 г.
python script for search duplicated file
Абонамент за:
Коментари за публикацията
(
Atom
)
Няма коментари :
Публикуване на коментар