25.03.2015 г.

python script for search duplicated file

##################################################
#   Search for duplicated files without,         #
#   meaningless file exstension                  #
#     NOT EDIT BY HAND!!!                        # 
##################################################

#IMPORT MODULES FOR SCRIPT
import hashlib, os, locale, fileinput, platform, logging
from itertools import *
from operator import itemgetter
#THIS USE FOR GIVE MD5 FOR FILE'S
md = hashlib.md5()
encoding = locale.getdefaultlocale()[1]
logging.basicConfig(filename='logfile.log', level=logging.DEBUG, \
format='%(message)s', filemode="w")

Dir = raw_input("Enter destination for begin use \\ : ")
extension = raw_input("Enter extension for file's : ")
#Dir = "C:\\script"
#extension = "py"
_File = []       #CONTAINED PATH FOR FILE
_File_H = []     #CONTAINED MD5 CODE FOR FILE
_duplicated = []   #CONTAINED DUPLIATED
_duplicated_sravnqva = [] #NEED FOR ZIP
subdirlist = []    #MAY DELETED IF YOU WON USED FOR FUNCTION search_file
one= []      #ADD FILEDPUPLICATES
two= []      #ADD MD5 FOR DUPLICATE FILE

 #SEARCH WITH OS.LISTDIR NOT WORKING FOR MOMENT
 # def search_file(Dir):
 
 #  for file in os.listdir(Dir):
 #   if os.path.isfile(file):
 #    yield os.path.join(Dir,file)
 #   else:
 #    subdirlist.append(os.path.join(Dir, file))
 #    try:
 #     for subdir in subdirlist:
 #      search_file(subdir)
 #    except:
 #     print "Somting wrong"

 #SEARCH WITH OS.WALK
def search_os_walk(Dir):
  for path,dirlist,filelist in os.walk(Dir):
    for fn in filelist:
      if fn.endswith("."+extension):
        try:
          yield os.path.join(path,fn)
        except IOError, e:
          print "Not allowed", e


 #FUNCTION USE FOR ENCRIPT FILE TO MD5
def md_5(filePath):
  try:
    with open(filePath, 'rb') as fh:
      m = hashlib.md5()
      while True:
        try:
          data = fh.read(8192)
          if not data:
            break
      m.update(data)
  except IOError, e:
    print "Not allowerd Basi", e
    return m.hexdigest()
  except IOError, e:
    pass


 #SEARCH FOR FILE AND APPEND THEM TO REQUIRED LIST (LOOKED ABOVE FOR DESCRIPTION'S )

for file in search_os_walk(Dir):

   _File.append(file)
   _File_H.append(md_5(file))


for f,z in (izip(_File, _File_H)):

   if z in _duplicated:
    _duplicated_sravnqva.append(z)
   _duplicated.append(z)


print "*"*80
_Dupliated =  [x[0] for x in zip(_File, _File_H) if x[1] in _duplicated_sravnqva]

for i in _Dupliated:

   one.append(i)
   two.append(md_5(i))

#RESULT IN DICTIONARI, NEEDED FOR FUNCTION GROUPBY
d = dict(zip(one,two))

di = sorted(d.iteritems(), key=itemgetter(1))
for k, g in groupby(di, key=itemgetter(1)):

 #IF YOU WHANT TO SHOW IN COLUMN UNCHECK BOTTOM ROW'S
      #~ for i,z  in enumerate(map(itemgetter(0), g)):
       #~ logging.info(str(i),(z))
   info = map(itemgetter(0), g)
   print "Duplicated ", info
   logging.info(str(info))
   print "\n"
answer = raw_input("Do you whant to view txt file with result (y/n) :")
if answer.lower() != 'y':
  pass
else:
  File = open('logfile.log')
  for line in list(File.readlines()):
    print line

Няма коментари :

Публикуване на коментар