22.08.2016 г.

Serch for duplicated file's (!!!Not Erased only show!!!)


##################################################
# Search for duplicated files without,                                    #
# meaningless file exstension                                                 #
# NOT EDIT BY HAND!!! ONLY Dir Param #
##################################################
#IMPORT MODULES FOR SCRIPT
import hashlib, os
from itertools import *
from operator import itemgetter
#THIS USE FOR GIVE MD5 FOR FILE'S
md = hashlib.md5()

#Directory = os.getcwd()
#ADD NEEDED LIST
Dir = "C:\\script\\"
_File = []   #CONTAINED PATH FOR FILE
_File_H = [] #CONTAINED MD5 CODE FOR FILE
_duplicated = [] #CONTAINED DUPLIATED
_duplicated_sravnqva = [] #NEED FOR ZIP
subdirlist = [] #MAY DELETED IF YOU WON USED FOR FUNCTION search_file

#SEARCH WITH OS.WALK
def search_os_walk(Dir):
    for path,dirlist,filelist in os.walk(Dir):
        for fn in filelist:
          try:
        yield os.path.join(path,fn)
        except IOError, e:
    print "Not allowed", e


#FUNCTION USE FOR ENCRIPT FILE TO MD5
def md_5(filePath):
  try:
       with open(filePath, 'rb') as fh:
           m = hashlib.md5()
           while True:
        try:
                  data = fh.read(8192)
                  if not data:
                      break
                  m.update(data)
              except IOError, e:
        print "Not allowerd Basi", e
          return m.hexdigest()
    except IOError, e:
print "Not allowerd", e


#SEARCH FOR FILE AND APPEND THEM TO REQUIRED LIST (LOOKED ABOVE FOR #DESCRIPTION'S )

for file in search_os_walk(Dir):
_File.append(file)
_File_H.append(md_5(file))


for f,z in (izip(_File, _File_H)):
  if z in _duplicated:
_duplicated_sravnqva.append(z)
_duplicated.append(z)


print "*"*80
_Dupliated =  [x[0] for x in zip(_File, _File_H) if x[1] in _duplicated_sravnqva]
one= []
two= []
for i in _Dupliated:
  one.append(i)
two.append(md_5(i))

#RESULT IN DICTIONARI, NEEDED FOR FUNCTION GROUPBY
d = dict(zip(one,two))

di = sorted(d.iteritems(), key=itemgetter(1))
for k, g in groupby(di, key=itemgetter(1)):
#IF YOU WHANT TO SHOW IN COLUMN UNCHECK BOTTOM ROW'S
     # for i,z  in enumerate(map(itemgetter(0), g)):
         # print i,z
     print "Duplicated ", map(itemgetter(0), g)

#raw_input()

Няма коментари :

Публикуване на коментар