16.12.2017 г.

Read excell file with python, validate and return defaultdict

import os
import xlrd
from collections import defaultdict
import sys
from pprint import pprint
import re

'''
Some name1     2345        someemal1@gmail.com
Some name12    23456        someemal2@gmail.com
Some name1    0            someemal1@gmail.com
Some name3    0889345        someemal1@gmail.com
'''

filename = "somefile.xls"
result = defaultdict(list)


def checkFilename(filename):
    '''Check file is excel or not '''
    if filename.endswith('xls'):
        return filename
    else:
        return 'Not valid filename'
        sys.exit()


class ReadXLS:

    def __init__(self, checkFilename):
        self.filename = checkFilename

    def realReadFilename(self):
        '''Read xls file with xlrd module '''
        try:
            workbook = xlrd.open_workbook(self.filename)
        except:
            print("some error...")
            sys.exit()
        else:
            try:
                sheet = workbook.sheet_by_index(0)
            except IndexError as ie:
                print(ie)
                sys.exit()
            else:
                for rowx in range(sheet.nrows):
                    columnmapping = sheet.row_values(rowx)
                    yield columnmapping

    def checkResult(self):
        '''add to result only what i whant'''
        for line in ReadXLS.realReadFilename(self):
            if ((line[0] in (None, '')) or (line[1] in (None, ''))
                    or (line[2] in (None, '')) or not
                    re.search(r'[\w.-]+@[\w.-]+.\w+', line[2])):
                continue
            else:
                #result[line[2].upper()].append((line[0], line[1]))
                result[line[2].upper()].append(
                    (line[1].strip().replace(' ', ''), line[0].strip())
                )

    def __repr__(self):
        ''' Why I can '''
        return "{}".format(self.filename)


if __name__ == '__main__':
    test = ReadXLS(checkFilename(filename))
    test.checkResult()
    pprint(result)

Няма коментари :

Публикуване на коментар