diff --git a/README.md b/README.md index 56ba914..d772a4e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# 照片分类器 +# 照片分类器-python3 1. 对指定目录及子目录下的照片进行分类, 先按 年/月 分目录, 文件名重命名为"年-月-时间戳" 2. 每个照片文件的md5校验码存储到oracle新加坡云的mysql数据库'photo_classifier', 用于检查照片是否重复, 重复的会跳过 3. 如果照片经过修改, 丢失EXIF信息, 将被跳过 diff --git a/src/photo_classifier.py b/src/photo_classifier.py index 863c457..d598fa4 100644 --- a/src/photo_classifier.py +++ b/src/photo_classifier.py @@ -15,30 +15,65 @@ import shutil import hashlib import pymysql import datetime +import pytz +from win32com.propsys import propsys, pscon class Classifier(): + mode = 'prod' # 开发模式(dev)还是产品模式(prod) + IMAGE_EXTENTIONS = ['jpg', 'jpeg', 'bmp', 'png'] + VIDEO_EXTENTIONS = ['mp4'] + TEST_TABLE = 'TEST_PHOTO' + TABLE = 'PHOTO' + PHOTO_NO_DATE_KEYS = ['EXIF ExifVersion'] + PHOTO_DATE_KEYS = ['Image DateTime', 'EXIF DateTimeOriginal'] + PHOTO_EXIF_KEYS = PHOTO_NO_DATE_KEYS + PHOTO_DATE_KEYS - def __init__(self, input_folder, output_folder): + def __init__(self, input_folder, photo_output, video_output, image_output): self.input = input_folder - self.output = output_folder + self.photo_output = photo_output + self.video_output = video_output + self.image_output = image_output self.processed_count = 0 - self.db = pymysql.connect(host='bt.biggerfish.tech', user='admin', password='zhiyong214', database='photo_classifier') - self.photo_info_keys = ['Image DateTime', 'EXIF DateTimeOriginal', 'EXIF ExifVersion'] - self.key_without_date = 'EXIF ExifVersion' + self.table = self.TEST_TABLE if self.mode == 'dev' else self.TABLE pass - def start(self): - self.process_folder(self.input) + def connect_database(self): + self.db = pymysql.connect(host='bt.biggerfish.tech', user='admin', password='zhiyong214', database='photo_classifier') + + def close_database(self): self.db.close() + + def create_table(self): + self.connect_database() + cursor = self.db.cursor() + + sql = 'DROP TABLE IF EXISTS {}'.format(self.table) + cursor.execute(sql) + print('删除表 {}'.format(self.table)) + + sql = '''CREATE TABLE {} ( + ID INT NOT NULL AUTO_INCREMENT , + MD5 VARCHAR(255) NOT NULL , + PRIMARY KEY (ID), UNIQUE (MD5)) + ENGINE = InnoDB;'''.format(self.table) + cursor.execute(sql) + print('创建表 {}'.format(self.table)) + + self.close_database() + + def start(self): + self.connect_database() + self.process_folder(self.input) self.delete_folders(self.input) - + self.close_database() + def get_file_count(self, folder): count = 0 for (_, _, _files) in os.walk(folder): count += len(_files) return count - + def delete_folders(self, folder): for (root, dirs, files) in os.walk(folder): for dir in dirs: @@ -47,20 +82,33 @@ class Classifier(): if self.get_file_count(abs_path) == 0: shutil.rmtree(abs_path) print('删除目录: {}'.format(abs_path)) - def is_photo(self, file_name): - for ext in ['jpg', 'jpeg', 'bmp', 'png']: + return self.is_image(file_name) and self.contains_exif(file_name) + + def is_video(self, file_name): + for ext in self.VIDEO_EXTENTIONS: if file_name.lower().endswith(ext): return True return False + def is_image(self, file_name): + for ext in self.IMAGE_EXTENTIONS: + if file_name.lower().endswith(ext): + return True + return False + + def contains_exif(self, file_name): + with open(file_name, 'rb') as reader: + tags = exifread.process_file(reader) + keys = [key for key in self.PHOTO_EXIF_KEYS if key in tags] + return len(keys) > 0 + def process_folder(self, folder): for (root, dirs, files) in os.walk(folder): for file in files: - if self.is_photo(file): - self.process_file(root, file) - + self.process_file(root, file) + def get_md5(self, file): with open(file, 'rb') as reader: return hashlib.md5(reader.read()).hexdigest() @@ -74,27 +122,26 @@ class Classifier(): new_name = self.rename_move(file_path, year, month) self.add_record(md5) self.processed_count += 1 - print('已处理照片 {}: {} --> {}'.format(self.processed_count, file, new_name)) + print('已处理 {}: {} --> {}'.format(self.processed_count, file, new_name)) except Exception as e: print(str(e)) - + def add_record(self, md5): try: cursor = self.db.cursor() - sql = "INSERT INTO photo_md5(MD5) VALUES('{}')".format(md5) + sql = "INSERT INTO {}(MD5) VALUES('{}')".format(self.table, md5) cursor.execute(sql) self.db.commit() except Exception as e: print('插入记录 {} 到数据库photo_classifier失败: {}'.format(md5, str(e))) self.db.rollback() raise e - def validate(self, file_path, md5): # check if the md5 of the photo exists in database try: cursor = self.db.cursor() - sql = "SELECT MD5 FROM photo_md5 WHERE MD5='{}'".format(md5) + sql = "SELECT MD5 FROM {} WHERE MD5='{}'".format(self.table, md5) cursor.execute(sql) record = cursor.fetchone() if str(record) != 'None': @@ -102,40 +149,65 @@ class Classifier(): raise Exception('重复照片 {} --> 删除'.format(file_path)) except Exception as e: raise e - - # check if image is photo or not - with open(file_path, 'rb') as reader: - tags = exifread.process_file(reader) - keys = [key for key in self.photo_info_keys if key in tags] - if len(keys) == 0: - raise Exception('图片不是照片: {} --> 跳过'.format(file_path)) - - def read_date(self, file): + if (not self.is_image(file_path)) and (not self.is_video(file_path)): + raise Exception('非图片或视频: {} --> 跳过'.format(file_path)) + + def get_photo_create_date(self, file): with open(file, 'rb') as reader: tags = exifread.process_file(reader) - keys = [key for key in self.photo_info_keys if key in tags] + keys = [key for key in self.PHOTO_DATE_KEYS if key in tags] if len(keys) > 0: key = keys[0] - if key != self.key_without_date: - origin_date = tags[key] - time_str = str(origin_date) - _date = time_str[:7].split(':') - year = _date[0] - month = _date[1] - return (year, month) - else: # 是拍摄照片, 但没有拍摄日期 - time_str = os.path.getmtime(file) - time_str = str(datetime.datetime.fromtimestamp(time_str)) - _date = time_str[:7].split('-') - year = _date[0] - month = _date[1] - return (year, month) - - + origin_date = tags[key] + time_str = str(origin_date) + _date = time_str[:7].split(':') + year = _date[0] + month = _date[1] + return (year, month) + return None + + def get_video_create_date(self, file): + try: + properties = propsys.SHGetPropertyStoreFromParsingName(file) + dt = properties.GetValue(pscon.PKEY_Media_DateEncoded).GetValue() + time_str = str(dt.astimezone(pytz.timezone('Asia/Shanghai'))) + _date = time_str[:7].split('-') + year = _date[0] + month = _date[1] + return (year, month) + except: + return None + + def read_date(self, file): + file = file.replace('/', '\\') + date = None + if self.is_photo(file): + date = self.get_photo_create_date(file) # 照片可能没有EXIF日期 + elif self.is_video(file): + date = self.get_video_create_date(file) # 视频可能没有媒体创建日期 + + if not date: # 获取文件上次修改日期 + time_str = os.path.getmtime(file) + time_str = str(datetime.datetime.fromtimestamp(time_str)) + _date = time_str[:7].split('-') + year = _date[0] + month = _date[1] + date = (year, month) + return date def rename_move(self, file_path, year, month): - new_path = os.path.join(self.output, year, month) + if self.is_image(file_path): + if self.is_photo(file_path): + output = self.photo_output + else: + output = self.image_output + elif self.is_video(file_path): + output = self.video_output + else: + raise Exception('移动文件失败, 非图片或视频: {}'.format(file_path)) + + new_path = os.path.join(output, year, month) if not os.path.exists(new_path): os.makedirs(new_path) file_name, file_ext = os.path.splitext(file_path) @@ -143,5 +215,11 @@ class Classifier(): shutil.move(file_path, os.path.join(new_path, new_name)) return new_name -cf = Classifier('D:/temp/相册', 'D:/自动同步/总相册') + +cf = Classifier('D:/temp/相册', + photo_output='D:/总仓库-照片视频/总照片备份', + video_output='D:/总仓库-照片视频/总视频备份', + image_output='D:/总仓库-照片视频/总图片备份') + +# cf.create_table() cf.start() \ No newline at end of file