增加: 处理视频和非照片的图片

2022-01-01 00:38:54 +08:00 · 2022-01-01 00:38:54 +08:00 · 564da5df77
commit 564da5df77
parent fee24db49c
2 changed files with 126 additions and 48 deletions
--- a/README.md
+++ b/README.md
@ -1,4 +1,4 @@
-# 照片分类器
+# 照片分类器-python3
 1. 对指定目录及子目录下的照片进行分类, 先按 年/月 分目录, 文件名重命名为"年-月-时间戳"
 2. 每个照片文件的md5校验码存储到oracle新加坡云的mysql数据库'photo_classifier', 用于检查照片是否重复, 重复的会跳过
 3. 如果照片经过修改, 丢失EXIF信息, 将被跳过
--- a/src/photo_classifier.py
+++ b/src/photo_classifier.py
@ -15,23 +15,58 @@ import shutil
 import hashlib
 import pymysql
 import datetime
 import pytz
 from win32com.propsys import propsys, pscon
 class Classifier():
    mode = 'prod'  # 开发模式(dev)还是产品模式(prod)
    IMAGE_EXTENTIONS = ['jpg', 'jpeg', 'bmp', 'png']
    VIDEO_EXTENTIONS = ['mp4']
    TEST_TABLE = 'TEST_PHOTO'
    TABLE = 'PHOTO'
    PHOTO_NO_DATE_KEYS = ['EXIF ExifVersion']
    PHOTO_DATE_KEYS = ['Image DateTime', 'EXIF DateTimeOriginal']
    PHOTO_EXIF_KEYS = PHOTO_NO_DATE_KEYS + PHOTO_DATE_KEYS
-    def __init__(self, input_folder, output_folder):
+    def __init__(self, input_folder, photo_output, video_output, image_output):
        self.input = input_folder
-        self.output = output_folder
+        self.photo_output = photo_output
        self.video_output = video_output
        self.image_output = image_output
        self.processed_count = 0
-        self.db = pymysql.connect(host='bt.biggerfish.tech', user='admin', password='zhiyong214', database='photo_classifier')
+        self.table = self.TEST_TABLE if self.mode == 'dev' else self.TABLE
        self.photo_info_keys = ['Image DateTime', 'EXIF DateTimeOriginal', 'EXIF ExifVersion']
        self.key_without_date = 'EXIF ExifVersion'
        pass
-    def start(self):
+    def connect_database(self):
-        self.process_folder(self.input)
+        self.db = pymysql.connect(host='bt.biggerfish.tech', user='admin', password='zhiyong214', database='photo_classifier')
    def close_database(self):
        self.db.close()
    def create_table(self):
        self.connect_database()
        cursor = self.db.cursor()
        sql = 'DROP TABLE IF EXISTS {}'.format(self.table)
        cursor.execute(sql)
        print('删除表 {}'.format(self.table))
        sql = '''CREATE TABLE {} (
            ID INT NOT NULL AUTO_INCREMENT ,
            MD5 VARCHAR(255) NOT NULL ,
            PRIMARY KEY (ID), UNIQUE (MD5))
            ENGINE = InnoDB;'''.format(self.table)
        cursor.execute(sql)
        print('创建表 {}'.format(self.table))
        self.close_database()
    def start(self):
        self.connect_database()
        self.process_folder(self.input)
        self.delete_folders(self.input)
        self.close_database()
    def get_file_count(self, folder):
        count = 0
@ -48,18 +83,31 @@ class Classifier():
                        shutil.rmtree(abs_path)
                        print('删除目录: {}'.format(abs_path))
    def is_photo(self, file_name):
-        for ext in ['jpg', 'jpeg', 'bmp', 'png']:
+        return self.is_image(file_name) and self.contains_exif(file_name)
    def is_video(self, file_name):
        for ext in self.VIDEO_EXTENTIONS:
            if file_name.lower().endswith(ext):
                return True
        return False
    def is_image(self, file_name):
        for ext in self.IMAGE_EXTENTIONS:
            if file_name.lower().endswith(ext):
                return True
        return False
    def contains_exif(self, file_name):
        with open(file_name, 'rb') as reader:
            tags = exifread.process_file(reader)
            keys = [key for key in self.PHOTO_EXIF_KEYS if key in tags]
            return len(keys) > 0
    def process_folder(self, folder):
        for (root, dirs, files) in os.walk(folder):
            for file in files:
-                if self.is_photo(file):
+                self.process_file(root, file)
                    self.process_file(root, file)
    def get_md5(self, file):
        with open(file, 'rb') as reader:
@ -74,14 +122,14 @@ class Classifier():
            new_name = self.rename_move(file_path, year, month)
            self.add_record(md5)
            self.processed_count += 1
-            print('已处理照片 {}: {} --> {}'.format(self.processed_count, file, new_name))
+            print('已处理 {}: {} --> {}'.format(self.processed_count, file, new_name))
        except Exception as e:
            print(str(e))
    def add_record(self, md5):
        try:
            cursor = self.db.cursor()
-            sql = "INSERT INTO photo_md5(MD5) VALUES('{}')".format(md5)
+            sql = "INSERT INTO {}(MD5) VALUES('{}')".format(self.table, md5)
            cursor.execute(sql)
            self.db.commit()
        except Exception as e:
@ -89,12 +137,11 @@ class Classifier():
            self.db.rollback()
            raise e
    def validate(self, file_path, md5):
        # check if the md5 of the photo exists in database
        try:
            cursor = self.db.cursor()
-            sql = "SELECT MD5 FROM photo_md5 WHERE MD5='{}'".format(md5)
+            sql = "SELECT MD5 FROM {} WHERE MD5='{}'".format(self.table, md5)
            cursor.execute(sql)
            record = cursor.fetchone()
            if str(record) != 'None':
@ -103,39 +150,64 @@ class Classifier():
        except Exception as e:
            raise e
-        # check if image is photo or not
+        if (not self.is_image(file_path)) and (not self.is_video(file_path)):
-        with open(file_path, 'rb') as reader:
+            raise Exception('非图片或视频: {} --> 跳过'.format(file_path))
            tags = exifread.process_file(reader)
            keys = [key for key in self.photo_info_keys if key in tags]
            if len(keys) == 0:
                raise Exception('图片不是照片: {} --> 跳过'.format(file_path))
-
+    def get_photo_create_date(self, file):
    def read_date(self, file):
        with open(file, 'rb') as reader:
            tags = exifread.process_file(reader)
-            keys = [key for key in self.photo_info_keys if key in tags]
+            keys = [key for key in self.PHOTO_DATE_KEYS if key in tags]
            if len(keys) > 0:
                key = keys[0]
-                if key != self.key_without_date:
+                origin_date = tags[key]
-                    origin_date = tags[key]
+                time_str = str(origin_date)
-                    time_str = str(origin_date)
+                _date = time_str[:7].split(':')
-                    _date = time_str[:7].split(':')
+                year = _date[0]
-                    year = _date[0]
+                month = _date[1]
-                    month = _date[1]
+                return (year, month)
-                    return (year, month)
+        return None
                else: # 是拍摄照片, 但没有拍摄日期
                    time_str = os.path.getmtime(file)
                    time_str = str(datetime.datetime.fromtimestamp(time_str))
                    _date = time_str[:7].split('-')
                    year = _date[0]
                    month = _date[1]
                    return (year, month)
    def get_video_create_date(self, file):
        try:
            properties = propsys.SHGetPropertyStoreFromParsingName(file)
            dt = properties.GetValue(pscon.PKEY_Media_DateEncoded).GetValue()
            time_str = str(dt.astimezone(pytz.timezone('Asia/Shanghai')))
            _date = time_str[:7].split('-')
            year = _date[0]
            month = _date[1]
            return (year, month)
        except:
            return None
    def read_date(self, file):
        file = file.replace('/', '\\')
        date = None
        if self.is_photo(file):
            date = self.get_photo_create_date(file)  # 照片可能没有EXIF日期
        elif self.is_video(file):
            date = self.get_video_create_date(file)  # 视频可能没有媒体创建日期
        if not date:  # 获取文件上次修改日期
            time_str = os.path.getmtime(file)
            time_str = str(datetime.datetime.fromtimestamp(time_str))
            _date = time_str[:7].split('-')
            year = _date[0]
            month = _date[1]
            date = (year, month)
        return date
    def rename_move(self, file_path, year, month):
-        new_path = os.path.join(self.output, year, month)
+        if self.is_image(file_path):
            if self.is_photo(file_path):
                output = self.photo_output
            else:
                output = self.image_output
        elif self.is_video(file_path):
            output = self.video_output
        else:
            raise Exception('移动文件失败, 非图片或视频: {}'.format(file_path))
        new_path = os.path.join(output, year, month)
        if not os.path.exists(new_path):
            os.makedirs(new_path)
        file_name, file_ext = os.path.splitext(file_path)
@ -143,5 +215,11 @@ class Classifier():
        shutil.move(file_path, os.path.join(new_path, new_name))
        return new_name
-cf = Classifier('D:/temp/相册', 'D:/自动同步/总相册')
+
 cf = Classifier('D:/temp/相册',
                photo_output='D:/总仓库-照片视频/总照片备份',
                video_output='D:/总仓库-照片视频/总视频备份',
                image_output='D:/总仓库-照片视频/总图片备份')
 # cf.create_table()
 cf.start()