diff --git a/photo_classifier-for nas.py b/photo_classifier-for nas.py index 71cc932..b8116d3 100644 --- a/photo_classifier-for nas.py +++ b/photo_classifier-for nas.py @@ -1,9 +1,9 @@ -''' +""" 根据读取的照片信息分类照片 分类: 目录名:2020\01 文件名:2020-01-时间戳 -''' +""" import os import sys @@ -18,16 +18,16 @@ import pytz from win32com.propsys import propsys, pscon -class Classifier1(): - mode = 'prod' # 开发模式(dev)还是产品模式(prod) - IMAGE_EXTENTIONS = ['jpg', 'jpeg', 'bmp', 'png', 'tif', 'gif', 'heic'] - VIDEO_EXTENTIONS = ['mp4', 'avi', 'rmvb', 'mkv', 'mov', 'amr', 'mpg'] - TEST_TABLE = 'TEST_PHOTO' - TABLE = 'PHOTO' - PHOTO_NO_DATE_KEYS = ['EXIF ExifVersion'] - PHOTO_DATE_KEYS = ['Image DateTime', 'EXIF DateTimeOriginal'] +class Classifier1: + mode = "prod" # 开发模式(dev)还是产品模式(prod) + IMAGE_EXTENTIONS = ["jpg", "jpeg", "bmp", "png", "tif", "gif", "heic"] + VIDEO_EXTENTIONS = ["mp4", "avi", "rmvb", "mkv", "mov", "amr", "mpg"] + TEST_TABLE = "TEST_PHOTO" + TABLE = "PHOTO" + PHOTO_NO_DATE_KEYS = ["EXIF ExifVersion"] + PHOTO_DATE_KEYS = ["Image DateTime", "EXIF DateTimeOriginal"] PHOTO_EXIF_KEYS = PHOTO_NO_DATE_KEYS + PHOTO_DATE_KEYS - SKIP_FOLDERS = ['System Volume Information', '$RECYCLE.BIN', '.stfolder'] + SKIP_FOLDERS = ["System Volume Information", "$RECYCLE.BIN", ".stfolder"] def __init__(self, input_folder, photo_output, video_output, image_output): self.input = input_folder @@ -35,11 +35,17 @@ class Classifier1(): self.video_output = video_output self.image_output = image_output self.processed_count = 0 - self.table = self.TEST_TABLE if self.mode == 'dev' else self.TABLE + self.table = self.TEST_TABLE if self.mode == "dev" else self.TABLE pass def connect_database(self): - self.db = pymysql.connect(host='northflyfish.myqnapcloud.cn', user='admin', password='zhiyong@214', database='photo_classifier', port=3307) + self.db = pymysql.connect( + host="northflyfish.myqnapcloud.cn", + user="admin", + password="zhiyong@214", + database="photo_classifier", + port=3307, + ) def close_database(self): self.db.close() @@ -48,17 +54,19 @@ class Classifier1(): self.connect_database() cursor = self.db.cursor() - sql = 'DROP TABLE IF EXISTS {}'.format(self.table) + sql = "DROP TABLE IF EXISTS {}".format(self.table) cursor.execute(sql) - print('删除表 {}'.format(self.table)) + print("删除表 {}".format(self.table)) - sql = '''CREATE TABLE {} ( + sql = """CREATE TABLE {} ( ID INT NOT NULL AUTO_INCREMENT , MD5 VARCHAR(255) NOT NULL , PRIMARY KEY (ID), UNIQUE (MD5)) - ENGINE = InnoDB;'''.format(self.table) + ENGINE = InnoDB;""".format( + self.table + ) cursor.execute(sql) - print('创建表 {}'.format(self.table)) + print("创建表 {}".format(self.table)) self.close_database() @@ -70,12 +78,12 @@ class Classifier1(): def get_file_count(self, folder): count = 0 - for (_, _, _files) in os.walk(folder): + for _, _, _files in os.walk(folder): count += len(_files) return count def delete_folders(self, folder): - for (root, dirs, files) in os.walk(folder): + for root, dirs, files in os.walk(folder): for dir in dirs: if dir in self.SKIP_FOLDERS: continue @@ -83,7 +91,7 @@ class Classifier1(): if os.path.isdir(abs_path): if self.get_file_count(abs_path) == 0: shutil.rmtree(abs_path) - print('删除目录: {}'.format(abs_path)) + print("删除目录: {}".format(abs_path)) def is_photo(self, file_name): return self.is_image(file_name) and self.contains_exif(file_name) @@ -101,18 +109,18 @@ class Classifier1(): return False def contains_exif(self, file_name): - with open(file_name, 'rb') as reader: + with open(file_name, "rb") as reader: tags = exifread.process_file(reader) keys = [key for key in self.PHOTO_EXIF_KEYS if key in tags] return len(keys) > 0 def process_folder(self, folder): - for (root, dirs, files) in os.walk(folder): + for root, dirs, files in os.walk(folder): for file in files: self.process_file(root, file) def get_md5(self, file): - with open(file, 'rb') as reader: + with open(file, "rb") as reader: return hashlib.md5(reader.read()).hexdigest() def process_file(self, root, file): @@ -125,11 +133,13 @@ class Classifier1(): new_name = self.rename_move(file_path, year, month, day, md5) self.add_record(md5) self.processed_count += 1 - print('已处理 {}: {} --> {}'.format(self.processed_count, file, new_name)) + print( + "已处理 {}: {} --> {}".format(self.processed_count, file, new_name) + ) except Exception as e: print(str(e)) else: - print('非图片或视频, 忽略文件: {}'.format(file_path)) + print("非图片或视频, 忽略文件: {}".format(file_path)) def add_record(self, md5): try: @@ -138,7 +148,7 @@ class Classifier1(): cursor.execute(sql) self.db.commit() except Exception as e: - print('插入记录 {} 到数据库photo_classifier失败: {}'.format(md5, str(e))) + print("插入记录 {} 到数据库photo_classifier失败: {}".format(md5, str(e))) self.db.rollback() raise e @@ -149,24 +159,24 @@ class Classifier1(): sql = "SELECT MD5 FROM {} WHERE MD5='{}'".format(self.table, md5) cursor.execute(sql) record = cursor.fetchone() - if str(record) != 'None': + if str(record) != "None": os.remove(file_path) - raise Exception('重复文件 {} --> 删除'.format(file_path)) + raise Exception("重复文件 {} --> 删除".format(file_path)) except Exception as e: raise e if (not self.is_image(file_path)) and (not self.is_video(file_path)): - raise Exception('非图片或视频: {} --> 跳过'.format(file_path)) + raise Exception("非图片或视频: {} --> 跳过".format(file_path)) def get_photo_create_date(self, file): - with open(file, 'rb') as reader: + with open(file, "rb") as reader: tags = exifread.process_file(reader) keys = [key for key in self.PHOTO_DATE_KEYS if key in tags] if len(keys) > 0: key = keys[0] origin_date = tags[key] time_str = str(origin_date) - _date = time_str[:10].split(':') + _date = time_str[:10].split(":") year = _date[0] month = _date[1] day = _date[2] @@ -177,8 +187,8 @@ class Classifier1(): try: properties = propsys.SHGetPropertyStoreFromParsingName(file) dt = properties.GetValue(pscon.PKEY_Media_DateEncoded).GetValue() - time_str = str(dt.astimezone(pytz.timezone('Asia/Shanghai'))) - _date = time_str[:10].split('-') + time_str = str(dt.astimezone(pytz.timezone("Asia/Shanghai"))) + _date = time_str[:10].split("-") year = _date[0] month = _date[1] day = _date[2] @@ -187,7 +197,7 @@ class Classifier1(): return None def read_date(self, file): - file = file.replace('/', '\\') + file = file.replace("/", "\\") date = None if self.is_photo(file): date = self.get_photo_create_date(file) # 照片可能没有EXIF日期 @@ -197,7 +207,7 @@ class Classifier1(): if not date: # 获取文件上次修改日期 time_str = os.path.getmtime(file) time_str = str(datetime.datetime.fromtimestamp(time_str)) - _date = time_str[:10].split('-') + _date = time_str[:10].split("-") year = _date[0] month = _date[1] day = _date[2] @@ -213,23 +223,24 @@ class Classifier1(): elif self.is_video(file_path): output = self.video_output else: - raise Exception('移动文件失败, 非图片或视频: {}'.format(file_path)) + raise Exception("移动文件失败, 非图片或视频: {}".format(file_path)) new_path = os.path.join(output, year, month, day) if not os.path.exists(new_path): os.makedirs(new_path) file_name, file_ext = os.path.splitext(file_path) - new_name = year + '-' + month + '-' + day + '-' + md5 + file_ext + new_name = year + "-" + month + "-" + day + "-" + md5 + file_ext shutil.move(file_path, os.path.join(new_path, new_name)) return new_name cf = Classifier1( - input_folder='D:/待分类照片视频1', + input_folder="D:/待分类照片视频1", # input_folder='z:/待分类照片视频/Picture', - photo_output='D:/总仓库-照片视频1/总照片备份', - video_output='D:/总仓库-照片视频1/总视频备份', - image_output='D:/总仓库-照片视频1/总图片备份') + photo_output="D:/总仓库-照片视频1/总照片备份", + video_output="D:/总仓库-照片视频1/总视频备份", + image_output="D:/总仓库-照片视频1/总图片备份", +) cf.start() # cf.create_table() diff --git a/src/photo_classifier-当前使用.py b/src/photo_classifier-当前使用.py index 60cba03..5b9e125 100644 --- a/src/photo_classifier-当前使用.py +++ b/src/photo_classifier-当前使用.py @@ -1,9 +1,9 @@ -''' +""" 根据读取的照片信息分类照片 分类: 目录名:2020\01 文件名:2020-01-时间戳 -''' +""" import os import sys @@ -17,17 +17,19 @@ import datetime import pytz from win32com.propsys import propsys, pscon +# 使用韩国oracle 数据库 -class Classifier(): - mode = 'prod' # 开发模式(dev)还是产品模式(prod) - IMAGE_EXTENTIONS = ['jpg', 'jpeg', 'bmp', 'png', 'tif', 'gif', 'heic'] - VIDEO_EXTENTIONS = ['mp4', 'avi', 'rmvb', 'mkv', 'mov', 'amr', 'mpg'] - TEST_TABLE = 'TEST_PHOTO' - TABLE = 'PHOTO' - PHOTO_NO_DATE_KEYS = ['EXIF ExifVersion'] - PHOTO_DATE_KEYS = ['Image DateTime', 'EXIF DateTimeOriginal'] + +class Classifier: + mode = "prod" # 开发模式(dev)还是产品模式(prod) + IMAGE_EXTENTIONS = ["jpg", "jpeg", "bmp", "png", "tif", "gif", "heic"] + VIDEO_EXTENTIONS = ["mp4", "avi", "rmvb", "mkv", "mov", "amr", "mpg"] + TEST_TABLE = "TEST_PHOTO" + TABLE = "PHOTO" + PHOTO_NO_DATE_KEYS = ["EXIF ExifVersion"] + PHOTO_DATE_KEYS = ["Image DateTime", "EXIF DateTimeOriginal"] PHOTO_EXIF_KEYS = PHOTO_NO_DATE_KEYS + PHOTO_DATE_KEYS - SKIP_FOLDERS = ['System Volume Information', '$RECYCLE.BIN', '.stfolder'] + SKIP_FOLDERS = ["System Volume Information", "$RECYCLE.BIN", ".stfolder"] def __init__(self, input_folder, photo_output, video_output, image_output): self.input = input_folder @@ -35,11 +37,16 @@ class Classifier(): self.video_output = video_output self.image_output = image_output self.processed_count = 0 - self.table = self.TEST_TABLE if self.mode == 'dev' else self.TABLE + self.table = self.TEST_TABLE if self.mode == "dev" else self.TABLE pass def connect_database(self): - self.db = pymysql.connect(host='bt.biggerfish.tech', user='admin', password='zhiyong214', database='photo_classifier') + self.db = pymysql.connect( + host="panel.zhiyong.tech", + user="yu_biggerfish", + password="jRHTbQrdkfNNTztH", + database="photo_classifier", + ) def close_database(self): self.db.close() @@ -48,17 +55,19 @@ class Classifier(): self.connect_database() cursor = self.db.cursor() - sql = 'DROP TABLE IF EXISTS {}'.format(self.table) + sql = "DROP TABLE IF EXISTS {}".format(self.table) cursor.execute(sql) - print('删除表 {}'.format(self.table)) + print("删除表 {}".format(self.table)) - sql = '''CREATE TABLE {} ( + sql = """CREATE TABLE {} ( ID INT NOT NULL AUTO_INCREMENT , MD5 VARCHAR(255) NOT NULL , PRIMARY KEY (ID), UNIQUE (MD5)) - ENGINE = InnoDB;'''.format(self.table) + ENGINE = InnoDB;""".format( + self.table + ) cursor.execute(sql) - print('创建表 {}'.format(self.table)) + print("创建表 {}".format(self.table)) self.close_database() @@ -70,12 +79,12 @@ class Classifier(): def get_file_count(self, folder): count = 0 - for (_, _, _files) in os.walk(folder): + for _, _, _files in os.walk(folder): count += len(_files) return count def delete_folders(self, folder): - for (root, dirs, files) in os.walk(folder): + for root, dirs, files in os.walk(folder): for dir in dirs: if dir in self.SKIP_FOLDERS: continue @@ -83,7 +92,7 @@ class Classifier(): if os.path.isdir(abs_path): if self.get_file_count(abs_path) == 0: shutil.rmtree(abs_path) - print('删除目录: {}'.format(abs_path)) + print("删除目录: {}".format(abs_path)) def is_photo(self, file_name): return self.is_image(file_name) and self.contains_exif(file_name) @@ -101,18 +110,18 @@ class Classifier(): return False def contains_exif(self, file_name): - with open(file_name, 'rb') as reader: + with open(file_name, "rb") as reader: tags = exifread.process_file(reader) keys = [key for key in self.PHOTO_EXIF_KEYS if key in tags] return len(keys) > 0 def process_folder(self, folder): - for (root, dirs, files) in os.walk(folder): + for root, dirs, files in os.walk(folder): for file in files: self.process_file(root, file) def get_md5(self, file): - with open(file, 'rb') as reader: + with open(file, "rb") as reader: return hashlib.md5(reader.read()).hexdigest() def process_file(self, root, file): @@ -125,11 +134,13 @@ class Classifier(): new_name = self.rename_move(file_path, year, month, day, md5) self.add_record(md5) self.processed_count += 1 - print('已处理 {}: {} --> {}'.format(self.processed_count, file, new_name)) + print( + "已处理 {}: {} --> {}".format(self.processed_count, file, new_name) + ) except Exception as e: print(str(e)) else: - print('非图片或视频, 忽略文件: {}'.format(file_path)) + print("非图片或视频, 忽略文件: {}".format(file_path)) def add_record(self, md5): try: @@ -138,7 +149,7 @@ class Classifier(): cursor.execute(sql) self.db.commit() except Exception as e: - print('插入记录 {} 到数据库photo_classifier失败: {}'.format(md5, str(e))) + print("插入记录 {} 到数据库photo_classifier失败: {}".format(md5, str(e))) self.db.rollback() raise e @@ -149,24 +160,24 @@ class Classifier(): sql = "SELECT MD5 FROM {} WHERE MD5='{}'".format(self.table, md5) cursor.execute(sql) record = cursor.fetchone() - if str(record) != 'None': + if str(record) != "None": os.remove(file_path) - raise Exception('重复文件 {} --> 删除'.format(file_path)) + raise Exception("重复文件 {} --> 删除".format(file_path)) except Exception as e: raise e if (not self.is_image(file_path)) and (not self.is_video(file_path)): - raise Exception('非图片或视频: {} --> 跳过'.format(file_path)) + raise Exception("非图片或视频: {} --> 跳过".format(file_path)) def get_photo_create_date(self, file): - with open(file, 'rb') as reader: + with open(file, "rb") as reader: tags = exifread.process_file(reader) keys = [key for key in self.PHOTO_DATE_KEYS if key in tags] if len(keys) > 0: key = keys[0] origin_date = tags[key] time_str = str(origin_date) - _date = time_str[:10].split(':') + _date = time_str[:10].split(":") year = _date[0] month = _date[1] day = _date[2] @@ -177,8 +188,8 @@ class Classifier(): try: properties = propsys.SHGetPropertyStoreFromParsingName(file) dt = properties.GetValue(pscon.PKEY_Media_DateEncoded).GetValue() - time_str = str(dt.astimezone(pytz.timezone('Asia/Shanghai'))) - _date = time_str[:10].split('-') + time_str = str(dt.astimezone(pytz.timezone("Asia/Shanghai"))) + _date = time_str[:10].split("-") year = _date[0] month = _date[1] day = _date[2] @@ -187,7 +198,7 @@ class Classifier(): return None def read_date(self, file): - file = file.replace('/', '\\') + file = file.replace("/", "\\") date = None if self.is_photo(file): date = self.get_photo_create_date(file) # 照片可能没有EXIF日期 @@ -197,7 +208,7 @@ class Classifier(): if not date: # 获取文件上次修改日期 time_str = os.path.getmtime(file) time_str = str(datetime.datetime.fromtimestamp(time_str)) - _date = time_str[:10].split('-') + _date = time_str[:10].split("-") year = _date[0] month = _date[1] day = _date[2] @@ -213,23 +224,24 @@ class Classifier(): elif self.is_video(file_path): output = self.video_output else: - raise Exception('移动文件失败, 非图片或视频: {}'.format(file_path)) + raise Exception("移动文件失败, 非图片或视频: {}".format(file_path)) new_path = os.path.join(output, year, month, day) if not os.path.exists(new_path): os.makedirs(new_path) file_name, file_ext = os.path.splitext(file_path) - new_name = year + '-' + month + '-' + day + '-' + md5 + file_ext + new_name = year + "-" + month + "-" + day + "-" + md5 + file_ext shutil.move(file_path, os.path.join(new_path, new_name)) return new_name cf = Classifier( - input_folder='D:/待分类照片视频', - # input_folder='z:/待分类照片视频/Picture', - photo_output='D:/总仓库-照片视频/总照片备份', - video_output='D:/总仓库-照片视频/总视频备份', - image_output='D:/总仓库-照片视频/总图片备份') + input_folder="D:/待分类照片视频", + # input_folder="D:/总仓库-照片视频-bak", + photo_output="D:/总仓库-照片视频/总照片备份", + video_output="D:/总仓库-照片视频/总视频备份", + image_output="D:/总仓库-照片视频/总图片备份", +) cf.start() # cf.create_table()