photo-classifier/src/photo_classifier.py
2022-01-01 23:36:09 +08:00

223 lines
7.7 KiB
Python

'''
根据读取的照片信息分类照片
分类:
目录名:2020\01
文件名:2020-01-时间戳
'''
import os
from posixpath import abspath
import exifread
import time
import shutil
import hashlib
import pymysql
import datetime
import pytz
from win32com.propsys import propsys, pscon
class Classifier():
mode = 'prod' # 开发模式(dev)还是产品模式(prod)
IMAGE_EXTENTIONS = ['jpg', 'jpeg', 'bmp', 'png', 'tif', 'gif']
VIDEO_EXTENTIONS = ['mp4', 'avi', 'rmvb', 'mkv', 'mov', 'ppt', 'amr', 'mpg']
TEST_TABLE = 'TEST_PHOTO'
TABLE = 'PHOTO'
PHOTO_NO_DATE_KEYS = ['EXIF ExifVersion']
PHOTO_DATE_KEYS = ['Image DateTime', 'EXIF DateTimeOriginal']
PHOTO_EXIF_KEYS = PHOTO_NO_DATE_KEYS + PHOTO_DATE_KEYS
def __init__(self, input_folder, photo_output, video_output, image_output):
self.input = input_folder
self.photo_output = photo_output
self.video_output = video_output
self.image_output = image_output
self.processed_count = 0
self.table = self.TEST_TABLE if self.mode == 'dev' else self.TABLE
pass
def connect_database(self):
self.db = pymysql.connect(host='bt.biggerfish.tech', user='admin', password='zhiyong214', database='photo_classifier')
def close_database(self):
self.db.close()
def create_table(self):
self.connect_database()
cursor = self.db.cursor()
sql = 'DROP TABLE IF EXISTS {}'.format(self.table)
cursor.execute(sql)
print('删除表 {}'.format(self.table))
sql = '''CREATE TABLE {} (
ID INT NOT NULL AUTO_INCREMENT ,
MD5 VARCHAR(255) NOT NULL ,
PRIMARY KEY (ID), UNIQUE (MD5))
ENGINE = InnoDB;'''.format(self.table)
cursor.execute(sql)
print('创建表 {}'.format(self.table))
self.close_database()
def start(self):
self.connect_database()
self.process_folder(self.input)
self.delete_folders(self.input)
self.close_database()
def get_file_count(self, folder):
count = 0
for (_, _, _files) in os.walk(folder):
count += len(_files)
return count
def delete_folders(self, folder):
for (root, dirs, files) in os.walk(folder):
for dir in dirs:
abs_path = os.path.join(root, dir)
if os.path.isdir(abs_path):
if self.get_file_count(abs_path) == 0:
shutil.rmtree(abs_path)
print('删除目录: {}'.format(abs_path))
def is_photo(self, file_name):
return self.is_image(file_name) and self.contains_exif(file_name)
def is_video(self, file_name):
for ext in self.VIDEO_EXTENTIONS:
if file_name.lower().endswith(ext):
return True
return False
def is_image(self, file_name):
for ext in self.IMAGE_EXTENTIONS:
if file_name.lower().endswith(ext):
return True
return False
def contains_exif(self, file_name):
with open(file_name, 'rb') as reader:
tags = exifread.process_file(reader)
keys = [key for key in self.PHOTO_EXIF_KEYS if key in tags]
return len(keys) > 0
def process_folder(self, folder):
for (root, dirs, files) in os.walk(folder):
for file in files:
self.process_file(root, file)
def get_md5(self, file):
with open(file, 'rb') as reader:
return hashlib.md5(reader.read()).hexdigest()
def process_file(self, root, file):
file_path = os.path.join(root, file)
md5 = self.get_md5(file_path)
try:
self.validate(file_path, md5)
year, month = self.read_date(file_path)
new_name = self.rename_move(file_path, year, month)
self.add_record(md5)
self.processed_count += 1
print('已处理 {}: {} --> {}'.format(self.processed_count, file, new_name))
except Exception as e:
print(str(e))
def add_record(self, md5):
try:
cursor = self.db.cursor()
sql = "INSERT INTO {}(MD5) VALUES('{}')".format(self.table, md5)
cursor.execute(sql)
self.db.commit()
except Exception as e:
print('插入记录 {} 到数据库photo_classifier失败: {}'.format(md5, str(e)))
self.db.rollback()
raise e
def validate(self, file_path, md5):
# check if the md5 of the photo exists in database
try:
cursor = self.db.cursor()
sql = "SELECT MD5 FROM {} WHERE MD5='{}'".format(self.table, md5)
cursor.execute(sql)
record = cursor.fetchone()
if str(record) != 'None':
os.remove(file_path)
raise Exception('重复文件 {} --> 删除'.format(file_path))
except Exception as e:
raise e
if (not self.is_image(file_path)) and (not self.is_video(file_path)):
raise Exception('非图片或视频: {} --> 跳过'.format(file_path))
def get_photo_create_date(self, file):
with open(file, 'rb') as reader:
tags = exifread.process_file(reader)
keys = [key for key in self.PHOTO_DATE_KEYS if key in tags]
if len(keys) > 0:
key = keys[0]
origin_date = tags[key]
time_str = str(origin_date)
_date = time_str[:7].split(':')
year = _date[0]
month = _date[1]
return (year, month)
return None
def get_video_create_date(self, file):
try:
properties = propsys.SHGetPropertyStoreFromParsingName(file)
dt = properties.GetValue(pscon.PKEY_Media_DateEncoded).GetValue()
time_str = str(dt.astimezone(pytz.timezone('Asia/Shanghai')))
_date = time_str[:7].split('-')
year = _date[0]
month = _date[1]
return (year, month)
except:
return None
def read_date(self, file):
file = file.replace('/', '\\')
date = None
if self.is_photo(file):
date = self.get_photo_create_date(file) # 照片可能没有EXIF日期
elif self.is_video(file):
date = self.get_video_create_date(file) # 视频可能没有媒体创建日期
if not date: # 获取文件上次修改日期
time_str = os.path.getmtime(file)
time_str = str(datetime.datetime.fromtimestamp(time_str))
_date = time_str[:7].split('-')
year = _date[0]
month = _date[1]
date = (year, month)
return date
def rename_move(self, file_path, year, month):
if self.is_image(file_path):
if self.is_photo(file_path):
output = self.photo_output
else:
output = self.image_output
elif self.is_video(file_path):
output = self.video_output
else:
raise Exception('移动文件失败, 非图片或视频: {}'.format(file_path))
new_path = os.path.join(output, year, month)
if not os.path.exists(new_path):
os.makedirs(new_path)
file_name, file_ext = os.path.splitext(file_path)
new_name = year + '-' + month + '-' + str(time.time()) + file_ext
shutil.move(file_path, os.path.join(new_path, new_name))
return new_name
cf = Classifier(input_folder='D:/temp/相册',
photo_output='D:/总仓库-照片视频/总照片备份',
video_output='D:/总仓库-照片视频/总视频备份',
image_output='D:/总仓库-照片视频/总图片备份')
# cf.create_table()
cf.start()