Compare commits
10 Commits
564da5df77
...
57ab326604
Author | SHA1 | Date | |
---|---|---|---|
|
57ab326604 | ||
|
c1b2c52a82 | ||
|
9e556f5e15 | ||
|
99661c4a01 | ||
|
f688825080 | ||
|
6276c1fafc | ||
|
d9eb67503c | ||
|
dc1872083a | ||
|
6c73a526e0 | ||
|
d2d6ae478f |
31
.github/workflows/backup.yml
vendored
Normal file
31
.github/workflows/backup.yml
vendored
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
name: Backup to Gitea
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main # 你可以根据需要修改分支名称
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
backup:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
fetch-depth: 0 # Fetch all history for all branches and tags
|
||||||
|
|
||||||
|
- name: Set up Git
|
||||||
|
run: |
|
||||||
|
git config --global user.name "biggerfish"
|
||||||
|
git config --global user.email "yuzhiyongcn@qq.com"
|
||||||
|
|
||||||
|
- name: Add Gitea remote
|
||||||
|
run: |
|
||||||
|
REPO_NAME=$(basename ${{ github.repository }})
|
||||||
|
git remote add gitea https://yu:${{secrets.GITEA_TOKEN}}@git.zhiyong.tech/yu/$REPO_NAME.git
|
||||||
|
|
||||||
|
- name: Push to Gitea
|
||||||
|
run: |
|
||||||
|
git push -u gitea --all --force
|
||||||
|
git push -u gitea --tags --force
|
@ -1,5 +1,5 @@
|
|||||||
# 照片分类器-python3
|
# 照片分类器-python3
|
||||||
1. 对指定目录及子目录下的照片进行分类, 先按 年/月 分目录, 文件名重命名为"年-月-时间戳"
|
1. 对指定目录及子目录下的照片进行分类, 先按 年/月 分目录, 文件名重命名为"年-月-时间戳"
|
||||||
2. 每个照片文件的md5校验码存储到oracle新加坡云的mysql数据库'photo_classifier', 用于检查照片是否重复, 重复的会跳过
|
2. 每个照片文件的md5校验码存储到oracle新加坡云的mysql数据库'photo_classifier', 表名'photo', 用于检查照片是否重复, 重复的会跳过
|
||||||
3. 如果照片经过修改, 丢失EXIF信息, 将被跳过
|
3. 可以处理视频, 照片, 非照片的图片
|
||||||
4. 整理后的照片存放到"自动备份", 被备份到多个云盘
|
4. 整理后的照片存放到"总仓库-照片视频", 被备份到多个云盘
|
@ -1,13 +1,12 @@
|
|||||||
'''
|
"""
|
||||||
根据读取的照片信息分类照片
|
根据读取的照片信息分类照片
|
||||||
分类:
|
分类:
|
||||||
目录名:2020\01
|
目录名:2020\01
|
||||||
文件名:2020-01-时间戳
|
文件名:2020-01-时间戳
|
||||||
处理过的文件名存为json文件: processed_files.json
|
"""
|
||||||
如果照片经过修改, 丢失原始EXIF信息, 将会被跳过
|
|
||||||
'''
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
from posixpath import abspath
|
from posixpath import abspath
|
||||||
import exifread
|
import exifread
|
||||||
import time
|
import time
|
||||||
@ -19,15 +18,16 @@ import pytz
|
|||||||
from win32com.propsys import propsys, pscon
|
from win32com.propsys import propsys, pscon
|
||||||
|
|
||||||
|
|
||||||
class Classifier():
|
class Classifier1:
|
||||||
mode = 'prod' # 开发模式(dev)还是产品模式(prod)
|
mode = "prod" # 开发模式(dev)还是产品模式(prod)
|
||||||
IMAGE_EXTENTIONS = ['jpg', 'jpeg', 'bmp', 'png']
|
IMAGE_EXTENTIONS = ["jpg", "jpeg", "bmp", "png", "tif", "gif", "heic"]
|
||||||
VIDEO_EXTENTIONS = ['mp4']
|
VIDEO_EXTENTIONS = ["mp4", "avi", "rmvb", "mkv", "mov", "amr", "mpg"]
|
||||||
TEST_TABLE = 'TEST_PHOTO'
|
TEST_TABLE = "TEST_PHOTO"
|
||||||
TABLE = 'PHOTO'
|
TABLE = "PHOTO"
|
||||||
PHOTO_NO_DATE_KEYS = ['EXIF ExifVersion']
|
PHOTO_NO_DATE_KEYS = ["EXIF ExifVersion"]
|
||||||
PHOTO_DATE_KEYS = ['Image DateTime', 'EXIF DateTimeOriginal']
|
PHOTO_DATE_KEYS = ["Image DateTime", "EXIF DateTimeOriginal"]
|
||||||
PHOTO_EXIF_KEYS = PHOTO_NO_DATE_KEYS + PHOTO_DATE_KEYS
|
PHOTO_EXIF_KEYS = PHOTO_NO_DATE_KEYS + PHOTO_DATE_KEYS
|
||||||
|
SKIP_FOLDERS = ["System Volume Information", "$RECYCLE.BIN", ".stfolder"]
|
||||||
|
|
||||||
def __init__(self, input_folder, photo_output, video_output, image_output):
|
def __init__(self, input_folder, photo_output, video_output, image_output):
|
||||||
self.input = input_folder
|
self.input = input_folder
|
||||||
@ -35,11 +35,17 @@ class Classifier():
|
|||||||
self.video_output = video_output
|
self.video_output = video_output
|
||||||
self.image_output = image_output
|
self.image_output = image_output
|
||||||
self.processed_count = 0
|
self.processed_count = 0
|
||||||
self.table = self.TEST_TABLE if self.mode == 'dev' else self.TABLE
|
self.table = self.TEST_TABLE if self.mode == "dev" else self.TABLE
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def connect_database(self):
|
def connect_database(self):
|
||||||
self.db = pymysql.connect(host='bt.biggerfish.tech', user='admin', password='zhiyong214', database='photo_classifier')
|
self.db = pymysql.connect(
|
||||||
|
host="northflyfish.myqnapcloud.cn",
|
||||||
|
user="admin",
|
||||||
|
password="zhiyong@214",
|
||||||
|
database="photo_classifier",
|
||||||
|
port=3307,
|
||||||
|
)
|
||||||
|
|
||||||
def close_database(self):
|
def close_database(self):
|
||||||
self.db.close()
|
self.db.close()
|
||||||
@ -48,17 +54,19 @@ class Classifier():
|
|||||||
self.connect_database()
|
self.connect_database()
|
||||||
cursor = self.db.cursor()
|
cursor = self.db.cursor()
|
||||||
|
|
||||||
sql = 'DROP TABLE IF EXISTS {}'.format(self.table)
|
sql = "DROP TABLE IF EXISTS {}".format(self.table)
|
||||||
cursor.execute(sql)
|
cursor.execute(sql)
|
||||||
print('删除表 {}'.format(self.table))
|
print("删除表 {}".format(self.table))
|
||||||
|
|
||||||
sql = '''CREATE TABLE {} (
|
sql = """CREATE TABLE {} (
|
||||||
ID INT NOT NULL AUTO_INCREMENT ,
|
ID INT NOT NULL AUTO_INCREMENT ,
|
||||||
MD5 VARCHAR(255) NOT NULL ,
|
MD5 VARCHAR(255) NOT NULL ,
|
||||||
PRIMARY KEY (ID), UNIQUE (MD5))
|
PRIMARY KEY (ID), UNIQUE (MD5))
|
||||||
ENGINE = InnoDB;'''.format(self.table)
|
ENGINE = InnoDB;""".format(
|
||||||
|
self.table
|
||||||
|
)
|
||||||
cursor.execute(sql)
|
cursor.execute(sql)
|
||||||
print('创建表 {}'.format(self.table))
|
print("创建表 {}".format(self.table))
|
||||||
|
|
||||||
self.close_database()
|
self.close_database()
|
||||||
|
|
||||||
@ -70,18 +78,20 @@ class Classifier():
|
|||||||
|
|
||||||
def get_file_count(self, folder):
|
def get_file_count(self, folder):
|
||||||
count = 0
|
count = 0
|
||||||
for (_, _, _files) in os.walk(folder):
|
for _, _, _files in os.walk(folder):
|
||||||
count += len(_files)
|
count += len(_files)
|
||||||
return count
|
return count
|
||||||
|
|
||||||
def delete_folders(self, folder):
|
def delete_folders(self, folder):
|
||||||
for (root, dirs, files) in os.walk(folder):
|
for root, dirs, files in os.walk(folder):
|
||||||
for dir in dirs:
|
for dir in dirs:
|
||||||
|
if dir in self.SKIP_FOLDERS:
|
||||||
|
continue
|
||||||
abs_path = os.path.join(root, dir)
|
abs_path = os.path.join(root, dir)
|
||||||
if os.path.isdir(abs_path):
|
if os.path.isdir(abs_path):
|
||||||
if self.get_file_count(abs_path) == 0:
|
if self.get_file_count(abs_path) == 0:
|
||||||
shutil.rmtree(abs_path)
|
shutil.rmtree(abs_path)
|
||||||
print('删除目录: {}'.format(abs_path))
|
print("删除目录: {}".format(abs_path))
|
||||||
|
|
||||||
def is_photo(self, file_name):
|
def is_photo(self, file_name):
|
||||||
return self.is_image(file_name) and self.contains_exif(file_name)
|
return self.is_image(file_name) and self.contains_exif(file_name)
|
||||||
@ -99,32 +109,37 @@ class Classifier():
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def contains_exif(self, file_name):
|
def contains_exif(self, file_name):
|
||||||
with open(file_name, 'rb') as reader:
|
with open(file_name, "rb") as reader:
|
||||||
tags = exifread.process_file(reader)
|
tags = exifread.process_file(reader)
|
||||||
keys = [key for key in self.PHOTO_EXIF_KEYS if key in tags]
|
keys = [key for key in self.PHOTO_EXIF_KEYS if key in tags]
|
||||||
return len(keys) > 0
|
return len(keys) > 0
|
||||||
|
|
||||||
def process_folder(self, folder):
|
def process_folder(self, folder):
|
||||||
for (root, dirs, files) in os.walk(folder):
|
for root, dirs, files in os.walk(folder):
|
||||||
for file in files:
|
for file in files:
|
||||||
self.process_file(root, file)
|
self.process_file(root, file)
|
||||||
|
|
||||||
def get_md5(self, file):
|
def get_md5(self, file):
|
||||||
with open(file, 'rb') as reader:
|
with open(file, "rb") as reader:
|
||||||
return hashlib.md5(reader.read()).hexdigest()
|
return hashlib.md5(reader.read()).hexdigest()
|
||||||
|
|
||||||
def process_file(self, root, file):
|
def process_file(self, root, file):
|
||||||
file_path = os.path.join(root, file)
|
file_path = os.path.join(root, file)
|
||||||
md5 = self.get_md5(file_path)
|
if self.is_image(file_path) or self.is_video(file_path):
|
||||||
try:
|
md5 = self.get_md5(file_path)
|
||||||
self.validate(file_path, md5)
|
try:
|
||||||
year, month = self.read_date(file_path)
|
self.validate(file_path, md5)
|
||||||
new_name = self.rename_move(file_path, year, month)
|
year, month, day = self.read_date(file_path)
|
||||||
self.add_record(md5)
|
new_name = self.rename_move(file_path, year, month, day, md5)
|
||||||
self.processed_count += 1
|
self.add_record(md5)
|
||||||
print('已处理 {}: {} --> {}'.format(self.processed_count, file, new_name))
|
self.processed_count += 1
|
||||||
except Exception as e:
|
print(
|
||||||
print(str(e))
|
"已处理 {}: {} --> {}".format(self.processed_count, file, new_name)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(str(e))
|
||||||
|
else:
|
||||||
|
print("非图片或视频, 忽略文件: {}".format(file_path))
|
||||||
|
|
||||||
def add_record(self, md5):
|
def add_record(self, md5):
|
||||||
try:
|
try:
|
||||||
@ -133,7 +148,7 @@ class Classifier():
|
|||||||
cursor.execute(sql)
|
cursor.execute(sql)
|
||||||
self.db.commit()
|
self.db.commit()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print('插入记录 {} 到数据库photo_classifier失败: {}'.format(md5, str(e)))
|
print("插入记录 {} 到数据库photo_classifier失败: {}".format(md5, str(e)))
|
||||||
self.db.rollback()
|
self.db.rollback()
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
@ -144,43 +159,45 @@ class Classifier():
|
|||||||
sql = "SELECT MD5 FROM {} WHERE MD5='{}'".format(self.table, md5)
|
sql = "SELECT MD5 FROM {} WHERE MD5='{}'".format(self.table, md5)
|
||||||
cursor.execute(sql)
|
cursor.execute(sql)
|
||||||
record = cursor.fetchone()
|
record = cursor.fetchone()
|
||||||
if str(record) != 'None':
|
if str(record) != "None":
|
||||||
os.remove(file_path)
|
os.remove(file_path)
|
||||||
raise Exception('重复照片 {} --> 删除'.format(file_path))
|
raise Exception("重复文件 {} --> 删除".format(file_path))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
if (not self.is_image(file_path)) and (not self.is_video(file_path)):
|
if (not self.is_image(file_path)) and (not self.is_video(file_path)):
|
||||||
raise Exception('非图片或视频: {} --> 跳过'.format(file_path))
|
raise Exception("非图片或视频: {} --> 跳过".format(file_path))
|
||||||
|
|
||||||
def get_photo_create_date(self, file):
|
def get_photo_create_date(self, file):
|
||||||
with open(file, 'rb') as reader:
|
with open(file, "rb") as reader:
|
||||||
tags = exifread.process_file(reader)
|
tags = exifread.process_file(reader)
|
||||||
keys = [key for key in self.PHOTO_DATE_KEYS if key in tags]
|
keys = [key for key in self.PHOTO_DATE_KEYS if key in tags]
|
||||||
if len(keys) > 0:
|
if len(keys) > 0:
|
||||||
key = keys[0]
|
key = keys[0]
|
||||||
origin_date = tags[key]
|
origin_date = tags[key]
|
||||||
time_str = str(origin_date)
|
time_str = str(origin_date)
|
||||||
_date = time_str[:7].split(':')
|
_date = time_str[:10].split(":")
|
||||||
year = _date[0]
|
year = _date[0]
|
||||||
month = _date[1]
|
month = _date[1]
|
||||||
return (year, month)
|
day = _date[2]
|
||||||
|
return (year, month, day)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_video_create_date(self, file):
|
def get_video_create_date(self, file):
|
||||||
try:
|
try:
|
||||||
properties = propsys.SHGetPropertyStoreFromParsingName(file)
|
properties = propsys.SHGetPropertyStoreFromParsingName(file)
|
||||||
dt = properties.GetValue(pscon.PKEY_Media_DateEncoded).GetValue()
|
dt = properties.GetValue(pscon.PKEY_Media_DateEncoded).GetValue()
|
||||||
time_str = str(dt.astimezone(pytz.timezone('Asia/Shanghai')))
|
time_str = str(dt.astimezone(pytz.timezone("Asia/Shanghai")))
|
||||||
_date = time_str[:7].split('-')
|
_date = time_str[:10].split("-")
|
||||||
year = _date[0]
|
year = _date[0]
|
||||||
month = _date[1]
|
month = _date[1]
|
||||||
return (year, month)
|
day = _date[2]
|
||||||
|
return (year, month, day)
|
||||||
except:
|
except:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def read_date(self, file):
|
def read_date(self, file):
|
||||||
file = file.replace('/', '\\')
|
file = file.replace("/", "\\")
|
||||||
date = None
|
date = None
|
||||||
if self.is_photo(file):
|
if self.is_photo(file):
|
||||||
date = self.get_photo_create_date(file) # 照片可能没有EXIF日期
|
date = self.get_photo_create_date(file) # 照片可能没有EXIF日期
|
||||||
@ -190,13 +207,14 @@ class Classifier():
|
|||||||
if not date: # 获取文件上次修改日期
|
if not date: # 获取文件上次修改日期
|
||||||
time_str = os.path.getmtime(file)
|
time_str = os.path.getmtime(file)
|
||||||
time_str = str(datetime.datetime.fromtimestamp(time_str))
|
time_str = str(datetime.datetime.fromtimestamp(time_str))
|
||||||
_date = time_str[:7].split('-')
|
_date = time_str[:10].split("-")
|
||||||
year = _date[0]
|
year = _date[0]
|
||||||
month = _date[1]
|
month = _date[1]
|
||||||
date = (year, month)
|
day = _date[2]
|
||||||
|
date = (year, month, day)
|
||||||
return date
|
return date
|
||||||
|
|
||||||
def rename_move(self, file_path, year, month):
|
def rename_move(self, file_path, year, month, day, md5):
|
||||||
if self.is_image(file_path):
|
if self.is_image(file_path):
|
||||||
if self.is_photo(file_path):
|
if self.is_photo(file_path):
|
||||||
output = self.photo_output
|
output = self.photo_output
|
||||||
@ -205,21 +223,24 @@ class Classifier():
|
|||||||
elif self.is_video(file_path):
|
elif self.is_video(file_path):
|
||||||
output = self.video_output
|
output = self.video_output
|
||||||
else:
|
else:
|
||||||
raise Exception('移动文件失败, 非图片或视频: {}'.format(file_path))
|
raise Exception("移动文件失败, 非图片或视频: {}".format(file_path))
|
||||||
|
|
||||||
new_path = os.path.join(output, year, month)
|
new_path = os.path.join(output, year, month, day)
|
||||||
if not os.path.exists(new_path):
|
if not os.path.exists(new_path):
|
||||||
os.makedirs(new_path)
|
os.makedirs(new_path)
|
||||||
file_name, file_ext = os.path.splitext(file_path)
|
file_name, file_ext = os.path.splitext(file_path)
|
||||||
new_name = year + '-' + month + '-' + str(time.time()) + file_ext
|
new_name = year + "-" + month + "-" + day + "-" + md5 + file_ext
|
||||||
shutil.move(file_path, os.path.join(new_path, new_name))
|
shutil.move(file_path, os.path.join(new_path, new_name))
|
||||||
return new_name
|
return new_name
|
||||||
|
|
||||||
|
|
||||||
cf = Classifier('D:/temp/相册',
|
cf = Classifier1(
|
||||||
photo_output='D:/总仓库-照片视频/总照片备份',
|
input_folder="D:/待分类照片视频1",
|
||||||
video_output='D:/总仓库-照片视频/总视频备份',
|
# input_folder='z:/待分类照片视频/Picture',
|
||||||
image_output='D:/总仓库-照片视频/总图片备份')
|
photo_output="D:/总仓库-照片视频1/总照片备份",
|
||||||
|
video_output="D:/总仓库-照片视频1/总视频备份",
|
||||||
|
image_output="D:/总仓库-照片视频1/总图片备份",
|
||||||
|
)
|
||||||
|
|
||||||
|
cf.start()
|
||||||
# cf.create_table()
|
# cf.create_table()
|
||||||
cf.start()
|
|
247
src/photo_classifier-当前使用.py
Normal file
247
src/photo_classifier-当前使用.py
Normal file
@ -0,0 +1,247 @@
|
|||||||
|
"""
|
||||||
|
根据读取的照片信息分类照片
|
||||||
|
分类:
|
||||||
|
目录名:2020\01
|
||||||
|
文件名:2020-01-时间戳
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from posixpath import abspath
|
||||||
|
import exifread
|
||||||
|
import time
|
||||||
|
import shutil
|
||||||
|
import hashlib
|
||||||
|
import pymysql
|
||||||
|
import datetime
|
||||||
|
import pytz
|
||||||
|
from win32com.propsys import propsys, pscon
|
||||||
|
|
||||||
|
# 使用韩国oracle 数据库
|
||||||
|
|
||||||
|
|
||||||
|
class Classifier:
|
||||||
|
mode = "prod" # 开发模式(dev)还是产品模式(prod)
|
||||||
|
IMAGE_EXTENTIONS = ["jpg", "jpeg", "bmp", "png", "tif", "gif", "heic"]
|
||||||
|
VIDEO_EXTENTIONS = ["mp4", "avi", "rmvb", "mkv", "mov", "amr", "mpg"]
|
||||||
|
TEST_TABLE = "TEST_PHOTO"
|
||||||
|
TABLE = "PHOTO"
|
||||||
|
PHOTO_NO_DATE_KEYS = ["EXIF ExifVersion"]
|
||||||
|
PHOTO_DATE_KEYS = ["Image DateTime", "EXIF DateTimeOriginal"]
|
||||||
|
PHOTO_EXIF_KEYS = PHOTO_NO_DATE_KEYS + PHOTO_DATE_KEYS
|
||||||
|
SKIP_FOLDERS = ["System Volume Information", "$RECYCLE.BIN", ".stfolder"]
|
||||||
|
|
||||||
|
def __init__(self, input_folder, photo_output, video_output, image_output):
|
||||||
|
self.input = input_folder
|
||||||
|
self.photo_output = photo_output
|
||||||
|
self.video_output = video_output
|
||||||
|
self.image_output = image_output
|
||||||
|
self.processed_count = 0
|
||||||
|
self.table = self.TEST_TABLE if self.mode == "dev" else self.TABLE
|
||||||
|
pass
|
||||||
|
|
||||||
|
def connect_database(self):
|
||||||
|
self.db = pymysql.connect(
|
||||||
|
host="panel.zhiyong.tech",
|
||||||
|
user="yu_biggerfish",
|
||||||
|
password="jRHTbQrdkfNNTztH",
|
||||||
|
database="photo_classifier",
|
||||||
|
)
|
||||||
|
|
||||||
|
def close_database(self):
|
||||||
|
self.db.close()
|
||||||
|
|
||||||
|
def create_table(self):
|
||||||
|
self.connect_database()
|
||||||
|
cursor = self.db.cursor()
|
||||||
|
|
||||||
|
sql = "DROP TABLE IF EXISTS {}".format(self.table)
|
||||||
|
cursor.execute(sql)
|
||||||
|
print("删除表 {}".format(self.table))
|
||||||
|
|
||||||
|
sql = """CREATE TABLE {} (
|
||||||
|
ID INT NOT NULL AUTO_INCREMENT ,
|
||||||
|
MD5 VARCHAR(255) NOT NULL ,
|
||||||
|
PRIMARY KEY (ID), UNIQUE (MD5))
|
||||||
|
ENGINE = InnoDB;""".format(
|
||||||
|
self.table
|
||||||
|
)
|
||||||
|
cursor.execute(sql)
|
||||||
|
print("创建表 {}".format(self.table))
|
||||||
|
|
||||||
|
self.close_database()
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
self.connect_database()
|
||||||
|
self.process_folder(self.input)
|
||||||
|
self.delete_folders(self.input)
|
||||||
|
self.close_database()
|
||||||
|
|
||||||
|
def get_file_count(self, folder):
|
||||||
|
count = 0
|
||||||
|
for _, _, _files in os.walk(folder):
|
||||||
|
count += len(_files)
|
||||||
|
return count
|
||||||
|
|
||||||
|
def delete_folders(self, folder):
|
||||||
|
for root, dirs, files in os.walk(folder):
|
||||||
|
for dir in dirs:
|
||||||
|
if dir in self.SKIP_FOLDERS:
|
||||||
|
continue
|
||||||
|
abs_path = os.path.join(root, dir)
|
||||||
|
if os.path.isdir(abs_path):
|
||||||
|
if self.get_file_count(abs_path) == 0:
|
||||||
|
shutil.rmtree(abs_path)
|
||||||
|
print("删除目录: {}".format(abs_path))
|
||||||
|
|
||||||
|
def is_photo(self, file_name):
|
||||||
|
return self.is_image(file_name) and self.contains_exif(file_name)
|
||||||
|
|
||||||
|
def is_video(self, file_name):
|
||||||
|
for ext in self.VIDEO_EXTENTIONS:
|
||||||
|
if file_name.lower().endswith(ext):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def is_image(self, file_name):
|
||||||
|
for ext in self.IMAGE_EXTENTIONS:
|
||||||
|
if file_name.lower().endswith(ext):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def contains_exif(self, file_name):
|
||||||
|
with open(file_name, "rb") as reader:
|
||||||
|
tags = exifread.process_file(reader)
|
||||||
|
keys = [key for key in self.PHOTO_EXIF_KEYS if key in tags]
|
||||||
|
return len(keys) > 0
|
||||||
|
|
||||||
|
def process_folder(self, folder):
|
||||||
|
for root, dirs, files in os.walk(folder):
|
||||||
|
for file in files:
|
||||||
|
self.process_file(root, file)
|
||||||
|
|
||||||
|
def get_md5(self, file):
|
||||||
|
with open(file, "rb") as reader:
|
||||||
|
return hashlib.md5(reader.read()).hexdigest()
|
||||||
|
|
||||||
|
def process_file(self, root, file):
|
||||||
|
file_path = os.path.join(root, file)
|
||||||
|
if self.is_image(file_path) or self.is_video(file_path):
|
||||||
|
md5 = self.get_md5(file_path)
|
||||||
|
try:
|
||||||
|
self.validate(file_path, md5)
|
||||||
|
year, month, day = self.read_date(file_path)
|
||||||
|
new_name = self.rename_move(file_path, year, month, day, md5)
|
||||||
|
self.add_record(md5)
|
||||||
|
self.processed_count += 1
|
||||||
|
print(
|
||||||
|
"已处理 {}: {} --> {}".format(self.processed_count, file, new_name)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(str(e))
|
||||||
|
else:
|
||||||
|
print("非图片或视频, 忽略文件: {}".format(file_path))
|
||||||
|
|
||||||
|
def add_record(self, md5):
|
||||||
|
try:
|
||||||
|
cursor = self.db.cursor()
|
||||||
|
sql = "INSERT INTO {}(MD5) VALUES('{}')".format(self.table, md5)
|
||||||
|
cursor.execute(sql)
|
||||||
|
self.db.commit()
|
||||||
|
except Exception as e:
|
||||||
|
print("插入记录 {} 到数据库photo_classifier失败: {}".format(md5, str(e)))
|
||||||
|
self.db.rollback()
|
||||||
|
raise e
|
||||||
|
|
||||||
|
def validate(self, file_path, md5):
|
||||||
|
# check if the md5 of the photo exists in database
|
||||||
|
try:
|
||||||
|
cursor = self.db.cursor()
|
||||||
|
sql = "SELECT MD5 FROM {} WHERE MD5='{}'".format(self.table, md5)
|
||||||
|
cursor.execute(sql)
|
||||||
|
record = cursor.fetchone()
|
||||||
|
if str(record) != "None":
|
||||||
|
os.remove(file_path)
|
||||||
|
raise Exception("重复文件 {} --> 删除".format(file_path))
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
if (not self.is_image(file_path)) and (not self.is_video(file_path)):
|
||||||
|
raise Exception("非图片或视频: {} --> 跳过".format(file_path))
|
||||||
|
|
||||||
|
def get_photo_create_date(self, file):
|
||||||
|
with open(file, "rb") as reader:
|
||||||
|
tags = exifread.process_file(reader)
|
||||||
|
keys = [key for key in self.PHOTO_DATE_KEYS if key in tags]
|
||||||
|
if len(keys) > 0:
|
||||||
|
key = keys[0]
|
||||||
|
origin_date = tags[key]
|
||||||
|
time_str = str(origin_date)
|
||||||
|
_date = time_str[:10].split(":")
|
||||||
|
year = _date[0]
|
||||||
|
month = _date[1]
|
||||||
|
day = _date[2]
|
||||||
|
return (year, month, day)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_video_create_date(self, file):
|
||||||
|
try:
|
||||||
|
properties = propsys.SHGetPropertyStoreFromParsingName(file)
|
||||||
|
dt = properties.GetValue(pscon.PKEY_Media_DateEncoded).GetValue()
|
||||||
|
time_str = str(dt.astimezone(pytz.timezone("Asia/Shanghai")))
|
||||||
|
_date = time_str[:10].split("-")
|
||||||
|
year = _date[0]
|
||||||
|
month = _date[1]
|
||||||
|
day = _date[2]
|
||||||
|
return (year, month, day)
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def read_date(self, file):
|
||||||
|
file = file.replace("/", "\\")
|
||||||
|
date = None
|
||||||
|
if self.is_photo(file):
|
||||||
|
date = self.get_photo_create_date(file) # 照片可能没有EXIF日期
|
||||||
|
elif self.is_video(file):
|
||||||
|
date = self.get_video_create_date(file) # 视频可能没有媒体创建日期
|
||||||
|
|
||||||
|
if not date: # 获取文件上次修改日期
|
||||||
|
time_str = os.path.getmtime(file)
|
||||||
|
time_str = str(datetime.datetime.fromtimestamp(time_str))
|
||||||
|
_date = time_str[:10].split("-")
|
||||||
|
year = _date[0]
|
||||||
|
month = _date[1]
|
||||||
|
day = _date[2]
|
||||||
|
date = (year, month, day)
|
||||||
|
return date
|
||||||
|
|
||||||
|
def rename_move(self, file_path, year, month, day, md5):
|
||||||
|
if self.is_image(file_path):
|
||||||
|
if self.is_photo(file_path):
|
||||||
|
output = self.photo_output
|
||||||
|
else:
|
||||||
|
output = self.image_output
|
||||||
|
elif self.is_video(file_path):
|
||||||
|
output = self.video_output
|
||||||
|
else:
|
||||||
|
raise Exception("移动文件失败, 非图片或视频: {}".format(file_path))
|
||||||
|
|
||||||
|
new_path = os.path.join(output, year, month, day)
|
||||||
|
if not os.path.exists(new_path):
|
||||||
|
os.makedirs(new_path)
|
||||||
|
file_name, file_ext = os.path.splitext(file_path)
|
||||||
|
new_name = year + "-" + month + "-" + day + "-" + md5 + file_ext
|
||||||
|
shutil.move(file_path, os.path.join(new_path, new_name))
|
||||||
|
return new_name
|
||||||
|
|
||||||
|
|
||||||
|
cf = Classifier(
|
||||||
|
input_folder="D:/待分类照片视频",
|
||||||
|
# input_folder="D:/总仓库-照片视频-bak",
|
||||||
|
photo_output="D:/总仓库-照片视频/总照片备份",
|
||||||
|
video_output="D:/总仓库-照片视频/总视频备份",
|
||||||
|
image_output="D:/总仓库-照片视频/总图片备份",
|
||||||
|
)
|
||||||
|
|
||||||
|
cf.start()
|
||||||
|
# cf.create_table()
|
Loading…
x
Reference in New Issue
Block a user