Compare commits

...

10 Commits

Author SHA1 Message Date
biggerfish
57ab326604 build: backup to gitea 2024-06-06 22:15:06 +08:00
biggerfish
c1b2c52a82 feat: use korea vps as DB 2024-06-06 22:13:24 +08:00
zhiyong
9e556f5e15 使用新加坡服务器存储文件信息 2023-11-28 15:31:32 +08:00
biggerfish
99661c4a01 feat: add mysql database of nas 2023-10-08 19:00:25 +08:00
bigfish
f688825080 feat: remove ppt files and add skip folders 2023-06-27 14:04:37 +08:00
bigfish
6276c1fafc feat: add day as a folder 2023-04-01 00:22:49 +08:00
biggerfish
d9eb67503c 从视频格式中去除exe 2022-01-01 23:36:09 +08:00
biggerfish
dc1872083a 支持更多视频格式:mkv, mov, avi... 2022-01-01 04:44:17 +08:00
biggerfish
6c73a526e0 更新readme 2022-01-01 00:42:33 +08:00
biggerfish
d2d6ae478f 调用时,为参数input_folder添加key 2022-01-01 00:40:28 +08:00
4 changed files with 359 additions and 60 deletions

31
.github/workflows/backup.yml vendored Normal file
View File

@ -0,0 +1,31 @@
name: Backup to Gitea
on:
push:
branches:
- main # 你可以根据需要修改分支名称
jobs:
backup:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
with:
fetch-depth: 0 # Fetch all history for all branches and tags
- name: Set up Git
run: |
git config --global user.name "biggerfish"
git config --global user.email "yuzhiyongcn@qq.com"
- name: Add Gitea remote
run: |
REPO_NAME=$(basename ${{ github.repository }})
git remote add gitea https://yu:${{secrets.GITEA_TOKEN}}@git.zhiyong.tech/yu/$REPO_NAME.git
- name: Push to Gitea
run: |
git push -u gitea --all --force
git push -u gitea --tags --force

View File

@ -1,5 +1,5 @@
# 照片分类器-python3 # 照片分类器-python3
1. 对指定目录及子目录下的照片进行分类, 先按 年/月 分目录, 文件名重命名为"年-月-时间戳" 1. 对指定目录及子目录下的照片进行分类, 先按 年/月 分目录, 文件名重命名为"年-月-时间戳"
2. 每个照片文件的md5校验码存储到oracle新加坡云的mysql数据库'photo_classifier', 用于检查照片是否重复, 重复的会跳过 2. 每个照片文件的md5校验码存储到oracle新加坡云的mysql数据库'photo_classifier', 表名'photo', 用于检查照片是否重复, 重复的会跳过
3. 如果照片经过修改, 丢失EXIF信息, 将被跳过 3. 可以处理视频, 照片, 非照片的图片
4. 整理后的照片存放到"自动备份", 被备份到多个云盘 4. 整理后的照片存放到"总仓库-照片视频", 被备份到多个云盘

View File

@ -1,13 +1,12 @@
''' """
根据读取的照片信息分类照片 根据读取的照片信息分类照片
分类: 分类:
目录名:2020\01 目录名:2020\01
文件名:2020-01-时间戳 文件名:2020-01-时间戳
处理过的文件名存为json文件: processed_files.json """
如果照片经过修改, 丢失原始EXIF信息, 将会被跳过
'''
import os import os
import sys
from posixpath import abspath from posixpath import abspath
import exifread import exifread
import time import time
@ -19,15 +18,16 @@ import pytz
from win32com.propsys import propsys, pscon from win32com.propsys import propsys, pscon
class Classifier(): class Classifier1:
mode = 'prod' # 开发模式(dev)还是产品模式(prod) mode = "prod" # 开发模式(dev)还是产品模式(prod)
IMAGE_EXTENTIONS = ['jpg', 'jpeg', 'bmp', 'png'] IMAGE_EXTENTIONS = ["jpg", "jpeg", "bmp", "png", "tif", "gif", "heic"]
VIDEO_EXTENTIONS = ['mp4'] VIDEO_EXTENTIONS = ["mp4", "avi", "rmvb", "mkv", "mov", "amr", "mpg"]
TEST_TABLE = 'TEST_PHOTO' TEST_TABLE = "TEST_PHOTO"
TABLE = 'PHOTO' TABLE = "PHOTO"
PHOTO_NO_DATE_KEYS = ['EXIF ExifVersion'] PHOTO_NO_DATE_KEYS = ["EXIF ExifVersion"]
PHOTO_DATE_KEYS = ['Image DateTime', 'EXIF DateTimeOriginal'] PHOTO_DATE_KEYS = ["Image DateTime", "EXIF DateTimeOriginal"]
PHOTO_EXIF_KEYS = PHOTO_NO_DATE_KEYS + PHOTO_DATE_KEYS PHOTO_EXIF_KEYS = PHOTO_NO_DATE_KEYS + PHOTO_DATE_KEYS
SKIP_FOLDERS = ["System Volume Information", "$RECYCLE.BIN", ".stfolder"]
def __init__(self, input_folder, photo_output, video_output, image_output): def __init__(self, input_folder, photo_output, video_output, image_output):
self.input = input_folder self.input = input_folder
@ -35,11 +35,17 @@ class Classifier():
self.video_output = video_output self.video_output = video_output
self.image_output = image_output self.image_output = image_output
self.processed_count = 0 self.processed_count = 0
self.table = self.TEST_TABLE if self.mode == 'dev' else self.TABLE self.table = self.TEST_TABLE if self.mode == "dev" else self.TABLE
pass pass
def connect_database(self): def connect_database(self):
self.db = pymysql.connect(host='bt.biggerfish.tech', user='admin', password='zhiyong214', database='photo_classifier') self.db = pymysql.connect(
host="northflyfish.myqnapcloud.cn",
user="admin",
password="zhiyong@214",
database="photo_classifier",
port=3307,
)
def close_database(self): def close_database(self):
self.db.close() self.db.close()
@ -48,17 +54,19 @@ class Classifier():
self.connect_database() self.connect_database()
cursor = self.db.cursor() cursor = self.db.cursor()
sql = 'DROP TABLE IF EXISTS {}'.format(self.table) sql = "DROP TABLE IF EXISTS {}".format(self.table)
cursor.execute(sql) cursor.execute(sql)
print('删除表 {}'.format(self.table)) print("删除表 {}".format(self.table))
sql = '''CREATE TABLE {} ( sql = """CREATE TABLE {} (
ID INT NOT NULL AUTO_INCREMENT , ID INT NOT NULL AUTO_INCREMENT ,
MD5 VARCHAR(255) NOT NULL , MD5 VARCHAR(255) NOT NULL ,
PRIMARY KEY (ID), UNIQUE (MD5)) PRIMARY KEY (ID), UNIQUE (MD5))
ENGINE = InnoDB;'''.format(self.table) ENGINE = InnoDB;""".format(
self.table
)
cursor.execute(sql) cursor.execute(sql)
print('创建表 {}'.format(self.table)) print("创建表 {}".format(self.table))
self.close_database() self.close_database()
@ -70,18 +78,20 @@ class Classifier():
def get_file_count(self, folder): def get_file_count(self, folder):
count = 0 count = 0
for (_, _, _files) in os.walk(folder): for _, _, _files in os.walk(folder):
count += len(_files) count += len(_files)
return count return count
def delete_folders(self, folder): def delete_folders(self, folder):
for (root, dirs, files) in os.walk(folder): for root, dirs, files in os.walk(folder):
for dir in dirs: for dir in dirs:
if dir in self.SKIP_FOLDERS:
continue
abs_path = os.path.join(root, dir) abs_path = os.path.join(root, dir)
if os.path.isdir(abs_path): if os.path.isdir(abs_path):
if self.get_file_count(abs_path) == 0: if self.get_file_count(abs_path) == 0:
shutil.rmtree(abs_path) shutil.rmtree(abs_path)
print('删除目录: {}'.format(abs_path)) print("删除目录: {}".format(abs_path))
def is_photo(self, file_name): def is_photo(self, file_name):
return self.is_image(file_name) and self.contains_exif(file_name) return self.is_image(file_name) and self.contains_exif(file_name)
@ -99,32 +109,37 @@ class Classifier():
return False return False
def contains_exif(self, file_name): def contains_exif(self, file_name):
with open(file_name, 'rb') as reader: with open(file_name, "rb") as reader:
tags = exifread.process_file(reader) tags = exifread.process_file(reader)
keys = [key for key in self.PHOTO_EXIF_KEYS if key in tags] keys = [key for key in self.PHOTO_EXIF_KEYS if key in tags]
return len(keys) > 0 return len(keys) > 0
def process_folder(self, folder): def process_folder(self, folder):
for (root, dirs, files) in os.walk(folder): for root, dirs, files in os.walk(folder):
for file in files: for file in files:
self.process_file(root, file) self.process_file(root, file)
def get_md5(self, file): def get_md5(self, file):
with open(file, 'rb') as reader: with open(file, "rb") as reader:
return hashlib.md5(reader.read()).hexdigest() return hashlib.md5(reader.read()).hexdigest()
def process_file(self, root, file): def process_file(self, root, file):
file_path = os.path.join(root, file) file_path = os.path.join(root, file)
md5 = self.get_md5(file_path) if self.is_image(file_path) or self.is_video(file_path):
try: md5 = self.get_md5(file_path)
self.validate(file_path, md5) try:
year, month = self.read_date(file_path) self.validate(file_path, md5)
new_name = self.rename_move(file_path, year, month) year, month, day = self.read_date(file_path)
self.add_record(md5) new_name = self.rename_move(file_path, year, month, day, md5)
self.processed_count += 1 self.add_record(md5)
print('已处理 {}: {} --> {}'.format(self.processed_count, file, new_name)) self.processed_count += 1
except Exception as e: print(
print(str(e)) "已处理 {}: {} --> {}".format(self.processed_count, file, new_name)
)
except Exception as e:
print(str(e))
else:
print("非图片或视频, 忽略文件: {}".format(file_path))
def add_record(self, md5): def add_record(self, md5):
try: try:
@ -133,7 +148,7 @@ class Classifier():
cursor.execute(sql) cursor.execute(sql)
self.db.commit() self.db.commit()
except Exception as e: except Exception as e:
print('插入记录 {} 到数据库photo_classifier失败: {}'.format(md5, str(e))) print("插入记录 {} 到数据库photo_classifier失败: {}".format(md5, str(e)))
self.db.rollback() self.db.rollback()
raise e raise e
@ -144,43 +159,45 @@ class Classifier():
sql = "SELECT MD5 FROM {} WHERE MD5='{}'".format(self.table, md5) sql = "SELECT MD5 FROM {} WHERE MD5='{}'".format(self.table, md5)
cursor.execute(sql) cursor.execute(sql)
record = cursor.fetchone() record = cursor.fetchone()
if str(record) != 'None': if str(record) != "None":
os.remove(file_path) os.remove(file_path)
raise Exception('重复照片 {} --> 删除'.format(file_path)) raise Exception("重复文件 {} --> 删除".format(file_path))
except Exception as e: except Exception as e:
raise e raise e
if (not self.is_image(file_path)) and (not self.is_video(file_path)): if (not self.is_image(file_path)) and (not self.is_video(file_path)):
raise Exception('非图片或视频: {} --> 跳过'.format(file_path)) raise Exception("非图片或视频: {} --> 跳过".format(file_path))
def get_photo_create_date(self, file): def get_photo_create_date(self, file):
with open(file, 'rb') as reader: with open(file, "rb") as reader:
tags = exifread.process_file(reader) tags = exifread.process_file(reader)
keys = [key for key in self.PHOTO_DATE_KEYS if key in tags] keys = [key for key in self.PHOTO_DATE_KEYS if key in tags]
if len(keys) > 0: if len(keys) > 0:
key = keys[0] key = keys[0]
origin_date = tags[key] origin_date = tags[key]
time_str = str(origin_date) time_str = str(origin_date)
_date = time_str[:7].split(':') _date = time_str[:10].split(":")
year = _date[0] year = _date[0]
month = _date[1] month = _date[1]
return (year, month) day = _date[2]
return (year, month, day)
return None return None
def get_video_create_date(self, file): def get_video_create_date(self, file):
try: try:
properties = propsys.SHGetPropertyStoreFromParsingName(file) properties = propsys.SHGetPropertyStoreFromParsingName(file)
dt = properties.GetValue(pscon.PKEY_Media_DateEncoded).GetValue() dt = properties.GetValue(pscon.PKEY_Media_DateEncoded).GetValue()
time_str = str(dt.astimezone(pytz.timezone('Asia/Shanghai'))) time_str = str(dt.astimezone(pytz.timezone("Asia/Shanghai")))
_date = time_str[:7].split('-') _date = time_str[:10].split("-")
year = _date[0] year = _date[0]
month = _date[1] month = _date[1]
return (year, month) day = _date[2]
return (year, month, day)
except: except:
return None return None
def read_date(self, file): def read_date(self, file):
file = file.replace('/', '\\') file = file.replace("/", "\\")
date = None date = None
if self.is_photo(file): if self.is_photo(file):
date = self.get_photo_create_date(file) # 照片可能没有EXIF日期 date = self.get_photo_create_date(file) # 照片可能没有EXIF日期
@ -190,13 +207,14 @@ class Classifier():
if not date: # 获取文件上次修改日期 if not date: # 获取文件上次修改日期
time_str = os.path.getmtime(file) time_str = os.path.getmtime(file)
time_str = str(datetime.datetime.fromtimestamp(time_str)) time_str = str(datetime.datetime.fromtimestamp(time_str))
_date = time_str[:7].split('-') _date = time_str[:10].split("-")
year = _date[0] year = _date[0]
month = _date[1] month = _date[1]
date = (year, month) day = _date[2]
date = (year, month, day)
return date return date
def rename_move(self, file_path, year, month): def rename_move(self, file_path, year, month, day, md5):
if self.is_image(file_path): if self.is_image(file_path):
if self.is_photo(file_path): if self.is_photo(file_path):
output = self.photo_output output = self.photo_output
@ -205,21 +223,24 @@ class Classifier():
elif self.is_video(file_path): elif self.is_video(file_path):
output = self.video_output output = self.video_output
else: else:
raise Exception('移动文件失败, 非图片或视频: {}'.format(file_path)) raise Exception("移动文件失败, 非图片或视频: {}".format(file_path))
new_path = os.path.join(output, year, month) new_path = os.path.join(output, year, month, day)
if not os.path.exists(new_path): if not os.path.exists(new_path):
os.makedirs(new_path) os.makedirs(new_path)
file_name, file_ext = os.path.splitext(file_path) file_name, file_ext = os.path.splitext(file_path)
new_name = year + '-' + month + '-' + str(time.time()) + file_ext new_name = year + "-" + month + "-" + day + "-" + md5 + file_ext
shutil.move(file_path, os.path.join(new_path, new_name)) shutil.move(file_path, os.path.join(new_path, new_name))
return new_name return new_name
cf = Classifier('D:/temp/相册', cf = Classifier1(
photo_output='D:/总仓库-照片视频/总照片备份', input_folder="D:/待分类照片视频1",
video_output='D:/总仓库-照片视频/总视频备份', # input_folder='z:/待分类照片视频/Picture',
image_output='D:/总仓库-照片视频/总图片备份') photo_output="D:/总仓库-照片视频1/总照片备份",
video_output="D:/总仓库-照片视频1/总视频备份",
image_output="D:/总仓库-照片视频1/总图片备份",
)
cf.start()
# cf.create_table() # cf.create_table()
cf.start()

View File

@ -0,0 +1,247 @@
"""
根据读取的照片信息分类照片
分类:
目录名:2020\01
文件名:2020-01-时间戳
"""
import os
import sys
from posixpath import abspath
import exifread
import time
import shutil
import hashlib
import pymysql
import datetime
import pytz
from win32com.propsys import propsys, pscon
# 使用韩国oracle 数据库
class Classifier:
mode = "prod" # 开发模式(dev)还是产品模式(prod)
IMAGE_EXTENTIONS = ["jpg", "jpeg", "bmp", "png", "tif", "gif", "heic"]
VIDEO_EXTENTIONS = ["mp4", "avi", "rmvb", "mkv", "mov", "amr", "mpg"]
TEST_TABLE = "TEST_PHOTO"
TABLE = "PHOTO"
PHOTO_NO_DATE_KEYS = ["EXIF ExifVersion"]
PHOTO_DATE_KEYS = ["Image DateTime", "EXIF DateTimeOriginal"]
PHOTO_EXIF_KEYS = PHOTO_NO_DATE_KEYS + PHOTO_DATE_KEYS
SKIP_FOLDERS = ["System Volume Information", "$RECYCLE.BIN", ".stfolder"]
def __init__(self, input_folder, photo_output, video_output, image_output):
self.input = input_folder
self.photo_output = photo_output
self.video_output = video_output
self.image_output = image_output
self.processed_count = 0
self.table = self.TEST_TABLE if self.mode == "dev" else self.TABLE
pass
def connect_database(self):
self.db = pymysql.connect(
host="panel.zhiyong.tech",
user="yu_biggerfish",
password="jRHTbQrdkfNNTztH",
database="photo_classifier",
)
def close_database(self):
self.db.close()
def create_table(self):
self.connect_database()
cursor = self.db.cursor()
sql = "DROP TABLE IF EXISTS {}".format(self.table)
cursor.execute(sql)
print("删除表 {}".format(self.table))
sql = """CREATE TABLE {} (
ID INT NOT NULL AUTO_INCREMENT ,
MD5 VARCHAR(255) NOT NULL ,
PRIMARY KEY (ID), UNIQUE (MD5))
ENGINE = InnoDB;""".format(
self.table
)
cursor.execute(sql)
print("创建表 {}".format(self.table))
self.close_database()
def start(self):
self.connect_database()
self.process_folder(self.input)
self.delete_folders(self.input)
self.close_database()
def get_file_count(self, folder):
count = 0
for _, _, _files in os.walk(folder):
count += len(_files)
return count
def delete_folders(self, folder):
for root, dirs, files in os.walk(folder):
for dir in dirs:
if dir in self.SKIP_FOLDERS:
continue
abs_path = os.path.join(root, dir)
if os.path.isdir(abs_path):
if self.get_file_count(abs_path) == 0:
shutil.rmtree(abs_path)
print("删除目录: {}".format(abs_path))
def is_photo(self, file_name):
return self.is_image(file_name) and self.contains_exif(file_name)
def is_video(self, file_name):
for ext in self.VIDEO_EXTENTIONS:
if file_name.lower().endswith(ext):
return True
return False
def is_image(self, file_name):
for ext in self.IMAGE_EXTENTIONS:
if file_name.lower().endswith(ext):
return True
return False
def contains_exif(self, file_name):
with open(file_name, "rb") as reader:
tags = exifread.process_file(reader)
keys = [key for key in self.PHOTO_EXIF_KEYS if key in tags]
return len(keys) > 0
def process_folder(self, folder):
for root, dirs, files in os.walk(folder):
for file in files:
self.process_file(root, file)
def get_md5(self, file):
with open(file, "rb") as reader:
return hashlib.md5(reader.read()).hexdigest()
def process_file(self, root, file):
file_path = os.path.join(root, file)
if self.is_image(file_path) or self.is_video(file_path):
md5 = self.get_md5(file_path)
try:
self.validate(file_path, md5)
year, month, day = self.read_date(file_path)
new_name = self.rename_move(file_path, year, month, day, md5)
self.add_record(md5)
self.processed_count += 1
print(
"已处理 {}: {} --> {}".format(self.processed_count, file, new_name)
)
except Exception as e:
print(str(e))
else:
print("非图片或视频, 忽略文件: {}".format(file_path))
def add_record(self, md5):
try:
cursor = self.db.cursor()
sql = "INSERT INTO {}(MD5) VALUES('{}')".format(self.table, md5)
cursor.execute(sql)
self.db.commit()
except Exception as e:
print("插入记录 {} 到数据库photo_classifier失败: {}".format(md5, str(e)))
self.db.rollback()
raise e
def validate(self, file_path, md5):
# check if the md5 of the photo exists in database
try:
cursor = self.db.cursor()
sql = "SELECT MD5 FROM {} WHERE MD5='{}'".format(self.table, md5)
cursor.execute(sql)
record = cursor.fetchone()
if str(record) != "None":
os.remove(file_path)
raise Exception("重复文件 {} --> 删除".format(file_path))
except Exception as e:
raise e
if (not self.is_image(file_path)) and (not self.is_video(file_path)):
raise Exception("非图片或视频: {} --> 跳过".format(file_path))
def get_photo_create_date(self, file):
with open(file, "rb") as reader:
tags = exifread.process_file(reader)
keys = [key for key in self.PHOTO_DATE_KEYS if key in tags]
if len(keys) > 0:
key = keys[0]
origin_date = tags[key]
time_str = str(origin_date)
_date = time_str[:10].split(":")
year = _date[0]
month = _date[1]
day = _date[2]
return (year, month, day)
return None
def get_video_create_date(self, file):
try:
properties = propsys.SHGetPropertyStoreFromParsingName(file)
dt = properties.GetValue(pscon.PKEY_Media_DateEncoded).GetValue()
time_str = str(dt.astimezone(pytz.timezone("Asia/Shanghai")))
_date = time_str[:10].split("-")
year = _date[0]
month = _date[1]
day = _date[2]
return (year, month, day)
except:
return None
def read_date(self, file):
file = file.replace("/", "\\")
date = None
if self.is_photo(file):
date = self.get_photo_create_date(file) # 照片可能没有EXIF日期
elif self.is_video(file):
date = self.get_video_create_date(file) # 视频可能没有媒体创建日期
if not date: # 获取文件上次修改日期
time_str = os.path.getmtime(file)
time_str = str(datetime.datetime.fromtimestamp(time_str))
_date = time_str[:10].split("-")
year = _date[0]
month = _date[1]
day = _date[2]
date = (year, month, day)
return date
def rename_move(self, file_path, year, month, day, md5):
if self.is_image(file_path):
if self.is_photo(file_path):
output = self.photo_output
else:
output = self.image_output
elif self.is_video(file_path):
output = self.video_output
else:
raise Exception("移动文件失败, 非图片或视频: {}".format(file_path))
new_path = os.path.join(output, year, month, day)
if not os.path.exists(new_path):
os.makedirs(new_path)
file_name, file_ext = os.path.splitext(file_path)
new_name = year + "-" + month + "-" + day + "-" + md5 + file_ext
shutil.move(file_path, os.path.join(new_path, new_name))
return new_name
cf = Classifier(
input_folder="D:/待分类照片视频",
# input_folder="D:/总仓库-照片视频-bak",
photo_output="D:/总仓库-照片视频/总照片备份",
video_output="D:/总仓库-照片视频/总视频备份",
image_output="D:/总仓库-照片视频/总图片备份",
)
cf.start()
# cf.create_table()