From eb502222a95a6f5f09f9265bfa7abaf8048936cf Mon Sep 17 00:00:00 2001 From: Ian Fijolek Date: Tue, 5 Nov 2024 14:17:32 -0800 Subject: [PATCH] Initial commit --- .gitignore | 5 + .pre-commit-config.yaml | 23 ++++ .secrets-baseline | 127 +++++++++++++++++++++ LICENSE | 9 ++ Makefile | 36 ++++++ README.md | 25 +++++ delete-replacements.py | 236 ++++++++++++++++++++++++++++++++++++++++ hardlink.py | 29 +++++ save-albums.py | 99 +++++++++++++++++ 9 files changed, 589 insertions(+) create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 .secrets-baseline create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 README.md create mode 100755 delete-replacements.py create mode 100755 hardlink.py create mode 100755 save-albums.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..929cf96 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +FakeLibrary/ +FakeOld/ +venv/ +*.log +photo_mapping.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..54fed21 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,23 @@ +repos: + - repo: https://github.com/psf/black + rev: 24.10.0 + hooks: + - id: black + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: debug-statements + language_version: python3 + - id: check-yaml + args: + - --allow-multiple-documents + - id: check-merge-conflict + - id: name-tests-test + exclude: tests/(common.py|util.py|(helpers|integration/factories)/(.+).py) + - repo: https://github.com/Yelp/detect-secrets + rev: v1.5.0 + hooks: + - id: detect-secrets + args: ['--baseline', '.secrets-baseline'] diff --git a/.secrets-baseline b/.secrets-baseline new file mode 100644 index 0000000..6340aa9 --- /dev/null +++ b/.secrets-baseline @@ -0,0 +1,127 @@ +{ + "version": "1.5.0", + "plugins_used": [ + { + "name": "ArtifactoryDetector" + }, + { + "name": "AWSKeyDetector" + }, + { + "name": "AzureStorageKeyDetector" + }, + { + "name": "Base64HighEntropyString", + "limit": 4.5 + }, + { + "name": "BasicAuthDetector" + }, + { + "name": "CloudantDetector" + }, + { + "name": "DiscordBotTokenDetector" + }, + { + "name": "GitHubTokenDetector" + }, + { + "name": "GitLabTokenDetector" + }, + { + "name": "HexHighEntropyString", + "limit": 3.0 + }, + { + "name": "IbmCloudIamDetector" + }, + { + "name": "IbmCosHmacDetector" + }, + { + "name": "IPPublicDetector" + }, + { + "name": "JwtTokenDetector" + }, + { + "name": "KeywordDetector", + "keyword_exclude": "" + }, + { + "name": "MailchimpDetector" + }, + { + "name": "NpmDetector" + }, + { + "name": "OpenAIDetector" + }, + { + "name": "PrivateKeyDetector" + }, + { + "name": "PypiTokenDetector" + }, + { + "name": "SendGridDetector" + }, + { + "name": "SlackDetector" + }, + { + "name": "SoftlayerDetector" + }, + { + "name": "SquareOAuthDetector" + }, + { + "name": "StripeDetector" + }, + { + "name": "TelegramBotTokenDetector" + }, + { + "name": "TwilioKeyDetector" + } + ], + "filters_used": [ + { + "path": "detect_secrets.filters.allowlist.is_line_allowlisted" + }, + { + "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", + "min_level": 2 + }, + { + "path": "detect_secrets.filters.heuristic.is_indirect_reference" + }, + { + "path": "detect_secrets.filters.heuristic.is_likely_id_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_lock_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_not_alphanumeric_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_potential_uuid" + }, + { + "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign" + }, + { + "path": "detect_secrets.filters.heuristic.is_sequential_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_swagger_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_templated_secret" + } + ], + "results": {}, + "generated_at": "2024-11-05T22:16:35Z" +} diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d1eb296 --- /dev/null +++ b/LICENSE @@ -0,0 +1,9 @@ +MIT License + +Copyright (c) 2024 iamthefij + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..fe008e2 --- /dev/null +++ b/Makefile @@ -0,0 +1,36 @@ +ENV ?= venv +ENV_BIN ?= $(ENV)/bin + +$(ENV): + python3 -m venv $(ENV) + pip install --upgrade pip + pip install pre-commit detect-secrets requests + +# Installs pre-commit hooks +.PHONY: install-hooks +install-hooks: $(ENV) + $(ENV_BIN)/pre-commit install --install-hooks + +# Checks files for encryption +.PHONY: check +check: $(ENV) deps + $(ENV_BIN)/pre-commit run --all-files + +# Clears all roles and virtualenv +.PHONY: clean +clean: + rm -fr $(ENV) + +# Creates a new secrets baseline +.secrets-baseline: $(ENV) + $(ENV_BIN)/detect-secrets scan > .secrets-baseline + +# Audits secrets against baseline +.PHONY: secrets-audit +secrets-audit: $(ENV) .secrets-baseline + $(ENV_BIN)/detect-secrets audit .secrets-baseline + +# Updates secrets baseline +.PHONY: secrets-update +secrets-update: $(ENV) .secrets-baseline + $(ENV_BIN)/detect-secrets scan --baseline .secrets-baseline diff --git a/README.md b/README.md new file mode 100644 index 0000000..5d343cf --- /dev/null +++ b/README.md @@ -0,0 +1,25 @@ +# photoprism-fix + +A set of scripts I've written to fix some issues with my Photoprism instance. + +## The problem + +My NAS app would upload all mobile photos to the same directory, so it was messing up the names of files. If I had an image named `IMG_1234.heic` on my phone and another image with the same name on my wife's phone, the second image would get written as `IMG_1234-Copy(1).heic`. This was causing Photoprism to think the files were stacks of images and not showing them correctly. This was further complicated for live photos, where the video and image would be expected to have the same name, but may not. + +## The solution + +My solution is to export all files from my phone directly, delete them from Photoprism, and then re-import them. This is not quite straightforward, as I have to make sure that I don't lose any favorites metadata. I also have to make sure that I don't delete files that didn't come from my phone. + +## The scripts + +### `save-albums.py` + +This script downloads all favorited photos and stores them in a json file by hash. I am using this so that I can restore my favorites after I delete and re-import my libarary. + +### `delete-replacements.py` + +This script assumes there is a directory with newer versions of files that you'd like to replace older ones with. It will delete any files in the old library that have a version in the newer library. By default, it will use sha1 hashes to compare files. If you add the `--use-image-hash` flag, it will use a phash of images and, for videos, a phash of the first frame. + +### `hardlink.py` + +Recursively hardlink files between two directories. This is useful for when I need multiple attempts of importing and don't want to waste space or time making a full copy of thousands of files. diff --git a/delete-replacements.py b/delete-replacements.py new file mode 100755 index 0000000..a5005cd --- /dev/null +++ b/delete-replacements.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +""" +This Python script is intended to delete files from a library that have replacements in a another library. + +It should accept two arguments: + 1. The old library path from where the files will be deleted. + 2. The new library path where the replacements are located. + +This script will recurse through subdirectories to locate all files and determine if they have replacements. It does +this by checking a checksum (sha1 or phash) for each file +""" + +import hashlib +import logging +import mimetypes +import os +from argparse import ArgumentParser, Namespace +from collections.abc import Generator +from pathlib import Path + +try: + import imagehash + from PIL import Image, UnidentifiedImageError +except ImportError: + Image = None + imagehash = None + UnidentifiedImageError = Exception + logging.warning( + "PIL and imagehash libraries are required for image hash comparison" + ) + pass + +try: + from pillow_heif import register_heif_opener + + register_heif_opener() +except ImportError: + logging.warning("pillow-heif library is required for HEIF image support") + pass + +try: + import cv2 +except ImportError: + cv2 = None + logging.warning("opencv-python library is required for video hash comparison") + pass + +logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") +log = logging.getLogger(__name__) + + +def image_hash(file_path: Path) -> str | None: + if imagehash is None or Image is None: + raise ImportError( + "PIL and imagehash libraries are required for image hash comparison" + ) + + try: + hash = imagehash.phash(Image.open(file_path)) + return "image-" + str(hash) + except UnidentifiedImageError: + if file_path.suffix.lower() in [".heic", ".heif"]: + log.warning(f"Unidentified image: {file_path}. Maybe install pillow-heif?") + else: + log.warning(f"Unidentified image: {file_path}.") + except Exception as e: + log.error(f"Error calculating image hash for {file_path}: {e}") + + return None + + +def video_hash(file_path: Path) -> str | None: + """Extract first frame of a video and calculate the image hash""" + if imagehash is None or Image is None: + raise ImportError( + "PIL and imagehash libraries are required for image hash comparison" + ) + + if not cv2: + raise ImportError("opencv-python library is required for video hash comparison") + + try: + cap = cv2.VideoCapture(str(file_path)) + ret, frame = cap.read() + if not ret: + log.warning(f"Error reading video frame: {file_path}") + return None + + hash = imagehash.phash(Image.fromarray(frame)) + return "video-" + str(hash) + except Exception as e: + log.error(f"Error calculating video hash for {file_path}: {e}") + + return None + + +def calc_hash(file_path: Path, use_image_hash=False): + """ + Calculate the hash of a file + """ + if use_image_hash: + mimetype = mimetypes.guess_type(file_path) + if not mimetype[0] or mimetype[0].startswith("image"): + if hash := image_hash(file_path): + return hash + elif mimetype[0].startswith("video"): + if hash := video_hash(file_path): + return hash + + sha1 = hashlib.sha1() + with open(file_path, "rb") as f: + while True: + data = f.read(65536) + if not data: + break + sha1.update(data) + + return sha1.hexdigest() + + +def recurse_files(directory: Path) -> Generator[Path, None, None]: + """ + Generator to yield all files in a directory tree that have an extension and are not hidden. + """ + for file in directory.rglob("*.*"): + if file.name.startswith("."): + continue + if file.is_file(): + yield file + + +def parse_args() -> Namespace: + """Parse and return command line arguments""" + parser = ArgumentParser( + description="Delete files from a library that have replacements in another library" + ) + parser.add_argument( + "old_library_path", type=Path, help="The path to the old library" + ) + parser.add_argument( + "new_library_path", type=Path, help="The path to the new library" + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Perform a dry run without deleting any files", + ) + parser.add_argument( + "--use-image-hash", + action="store_true", + help="Use image hash to compare images", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="Increase verbosity", + ) + + return parser.parse_args() + + +def calc_hashes(directory: Path, use_image_hash: bool = False) -> dict[str, Path]: + """Calculates the hash for all files in a directory and returns a dictionary of hashes to file paths""" + new_library_hashes: dict[str, Path] = {} + for file in recurse_files(directory): + new_hash = calc_hash(file, use_image_hash) + log.debug(f"{new_hash} {file}") + if new_hash in new_library_hashes: + log.warning("Hash collision: %s %s", file, new_library_hashes[new_hash]) + new_library_hashes[new_hash] = file + + return new_library_hashes + + +def delete_files( + old_library_path: Path, + new_library_hashes: dict[str, Path], + dry_run: bool = False, + use_image_hash: bool = False, +) -> tuple[int, int]: + """ + Delete files from the old library that have replacements in the new library. + + Returns a tuple of the number of deleted files and the number of kept files. + """ + deleted_files: set[str] = set() + kept_files = 0 + for file in recurse_files(old_library_path): + oldhash = calc_hash(file, use_image_hash) + log.debug(f"{oldhash} {file}") + if oldhash in new_library_hashes: + log.debug(f"Deleting {file}") + if not dry_run: + os.remove(file) + deleted_files.add(oldhash) + else: + log.debug(f"Keeping {file}") + kept_files += 1 + + for new_sha, new_file in new_library_hashes.items(): + if new_sha not in deleted_files: + log.warning(f"Replacement file not found in old library: {new_file}") + + return len(deleted_files), kept_files + + +def main() -> int: + args = parse_args() + if args.verbose: + log.setLevel(logging.DEBUG) + + if not args.old_library_path.exists(): + log.warning("Old library path does not exist: %s", args.old_library_path) + return 1 + + if not args.new_library_path.exists(): + log.warning("New library path does not exist: %s", args.new_library_path) + return 1 + + log.info("Calculating hashes for new library") + new_library_hashes = calc_hashes(args.new_library_path, args.use_image_hash) + log.info(f"New library hashes calculated: {len(new_library_hashes)}") + + log.info("Deleting files from old library") + deleted_files, kept_files = delete_files( + args.old_library_path, new_library_hashes, args.dry_run, args.use_image_hash + ) + log.info( + f"Deleted files: {deleted_files} Kept files: {kept_files}: Unknown files: {len(new_library_hashes) - deleted_files}" + ) + + return 0 + + +if __name__ == "__main__": + main() diff --git a/hardlink.py b/hardlink.py new file mode 100755 index 0000000..676a1ca --- /dev/null +++ b/hardlink.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +""" +This module is intented to be a command line tool to recursively hardlink files. It should otherwise function in a similar way to cp -r. +""" + +import argparse +import os +from pathlib import Path + + +def hardlink(src: Path, dst: Path): + if src.is_dir(): + dst.mkdir(exist_ok=True) + for child in src.iterdir(): + hardlink(child, dst / child.name) + else: + os.link(src, dst) + + +def main(): + parser = argparse.ArgumentParser(description="Recursively hardlink files") + parser.add_argument("src", type=Path, help="source directory") + parser.add_argument("dst", type=Path, help="destination directory") + args = parser.parse_args() + hardlink(args.src, args.dst) + + +if __name__ == "__main__": + main() diff --git a/save-albums.py b/save-albums.py new file mode 100755 index 0000000..0c71e0b --- /dev/null +++ b/save-albums.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python +"""Script to save and restore favorite photos in PhotoPrism.""" +import json +import sys +import os +from pathlib import Path + +import requests + +# PhotoPrism API configuration +PHOTOPRISM_API_URL = ( + os.getenv("PHOTOPRISM_URL", "https://example.com").removesuffix("/") + "/api/v1" +) +PHOTOPRISM_API_TOKEN = os.getenv("PHOTOPRISM_API_TOKEN") + +# Headers for authorization +headers = { + "X-Auth-Token": f"{PHOTOPRISM_API_TOKEN}", + "Content-Type": "application/json", +} + + +def fetch_favorites(): + """Fetches favorite photos and maps their primary hashes to 'favorite' status.""" + response = requests.get( + f"{PHOTOPRISM_API_URL}/photos", + headers=headers, + params={"count": 1000000, "favorite": True, "quality": 0}, + ) + print(response.text) + response.raise_for_status() + return {photo["Hash"]: "favorite" for photo in response.json()} + + +def save_mapping(favorites, filepath: Path): + """Saves the favorites and album mapping to a JSON file.""" + with filepath.open("w") as file: + json.dump({"favorites": favorites}, file) + print(f"Mapping saved to {filepath}") + + +def load_mapping(filepath: Path): + """Loads the mapping from a JSON file.""" + with filepath.open("r") as file: + return json.load(file) + + +def fetch_photo_uid(photo_hash): + """Fetches the photo UID based on the provided hash.""" + response = requests.get( + f"{PHOTOPRISM_API_URL}/files/{photo_hash}", + headers=headers, + ) + response.raise_for_status() + return response.json()["PhotoUID"] if response.json() else None + + +def re_add_favorites(mapping): + """Re-adds photos as favorites based on the provided mapping.""" + for photo_hash in mapping["favorites"]: + photo_uid = fetch_photo_uid(photo_hash) + if photo_uid is None: + print(f"Photo with hash {photo_hash} not found.") + continue + + response = requests.post( + f"{PHOTOPRISM_API_URL}/photos/{photo_uid}/like", + headers=headers, + ) + response.raise_for_status() + + print("Favorites re-added successfully") + + +if __name__ == "__main__": + # Fetch and save the mappings before deletion + if len(sys.argv) > 2: + filepath = Path(sys.argv[2]) + else: + filepath = Path("photo_mapping.json") + + if sys.argv[1] == "save": + + if filepath.exists(): + print(f"File {filepath} already exists. Please provide a new file path.") + sys.exit(1) + + if not filepath.parent.exists(): + print(f"Parent directory {filepath.parent} does not exist.") + sys.exit(1) + + favorites = fetch_favorites() + save_mapping(favorites, filepath) + elif sys.argv[1] == "restore": + mapping = load_mapping(filepath) + re_add_favorites(mapping) + else: + print("Invalid command. Please use 'save' or 'restore'.") + sys.exit(1)