From 0848b8454f8c96bf9487e0222ed1dbbee7395b9a Mon Sep 17 00:00:00 2001 From: Marcin Owsiany Date: Fri, 27 Sep 2024 19:56:19 +0200 Subject: [PATCH] Add a remote command for batch duplicate finding. Based on https://github.com/porridge/image-duplicate-finder --- src/meson.build | 2 + src/options.cc | 1 + src/options.h | 1 + src/pic.cc | 53 ++++++++++++++++++++++++ src/pic.h | 42 +++++++++++++++++++ src/remote.cc | 105 ++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 204 insertions(+) create mode 100644 src/pic.cc create mode 100644 src/pic.h diff --git a/src/meson.build b/src/meson.build index 011d7eaf3..882371d62 100644 --- a/src/meson.build +++ b/src/meson.build @@ -128,6 +128,8 @@ main_sources = files('advanced-exif.cc', 'osd.cc', 'osd.h', 'pan-view.h', +'pic.h', +'pic.cc', 'pixbuf-renderer.cc', 'pixbuf-renderer.h', 'pixbuf-util.cc', diff --git a/src/options.cc b/src/options.cc index 19c8ca0d4..95fe86b62 100644 --- a/src/options.cc +++ b/src/options.cc @@ -66,6 +66,7 @@ ConfOptions *init_options(ConfOptions *options) options->dnd_icon_size = 48; options->dnd_default_action = DND_ACTION_ASK; options->duplicates_similarity_threshold = 99; + options->duplicates_program = g_strdup("echo"); options->rot_invariant_sim = TRUE; options->sort_totals = FALSE; options->rectangle_draw_aspect_ratio = RECTANGLE_DRAW_ASPECT_RATIO_NONE; diff --git a/src/options.h b/src/options.h index 6fdd37c74..bc41241b1 100644 --- a/src/options.h +++ b/src/options.h @@ -79,6 +79,7 @@ struct ConfOptions guint duplicates_similarity_threshold; guint duplicates_match; + gchar *duplicates_program; gboolean duplicates_thumbnails; guint duplicates_select_type; gboolean rot_invariant_sim; diff --git a/src/pic.cc b/src/pic.cc new file mode 100644 index 000000000..20907b868 --- /dev/null +++ b/src/pic.cc @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2024 The Geeqie Team + * + * Author: Marcin Owsiany + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * + * Helper class for computing equivalence sets of pictures. + * + */ + +#include "pic.h" + +pic::pic(char const *cname): name(cname), equivalent{name}, sim(NULL) { + GError *err = NULL; + GdkPixbuf *buf = gdk_pixbuf_new_from_file(cname, &err); + if (buf == NULL) { + fprintf(stderr, "Unable to read file %s: %s\n", cname, err->message); + g_error_free(err); + return; + } + sim = image_sim_new_from_pixbuf(buf); + g_object_unref(buf); +} + +pic::~pic() { + if (sim != NULL) image_sim_free(sim); +} + +int operator<(const pic &a, const pic &b) +{ + return a.name < b.name; +} + +gdouble pic::compare(const pic & other) +{ + if (sim == NULL || other.sim == NULL) + return 0.0; + return 100.0 * image_sim_compare(sim, other.sim); +} diff --git a/src/pic.h b/src/pic.h new file mode 100644 index 000000000..4ba55a244 --- /dev/null +++ b/src/pic.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2024 The Geeqie Team + * + * Author: Marcin Owsiany + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * + * Helper class for computing equivalence sets of pictures. + * + */ + +#include +#include +#include +#include + +#include "similar.h" + +class pic { +public: + pic(char const *cname); + ~pic(); + gdouble compare(const pic&); + std::string name; + std::set equivalent; +private: + ImageSimilarityData *sim; +friend int operator<(const pic &a, const pic &b); +}; diff --git a/src/remote.cc b/src/remote.cc index 19982522d..52733b633 100644 --- a/src/remote.cc +++ b/src/remote.cc @@ -21,9 +21,11 @@ #include "remote.h" +#include #include #include #include +#include #include #include @@ -32,6 +34,8 @@ #include #include #include +#include +#include #include #include @@ -57,8 +61,10 @@ #include "main.h" #include "misc.h" #include "options.h" +#include "pic.h" #include "pixbuf-renderer.h" #include "rcfile.h" +#include "similar.h" #include "slideshow.h" #include "typedefs.h" #include "ui-fileops.h" @@ -682,6 +688,102 @@ static void gr_slideshow_delay(const gchar *text, GIOChannel *, gpointer) options->slideshow.delay = static_cast(n * 10.0 + 0.01); } +static void gr_duplicates_threshold(const gchar *text, GIOChannel *, gpointer) +{ + gint thresh; + gint n; + gint res; + + res = sscanf(text, "%d", &thresh); + if (res == 1) + { + n = thresh; + if (n < 0 || n > 100) + { + printf_term(TRUE, "Image similarity threshold out of range (%d to %d)\n", 0, 100); + return; + } + } + else + { + n = 99; + } + + options->duplicates_similarity_threshold = static_cast(n); + DEBUG_0("threshold set to %d", options->duplicates_similarity_threshold); +} + +static void gr_duplicates_program(const gchar *text, GIOChannel *, gpointer) +{ + g_strdup(options->duplicates_program); + options->duplicates_program = g_strdup(text); + DEBUG_0("duplicates program set to \"%s\"", options->duplicates_program); +} + +static void gr_process_duplicates(const gchar *, GIOChannel *, gpointer data) +{ + auto remote_data = static_cast(data); + std::map> pics; + GList *work = remote_data->file_list; + while (work) + { + FileData *fd = static_cast(work->data); + std::string name(fd->path); + pics[name] = std::unique_ptr(new pic(fd->path)); + work = work->next; + } + DEBUG_1("processing %d files in set", pics.size()); + + // Compute similarity score for every pair, build equivalence sets. + for (auto a = pics.begin(); a != pics.end(); ++a) { + auto b = a; + b++; + for (; b != pics.end(); ++b) { + double similarity = a->second->compare(*b->second); + DEBUG_1("%s vs %s: %f", a->second->name.c_str(), b->second->name.c_str(), similarity); + if (similarity < options->duplicates_similarity_threshold) + continue; + a->second->equivalent.insert(b->second->equivalent.begin(), b->second->equivalent.end()); + for (auto const &f: a->second->equivalent) { + pics[f]->equivalent.insert(a->second->equivalent.begin(), a->second->equivalent.end()); + } + } + } + + std::set printed; + for (auto const &f: pics) { + if (f.second->equivalent.size() < 2) + // skip this pic if not similar to any other one but itself + continue; + if (printed.find(f.second->name) != printed.end()) + // skip this pic if it was already printed (when processing a similar image) + continue; + std::vector cmd; + cmd.push_back(options->duplicates_program); + for (auto const &e: f.second->equivalent) { + cmd.push_back(e.c_str()); + printed.insert(e); + } + cmd.push_back(NULL); + pid_t pid = fork(); + if (pid == -1) { + perror("fork"); + exit(1); + } else if (pid == 0) { + execvp(const_cast(cmd[0]), const_cast(&(cmd[0]))); + perror("execv"); + exit(1); + } else { + int status; + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)!=0) { + fprintf(stderr, "subprocess failed, aborting\n"); + exit(1); + } + } + } +} + static void gr_tools_show(const gchar *, GIOChannel *, gpointer) { gboolean popped; @@ -1718,6 +1820,8 @@ static RemoteCommandEntry remote_commands[] = { { nullptr, "--cache-shared=", gr_cache_shared, TRUE, FALSE, N_("clean|clear"), N_("clean or clear shared thumbnail cache") }, { nullptr, "--cache-thumbs=", gr_cache_thumb, TRUE, FALSE, N_("clean|clear"), N_("clean or clear thumbnail cache") }, { "-d", "--delay=", gr_slideshow_delay, TRUE, FALSE, N_("<[H:][M:][N][.M]>"), N_("set slide show delay to Hrs Mins N.M seconds") }, + { nullptr, "--duplicates-program=", gr_duplicates_program, TRUE, FALSE, N_(""), N_("run program with each identified set of duplicate images") }, + { nullptr, "--duplicates-threshold=", gr_duplicates_threshold, TRUE, FALSE, N_(""), N_("set similarity threshold for what is considered a duplicate") }, { nullptr, "--first", gr_image_first, FALSE, FALSE, nullptr, N_("first image") }, { "-f", "--fullscreen", gr_fullscreen_toggle, FALSE, TRUE, nullptr, N_("toggle full screen") }, { nullptr, "--file=", gr_file_load, TRUE, FALSE, N_("|"), N_("open FILE or URL, bring Geeqie window to the top") }, @@ -1749,6 +1853,7 @@ static RemoteCommandEntry remote_commands[] = { { "-n", "--next", gr_image_next, FALSE, FALSE, nullptr, N_("next image") }, { nullptr, "--pixel-info", gr_pixel_info, FALSE, FALSE, nullptr, N_("print pixel info of mouse pointer on current image") }, { nullptr, "--print0", gr_print0, TRUE, FALSE, nullptr, N_("terminate returned data with null character instead of newline") }, + { "-p", "--process-duplicates", gr_process_duplicates, FALSE, FALSE, nullptr, N_("group duplicate pictures in current collection and process them") }, { nullptr, "--PWD=", gr_pwd, TRUE, FALSE, N_(""), N_("use PWD as working directory for following commands") }, { "-q", "--quit", gr_quit, FALSE, FALSE, nullptr, N_("quit") }, { nullptr, "--raise", gr_raise, FALSE, FALSE, nullptr, N_("bring the Geeqie window to the top") },