Skip to content

Commit

Permalink
Add a remote command for batch duplicate finding.
Browse files Browse the repository at this point in the history
  • Loading branch information
porridge committed Sep 28, 2024
1 parent d0def46 commit 0848b84
Show file tree
Hide file tree
Showing 6 changed files with 204 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ main_sources = files('advanced-exif.cc',
'osd.cc',
'osd.h',
'pan-view.h',
'pic.h',
'pic.cc',
'pixbuf-renderer.cc',
'pixbuf-renderer.h',
'pixbuf-util.cc',
Expand Down
1 change: 1 addition & 0 deletions src/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ ConfOptions *init_options(ConfOptions *options)
options->dnd_icon_size = 48;
options->dnd_default_action = DND_ACTION_ASK;
options->duplicates_similarity_threshold = 99;
options->duplicates_program = g_strdup("echo");
options->rot_invariant_sim = TRUE;
options->sort_totals = FALSE;
options->rectangle_draw_aspect_ratio = RECTANGLE_DRAW_ASPECT_RATIO_NONE;
Expand Down
1 change: 1 addition & 0 deletions src/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ struct ConfOptions

guint duplicates_similarity_threshold;
guint duplicates_match;
gchar *duplicates_program;
gboolean duplicates_thumbnails;
guint duplicates_select_type;
gboolean rot_invariant_sim;
Expand Down
53 changes: 53 additions & 0 deletions src/pic.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Copyright (C) 2024 The Geeqie Team
*
* Author: Marcin Owsiany <[email protected]>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
*
* Helper class for computing equivalence sets of pictures.
*
*/

#include "pic.h"

pic::pic(char const *cname): name(cname), equivalent{name}, sim(NULL) {
GError *err = NULL;
GdkPixbuf *buf = gdk_pixbuf_new_from_file(cname, &err);
if (buf == NULL) {
fprintf(stderr, "Unable to read file %s: %s\n", cname, err->message);
g_error_free(err);
return;
}
sim = image_sim_new_from_pixbuf(buf);
g_object_unref(buf);
}

pic::~pic() {
if (sim != NULL) image_sim_free(sim);
}

int operator<(const pic &a, const pic &b)
{
return a.name < b.name;
}

gdouble pic::compare(const pic & other)
{
if (sim == NULL || other.sim == NULL)
return 0.0;
return 100.0 * image_sim_compare(sim, other.sim);
}
42 changes: 42 additions & 0 deletions src/pic.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Copyright (C) 2024 The Geeqie Team
*
* Author: Marcin Owsiany <[email protected]>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
*
* Helper class for computing equivalence sets of pictures.
*
*/

#include <set>
#include <string>
#include <gdk-pixbuf/gdk-pixbuf.h>
#include <glib/gtypes.h>

#include "similar.h"

class pic {
public:
pic(char const *cname);
~pic();
gdouble compare(const pic&);
std::string name;
std::set<std::string> equivalent;
private:
ImageSimilarityData *sim;
friend int operator<(const pic &a, const pic &b);
};
105 changes: 105 additions & 0 deletions src/remote.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@

#include "remote.h"

#include <stdio.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <unistd.h>

#include <algorithm>
Expand All @@ -32,6 +34,8 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <map>
#include <memory>
#include <vector>

#include <gtk/gtk.h>
Expand All @@ -57,8 +61,10 @@
#include "main.h"
#include "misc.h"
#include "options.h"
#include "pic.h"
#include "pixbuf-renderer.h"
#include "rcfile.h"
#include "similar.h"
#include "slideshow.h"
#include "typedefs.h"
#include "ui-fileops.h"
Expand Down Expand Up @@ -682,6 +688,102 @@ static void gr_slideshow_delay(const gchar *text, GIOChannel *, gpointer)
options->slideshow.delay = static_cast<gint>(n * 10.0 + 0.01);
}

static void gr_duplicates_threshold(const gchar *text, GIOChannel *, gpointer)
{
gint thresh;
gint n;
gint res;

res = sscanf(text, "%d", &thresh);
if (res == 1)
{
n = thresh;
if (n < 0 || n > 100)
{
printf_term(TRUE, "Image similarity threshold out of range (%d to %d)\n", 0, 100);
return;
}
}
else
{
n = 99;
}

options->duplicates_similarity_threshold = static_cast<guint>(n);
DEBUG_0("threshold set to %d", options->duplicates_similarity_threshold);
}

static void gr_duplicates_program(const gchar *text, GIOChannel *, gpointer)
{
g_strdup(options->duplicates_program);
options->duplicates_program = g_strdup(text);
DEBUG_0("duplicates program set to \"%s\"", options->duplicates_program);
}

static void gr_process_duplicates(const gchar *, GIOChannel *, gpointer data)
{
auto remote_data = static_cast<RemoteData *>(data);
std::map<std::string, std::unique_ptr<pic>> pics;
GList *work = remote_data->file_list;
while (work)
{
FileData *fd = static_cast<FileData *>(work->data);
std::string name(fd->path);
pics[name] = std::unique_ptr<pic>(new pic(fd->path));
work = work->next;
}
DEBUG_1("processing %d files in set", pics.size());

// Compute similarity score for every pair, build equivalence sets.
for (auto a = pics.begin(); a != pics.end(); ++a) {
auto b = a;
b++;
for (; b != pics.end(); ++b) {
double similarity = a->second->compare(*b->second);
DEBUG_1("%s vs %s: %f", a->second->name.c_str(), b->second->name.c_str(), similarity);
if (similarity < options->duplicates_similarity_threshold)
continue;
a->second->equivalent.insert(b->second->equivalent.begin(), b->second->equivalent.end());
for (auto const &f: a->second->equivalent) {
pics[f]->equivalent.insert(a->second->equivalent.begin(), a->second->equivalent.end());
}
}
}

std::set<std::string> printed;
for (auto const &f: pics) {
if (f.second->equivalent.size() < 2)
// skip this pic if not similar to any other one but itself
continue;
if (printed.find(f.second->name) != printed.end())
// skip this pic if it was already printed (when processing a similar image)
continue;
std::vector<const char *> cmd;
cmd.push_back(options->duplicates_program);
for (auto const &e: f.second->equivalent) {
cmd.push_back(e.c_str());
printed.insert(e);
}
cmd.push_back(NULL);
pid_t pid = fork();
if (pid == -1) {
perror("fork");
exit(1);
} else if (pid == 0) {
execvp(const_cast<char *>(cmd[0]), const_cast<char **>(&(cmd[0])));
perror("execv");
exit(1);
} else {
int status;
wait(&status);
if (!WIFEXITED(status) || WEXITSTATUS(status)!=0) {
fprintf(stderr, "subprocess failed, aborting\n");
exit(1);
}
}
}
}

static void gr_tools_show(const gchar *, GIOChannel *, gpointer)
{
gboolean popped;
Expand Down Expand Up @@ -1718,6 +1820,8 @@ static RemoteCommandEntry remote_commands[] = {
{ nullptr, "--cache-shared=", gr_cache_shared, TRUE, FALSE, N_("clean|clear"), N_("clean or clear shared thumbnail cache") },
{ nullptr, "--cache-thumbs=", gr_cache_thumb, TRUE, FALSE, N_("clean|clear"), N_("clean or clear thumbnail cache") },
{ "-d", "--delay=", gr_slideshow_delay, TRUE, FALSE, N_("<[H:][M:][N][.M]>"), N_("set slide show delay to Hrs Mins N.M seconds") },
{ nullptr, "--duplicates-program=", gr_duplicates_program, TRUE, FALSE, N_("<PROGRAM>"), N_("run program with each identified set of duplicate images") },
{ nullptr, "--duplicates-threshold=", gr_duplicates_threshold, TRUE, FALSE, N_("<N>"), N_("set similarity threshold for what is considered a duplicate") },
{ nullptr, "--first", gr_image_first, FALSE, FALSE, nullptr, N_("first image") },
{ "-f", "--fullscreen", gr_fullscreen_toggle, FALSE, TRUE, nullptr, N_("toggle full screen") },
{ nullptr, "--file=", gr_file_load, TRUE, FALSE, N_("<FILE>|<URL>"), N_("open FILE or URL, bring Geeqie window to the top") },
Expand Down Expand Up @@ -1749,6 +1853,7 @@ static RemoteCommandEntry remote_commands[] = {
{ "-n", "--next", gr_image_next, FALSE, FALSE, nullptr, N_("next image") },
{ nullptr, "--pixel-info", gr_pixel_info, FALSE, FALSE, nullptr, N_("print pixel info of mouse pointer on current image") },
{ nullptr, "--print0", gr_print0, TRUE, FALSE, nullptr, N_("terminate returned data with null character instead of newline") },
{ "-p", "--process-duplicates", gr_process_duplicates, FALSE, FALSE, nullptr, N_("group duplicate pictures in current collection and process them") },
{ nullptr, "--PWD=", gr_pwd, TRUE, FALSE, N_("<PWD>"), N_("use PWD as working directory for following commands") },
{ "-q", "--quit", gr_quit, FALSE, FALSE, nullptr, N_("quit") },
{ nullptr, "--raise", gr_raise, FALSE, FALSE, nullptr, N_("bring the Geeqie window to the top") },
Expand Down

0 comments on commit 0848b84

Please sign in to comment.