Skip to content

Commit

Permalink
tried something
Browse files Browse the repository at this point in the history
  • Loading branch information
yokurang committed Aug 14, 2024
1 parent b57ad5d commit 5916411
Showing 1 changed file with 134 additions and 4 deletions.
138 changes: 134 additions & 4 deletions lib/WordDiff.ml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,137 @@ let compute (block : string Block.t) : line_content Block.t =
match block with
| Block.Common line -> Block.Common [ Unchanged line ]
| Block.Changed { mine; their; order } ->
let mine_str = String.concat " " mine in
let their_str = String.concat " " their in
let mine_words, their_words = diff_words mine_str their_str in
Block.Changed { mine = [ mine_words ]; their = [ their_words ]; order }
(* Helper function to convert a string to an array of words *)
let string_to_word_array s =
s |> String.split_on_char ' ' |> Array.of_list
in

(* Use Levenshtein distance to find the best pairing of lines *)
let pair_lines lines1 lines2 =
let distances =
Array.make_matrix (Array.length lines1) (Array.length lines2) 0
in
for i = 0 to Array.length lines1 - 1 do
for j = 0 to Array.length lines2 - 1 do
distances.(i).(j) <-
edit_distance ( = )
(string_to_word_array lines1.(i))
(string_to_word_array lines2.(j))
done
done;

(* Use a greedy approach to pair lines based on minimum distance *)
let paired = ref [] in
let used1 = Array.make (Array.length lines1) false in
let used2 = Array.make (Array.length lines2) false in

for _ = 1 to min (Array.length lines1) (Array.length lines2) do
let min_dist = ref max_int in
let min_i = ref (-1) in
let min_j = ref (-1) in

for i = 0 to Array.length lines1 - 1 do
for j = 0 to Array.length lines2 - 1 do
if
(not used1.(i))
&& (not used2.(j))
&& distances.(i).(j) < !min_dist
then (
min_dist := distances.(i).(j);
min_i := i;
min_j := j)
done
done;

if !min_i <> -1 && !min_j <> -1 then (
paired := (!min_i, !min_j) :: !paired;
used1.(!min_i) <- true;
used2.(!min_j) <- true)
done;

(* Add unpaired lines *)
let final_pairs = ref !paired in
for i = 0 to Array.length lines1 - 1 do
if not used1.(i) then final_pairs := (i, -1) :: !final_pairs
done;
for j = 0 to Array.length lines2 - 1 do
if not used2.(j) then final_pairs := (-1, j) :: !final_pairs
done;

List.sort compare !final_pairs
in

let mine_array = Array.of_list mine in
let their_array = Array.of_list their in
let pairs = pair_lines mine_array their_array in

let result_mine = ref [] in
let result_their = ref [] in

(* Helper function to trim leading/trailing whitespace and collapse multiple spaces *)
let normalize_whitespace s =
let s = String.trim s in
let buf = Buffer.create (String.length s) in
let space_seen = ref false in
String.iter
(fun c ->
match c with
| ' ' | '\t' | '\n' | '\r' ->
if not !space_seen then (
Buffer.add_char buf ' ';
space_seen := true)
| _ ->
Buffer.add_char buf c;
space_seen := false)
s;
Buffer.contents buf
in

List.iter
(fun (i, j) ->
match (i, j) with
| -1, j ->
let their_content = normalize_whitespace their_array.(j) in
if their_content <> "" then
result_their :=
(diff_words "" their_content |> snd) :: !result_their
| i, -1 ->
let mine_content = normalize_whitespace mine_array.(i) in
if mine_content <> "" then
result_mine :=
(diff_words mine_content "" |> fst) :: !result_mine
| i, j ->
let mine_content = normalize_whitespace mine_array.(i) in
let their_content = normalize_whitespace their_array.(j) in
if mine_content <> "" || their_content <> "" then (
let mine_diff, their_diff =
diff_words mine_content their_content
in
result_mine := mine_diff :: !result_mine;
result_their := their_diff :: !result_their))
pairs;

(* Remove any empty lines at the beginning and end of the results *)
let trim_empty_lines lines =
let is_empty line =
List.for_all (function Unchanged "" -> true | _ -> false) line
in
let rec trim_start = function
| [] -> []
| hd :: tl when is_empty hd -> trim_start tl
| lines -> lines
in
let rec trim_end = function
| [] -> []
| hd :: tl ->
let trimmed_tail = trim_end tl in
if trimmed_tail = [] && is_empty hd then []
else hd :: trimmed_tail
in
lines |> trim_start |> trim_end
in

let trimmed_mine = trim_empty_lines (List.rev !result_mine) in
let trimmed_their = trim_empty_lines (List.rev !result_their) in

Block.Changed { mine = trimmed_mine; their = trimmed_their; order }

0 comments on commit 5916411

Please sign in to comment.