Skip to content

Commit

Permalink
perf: optimized vector importing parsing from file
Browse files Browse the repository at this point in the history
  • Loading branch information
lvca committed Sep 12, 2023
1 parent 0368775 commit 88939ab
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 7 deletions.
2 changes: 1 addition & 1 deletion engine/src/main/java/com/arcadedb/database/RID.java
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ public static boolean is(final Object value) {
if (value instanceof String) {
final String valueAsString = value.toString();
if (valueAsString.length() > 3 && valueAsString.charAt(0) == '#') {
final List<String> parts = CodeUtils.split(valueAsString.substring(1), ':');
final List<String> parts = CodeUtils.split(valueAsString.substring(1), ':', 3);
return parts.size() == 2 && NumberUtils.isIntegerNumber(parts.get(0)) && NumberUtils.isIntegerNumber(parts.get(1));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public RID toRecordId(final Result target, final CommandContext context) {
if (!(((String) result).startsWith("#") && (((String) result).contains(":"))))
throw new CommandExecutionException("Cannot convert to RID: " + result);

final List<String> parts = CodeUtils.split(((String) result).substring(1), ':');
final List<String> parts = CodeUtils.split(((String) result).substring(1), ':', 3);
if (parts.size() != 2)
throw new CommandExecutionException("Cannot convert to RID: " + result);

Expand Down
8 changes: 6 additions & 2 deletions engine/src/main/java/com/arcadedb/utility/CodeUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,15 @@ public static boolean compare(final Object first, final Object second) {
}

public static List<String> split(final String text, final char sep) {
return split(text, sep, -1);
return split(text, sep, -1, 10);
}

public static List<String> split(final String text, final char sep, final int limit) {
final List<String> parts = limit > -1 ? new ArrayList<>(limit) : new ArrayList<>();
return split(text, sep, limit, 10);
}

public static List<String> split(final String text, final char sep, final int limit, final int estimatedSize) {
final List<String> parts = limit > -1 ? new ArrayList<>(limit) : new ArrayList<>(estimatedSize);
int startPos = 0;
for (int i = 0; i < text.length(); i++) {
final char c = text.charAt(i);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

import java.io.*;
import java.util.*;
import java.util.concurrent.atomic.*;
import java.util.stream.*;

/**
Expand Down Expand Up @@ -241,17 +242,20 @@ private List<TextFloatsEmbedding> loadFromFile() throws IOException {
if (settings.parsingLimitEntries > 0)
parser.limit(settings.parsingLimitEntries);

final AtomicInteger vectorSize = new AtomicInteger(301);

return parser.map(line -> {
++embeddingsParsed;

final List<String> tokens = CodeUtils.split(line, ' ');
final List<String> tokens = CodeUtils.split(line, ' ', -1, vectorSize.get());

String word = tokens.get(0);

float[] vector = new float[tokens.size() - 1];
for (int i = 1; i < tokens.size() - 1; i++) {
for (int i = 1; i < tokens.size() - 1; i++)
vector[i] = Float.parseFloat(tokens.get(i));
}

vectorSize.set(vector.length);

if (normalizeVectors)
// FOR INNER PRODUCT SEARCH NORMALIZE VECTORS
Expand Down

0 comments on commit 88939ab

Please sign in to comment.