Skip to content

Commit

Permalink
Update how cluster names are created
Browse files Browse the repository at this point in the history
  • Loading branch information
ejhusom committed Jan 17, 2024
1 parent b88ac98 commit 994554e
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 14 deletions.
16 changes: 16 additions & 0 deletions params-fersa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
featurize:
columns: Channel_4_Data
convert_timestamp_to_datetime: true
dataset: nova10_p8_10hz-20230721121352
overlap: 0
timestamp_column: timestamp
window_size: 30
postprocess:
min_segment_length: 1
train:
annotations_dir: nova10_p8_10hz_annotations
fix_predefined_centroids: false
learning_method: minibatchkmeans
max_iter: 100
n_clusters: 7
use_predefined_centroids: true
16 changes: 16 additions & 0 deletions params-nova9.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
featurize:
columns: Channel_4_Data
convert_timestamp_to_datetime: true
dataset: nova9_m4
overlap: 0
timestamp_column: timestamp
window_size: 10
postprocess:
min_segment_length: 1
train:
annotations_dir: nova9_m4_10hz_annotations
fix_predefined_centroids: false
learning_method: meanshift
max_iter: 100
n_clusters: 4
use_predefined_centroids: true
2 changes: 1 addition & 1 deletion params.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ featurize:
dataset: nova10_p8_10hz
overlap: 0
timestamp_column: timestamp
window_size: 30
window_size: 35
postprocess:
min_segment_length: 1
train:
Expand Down
3 changes: 2 additions & 1 deletion src/cluster_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,8 @@ def create_event_log(labels, identifier="",
segments = find_segments(labels)
event_log = create_event_log_from_segments(segments,
feature_vector_timestamps)
event_log["case"] = identifier
event_log["source"] = identifier
event_log["case"] = ""

return event_log

Expand Down
2 changes: 2 additions & 0 deletions src/featurize.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ def featurize(dir_path="", inference=False, inference_df=None):
with open("params.yaml", "r") as params_file:
params = yaml.safe_load(params_file)

print(params)
print("=========")
dataset = params["featurize"]["dataset"]
columns = params["featurize"]["columns"]
window_size = params["featurize"]["window_size"]
Expand Down
45 changes: 33 additions & 12 deletions src/postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,20 +257,39 @@ def generate_cluster_names(model, cluster_centers):
"""

levels = ["lowest", "low", "medium", "high", "highest"]
cluster_labels = []
cluster_names = []
cluster_characteristics = []
cluster_colors = []

n_clusters = cluster_centers.shape[0]

# Add index and color for each cluster
for i in range(n_clusters):
cluster_names.append(f"{i} ({COLORS[i]}): ")
cluster_colors.append(COLORS[i])
cluster_labels.append(i)
cluster_names.append("")
cluster_characteristics.append("")

maxs = cluster_centers.argmax(axis=0)
mins = cluster_centers.argmin(axis=0)

for i in range(len(FEATURE_NAMES)):
cluster_names[maxs[i]] += "highest " + FEATURE_NAMES[i] + ", "
cluster_names[mins[i]] += "lowest " + FEATURE_NAMES[i] + ", "

cluster_names = pd.DataFrame(cluster_names, columns=["cluster_name"])
# cluster_names[maxs[i]] += "highest " + FEATURE_NAMES[i] + ", "
# cluster_names[mins[i]] += "lowest " + FEATURE_NAMES[i] + ", "
cluster_characteristics[maxs[i]] += "highest " + FEATURE_NAMES[i] + ", "
cluster_characteristics[mins[i]] += "lowest " + FEATURE_NAMES[i] + ", "

print(cluster_labels)
print(cluster_names)
print(cluster_characteristics)

# cluster_names = pd.DataFrame([cluster_labels, cluster_names, cluster_characteristics], columns=["cluster_label", "cluster_name", "cluster_characteristics"])
cluster_names = pd.DataFrame({
"cluster_label": cluster_labels,
"cluster_name": cluster_names,
"cluster_characteristics": cluster_characteristics
})

return cluster_names

Expand Down Expand Up @@ -346,20 +365,22 @@ def postprocess(model, cluster_centers, feature_vectors, labels):
if use_predefined_centroids:
if len(predefined_centroids_dict) == n_clusters:
for i, key in enumerate(predefined_centroids_dict):
cluster_names["cluster_name"][i] = (
str(cluster_names["cluster_name"][i].split(":")[0])
+ ": "
+ f" {key}, ".upper()
+ str(cluster_names["cluster_name"][i].split(":")[1])
)
# cluster_names["cluster_name"][i] = (
# str(cluster_names["cluster_name"][i].split(":")[0])
# + ": "
# + f" {key}, ".upper()
# + str(cluster_names["cluster_name"][i].split(":")[1])
# )
cluster_names["cluster_name"][i] = key.upper()

if expectations != None:
# Add number to expectations
for expectation in expectations:
if expectation["name"].lower() == key.lower():
expectation["label"] = i

cluster_names.to_csv(OUTPUT_PATH / "cluster_names.csv")
cluster_names["source"] = params["featurize"]["dataset"]
cluster_names.to_csv(OUTPUT_PATH / "cluster_names.csv", index=False)

if expectations != None:
event_log_score(event_log, expectations)
Expand Down

0 comments on commit 994554e

Please sign in to comment.