adding notebooks, data, etc

spiralizing · Aug 6, 2023 · b951cff · b951cff
1 parent 2fb4030
commit b951cff
Show file tree

Hide file tree

Showing 144 changed files with 3,907,454 additions and 25,567 deletions.
diff --git a/BeethovenQuartets.ipynb b/BeethovenQuartets.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -21,7 +21,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -35,12 +35,12 @@
     }
    ],
    "source": [
-    "m21 = pyimport(\"music21\") #music21 manages to load successfully the musicxml files, while musicxml Julia package doesn't."
+    "m21 = pyimport(\"music21\") "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -55,7 +55,7 @@
    ],
    "source": [
     "#functions from personal packages.\n",
-    "git_path = \"/home/alfredo/Git/CoE-Testing/src/\"\n",
+    "git_path = \"/home/alfredo/Git/Harmony_Evolution/src\"\n",
     "include(joinpath(git_path,\"CEGFunctions.jl\"))\n",
     "include(joinpath(git_path,\"Constr_series.jl\"))\n",
     "include(joinpath(git_path,\"IM_Functions.jl\"))\n",
@@ -64,264 +64,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "get_key_ent (generic function with 1 method)"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "#More functions...\n",
-    "function get_xml_df(piece_xml)\n",
-    "    piece = get_piece_by_measure(piece_xml, csv=false)\n",
-    "    df_piece = DataFrame(\n",
-    "        :Measure => convert(Array{Int64,1},piece[:,1]),\n",
-    "        :TimeSignature => piece[:,2],\n",
-    "        :StartQuarter => piece[:,3],\n",
-    "        :EndQuarter => piece[:,4],\n",
-    "        :Duration => piece[:,5],\n",
-    "        :Pitch => convert(Array{Int64,1},piece[:,6])\n",
-    "    )\n",
-    "    return df_piece\n",
-    "end\n",
-    "function get_csv_df(piece_csv)\n",
-    "    piece = get_piece_by_measure(piece_csv, csv=true)[1]\n",
-    "    num_mea = []\n",
-    "    for nm in 1:length(piece)\n",
-    "        push!(num_mea,[nm for i in 1:size(piece[nm],1)])\n",
-    "    end\n",
-    "    num_mea = vcat(num_mea...)\n",
-    "    piece = vcat(piece...)\n",
-    "    df_piece = DataFrame(\n",
-    "        :Measure => convert(Array{Int64,1},num_mea),\n",
-    "        :TimeSignature => piece[:,2],\n",
-    "        :StartTime => piece[:,3],\n",
-    "        :EndTime => piece[:,4],\n",
-    "        :Duration => piece[:,5],\n",
-    "        :Pitch => convert(Array{Int64,1},piece[:,6])\n",
-    "    )\n",
-    "    return df_piece\n",
-    "end\n",
-    "function fastuniq(v)\n",
-    "  v1 = Vector{eltype(v)}()\n",
-    "  if length(v)>0\n",
-    "    laste = v[1]\n",
-    "    push!(v1,laste)\n",
-    "    for e in v\n",
-    "      if e != laste\n",
-    "        laste = e\n",
-    "        push!(v1,laste)\n",
-    "      end\n",
-    "    end\n",
-    "  end\n",
-    "  return v1\n",
-    "end\n",
-    "get_entropy(probs) = mapreduce(x->  - x * log2(x), +, probs)\n",
-    "function get_key_IC(list_keys)\n",
-    "    prob_keys = convert(Array{Float64,1}, list_keys[:,2])\n",
-    "    return - log2(prob_keys[1])\n",
-    "end\n",
-    "function get_distance_ces(ce1, ce2)\n",
-    "    z_dif = ce2[3] - ce1[3]\n",
-    "    while abs(z_dif) > h_octav / 2 #translating over z to be in the same octave (same CE region)\n",
-    "        if z_dif > 0\n",
-    "            ce2[3] = ce2[3] - h_octav\n",
-    "        else\n",
-    "            ce2[3] = ce2[3] + h_octav\n",
-    "        end\n",
-    "        z_dif = ce2[3] - ce1[3]\n",
-    "    end\n",
-    "    return round(euclidean(ce1,ce2), digits = 4)\n",
-    "end\n",
-    "function divide_chunk_notes(chunk, w_s, n_c)\n",
-    "    st_m = minimum(chunk[:,3])\n",
-    "    c_out = []\n",
-    "    for i in 1:n_c\n",
-    "        push!(c_out, chunk[findall(x -> st_m + w_s * (i - 1) <= x < st_m + w_s * i, chunk[:,3]),:])\n",
-    "    end\n",
-    "    return filter(x-> !isempty(x),c_out)\n",
-    "end\n",
-    "function get_distance_to_keys(c_i)\n",
-    "    d_to_keys = round.(map(x-> euclidean(c_i, x), pos_all_keys), digits = 4) #computing the eclidean distance to all keys\n",
-    "\n",
-    "    ranking = sortperm(d_to_keys) #ranking the distances from the closest to the farthest\n",
-    "\n",
-    "    return [all_keys[ranking] d_to_keys[ranking] pos_all_keys[ranking]][1:12,:]\n",
-    "end\n",
-    "function cluster_notes(ptcs)\n",
-    "    p_m12 = map(x-> mod(x,12),ptcs)\n",
-    "    spi_notes = get_cfpitch(p_m12)\n",
-    "    low_notes = findall(x-> x<6, spi_notes)\n",
-    "    high_notes = findall(x-> x>=6, spi_notes)\n",
-    "    if !isempty(high_notes) && !isempty(low_notes)\n",
-    "        if length(high_notes) < length(low_notes)\n",
-    "            for i in 1:length(high_notes)\n",
-    "                spi_notes = shift_outlier(spi_notes, high_notes[i])\n",
-    "            end\n",
-    "        else\n",
-    "            for i in 1:length(low_notes)\n",
-    "                spi_notes = shift_outlier(spi_notes, low_notes[i])\n",
-    "            end\n",
-    "        end\n",
-    "    end\n",
-    "    dmean = Float64[]\n",
-    "    oliers = Int64[]\n",
-    "    new_spi = []\n",
-    "    conv = false\n",
-    "    while conv == false\n",
-    "        dt_mean = zeros(length(spi_notes))\n",
-    "        for i in 1:length(spi_notes)\n",
-    "            dt_mean[i] = abs(spi_notes[i] - mean(spi_notes[1:end .!= i]))\n",
-    "        end\n",
-    "        d, olier = findmax(dt_mean)\n",
-    "        push!(dmean, mean(dt_mean)); push!(oliers, olier); push!(new_spi, spi_notes)\n",
-    "        #println(mean(dt_mean),'\\t', olier, '\\t', p_cf)\n",
-    "        #println(mean(dt_mean),'\\t', olier)\n",
-    "        if length(oliers) > 10 && length(unique(oliers[end-4:end])) <= 2\n",
-    "            conv = true\n",
-    "            break\n",
-    "        end\n",
-    "        spi_notes = shift_outlier(spi_notes, olier)\n",
-    "    end\n",
-    "    return new_spi[findmin(dmean)[2]]\n",
-    "end\n",
-    "function shift_outlier(notes, olier)\n",
-    "    dif = notes[olier] - median(notes)\n",
-    "    notes_new = copy(notes)\n",
-    "    if dif > 0\n",
-    "        notes_new[olier] = notes_new[olier] - 12\n",
-    "    elseif dif < 0\n",
-    "        notes_new[olier] = notes_new[olier] + 12\n",
-    "    end\n",
-    "    return notes_new\n",
-    "end\n",
-    "function get_center_effect(chunk_notes; r=1, h=sqrt(2/15), mod_12=false,all_keys=all_keys, pos_all_keys=pos_all_keys, sbeat_w=[[1.],[1.]], lin_w=1)    \n",
-    "    ptcs = chunk_notes[:,6]\n",
-    "    durs = chunk_notes[:,5]\n",
-    "    pbeat = chunk_notes[:,1]\n",
-    "    beat_w = ones(length(durs)) #array of the beat weights\n",
-    "    for b = 1:length(sbeat_w[1])\n",
-    "        loc_b = findall(x-> x==sbeat_w[1][b], pbeat) #finding all notes that start at beat sbeat_w[1][b]\n",
-    "        beat_w[loc_b] .= sbeat_w[2][b] #this is the weight.\n",
-    "    end\n",
-    "    notas, n_we = get_local_lin_w(ptcs, lin_w) #doing the linear weight in the pitches\n",
-    "    ii = vcat(map(x-> findall(y-> y==x, notas), ptcs)...)\n",
-    "    #println(ptcs)\n",
-    "    b_wei = n_we[ii] #getting the linear weight for every note i n the array of pitches\n",
-    "    ###DO A FUNCTION FROM HERE\n",
-    "    spi_ix = cluster_notes(ptcs) .+ 24\n",
-    "    #TO HERE\n",
-    "    spi_p = map(x-> get_pitch(x, r=r, h=h), spi_ix) #getting the location (x,y,z) for each pitch\n",
-    "    t_ws = map((x,y,z)-> x*y*z, beat_w,durs, b_wei) #computing the total weights\n",
-    "    cv_i = map((x,y)-> x*y, t_ws, spi_p) / sum(t_ws) #computing the location of the pitches with their relative weights\n",
-    "    c_i = sum(cv_i) #finding the center of effect\n",
-    "    return c_i\n",
-    "end\n",
-    "function get_key_ent(key_list)\n",
-    "    dists = map(x-> exp(-12 * x), Float64.(key_list[:,2]))\n",
-    "    key_p = dists ./ sum(dists)\n",
-    "    return get_entropy(key_p)\n",
-    "end"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
     "xml_path = \"/home/alfredo/MusicPenn/Beethoven_HAnalysis/XMLFiles\" #xml files \n",
-    "adata_path = \"/home/alfredo/MusicPenn/Beethoven_HAnalysis/AnnotationsTSV\" #annotated data\n",
-    "out_path = \"/home/alfredo/MusicPenn/Beethoven_HAnalysis/KeyCallCSV\"\n",
+    "adata_path = \"/home/alfredo/Git/Harmony_Evolution/Beethoven_Annotations/\" #annotated data\n",
+    "out_path = \"/home/alfredo/Git/Harmony_Evolution/Beethoven_Annotations/\"\n",
     "xml_list = readdir(xml_path)\n",
-    "adata_list = readdir(adata_path);"
+    "ann_files = readdir(adata_path)\n",
+    "adata_list = ann_files[findall(x -> occursin(r\".tsv\", x), ann_files)];"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "70-element Vector{String}:\n",
-       " \"op127_no12\"\n",
-       " \"op127_no12\"\n",
-       " \"op127_no12\"\n",
-       " \"op127_no12\"\n",
-       " \"op130_no13\"\n",
-       " \"op130_no13\"\n",
-       " \"op130_no13\"\n",
-       " \"op130_no13\"\n",
-       " \"op130_no13\"\n",
-       " \"op130_no13\"\n",
-       " ⋮\n",
-       " \"op59_no9\"\n",
-       " \"op74_no10\"\n",
-       " \"op74_no10\"\n",
-       " \"op74_no10\"\n",
-       " \"op74_no10\"\n",
-       " \"op95_no11\"\n",
-       " \"op95_no11\"\n",
-       " \"op95_no11\"\n",
-       " \"op95_no11\""
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "opus_number = map(x-> join(split(x, \"_\")[1:2],\"_\"), xml_list)\n",
     "u_opus = unique(opus_number);"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "16-element Vector{Vector{Int64}}:\n",
-       " [1, 2, 3, 4]\n",
-       " [5, 6, 7, 8, 9, 10]\n",
-       " [11, 12, 13, 14, 15, 16, 17]\n",
-       " [18, 19, 20, 21, 22]\n",
-       " [23, 24, 25, 26]\n",
-       " [27, 28, 29, 30]\n",
-       " [31, 32, 33, 34]\n",
-       " [35, 36, 37, 38]\n",
-       " [39, 40, 41, 42]\n",
-       " [43, 44, 45, 46]\n",
-       " [47, 48, 49, 50]\n",
-       " [51, 52, 53, 54]\n",
-       " [55, 56, 57, 58]\n",
-       " [59, 60, 61, 62]\n",
-       " [63, 64, 65, 66]\n",
-       " [67, 68, 69, 70]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "ix_all = [findall(x-> occursin(u_opus[i], x), adata_list) for i in 1:length(u_opus)]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -816,7 +583,6 @@
     }
    ],
    "source": [
-    "\n",
     "for xml_i in 1:length(xml_list)\n",
     "    local fh_kseq\n",
     "    #loading csv from midi\n",
@@ -889,12 +655,13 @@
     "    name_out = \"CoEKeyCallNEW-$(join([split(xml_file,\".\")[1] \"csv\"],\".\"))\"\n",
     "    CSV.write(joinpath(out_path,name_out), df_out, header=true)\n",
     "    println(\"File $(xml_file) DONE!\")\n",
+    "    flush(stdout)\n",
     "end"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -904,7 +671,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -943,14 +710,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "the median accuracy for all the movements is: 67.74%\n"
+      "the median accuracy for all the movements is: 68.435%\n"
      ]
     }
    ],