Skip to content

Commit

Permalink
feature(sampling): implements the processing steps
Browse files Browse the repository at this point in the history
Implement the Processing and PolynomialRegression objects for processing data from a sampling.
  • Loading branch information
mcolmant committed Feb 17, 2015
1 parent c8a2fd6 commit edbdc78
Show file tree
Hide file tree
Showing 47 changed files with 16,590 additions and 1 deletion.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ We all stand on the shoulders of giants and get by with a little help from our f
* [powerspy.scala](https://github.com/Spirals-Team/powerspy.scala) (version 1.0.1 under [AGPL license](http://www.gnu.org/licenses/agpl-3.0.html)), for using the [PowerSpy powermeter](http://www.alciom.com/en/products/powerspy2-en-gb-2.html).
* [BridJ](https://code.google.com/p/bridj/) (version 0.6.2 under [3-clause BSD license](https://github.com/ochafik/nativelibs4java/blob/master/libraries/BridJ/LICENSE)), for system or C calls.
* [perfmon2](http://sourceforge.net/p/perfmon2/libpfm4/ci/master/tree/) (version 4.6.0 under [MIT license](http://sourceforge.net/p/perfmon2/libpfm4/ci/master/tree/COPYING)), for accessing hardware performance counters.
* [JFreeChart](http://www.jfree.org/jfreechart/) (version 1.0.19 under [LGPL license](https://www.gnu.org/licenses/lgpl.html)), for creation of interactive and animated charts,
* [JFreeChart](http://www.jfree.org/jfreechart/) (version 1.0.19 under [LGPL license](https://www.gnu.org/licenses/lgpl.html)), for creation of interactive and animated charts.
* [Scala IO](http://jesseeichar.github.io/scala-io-doc/0.4.3/index.html#!/overview) (version 0.4.3 under [3-clause BSD license](http://www.scala-lang.org/license.html)), for an extensions of IO.
* [Saddle](http://saddle.github.io/) (version 1.3.3 under [Apache 2 license](http://www.apache.org/licenses/LICENSE-2.0)), for data manipulation.

# License
This software is licensed under the *GNU Affero General Public License*, quoted below.
Expand Down
1 change: 1 addition & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ libraryDependencies ++= Seq(
)

scalacOptions ++= Seq(
"-language:reflectiveCalls",
"-language:implicitConversions",
"-feature",
"-deprecation"
Expand Down
94 changes: 94 additions & 0 deletions src/main/scala/org/powerapi/sampling/PolynomialRegression.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* This software is licensed under the GNU Affero General Public License, quoted below.
*
* This file is a part of PowerAPI.
*
* Copyright (C) 2011-2014 Inria, University of Lille 1.
*
* PowerAPI is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* PowerAPI is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with PowerAPI.
*
* If not, please consult http://www.gnu.org/licenses/agpl-3.0.html.
*/
package org.powerapi.sampling

object PolynomialRegression {
import org.apache.logging.log4j.LogManager

private val log = LogManager.getLogger

def apply(prDataDir: String, formulaeDir: String, degree: Int): Unit = {
import org.ejml.data.DenseMatrix64F
import org.ejml.ops.CommonOps
import org.saddle.io.{CsvParams, CsvParser, CsvFile}
import org.saddle.{Mat, Vec}
import scalax.file.Path

var coefficients = Map[Double, Array[Double]]()

for(path <- Path("/") / (prDataDir, '/') * "*.csv") {
val coefficient = path.name.replace(".csv", "")
val data = CsvParser.parse(List(1,2), CsvParams(skipLines = 1))(CsvFile(path.path)).mapValues(CsvParser.parseDouble).toMat
val unhaltedCycles = data.col(0)
val powers = data.col(1)

/**
* Compute the xi^j sum (1 to degree + degree) and create the line with the maximum number of values.
* It allows to not compute the same xi^j many times in the matrix construction.
*/
val line = unhaltedCycles.length.toDouble +: (for(j <- 1 to degree * 2) yield unhaltedCycles.map(xi => math.pow(xi, j)).sum)

/**
* Take the right values into line by playing with the intervals.
*/
val a = (for(j <- 0 to degree) yield line.slice(j, degree + j + 1)).flatten.toArray
val A = new DenseMatrix64F(degree + 1, a.size / (degree + 1), true, a: _*)
CommonOps.invert(A)
val invertedA = Mat(A.getNumRows, A.getNumCols, A.getData)

val b = (powers.sum +: (for(j <- 1 to degree) yield (unhaltedCycles.map(xi => math.pow(xi, j)) * powers).sum)).toArray
val B = Vec(b)

val results = (invertedA dot B).toVec
coefficients += coefficient.toDouble -> results.toSeq.toArray

/**
* Error computations for logging
*
* @see http://www.stat.purdue.edu/~xuanyaoh/stat350/xyApr6Lec26.pdf
*/
lazy val estimatedPowers = for(xi <- unhaltedCycles) yield results.at(0).toDouble + results.at(1).toDouble * xi + results.at(2).toDouble * xi * xi
lazy val sst: Double = ((powers - powers.mean) ** 2).sum
lazy val sse: Double = ((powers - estimatedPowers) ** 2).sum
lazy val rsquared: Double = 1 - (sse / sst)
lazy val mse: Double = sse / unhaltedCycles.length
lazy val se: Double = math.sqrt(mse)

log.debug(s"coefficient: $coefficient; r^2: $rsquared; mean squared error: $mse; standard deviation: $se")
}

if((Path("/") / (prDataDir, '/')).exists) {
(Path("/") / (formulaeDir, '/')).deleteRecursively(force = true)
(Path("/") / (formulaeDir, '/')).createDirectory(failIfExists = false)
var lines = List[String]("powerapi.libpfm.formulae.cycles = [")

for(freqCoeff <- coefficients.keys.toList.sorted) {
lines :+= s" { coefficient = $freqCoeff, formula = [${coefficients(freqCoeff).mkString(",")}] }"
}

lines :+= "]"

(Path("/") / (formulaeDir, '/') / ("libpfm-formula.conf", '/')).writeStrings(lines, "\n")
}
}
}
189 changes: 189 additions & 0 deletions src/main/scala/org/powerapi/sampling/Processing.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
/*
* This software is licensed under the GNU Affero General Public License, quoted below.
*
* This file is a part of PowerAPI.
*
* Copyright (C) 2011-2014 Inria, University of Lille 1.
*
* PowerAPI is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* PowerAPI is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with PowerAPI.
*
* If not, please consult http://www.gnu.org/licenses/agpl-3.0.html.
*/
package org.powerapi.sampling

/**
* Processing phase.
* Allows to obtain data from the samplesDir, to process them and then to write the results inside a result directory as csv files.
*
* @author <a href="mailto:[email protected]">Maxime Colmant</a>
*/
object Processing {
import org.apache.logging.log4j.LogManager

private val log = LogManager.getLogger

def apply(samplesDir: String, prDataDir: String, separator: String, outputPowers: String, outputUnhaltedCycles: String, outputRefCycles: String, baseFrequency: Double, maxFrequency: Double): Unit = {
import org.saddle.{Frame, Mat, Vec}
import org.saddle.io.CsvImplicits.frame2CsvWriter
import scalax.io.LongTraversable
import scalax.file.Path
import scalax.file.PathMatcher.IsDirectory

val maxCoefficient = maxFrequency.toDouble / baseFrequency
var frequencies = Set[Long]()

// Freq -> List[Vec(data)]
var powerData = Map[Long, List[Vec[Double]]]()
var unhaltedCycleData = Map[Long, List[Vec[Double]]]()
var refCycleData = Map[Long, List[Vec[Double]]]()

var data = Map[Double, Array[Double]]()

/**
* Process sample files, keep the data in memory.
*/
for(samplePath <- Path("/") / (samplesDir, '/') ** IsDirectory) {
for(frequencyPath <- samplePath ** IsDirectory) {
val frequency = frequencyPath.name.toLong
var powerLines = (frequencyPath / outputPowers).lines()
var unhaltedCycleLines = (frequencyPath / outputUnhaltedCycles).lines()
var refCycleLines = (frequencyPath / outputRefCycles).lines()

frequencies += frequency

if(!powerData.contains(frequency)) {
powerData += frequency -> List()
}
if(!unhaltedCycleData.contains(frequency)) {
unhaltedCycleData += frequency -> List()
}
if(!refCycleData.contains(frequency)) {
refCycleData += frequency -> List()
}

var index = 0
while(powerLines.nonEmpty) {
val powersSubset = powerLines.takeWhile(_ != separator)

powerData += frequency -> (powerData.get(frequency) match {
case Some(list) => list.lift(index) match {
case Some(vector) => list.updated(index, vector.concat(Vec[Double](powersSubset.filter(_ != "").map(_.toDouble).toList: _*)))
case _ => list :+ Vec(powersSubset.filter(_ != "").map(_.toDouble).toList: _*)
}
case _ => List(Vec(powersSubset.filter(_ != "").map(_.toDouble).toList: _*))
})

powerLines = powerLines.dropWhile(_ != separator) match {
case traversable if traversable.size > 1 => traversable.tail
case _ => LongTraversable[String]()
}

index += 1
}

index = 0
while(unhaltedCycleLines.nonEmpty) {
val unhaltedCyclesSubset = unhaltedCycleLines.takeWhile(_ != separator)

unhaltedCycleData += frequency -> (unhaltedCycleData.get(frequency) match {
case Some(list) => list.lift(index) match {
case Some(vector) => list.updated(index, vector.concat(Vec[Double](unhaltedCyclesSubset.filter(_ != "").map(_.toDouble).toList: _*)))
case _ => list :+ Vec(unhaltedCyclesSubset.filter(_ != "").map(_.toDouble).toList: _*)
}
case _ => List(Vec(unhaltedCyclesSubset.filter(_ != "").map(_.toDouble).toList: _*))
})

unhaltedCycleLines = unhaltedCycleLines.dropWhile(_ != separator) match {
case traversable if traversable.size > 1 => traversable.tail
case _ => LongTraversable[String]()
}

index += 1
}

index = 0
while(refCycleLines.nonEmpty) {
val refCyclesSubset = refCycleLines.takeWhile(_ != separator)

refCycleData += frequency -> (refCycleData.get(frequency) match {
case Some(list) => list.lift(index) match {
case Some(vector) => list.updated(index, vector.concat(Vec[Double](refCyclesSubset.filter(_ != "").map(_.toDouble).toList: _*)))
case _ => list :+ Vec(refCyclesSubset.filter(_ != "").map(_.toDouble).toList: _*)
}
case _ => List(Vec(refCyclesSubset.filter(_ != "").map(_.toDouble).toList: _*))
})

refCycleLines = refCycleLines.dropWhile(_ != separator) match {
case traversable if traversable.size > 1 => traversable.tail
case _ => LongTraversable[String]()
}

index += 1
}
}
}

/**
* Check the buffers length, init. the base coefficients
*/
for(frequency <- frequencies) {
if(unhaltedCycleData(frequency).size != powerData(frequency).size || refCycleData(frequency).size != powerData(frequency).size) {
log.error("The data processing is wrong")
return
}
else {
// Frequencies in KHz
val coefficient = frequency.toDouble / (baseFrequency * 1E6)
if(coefficient <= maxCoefficient) data += coefficient -> Array()
}
}

/**
* Classify the data with the coefficients
*/
for(frequency <- frequencies) {
for(i <- 0 until powerData(frequency).size) {
val power = powerData(frequency)(i).median
val unhaltedCycles = unhaltedCycleData(frequency)(i).median
val refCycles = refCycleData(frequency)(i).median
val coefficient = math.round(unhaltedCycles / refCycles).toDouble

// Frequencies before boost mode
if(coefficient <= maxCoefficient) {
if(data.contains(coefficient)) {
data += coefficient -> (data(coefficient) ++ Array(unhaltedCycles, power))
}
else {
val coefficientBefore = data.keys.filter(_ < coefficient).max
data += coefficientBefore -> (data(coefficientBefore) ++ Array(unhaltedCycles, power))
}
}
// Boost mode
else {
data += coefficient -> (data.getOrElse(coefficient, Array()) ++ Array(unhaltedCycles, power))
}
}
}

if((Path("/") / (samplesDir, '/')).exists) {
(Path("/") / (prDataDir, '/')).deleteRecursively(force = true)
(Path("/") / (prDataDir, '/')).createDirectory(failIfExists = false)

for((coefficient, values) <- data) {
val matrix = Mat(values.size / 2, 2, values)
Frame("unhalted-cycles" -> matrix.col(0), "P" -> matrix.col(1)).writeCsvFile(s"$prDataDir/$coefficient.csv")
}
}
}
}
26 changes: 26 additions & 0 deletions src/test/resources/pr-data/12.0.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
,unhalted-cycles,P
0,109401240.0,86.793169744825605
1,1685445701.0,99.615563852959923
2,1222011540.0,96.299012230097603
3,995393395.0,94.102591797337425
4,591762811.0,91.568055628461991
5,1694723255.0,100.02878730780414
6,1223227825.0,96.397918615516829
7,981560114.0,94.249925194425501
8,611743408.0,90.446330582700782
9,3282200705.0,101.72323054251277
10,2798579051.0,101.47009133002921
11,2522452990.0,101.28192902770185
12,2153209839.0,101.01787299563559
13,2797053692.0,101.48585521750661
14,2423735680.0,98.170766448468385
15,2100817957.0,99.150354503262889
16,1750597810.0,98.433905467689698
17,2514906382.0,101.34061784977843
18,2111819327.0,99.884008446498413
19,1877157880.0,97.700993868185805
20,1438020638.0,96.679223222523746
21,2099113213.0,101.0407109822026
22,1757637512.0,98.861844795331393
23,1415930123.0,97.452570721761958
24,1032444544.0,94.5361642038838
27 changes: 27 additions & 0 deletions src/test/resources/pr-data/13.0.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
,unhalted-cycles,P
0,110227394.0,90.799380528005173
1,1811659256.0,101.11507635719992
2,1320977169.0,97.859811759474866
3,1005195010.0,95.181959583117788
4,620164822.0,92.44463257367417
5,1827751761.0,101.03001249901155
6,1421312380.0,98.113256642906748
7,1046714900.0,95.448722986440558
8,636208608.0,90.917107510384966
9,3546215506.0,102.97431990032894
10,3020390392.0,102.68238231104847
11,2725795333.0,102.49762605643092
12,2327658412.0,102.12741487074251
13,3027799438.0,102.81582951362735
14,2576228513.0,101.65205287883359
15,2265194280.0,100.29557254476607
16,1875341633.0,99.476985745177174
17,2727141924.0,102.4377582178394
18,2250318128.0,100.69687483262599
19,1999369288.0,98.535169886138789
20,1545171924.0,97.376720659301384
21,2308166478.0,101.91156751395344
22,1947283609.0,99.35484836768596
23,1559604608.0,97.619598061377218
24,1191498548.0,94.892838533289648
25,86434494.0,87.003362189070828
27 changes: 27 additions & 0 deletions src/test/resources/pr-data/14.0.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
,unhalted-cycles,P
0,184617772.0,88.889635777486603
1,1943101666.0,102.46932966007256
2,1519631914.0,99.670781126409196
3,1186304181.0,97.152532852351129
4,706909290.0,91.624604753900357
5,1961402598.0,102.65939148900731
6,1542514652.0,99.366813201948602
7,1145007851.0,96.236917360311224
8,636707288.0,91.986912162210885
9,3810538006.0,104.41010001184515
10,3244026838.0,104.04480139496897
11,2927750658.0,103.72467657752598
12,2497465932.0,103.33088506153896
13,3240774638.0,104.12388283602603
14,2783696730.0,101.99951341251165
15,2455791020.0,101.92060664056791
16,2002665170.0,100.69272644118456
17,2910645544.0,103.93257648965886
18,2405468088.0,101.78938646918391
19,2168918761.0,99.879554384108673
20,1674323603.0,98.509100520975309
21,2480651480.0,103.34874497837625
22,1934180999.0,100.43273146600306
23,1671072054.0,98.110461937093575
24,1198327820.0,95.376846646308081
25,140551116.0,88.164518787164752
Loading

0 comments on commit edbdc78

Please sign in to comment.