Skip to content

Commit

Permalink
added warmup instruction support; updated all sample configuration fi…
Browse files Browse the repository at this point in the history
…les with 100 million warmup instructions; added 3200 data rate option for DDR4; fixed some command scheduling bugs that were leading to activate-precharge without read or write command; changed the default scheduler to FRFCFS_Cap;
  • Loading branch information
Hasan Hassan committed Dec 12, 2017
1 parent 7ce65d0 commit cd96ed6
Show file tree
Hide file tree
Showing 24 changed files with 228 additions and 53 deletions.
1 change: 1 addition & 0 deletions configs/ALDRAM-config.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
Expand Down
1 change: 1 addition & 0 deletions configs/DDR3-config.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
Expand Down
1 change: 1 addition & 0 deletions configs/DDR4-config.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
Expand Down
1 change: 1 addition & 0 deletions configs/DSARP-config.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
Expand Down
1 change: 1 addition & 0 deletions configs/GDDR5-config.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
Expand Down
1 change: 1 addition & 0 deletions configs/HBM-config.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
Expand Down
1 change: 1 addition & 0 deletions configs/LPDDR3-config.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
Expand Down
1 change: 1 addition & 0 deletions configs/LPDDR4-config.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
Expand Down
1 change: 1 addition & 0 deletions configs/SALP-config.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
Expand Down
1 change: 1 addition & 0 deletions configs/TLDRAM-config.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
Expand Down
1 change: 1 addition & 0 deletions configs/WideIO-config.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
Expand Down
1 change: 1 addition & 0 deletions configs/WideIO2-config.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# early_exit = on, off (default value is on)
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit.
expected_limit_insts = 200000000
warmup_insts = 100000000
cache = no
# cache = no, L1L2, L3, all (default value is no)
translation = None
Expand Down
2 changes: 2 additions & 0 deletions src/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ void Config::parse(const string& fname)
mem_tick = atoi(tokens[1].c_str());
} else if (tokens[0] == "expected_limit_insts") {
expected_limit_insts = atoi(tokens[1].c_str());
} else if (tokens[0] == "warmup_insts") {
warmup_insts = atoi(tokens[1].c_str());
}
}
file.close();
Expand Down
3 changes: 3 additions & 0 deletions src/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class Config {
int mem_tick;
int core_num = 0;
long expected_limit_insts = 0;
long warmup_insts = 0;

public:
Config() {}
Expand Down Expand Up @@ -60,6 +61,8 @@ class Config {
int get_mem_tick() const {return mem_tick;}
int get_core_num() const {return core_num;}
long get_expected_limit_insts() const {return expected_limit_insts;}
long get_warmup_insts() const {return warmup_insts;}

bool has_l3_cache() const {
if (options.find("cache") != options.end()) {
const std::string& cache_option = (options.find("cache"))->second;
Expand Down
2 changes: 1 addition & 1 deletion src/Controller.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ void Controller<TLDRAM>::tick(){
/*** 3. Should we schedule writes? ***/
if (!write_mode) {
// yes -- write queue is almost full or read queue is empty
if (writeq.size() >= int(0.8 * writeq.max) || readq.size() == 0)
if (writeq.size() >= int(0.8 * writeq.max) /*|| readq.size() == 0*/)
write_mode = true;
}
else {
Expand Down
51 changes: 46 additions & 5 deletions src/Controller.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ using namespace std;
namespace ramulator
{

extern bool warmup_complete;

template <typename T>
class Controller
{
Expand All @@ -42,6 +44,7 @@ class Controller
VectorStat write_row_hits;
VectorStat write_row_misses;
VectorStat write_row_conflicts;
ScalarStat useless_activates;

ScalarStat read_latency_avg;
ScalarStat read_latency_sum;
Expand Down Expand Up @@ -80,6 +83,11 @@ class Controller

Queue readq; // queue for read requests
Queue writeq; // queue for write requests
Queue actq; // read and write requests for which activate was issued are moved to
// actq, which has higher priority than readq and writeq.
// This is an optimization
// for avoiding useless activations (i.e., PRECHARGE
// after ACTIVATE w/o READ of WRITE command)
Queue otherq; // queue for all "other" requests (e.g., refresh)

deque<Request> pending; // read requests that are about to receive data from DRAM
Expand Down Expand Up @@ -170,6 +178,12 @@ class Controller
.precision(0)
;

useless_activates
.name("useless_activates_"+to_string(channel->id)+ "_core")
.desc("Number of useless activations. E.g, ACT -> PRE w/o RD or WR")
.precision(0)
;

read_transaction_bytes
.name("read_transaction_bytes_"+to_string(channel->id))
.desc("The total byte of read transaction per channel")
Expand Down Expand Up @@ -340,7 +354,10 @@ class Controller
/*** 3. Should we schedule writes? ***/
if (!write_mode) {
// yes -- write queue is almost full or read queue is empty
if (writeq.size() >= int(0.8 * writeq.max) || readq.size() == 0)

This comment has been minimized.

Copy link
@lucjaulmes

lucjaulmes Jul 2, 2018

This change (repeated in Controller.cpp) obviously causes the simulation to hang forever if a program is waiting on a few writes and no reads.

This comment has been minimized.

Copy link
@arthasSin

arthasSin Jul 2, 2018

Member

Thanks for the feedback. As a quick solution, you may implement a counter to track for how many cycles the readq is empty and drain the writes if the counter exceeds some threshold, even if there are not too many write requests in writeq.

if (writeq.size() >= int(0.8 * writeq.max)
/*|| readq.size() == 0*/) // Hasan: Switching to write mode when there are just a few
// write requests, even if the read queue is empty, incurs a lot of overhead.
// Commented out the read request queue empty condition
write_mode = true;
}
else {
Expand All @@ -350,11 +367,21 @@ class Controller
}

/*** 4. Find the best command to schedule, if any ***/
Queue* queue = !write_mode ? &readq : &writeq;
if (otherq.size())
queue = &otherq; // "other" requests are rare, so we give them precedence over reads/writes

// First check the actq (which has higher priority) to see if there
// are requests available to service in this cycle
Queue* queue = &actq;

auto req = scheduler->get_head(queue->q);
if (req == queue->q.end() || !is_ready(req)) {
queue = !write_mode ? &readq : &writeq;

if (otherq.size())
queue = &otherq; // "other" requests are rare, so we give them precedence over reads/writes

req = scheduler->get_head(queue->q);
}

if (req == queue->q.end() || !is_ready(req)) {
// we couldn't find a command to schedule -- let's try to be speculative
auto cmd = T::Command::PRE;
Expand Down Expand Up @@ -404,8 +431,15 @@ class Controller
issue_cmd(cmd, get_addr_vec(cmd, req));

// check whether this is the last command (which finishes the request)
if (cmd != channel->spec->translate[int(req->type)])
if (cmd != channel->spec->translate[int(req->type)]){
if(channel->spec->is_opening(cmd)) {
// promote the request that caused issuing activation to actq
actq.q.push_back(*req);
queue->q.erase(req);
}

return;
}

// set a future completion time for read requests
if (req->type == Request::Type::READ) {
Expand Down Expand Up @@ -492,6 +526,13 @@ class Controller
{
assert(is_ready(cmd, addr_vec));
channel->update(cmd, addr_vec.data(), clk);

if(cmd == T::Command::PRE){
if(rowtable->get_hits(addr_vec, true) == 0){
useless_activates++;
}
}

rowtable->update(cmd, addr_vec, clk);
if (record_cmd_trace){
// select rank
Expand Down
54 changes: 28 additions & 26 deletions src/DDR4.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ map<string, enum DDR4::Speed> DDR4::speed_map = {
{"DDR4_1866M", DDR4::Speed::DDR4_1866M}, {"DDR4_1866N", DDR4::Speed::DDR4_1866N},
{"DDR4_2133P", DDR4::Speed::DDR4_2133P}, {"DDR4_2133R", DDR4::Speed::DDR4_2133R},
{"DDR4_2400R", DDR4::Speed::DDR4_2400R}, {"DDR4_2400U", DDR4::Speed::DDR4_2400U},
{"DDR4_3200", DDR4::Speed::DDR4_3200},
};


Expand Down Expand Up @@ -52,40 +53,40 @@ void DDR4::set_rank_number(int rank) {

void DDR4::init_speed()
{
const static int RRDS_TABLE[2][4] = {
{4, 4, 4, 4},
{5, 5, 6, 7}
const static int RRDS_TABLE[2][5] = {
{4, 4, 4, 4, 4},
{5, 5, 6, 7, 9}
};
const static int RRDL_TABLE[2][4] = {
{5, 5, 6, 6},
{6, 6, 7, 8}
const static int RRDL_TABLE[2][5] = {
{5, 5, 6, 6, 8},
{6, 6, 7, 8, 11}
};
const static int FAW_TABLE[3][4] = {
{16, 16, 16, 16},
{20, 22, 23, 26},
{28, 28, 32, 36}
const static int FAW_TABLE[3][5] = {
{16, 16, 16, 16, 16},
{20, 22, 23, 26, 34},
{28, 28, 32, 36, 48}
};
const static int RFC_TABLE[int(RefreshMode::MAX)][3][4] = {{
{128, 150, 171, 192},
{208, 243, 278, 312},
{280, 327, 374, 420}
const static int RFC_TABLE[int(RefreshMode::MAX)][3][5] = {{
{128, 150, 171, 192, 256},
{208, 243, 278, 312, 416},
{280, 327, 374, 420, 560}
},{
{88, 103, 118, 132},
{128, 150, 171, 192},
{208, 243, 278, 312}
{88, 103, 118, 132, 176},
{128, 150, 171, 192, 256},
{208, 243, 278, 312, 416}
},{
{72, 84, 96, 108},
{88, 103, 118, 132},
{128, 150, 171, 192}
{72, 84, 96, 108, 144},
{88, 103, 118, 132, 176},
{128, 150, 171, 192, 256}
}
};
const static int REFI_TABLE[4] = {
6240, 7280, 8320, 9360
const static int REFI_TABLE[5] = {
6240, 7280, 8320, 9360, 12480
};
const static int XS_TABLE[3][4] = {
{136, 159, 182, 204},
{216, 252, 288, 324},
{288, 336, 384, 432}
const static int XS_TABLE[3][5] = {
{136, 159, 182, 204, 272},
{216, 252, 288, 324, 532},

This comment has been minimized.

Copy link
@RSpliet

RSpliet Jul 11, 2018

Contributor

Should be
{216, 252, 288, 324, 432},

{288, 336, 384, 432, 576}
};

int speed = 0, density = 0;
Expand All @@ -94,6 +95,7 @@ void DDR4::init_speed()
case 1866: speed = 1; break;
case 2133: speed = 2; break;
case 2400: speed = 3; break;
case 3200: speed = 4; break;
default: assert(false);
};
switch (org_entry.size >> 10){
Expand Down
5 changes: 4 additions & 1 deletion src/DDR4.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ class DDR4
DDR4_1866M, DDR4_1866N,
DDR4_2133P, DDR4_2133R,
DDR4_2400R, DDR4_2400U,
DDR4_3200,
MAX
};

Expand Down Expand Up @@ -192,7 +193,9 @@ class DDR4
{2133, (400.0/3)*8, (3/0.4)/8, 4, 4, 6, 2, 15, 15, 15, 11, 36, 51, 8, 3, 8, 16, 0, 0, 0, 0, 0, 6, 7, 0, 7, 0, 0},
{2133, (400.0/3)*8, (3/0.4)/8, 4, 4, 6, 2, 16, 16, 16, 11, 36, 52, 8, 3, 8, 16, 0, 0, 0, 0, 0, 6, 7, 0, 7, 0, 0},
{2400, (400.0/3)*9, (3/0.4)/9, 4, 4, 6, 2, 16, 16, 16, 12, 39, 55, 9, 3, 9, 18, 0, 0, 0, 0, 0, 6, 8, 0, 7, 0, 0},
{2400, (400.0/3)*9, (3/0.4)/9, 4, 4, 6, 2, 18, 18, 18, 12, 39, 57, 9, 3, 9, 18, 0, 0, 0, 0, 0, 6, 8, 0, 7, 0, 0}
{2400, (400.0/3)*9, (3/0.4)/9, 4, 4, 6, 2, 18, 18, 18, 12, 39, 57, 9, 3, 9, 18, 0, 0, 0, 0, 0, 6, 8, 0, 7, 0, 0},
{3200, 1600, 0.625, prefetch_size/2/*DDR*/, 4, 10, 2, 22, 22, 22, 16, 56, 78, 12, 4, 12, 24, 8, 10, 40, 0, 0, 8, 10, 0, 8, 0, 0}
//rate, freq, tCK, nBL, nCCDS nCCDL nRTRS nCL nRCD nRP nCWL nRAS nRC nRTP nWTRS nWTRL nWR nRRDS nRRDL nFAW nRFC nREFI nPD nXP nXPDLL nCKESR nXS nXSDLL
}, speed_entry;

int read_latency;
Expand Down
Loading

0 comments on commit cd96ed6

Please sign in to comment.