Skip to content

Commit

Permalink
0.2.1 FastShiftInOut
Browse files Browse the repository at this point in the history
RobTillaart committed Nov 1, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent c10a901 commit e668c02
Showing 7 changed files with 114 additions and 55 deletions.
3 changes: 3 additions & 0 deletions libraries/FastShiftInOut/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).


## [0.2.1] - 2024-10-31
- fix #9, more optimizations

## [0.2.0] - 2024-09-10
- fix #7, loop unroll option, improving performance, kudos to nt314p
- added flag to select LOOP UNROLL (is optional as it gives larger code size)
101 changes: 58 additions & 43 deletions libraries/FastShiftInOut/FastShiftInOut.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//
// FILE: FastShiftInOut.cpp
// AUTHOR: Rob Tillaart
// VERSION: 0.2.0
// VERSION: 0.2.1
// PURPOSE: Arduino library for (AVR) optimized shiftInOut (simultaneously)
// URL: https://github.com/RobTillaart/FastShiftInOut

@@ -109,8 +109,12 @@ uint8_t FastShiftInOut::writeLSBFIRST(uint8_t data)
uint8_t oldSREG = SREG;
noInterrupts();

if ((value & 0x01) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
// See discussion #17 FastShiftOut
uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
uint8_t d1 = d0 | outmask1; // cache 1

if ((value & 0x01) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
// *localClockRegister |= cbmask1;
// if ((*localDataInRegister & inmask1) > 0) rv |= 0x01;
// *localClockRegister &= cbmask2; // ~_clockBit;
@@ -121,50 +125,50 @@ uint8_t FastShiftInOut::writeLSBFIRST(uint8_t data)
if ((*localDataInRegister & inmask1) > 0) rv |= 0x01;
*localClockRegister = r; // reset it

if ((value & 0x02) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x02) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x02;
*localClockRegister = r; // reset it

if ((value & 0x04) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x04) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x04;
*localClockRegister = r; // reset it

if ((value & 0x08) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x08) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x08;
*localClockRegister = r; // reset it

if ((value & 0x10) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x10) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x10;
*localClockRegister = r; // reset it

if ((value & 0x20) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x20) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x20;
*localClockRegister = r; // reset it

if ((value & 0x40) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x40) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x40;
*localClockRegister = r; // reset it

if ((value & 0x80) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x80) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x80;
@@ -185,16 +189,19 @@ uint8_t FastShiftInOut::writeLSBFIRST(uint8_t data)

uint8_t oldSREG = SREG;
noInterrupts();

uint8_t r = *localClockRegister;


uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
uint8_t d1 = d0 | outmask1; // cache 1

for (uint8_t m = 1; m > 0; m <<= 1)
{
// write one bit
if ((value & m) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & m) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
uint8_t r = *localClockRegister;

// clock pulse HIGH
*localClockRegister |= cbmask1;
*localClockRegister = r | cbmask1;
// read one bit
if ((*localDataInRegister & inmask1) > 0) rv |= m;
// clock pulse LOW
@@ -249,8 +256,12 @@ uint8_t FastShiftInOut::writeMSBFIRST(uint8_t data)
uint8_t oldSREG = SREG;
noInterrupts();

if ((value & 0x80) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
// See discussion #17 FastShiftOut
uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
uint8_t d1 = d0 | outmask1; // cache 1

if ((value & 0x80) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
// *localClockRegister |= cbmask1;
// if ((*localDataInRegister & inmask1) > 0) rv |= 0x80;
// *localClockRegister &= cbmask2; // ~_clockBit;
@@ -261,50 +272,50 @@ uint8_t FastShiftInOut::writeMSBFIRST(uint8_t data)
if ((*localDataInRegister & inmask1) > 0) rv |= 0x80;
*localClockRegister = r; // reset it

if ((value & 0x40) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x40) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x40;
*localClockRegister = r; // reset it

if ((value & 0x20) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x20) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x20;
*localClockRegister = r; // reset it

if ((value & 0x10) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x10) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x10;
*localClockRegister = r; // reset it

if ((value & 0x08) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x08) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x08;
*localClockRegister = r; // reset it

if ((value & 0x04) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x04) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x04;
*localClockRegister = r; // reset it

if ((value & 0x02) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x02) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x02;
*localClockRegister = r; // reset it

if ((value & 0x01) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x01) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
if ((*localDataInRegister & inmask1) > 0) rv |= 0x01;
@@ -326,14 +337,18 @@ uint8_t FastShiftInOut::writeMSBFIRST(uint8_t data)
uint8_t oldSREG = SREG;
noInterrupts();

uint8_t r = *localClockRegister;
// See discussion #17 FastShiftOut
uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
uint8_t d1 = d0 | outmask1; // cache 1

for (uint8_t m = 0x80; m > 0; m >>= 1)
{
// write one bit
if ((value & m) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & m) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
uint8_t r = *localClockRegister;
// clock pulse HIGH
*localClockRegister |= cbmask1;
*localClockRegister = r | cbmask1;
// read one bit
if ((*localDataInRegister & inmask1) > 0) rv |= m;
// clock pulse LOW
6 changes: 3 additions & 3 deletions libraries/FastShiftInOut/FastShiftInOut.h
Original file line number Diff line number Diff line change
@@ -2,18 +2,18 @@
//
// FILE: FastShiftInOut.cpp
// AUTHOR: Rob Tillaart
// VERSION: 0.2.0
// VERSION: 0.2.1
// PURPOSE: Arduino library for (AVR) optimized shiftInOut (simultaneously)
// URL: https://github.com/RobTillaart/FastShiftInOut


#include "Arduino.h"


#define FASTSHIFTINOUT_LIB_VERSION (F("0.2.0"))
#define FASTSHIFTINOUT_LIB_VERSION (F("0.2.1"))

// uncomment next line to get SPEED OPTIMIZED CODE
#define FASTSHIFTINOUT_AVR_LOOP_UNROLLED 1
// #define FASTSHIFTINOUT_AVR_LOOP_UNROLLED 1


class FastShiftInOut
16 changes: 9 additions & 7 deletions libraries/FastShiftInOut/README.md
Original file line number Diff line number Diff line change
@@ -58,22 +58,23 @@ Performance of **write()**

#### Measurements

(0.2.0)
(0.2.1)
Indicative time in microseconds, Arduino UNO, IDE 1.8.19, measured over 1000 calls.
(delta between 2 calls and 1 call to eliminate overhead)

| function | 0.1.3 | 0.2.0 | 0.2.0L |
|:-------------------------|---------:|---------:|----------:|
| write() (reference) | no data | 158.24 | no data |
| write() | 25.52 | 17.61 | 12.26 |
| writeLSBFIRST() | 25.52 | 17.61 | 12.26 |
| writeMSBFIRST() | 25.52 | 17.60 | 12.20 |
| function | 0.1.3 | 0.2.0 | 0.2.0L | 0.2.1 | 0.2.1L |
|:-------------------------|---------:|---------:|----------:|---------:|----------:|
| write() (reference) | no data | 158.24 | no data | 158.24 | no data |
| write() | 25.52 | 17.61 | 12.26 | 16.72 | 11.00 |
| writeLSBFIRST() | 25.52 | 17.61 | 12.26 | 16.72 | 11.00 |
| writeMSBFIRST() | 25.52 | 17.60 | 12.20 | 16.72 | 10.94 |


- Note: 0.1.3 added from old table.
- Note: reference run on AVR by commenting all optimizations.
- Note: 0.2.0 measured with loop unroll flag disabled.
- Note: 0.2.0L measured with loop unrolled flag enabled.
- Note: 0.2.1 / 0.2.1L idem.


### Related
@@ -83,6 +84,7 @@ Indicative time in microseconds, Arduino UNO, IDE 1.8.19, measured over 1000 cal
- https://github.com/RobTillaart/FastShiftOut
- https://github.com/RobTillaart/ShiftInSlow
- https://github.com/RobTillaart/ShiftOutSlow
- https://github.com/RobTillaart/SWSPI (experimental)


## Interface
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
IDE: 1.8.19
Board: UNO

FASTSHIFTINOUT_LIB_VERSION: 0.2.1

Performance - time in us
write: 17.86
write: 34.58
Delta: 16.72

writeLSBFIRST: 16.98
writeLSBFIRST: 33.70
Delta: 16.72

writeMSBFIRST: 16.98
writeMSBFIRST: 33.70
Delta: 16.72


# loop unrolled.

FASTSHIFTINOUT_LIB_VERSION: 0.2.1

Performance - time in us
write: 12.14
write: 23.14
Delta: 11.00

writeLSBFIRST: 11.26
writeLSBFIRST: 22.26
Delta: 11.00

writeMSBFIRST: 11.19
writeMSBFIRST: 22.13
Delta: 10.94


done ...

2 changes: 1 addition & 1 deletion libraries/FastShiftInOut/library.json
Original file line number Diff line number Diff line change
@@ -15,7 +15,7 @@
"type": "git",
"url": "https://github.com/RobTillaart/FastShiftInOut.git"
},
"version": "0.2.0",
"version": "0.2.1",
"license": "MIT",
"frameworks": "*",
"platforms": "*",
2 changes: 1 addition & 1 deletion libraries/FastShiftInOut/library.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name=FastShiftInOut
version=0.2.0
version=0.2.1
author=Rob Tillaart <[email protected]>
maintainer=Rob Tillaart <[email protected]>
sentence=Arduino library for (AVR) optimized shiftInOut (simultaneously)

0 comments on commit e668c02

Please sign in to comment.