Skip to content

Commit

Permalink
Improved PETSCII conversion when copying and pasting text
Browse files Browse the repository at this point in the history
  • Loading branch information
istvan-v committed May 9, 2018
1 parent e81ce21 commit 549b44c
Show file tree
Hide file tree
Showing 5 changed files with 259 additions and 49 deletions.
2 changes: 2 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ Changes in version 1.2.11
and used in read-only mode
* added support for comments (beginning with ; or #) in the breakpoint
list
* fixed copying and pasting upper/lower case characters, added limited
support for PETSCII graphic characters
* fixed DigiBlaster bug, SID external input was muted by default
* fixed horizontal scroll bug in multicolor character mode
* minor fixes in 1551 and TED emulation
Expand Down
1 change: 1 addition & 0 deletions SConstruct
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ if not fltkVersion13:
plus4emuLibSources2 += ['Fl_Native_File_Chooser/Fl_Native_File_Chooser.cxx']
plus4emuLibSources2 += Split('''
src/cfg_db.cpp
src/charconv.cpp
src/compress.cpp
src/comprlib.cpp
src/decompm2.cpp
Expand Down
179 changes: 179 additions & 0 deletions src/charconv.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@

// plus4emu -- portable Commodore Plus/4 emulator
// Copyright (C) 2003-2018 Istvan Varga <[email protected]>
// https://github.com/istvan-v/plus4emu/
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

#include "plus4emu.hpp"
#include "charconv.hpp"

namespace Plus4Emu {

static const uint16_t petsciiToUnicode[64] = {
0x2500, 0x2660, 0x2502, 0x2500, 0x003F, 0x003F, 0x003F, 0x003F, // 0x60
0x003F, 0x256E, 0x2570, 0x256F, 0x003F, 0x2572, 0x2571, 0x003F, // 0x68
0x003F, 0x25CF, 0x003F, 0x2665, 0x003F, 0x256D, 0x2573, 0x25CB, // 0x70
0x2663, 0x003F, 0x2666, 0x253C, 0x003F, 0x2502, 0x03C0, 0x25E5, // 0x78
0x00A0, 0x258C, 0x2584, 0x2594, 0x2581, 0x258F, 0x2592, 0x2595, // 0x80
0x003F, 0x25E4, 0x003F, 0x251C, 0x2597, 0x2514, 0x2510, 0x2582, // 0x88
0x250C, 0x2534, 0x252C, 0x2524, 0x258E, 0x258D, 0x003F, 0x003F, // 0x90
0x003F, 0x2583, 0x2713, 0x2596, 0x259D, 0x2518, 0x2598, 0x259A // 0x98
};

static const uint16_t unicodeToPETSCII[94] = {
0x00A0, 0xA0, 0x00A3, 0x5C, 0x03C0, 0x7E, 0x2190, 0x5F,
0x2500, 0x63, 0x2502, 0x62, 0x250C, 0xB0, 0x2510, 0xAE,
0x2514, 0xAD, 0x2518, 0xBD, 0x251C, 0xAB, 0x2524, 0xB3,
0x252C, 0xB2, 0x2534, 0xB1, 0x253C, 0x7B, 0x256D, 0x75,
0x256E, 0x69, 0x256F, 0x6B, 0x2570, 0x6A, 0x2571, 0x6E,
0x2572, 0x6D, 0x2573, 0x76, 0x2581, 0xA4, 0x2582, 0xAF,
0x2583, 0xB9, 0x2584, 0xA2, 0x258C, 0xA1, 0x258D, 0xB5,
0x258E, 0xB4, 0x258F, 0xA5, 0x2592, 0xA6, 0x2594, 0xA3,
0x2595, 0xA7, 0x2596, 0xBB, 0x2597, 0xAC, 0x2598, 0xBE,
0x259A, 0xBF, 0x259D, 0xBC, 0x25CB, 0x77, 0x25CF, 0x71,
0x25E4, 0xA9, 0x25E5, 0x7F, 0x2660, 0x61, 0x2663, 0x78,
0x2665, 0x73, 0x2666, 0x7A, 0x2713, 0xBA
};

uint8_t utf8ToPETSCII(const char *buf, size_t& bufPos, bool lowerCaseMode)
{
if (!buf)
return 0x00;
uint32_t w = 0U;
uint8_t c = uint8_t(buf[bufPos]);
if (c) {
bufPos++;
int n = 0;
if (c < 0xC0) {
w = c;
}
else if (c < 0xE0) {
w = c & 0x1F;
n = 1;
}
else if (c < 0xF0) {
w = c & 0x0F;
n = 2;
}
else {
w = c & 0x07;
n = 3;
}
for ( ; n > 0 && buf[bufPos] != '\0'; bufPos++, n--) {
c = uint8_t(buf[bufPos]);
if ((c & 0xC0) != 0x80)
break;
w = (w << 6) | (c & 0x3F);
}
}
c = 0xFF;
if (w >= 0x0080U) {
size_t i = 0;
size_t j = (sizeof(unicodeToPETSCII) / sizeof(uint16_t)) >> 1;
while (j > i) {
size_t k = (i + j) >> 1;
uint32_t tmp = unicodeToPETSCII[k << 1];
if (w < tmp) {
j = k;
}
else if (w > tmp) {
i = k + 1;
}
else {
c = uint8_t(unicodeToPETSCII[(k << 1) + 1]);
break;
}
}
}
else {
c = uint8_t(w);
switch (c & 0xE0) {
case 0x00:
if (c == '\n')
c = '\r';
else if (c != '\0' && c != '\t')
c = 0xFF;
break;
case 0x40:
if (c >= 0x41 && c <= 0x5A && lowerCaseMode)
c = c | 0x20;
break;
case 0x60:
if (c >= 0x61 && c <= 0x7A)
c = c & 0xDF;
break;
}
}
std::fprintf(stderr, "0x%02X\n", (unsigned int) c);
return c;
}

void petsciiToUTF8(std::string& buf, uint8_t c, bool lowerCaseMode)
{
uint32_t w = c;
switch (c & 0xE0) {
case 0x40:
if (lowerCaseMode && c >= 0x41 && c <= 0x5A)
w = w | 0x20;
if (c == 0x5C)
w = 0x00A3;
if (c == 0x5F)
w = 0x2190;
break;
case 0x60:
if (lowerCaseMode && c >= 0x61 && c <= 0x7A)
w = w & 0xDF;
else
w = petsciiToUnicode[c & 0x1F];
break;
case 0xA0:
w = petsciiToUnicode[c & 0x3F];
break;
}
if (w < 0x0080U) {
if (w)
buf += char(w);
}
else if (w < 0x0800U) {
buf += char((w >> 6) | 0xC0);
buf += char((w & 0x3F) | 0x80);
}
else {
buf += char((w >> 12) | 0xE0);
buf += char(((w >> 6) & 0x3F) | 0x80);
buf += char((w & 0x3F) | 0x80);
}
}

void screenCharToUTF8(std::string& buf, uint8_t c, bool lowerCaseMode)
{
c = c & 0x7F;
switch (c & 0xE0) {
case 0x00:
c = c | 0x40;
break;
case 0x40:
c = c | 0x20;
break;
case 0x60:
c = c ^ 0xC0;
break;
}
petsciiToUTF8(buf, c, lowerCaseMode);
}

} // namespace Plus4Emu

47 changes: 47 additions & 0 deletions src/charconv.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@

// plus4emu -- portable Commodore Plus/4 emulator
// Copyright (C) 2003-2018 Istvan Varga <[email protected]>
// https://github.com/istvan-v/plus4emu/
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

#ifndef PLUS4EMU_CHARCONV_HPP
#define PLUS4EMU_CHARCONV_HPP

#include "plus4emu.hpp"

namespace Plus4Emu {

/*!
* Convert a single UTF-8 encoded character from 'buf' to PETSCII.
* Returns 0 if the end of the input buffer has been reached,
* 0xFF on invalid character.
*/
uint8_t utf8ToPETSCII(const char *buf, size_t& bufPos, bool lowerCaseMode);

/*!
* Convert PETSCII character 'c' to UTF-8, and write it to 'buf'.
*/
void petsciiToUTF8(std::string& buf, uint8_t c, bool lowerCaseMode);

/*!
* Convert a character from screen memory to UTF-8, and write it to 'buf'.
*/
void screenCharToUTF8(std::string& buf, uint8_t c, bool lowerCaseMode);

} // namespace Plus4Emu

#endif // PLUS4EMU_CHARCONV_HPP

79 changes: 30 additions & 49 deletions src/plus4vm.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

// plus4emu -- portable Commodore Plus/4 emulator
// Copyright (C) 2003-2017 Istvan Varga <[email protected]>
// Copyright (C) 2003-2018 Istvan Varga <[email protected]>
// https://github.com/istvan-v/plus4emu/
//
// This program is free software; you can redistribute it and/or modify
Expand Down Expand Up @@ -37,6 +37,7 @@
#include "vc1581.hpp"
#include "iecdrive.hpp"
#include "system.hpp"
#include "charconv.hpp"

static void writeDemoTimeCnt(Plus4Emu::File::Buffer& buf, uint64_t n)
{
Expand Down Expand Up @@ -657,27 +658,22 @@ namespace Plus4 {
return;
}
int charCnt = 0;
char c = '\0';
uint8_t c = '\0';
do {
c = vm.pasteTextBuffer[vm.pasteTextBufferPos++];
c = Plus4Emu::utf8ToPETSCII(vm.pasteTextBuffer, vm.pasteTextBufferPos,
bool(vm.ted->readMemoryCPU(0xFF13) & 0x04));
if (c == '\0') {
vm.ted->writeMemoryCPU(0x00EF, uint8_t(charCnt));
vm.removePasteTextCallback(); // done pasting all characters
return;
}
if (c == '\n')
c = '\r';
if (c >= char(0x7F) || (c < char(0x20) && !(c == '\t' || c == '\r'))) {
if (c == char(0xFF)) {
// ignore invalid characters
continue;
}
// convert tabs to spaces, and swap upper and lower case
// convert tabs to spaces
if (c == '\t')
c = ' ';
else if (c >= 'A' && c <= 'Z')
c = (c - 'A') + 'a';
else if (c >= 'a' && c <= 'z')
c = (c - 'a') + 'A';
// store characters in keyboard buffer
vm.ted->writeMemoryCPU(uint16_t(0x0527 + charCnt), uint8_t(c));
charCnt++;
Expand Down Expand Up @@ -1277,7 +1273,7 @@ namespace Plus4 {

std::string Plus4VM::copyText(int xPos, int yPos) const
{
std::string s = "";
std::string s;
if (yPos >= 100) {
yPos = (yPos * 288) >> 16;
if (!ted->getIsNTSCMode()) {
Expand Down Expand Up @@ -1334,51 +1330,38 @@ namespace Plus4 {
y++;
lastLine = y;
}
std::string tmpBuf;
for (int y = firstLine; y <= lastLine; y++) {
char tmpBuf[42];
tmpBuf.clear();
for (int x = 0; x < 40; x++) {
uint8_t tmp = ted->readMemoryCPU(0x0C00 | ((y * 40) + x)) & 0x7F;
if (!((tmp >= 0x20 && tmp <= 0x3F) || (tmp >= 0x41 && tmp <= 0x5A))) {
// convert character codes to ASCII:
if (tmp >= 0x01 && tmp <= 0x1A)
tmp = tmp + 0x60;
else if (tmp == 0x00 || tmp == 0x1B || tmp == 0x1D || tmp == 0x1E)
tmp = tmp + 0x40;
else if (tmp == 0x60)
tmp = 0x20;
else
tmp = 0x5F; // replace any invalid characters with underscores
}
tmpBuf[x] = char(tmp);
Plus4Emu::screenCharToUTF8(tmpBuf,
ted->readMemoryCPU(0x0C00 | ((y * 40) + x)),
bool(ted->readMemoryCPU(0xFF13) & 0x04));
}
tmpBuf[40] = '\0';
if (xPos < 0) {
if (yPos >= 0 && !lineContTable[y]) {
// strip leading spaces if this is a first line
while (tmpBuf[0] == ' ') {
for (int i = 0; tmpBuf[i] != '\0'; i++)
tmpBuf[i] = tmpBuf[i + 1];
}
size_t n = 0;
for ( ; n < tmpBuf.length() && tmpBuf[n] == ' '; n++)
;
if (n)
tmpBuf.erase(0, n);
}
if (!lineContTable[y + 1]) {
// strip trailing spaces if this is a last line,
// and append a newline character
int i = 40;
while (--i >= 0) {
if (tmpBuf[i] == ' ')
tmpBuf[i] = '\0';
if (tmpBuf[i] == '\0')
continue;
break;
}
if (s.length() < 1 && i < 0) // skip any leading empty lines
size_t n = tmpBuf.length();
for ( ; n > 0 && tmpBuf[n - 1] == ' '; n--)
;
if (n < tmpBuf.length())
tmpBuf.resize(n);
if (s.empty() && tmpBuf.empty()) // skip any leading empty lines
continue;
tmpBuf[i + 1] = '\n';
tmpBuf[i + 2] = '\0';
tmpBuf += '\n';
}
}
// append to output buffer
s += &(tmpBuf[0]);
s += tmpBuf;
}
// remove trailing newline characters
while (s.length() > 0 && s[s.length() - 1] == '\n')
Expand All @@ -1389,9 +1372,8 @@ namespace Plus4 {
int endPos = xPos;
while (startPos >= 0) {
char c = s[startPos];
if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
(c >= '0' && c <= '9') ||
c == '.' || c == '"' || c == '#' || c == '$' || c == '_')) {
if ((unsigned char) c <= (unsigned char) ' ' || c == '[' || c == ']' ||
(c >= '&' && c <= '-') || c == '/' || (c >= ':' && c <= '?')) {
break;
}
startPos--;
Expand All @@ -1400,9 +1382,8 @@ namespace Plus4 {
startPos++;
while (size_t(endPos) < s.length()) {
char c = s[endPos];
if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
(c >= '0' && c <= '9') ||
c == '.' || c == '"' || c == '#' || c == '$' || c == '_')) {
if ((unsigned char) c <= (unsigned char) ' ' || c == '[' || c == ']' ||
(c >= '&' && c <= '-') || c == '/' || (c >= ':' && c <= '?')) {
if (endPos == xPos)
endPos++;
break;
Expand Down

0 comments on commit 549b44c

Please sign in to comment.