Skip to content

Commit

Permalink
Merge branch 'feature-auto-detecting-charset'
Browse files Browse the repository at this point in the history
  • Loading branch information
prprhyt committed Oct 29, 2019
2 parents 3c8fc91 + 1df3149 commit 2e5ff38
Show file tree
Hide file tree
Showing 5 changed files with 254 additions and 4 deletions.
4 changes: 2 additions & 2 deletions src/main/java/core/packetproxy/gui/GUIData.java
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ public void mousePressed(MouseEvent e) {
}
});

charSetCombo.setSelectedItem(charSetUtility.getInstance().getCharSet());
charSetCombo.setSelectedItem(charSetUtility.getInstance().getCharSetForGUIComponent());

JPanel diff_panel = new JPanel();
diff_panel.add(diff_orig_button);
Expand Down Expand Up @@ -343,7 +343,7 @@ public void updateCharSetCombo(){
for(String charSetName:charSetUtility.getAvailableCharSetList()){
charSetCombo.addItem(charSetName);
}
String charSetName = CharSetUtility.getInstance().getCharSet();
String charSetName = CharSetUtility.getInstance().getCharSetForGUIComponent();
if(charSetUtility.getAvailableCharSetList().contains(charSetName)){
charSetCombo.setSelectedItem(charSetName);
}else{
Expand Down
6 changes: 5 additions & 1 deletion src/main/java/core/packetproxy/gui/GUIHistory.java
Original file line number Diff line number Diff line change
Expand Up @@ -405,9 +405,13 @@ public void actionPerformed(ActionEvent actionEvent) {
try {
Packet packet = gui_packet.getPacket();
Http http = new Http(packet.getSentData());
CharSetUtility charsetutil = CharSetUtility.getInstance();
if(charsetutil.isAuto()){
charsetutil.setGuessedCharSet(http.getBody());
}
String copyData = http.getMethod() + "\t" +
http.getURL(packet.getServerPort()) + "\t" +
new String(http.getBody(), CharSetUtility.getInstance().getCharSet());
new String(http.getBody(), charsetutil.getCharSet());
Clipboard clipboard = Toolkit.getDefaultToolkit().getSystemClipboard();
StringSelection selection = new StringSelection(copyData);
clipboard.setContents(selection, selection);
Expand Down
33 changes: 32 additions & 1 deletion src/main/java/core/packetproxy/gui/RawTextPane.java
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,9 @@ public void keyPressed(KeyEvent e) {
url_decoder.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent actionEvent) {
try {
if(charSetUtility.isAuto()){
charSetUtility.setGuessedCharSet(getData());
}
String chasetName = charSetUtility.getCharSet();
int position_start = getSelectionStart();
int position_end = getSelectionEnd();
Expand All @@ -174,6 +177,10 @@ public void actionPerformed(ActionEvent actionEvent) {
try {
int position_start = getSelectionStart();
int position_end = getSelectionEnd();

if(charSetUtility.isAuto()){
charSetUtility.setGuessedCharSet(getData());
}
byte[] data = new String(getData(), charSetUtility.getCharSet()).substring(position_start, position_end).getBytes();
GUIDecoderDialog dlg = new GUIDecoderDialog();
if (Utils.indexOf(data, 0, data.length, "_".getBytes()) >= 0 ||
Expand All @@ -194,6 +201,9 @@ public void actionPerformed(ActionEvent actionEvent) {
jwt_decoder.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent actionEvent) {
try {
if(charSetUtility.isAuto()){
charSetUtility.setGuessedCharSet(getData());
}
String charSetName = charSetUtility.getCharSet();
int position_start = getSelectionStart();
int position_end = getSelectionEnd();
Expand All @@ -215,6 +225,9 @@ public void actionPerformed(ActionEvent actionEvent) {
unicode_unescaper.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent actionEvent) {
try {
if(charSetUtility.isAuto()){
charSetUtility.setGuessedCharSet(getData());
}
String charSetName = charSetUtility.getCharSet();
int position_start = getSelectionStart();
int position_end = getSelectionEnd();
Expand All @@ -241,6 +254,9 @@ public void actionPerformed(ActionEvent actionEvent) {
url_encoder.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent actionEvent) {
try {
if(charSetUtility.isAuto()){
charSetUtility.setGuessedCharSet(getData());
}
String charSetName = charSetUtility.getCharSet();
int position_start = getSelectionStart();
int position_end = getSelectionEnd();
Expand All @@ -259,6 +275,9 @@ public void actionPerformed(ActionEvent actionEvent) {
base64_encoder.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent actionEvent) {
try {
if(charSetUtility.isAuto()){
charSetUtility.setGuessedCharSet(getData());
}
String charSetName = charSetUtility.getCharSet();
int position_start = getSelectionStart();
int position_end = getSelectionEnd();
Expand All @@ -277,6 +296,9 @@ public void actionPerformed(ActionEvent actionEvent) {
base64url_encoder.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent actionEvent) {
try {
if(charSetUtility.isAuto()){
charSetUtility.setGuessedCharSet(getData());
}
String charSetName = charSetUtility.getCharSet();
int position_start = getSelectionStart();
int position_end = getSelectionEnd();
Expand All @@ -295,6 +317,9 @@ public void actionPerformed(ActionEvent actionEvent) {
jwt_encoder.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent actionEvent) {
try {
if(charSetUtility.isAuto()){
charSetUtility.setGuessedCharSet(getData());
}
String charSetName = charSetUtility.getCharSet();
int position_start = getSelectionStart();
int position_end = getSelectionEnd();
Expand Down Expand Up @@ -324,6 +349,9 @@ public void actionPerformed(ActionEvent actionEvent) {
unicode_escaper.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent actionEvent) {
try {
if(charSetUtility.isAuto()){
charSetUtility.setGuessedCharSet(getData());
}
String charSetName = charSetUtility.getCharSet();
int position_start = getSelectionStart();
int position_end = getSelectionEnd();
Expand Down Expand Up @@ -360,12 +388,15 @@ public void mousePressed(MouseEvent event) {
}

public void setData(byte[] data) throws Exception {
String charSetName = charSetUtility.getCharSet();
init_flg = true;
fin_flg = true;
init_count = 0;
prev_text_panel = "";
raw_data.reset(data);
if(charSetUtility.isAuto()){
charSetUtility.setGuessedCharSet(getData());
}
String charSetName = charSetUtility.getCharSet();
setText(new String(data, charSetName));
undo_manager.discardAllEdits();
}
Expand Down
181 changes: 181 additions & 0 deletions src/main/java/core/packetproxy/util/CharSetUtility.java
Original file line number Diff line number Diff line change
@@ -1,16 +1,24 @@
package packetproxy.util;

import packetproxy.http.HttpHeader;
import packetproxy.model.CharSet;
import packetproxy.model.CharSets;

import javax.swing.text.html.parser.Parser;
import javax.swing.text.html.parser.ParserDelegator;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;

public class CharSetUtility {
private static CharSetUtility instance=null;
private static String DEFAULT_CHARSET = "UTF-8";
private static String AUTO_CHARSET = "AUTO";
private String charSet=DEFAULT_CHARSET;
private boolean isAuto = false;

public static CharSetUtility getInstance(){
if(null==instance){
Expand All @@ -22,10 +30,159 @@ public static CharSetUtility getInstance(){
return instance;
}

private Object[] parseDocWithToken(String data, String[] startToken, String[] endToken, int cur, boolean allowEOL){
int s = data.length();
int s_index = -1;
int e_index = -1;
for(int t=0;t<startToken.length; t++){
int s_ = data.indexOf(startToken[t], cur);
if(-1==s_){
continue;
}
if(s_<s){
s = s_;
s_index = t;
}
}
if(data.length()==s){
return new Object[]{"", -1};
}
cur = s+startToken[s_index].length();
int e = data.length();
for(int t=0;t<endToken.length; t++){
int e_ = data.indexOf(endToken[t], cur);
if(-1==e_){
continue;
}
if(e_<e){
e = e_;
e_index = t;
}
}
if(data.length()==e && !allowEOL){
return new Object[]{"", -1};
}
String metaData = data.substring(s+startToken[s_index].length(), e);
if((0<=e_index) && (e_index<=endToken.length)){
e+=endToken[e_index].length();
}
return new Object[]{metaData, e};
}

public String guessCharSetFromMetatag(byte[] rawData){
String data = "";
String[] startKeywords = new String[]{"<meta","&lt;meta"};
String[] endKeywords = new String[]{"/>","</meta>", ">", "&#47;&gt;", "&lt;&#47;meta&gt;", "&gt;"};
String[] charsetStartKeywords = new String[]{"charset=\"", "charset=\'"};
String[] charsetEndKeywords = new String[]{"\"", "\'"};
String[] charsetStartKeywordsHTML4 = new String[]{"content=\"", "content=\'"};
String[] charsetEndKeywordsHTML4 = new String[]{"\"", "\'"};
String[] charsetStartKeywordsHTML4_2 = new String[]{"charset="};
String[] charsetEndKeywordsHTML4_2 = new String[]{";","\n"};

try {
data = new String(rawData, StandardCharsets.UTF_8);
}catch (Exception e){
e.printStackTrace();
}
int cur = 0;
while(true){
Object[] ret = parseDocWithToken(data, startKeywords, endKeywords, cur, false);
if(2!=ret.length){
break;
}
cur = (int)ret[1];
if(-1==cur){
return "";
}
String metaData = (String)ret[0];
Object[] ret2 = parseDocWithToken(metaData, charsetStartKeywords, charsetEndKeywords, 0, false);
if(2!=ret2.length){
break;
}
int cur2 = (int)ret2[1];
if(-1!=cur2){
return (String)ret2[0];
}

ret2 = parseDocWithToken(metaData, charsetStartKeywordsHTML4, charsetEndKeywordsHTML4, 0, false);
if(2!=ret2.length){
break;
}
cur2 = (int)ret2[1];
if(-1==cur2){
continue;
}
String metaDataContentAttr = (String)ret2[0];
ret2 = parseDocWithToken(metaDataContentAttr, charsetStartKeywordsHTML4_2, charsetEndKeywordsHTML4_2, 0, true);
if(2!=ret2.length){
break;
}
cur2 = (int)ret2[1];
if(-1!=cur2){
return (String)ret2[0];
}
}
return "";
}

public String guessCharSetFromHttpHeader(byte[] rawData){
String startKeyword = "charset=";
String[] endKeywords = new String[]{";","\n"};
HttpHeader header = new HttpHeader(rawData);
Optional<String> enc = header.getValue("Content-type");
int s,e =-1;

if(!enc.isPresent()){
return "";
}
String data = enc.get().toLowerCase();
s = data.indexOf(startKeyword);
if (-1==s){
return "";
}
s += startKeyword.length();
for(String t: endKeywords){
e = data.indexOf(t, s);
if(-1!=e){
break;
}
}
if(-1==e){
e = data.length();
}
return data.substring(s, e);
}

public boolean isAuto(){
return isAuto;
}

public void setCharSet(String charSet){
setCharSet(charSet, false);
}

public void setCharSet(String charSet, boolean autoStatusUnchanged){
if (charSet == null) {
return;
}
if (AUTO_CHARSET.equals(charSet)){
isAuto = true;
return;
}
if(!autoStatusUnchanged) {
isAuto = false;
}
if(isAuto){
for(String k: Charset.availableCharsets().keySet()){
String kLower = k.toLowerCase();
if(kLower.equals(charSet.toLowerCase())){
this.charSet = k;
return;
}
}
PacketProxyUtility.getInstance().packetProxyLog(String.format("%s is not supported charset", charSet));
}
if(getAvailableCharSetList().contains(charSet)){
this.charSet = charSet;
}else{
Expand All @@ -38,9 +195,33 @@ public String getCharSet(){
return charSet;
}

public String getCharSetForGUIComponent(){
if(isAuto){
return AUTO_CHARSET;
}
return charSet;
}

private String guessedCharSet(byte[] rawData){
String charset = guessCharSetFromHttpHeader(rawData);
if(!"".equals(charset)){
return charset;
}
charset = guessCharSetFromMetatag(rawData);
if(!"".equals(charset)){
return charset;
}
return DEFAULT_CHARSET;
}

public void setGuessedCharSet(byte[] rawData){
setCharSet(guessedCharSet(rawData), true);
}

public List<String> getAvailableCharSetList(){
List<String> ret = new ArrayList<>();
try {
ret.add(AUTO_CHARSET);
for(CharSet charset: CharSets.getInstance().queryAll()){
ret.add(charset.toString());
}
Expand Down
34 changes: 34 additions & 0 deletions src/test/java/packetproxy/common/CharSetUtilityTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package packetproxy.common;

import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import packetproxy.util.CharSetUtility;

import static org.junit.Assert.assertEquals;

@RunWith(JUnit4.class)
public class CharSetUtilityTest {
@Test
public void testCountChar() {
String header = "HTTP/1.1 302 Moved Temporarily\nContent-Type: text/html; charset=utf-8\nConnection: keep-alive\n";
String a = CharSetUtility.getInstance().guessCharSetFromHttpHeader(header.getBytes());
assertEquals("utf-8", a);
String header2 = "HTTP/1.1 302 Moved Temporarily\nContent-Type: text/html; charset=utf-8";
String a2 = CharSetUtility.getInstance().guessCharSetFromHttpHeader(header2.getBytes());
assertEquals("utf-8", a2);

String html5 = "<html>\n<head>\n<meta charset=\"UTF-8\">\n<title>test</title></head></html>";
String b = CharSetUtility.getInstance().guessCharSetFromMetatag(html5.getBytes());
assertEquals("UTF-8", b);
String html4 = "<html>\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n<title>test</title></head></html>";
String c = CharSetUtility.getInstance().guessCharSetFromMetatag(html4.getBytes());
assertEquals("UTF-8", c);
String html4_2 = "<html>\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8;\">\n<title>test</title></head></html>";
String c2 = CharSetUtility.getInstance().guessCharSetFromMetatag(html4_2.getBytes());
assertEquals("UTF-8", c2);
String html4_3 = "<html>\n<head>\n<meta http-equiv=\"Content-Type\" content=\"charset=UTF-8;text/html\">\n<title>test</title></head></html>";
String c3 = CharSetUtility.getInstance().guessCharSetFromMetatag(html4_2.getBytes());
assertEquals("UTF-8", c3);
}
}

0 comments on commit 2e5ff38

Please sign in to comment.