-
Notifications
You must be signed in to change notification settings - Fork 1
/
fix_mixed_encoding.php
85 lines (82 loc) · 1.54 KB
/
fix_mixed_encoding.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
<?php
// show usage
if(!is_file($argv[1]) || empty($argv[0])) {
echo "\nUsage: > php fix_mixed_encoding.php inputfile.sql outputfile.sql\n";
exit;
}
// replace damaged utf8 characters with fixed ones
function fixMixedCharacters($string) {
$searchReplace = array(
'ü'=>'ü',
'ä'=>'ä',
'ö'=>'ö',
'Ö'=>'Ö',
'ß'=>'ß',
'Ã '=>'à',
'á'=>'á',
'â'=>'â',
'ã'=>'ã',
'ù'=>'ù',
'ú'=>'ú',
'û'=>'û',
'Ù'=>'Ù',
'Ú'=>'Ú',
'Û'=>'Û',
'Ãœ'=>'Ü',
'ò'=>'ò',
'ó'=>'ó',
'ô'=>'ô',
'è'=>'è',
'é'=>'é',
'ê'=>'ê',
'ë'=>'ë',
'À'=>'À',
'Ã'=>'Á',
'Â'=>'Â',
'Ã'=>'Ã',
'Ä'=>'Ä',
'Ã…'=>'Å',
'Ç'=>'Ç',
'È'=>'È',
'É'=>'É',
'Ê'=>'',
'Ë'=>'Ë',
'ÃŒ'=>'Ì',
'Ã'=>'Í',
'ÃŽ'=>'Î',
'Ã'=>'Ï',
'Ñ'=>'Ñ',
'Ã’'=>'Ò',
'Ó'=>'Ó',
'Ô'=>'Ô',
'Õ'=>'Õ',
'Ø'=>'Ø',
'Ã¥'=>'å',
'æ'=>'æ',
'ç'=>'ç',
'ì'=>'ì',
'Ã'=>'í',
'î'=>'î',
'ï'=>'ï',
'ð'=>'ð',
'ñ'=>'ñ',
'õ'=>'õ',
'ø'=>'ø',
'ý'=>'ý',
'ÿ'=>'ÿ',
'€'=>'€'
);
return str_replace(array_keys($searchReplace), $searchReplace, $string);
}
// open input file
$fp = fopen($argv[1], 'r');
// open/create output file
$fp2 = fopen($argv[2], 'w+');
// read the whole file by 4098 byte pieces and fix the encoding
while(!feof($fp)) {
$fixThisString = fread($fp, 4098);
$fixThisString = fixMixedCharacters($fixThisString);
fwrite($fp2, $fixThisString);
}
fclose($fp);
fclose($fp2);