diff --git a/src/gb18030/CompareAllRTF.sas b/src/gb18030/CompareAllRTF.sas new file mode 100644 index 0000000..0cf90a2 --- /dev/null +++ b/src/gb18030/CompareAllRTF.sas @@ -0,0 +1,247 @@ +/* +ÏêϸÎĵµÇëÇ°Íù Github ²éÔÄ: https://github.com/Snoopy1866/RTFTools-For-SAS +*/ + +%macro CompareAllRTF(basedir, + comparedir, + ignorecreatim = yes, + ignoreheader = yes, + ignorefooter = yes, + ignorecellstyle = yes, + ignorefonttable = yes, + ignorecolortable = yes, + outdata = diff, + del_temp_data = yes) + / parmbuff; + + /*´ò¿ª°ïÖúÎĵµ*/ + %if %qupcase(&SYSPBUFF) = %bquote((HELP)) or %qupcase(&SYSPBUFF) = %bquote(()) %then %do; + X explorer "https://github.com/Snoopy1866/RTFTools-For-SAS/blob/main/docs/CompareAllRTF.md"; + %goto exit; + %end; + + /*¼ì²éÒÀÀµ*/ + proc sql noprint; + select * from DICTIONARY.CATALOGS where libname = "WORK" and memname = "SASMACR" and objname = "COMPARERTF"; + quit; + %if &SQLOBS = 0 %then %do; + %put ERROR: Ç°ÖÃÒÀÀµÈ±Ê§£¬ÇëÏȼÓÔغê³ÌÐò %nrstr(%%)CompareRTF¡£; + %goto exit; + %end; + + %let reg_dir_expr = %bquote(/^(?:([A-Za-z_][A-Za-z_0-9]{0,7})|[\x22\x27]?((?:[A-Za-z]:\\|\\\\[^\\\/:?\x22\x27<>|]+)[^\\\/:?\x22\x27<>|]+(?:\\[^\\\/:?\x22\x27<>|]+)*)[\x22\x27]?)$/); + %let reg_dir_id = %sysfunc(prxparse(%superq(reg_dir_expr))); + + /*1. »ñȡĿ¼·¾¶*/ + /*base*/ + %if %sysfunc(prxmatch(®_dir_id, %superq(basedir))) %then %do; + %let basedirref = %sysfunc(prxposn(®_dir_id, 1, %superq(basedir))); + %let basedirloc = %sysfunc(prxposn(®_dir_id, 2, %superq(basedir))); + + /*Ö¸¶¨µÄÊÇĿ¼ÒýÓÃÃû*/ + %if %bquote(&basedirref) ^= %bquote() %then %do; + %if %sysfunc(fileref(&basedirref)) > 0 %then %do; + %put ERROR: Ŀ¼ÒýÓà %upcase(&basedirref) 䶨Ò壡; + %goto exit; + %end; + %else %if %sysfunc(fileref(&basedirref)) < 0 %then %do; + %put ERROR: Ŀ¼ÒýÓà %upcase(&basedirref) Ö¸ÏòµÄĿ¼²»´æÔÚ£¡; + %goto exit; + %end; + %else %if %sysfunc(fileref(&basedirref)) = 0 %then %do; + %let basedirloc = %qsysfunc(pathname(&basedirref, F)); + %end; + %end; + + /*Ö¸¶¨µÄÊÇÎïÀí·¾¶*/ + %if %superq(basedirloc) ^= %bquote() %then %do; + %if %sysfunc(fileexist(%superq(basedirloc))) = 0 %then %do; + %put ERROR: Ŀ¼·¾¶ %superq(basedirloc) ²»´æÔÚ£¡; + %goto exit; + %end; + %end; + %end; + %else %do; + %put ERROR: Ŀ¼ÒýÓÃÃû³¬³ö 8 ×Ö½Ú£¬»òÕßĿ¼ÎïÀíµØÖ·²»·ûºÏ Winodws ¹æ·¶£¡; + %goto exit; + %end; + + /*compare*/ + %if %sysfunc(prxmatch(®_dir_id, %superq(comparedir))) %then %do; + %let comparedirref = %sysfunc(prxposn(®_dir_id, 1, %superq(comparedir))); + %let comparedirloc = %sysfunc(prxposn(®_dir_id, 2, %superq(comparedir))); + + /*Ö¸¶¨µÄÊÇĿ¼ÒýÓÃÃû*/ + %if %bquote(&comparedirref) ^= %bquote() %then %do; + %if %sysfunc(fileref(&comparedirref)) > 0 %then %do; + %put ERROR: Ŀ¼ÒýÓà %upcase(&comparedirref) 䶨Ò壡; + %goto exit; + %end; + %else %if %sysfunc(fileref(&comparedirref)) < 0 %then %do; + %put ERROR: Ŀ¼ÒýÓà %upcase(&comparedirref) Ö¸ÏòµÄĿ¼²»´æÔÚ£¡; + %goto exit; + %end; + %else %if %sysfunc(fileref(&comparedirref)) = 0 %then %do; + %let comparedirloc = %qsysfunc(pathname(&comparedirref, F)); + %end; + %end; + + /*Ö¸¶¨µÄÊÇÎïÀí·¾¶*/ + %if %superq(comparedirloc) ^= %bquote() %then %do; + %if %sysfunc(fileexist(%superq(comparedirloc))) = 0 %then %do; + %put ERROR: Ŀ¼·¾¶ %superq(comparedirloc) ²»´æÔÚ£¡; + %goto exit; + %end; + %end; + %end; + %else %do; + %put ERROR: Ŀ¼ÒýÓÃÃû³¬³ö 8 ×Ö½Ú£¬»òÕßĿ¼ÎïÀíµØÖ·²»·ûºÏ Winodws ¹æ·¶£¡; + %goto exit; + %end; + + + /*2. ʹÓà DOS ÃüÁî»ñÈ¡ËùÓÐ RTF Îļþ£¬´æ´¢ÔÚ _tmp_rtf_list_base.txt ºÍ _tmp_rtf_list_compare.txt ÖÐ*/ + X "dir ""&basedirloc\*.rtf"" /b/on > ""&basedirloc\_tmp_rtf_list_base.txt"" & exit"; + X "dir ""&comparedirloc\*.rtf"" /b/on > ""&comparedirloc\_tmp_rtf_list_compare.txt"" & exit"; + + + /*----------------ÁÙʱ¹Ø±ÕÈÕÖ¾Êä³ö------------------*/ + proc printto log=_null_; + run; + + /*3. ¶ÁÈ¡ _tmp_rtf_list.txt Îļþ£¬¹¹½¨ filename Óï¾ä*/ + data _tmp_rtf_list_base; + infile "&basedirloc\_tmp_rtf_list_base.txt" truncover encoding = 'gbke'; + input rtf_name $char1000. rtf_path $char32767.; + rtf_path = cats("&basedirloc\", rtf_name); + + /*¹¹Ôì filename Óï¾ä£¬½¨Á¢ÎļþÒýÓÃ*/ + fileref = 'brtf' || strip(_n_); + fnstm = 'filename ' || strip(fileref) || ' "' || strip(rtf_path) || '";'; + + call execute(fnstm); + run; + + data _tmp_rtf_list_compare; + infile "&comparedirloc\_tmp_rtf_list_compare.txt" truncover encoding = 'gbke'; + input rtf_name $char1000. rtf_path $char32767.; + rtf_path = cats("&comparedirloc\", rtf_name); + + /*¹¹Ôì filename Óï¾ä£¬½¨Á¢ÎļþÒýÓÃ*/ + fileref = 'crtf' || strip(_n_); + fnstm = 'filename ' || strip(fileref) || ' "' || strip(rtf_path) || '";'; + + call execute(fnstm); + run; + + + /*4. ºÏ²¢ _tmp_rtf_list_base ºÍ _tmp_rtf_list_compare*/ + proc sql noprint; + create table _tmp_rtf_list_bc as + select + ifc(not missing(a.rtf_name), a.rtf_name, b.rtf_name) + as rtf_name, + a.rtf_name as base_rtf_name, + a.fileref as baseref, + b.rtf_name as compare_rtf_name, + b.fileref as compareref + from _tmp_rtf_list_base as a full join _tmp_rtf_list_compare as b on a.rtf_name = b.rtf_name; + quit; + + + /*5. µ÷Óà %CompareRTF() ±È½Ï RTF Îļþ*/ + data _null_; + set _tmp_rtf_list_bc; + retain n 0; + if not missing(base_rtf_name) and not missing(compare_rtf_name) then do; + n + 1; + call execute('%nrstr(%CompareRTF(base = ' || baseref || + ', compare = ' || compareref || + ', ignorecreatim = ' || "&ignorecreatim" || + ', ignoreheader = ' || "&ignoreheader" || + ', ignorefooter = ' || "&ignorefooter" || + ', ignorecellstyle = ' || "&ignorecellstyle" || + ', ignorefonttable = ' || "&ignorefonttable" || + ', ignorecolortable = ' || "&ignorecolortable" || + ', outdata = _tmp_diff_' || strip(n) || '));'); + end; + call symputx("diff_n_max", n); + run; + + /*----------------»Ö¸´ÈÕÖ¾Êä³ö------------------*/ + proc printto log=log; + run; + + + /*6. Êä³ö²îÒì±È½Ï½á¹û*/ + %if &diff_n_max > 0 %then %do; + proc sql noprint; + create table _tmp_diff as + select * from _tmp_diff_1 + %do i = 2 %to &diff_n_max; + outer union corr select * from _tmp_diff_&i + %end; + ; + quit; + %end; + + proc sql noprint; + create table _tmp_outdata as + select + a.rtf_name, + a.base_rtf_name, + a.compare_rtf_name, + (case when missing(a.base_rtf_name) and not missing(a.compare_rtf_name) then "Y" else "" end) as addyn length = 4, /*base -> compare ÐÂÔö*/ + (case when not missing(a.base_rtf_name) and missing(a.compare_rtf_name) then "Y" else "" end) as delyn length = 4, /*base -> compare ɾ³ý*/ + (case when not missing(a.base_rtf_name) and not missing(a.compare_rtf_name) then + (select diffyn from _tmp_diff as b where a.base_rtf_name = b.base_name and a.compare_rtf_name = b.compare_name) + else "" + end) as diffyn length = 4 /*base -> compare ²îÒì*/ + from _tmp_rtf_list_bc as a; + quit; + + proc sort data = _tmp_outdata sortseq = linguistic(numeric_collation = on) out = _tmp_outdata(drop = rtf_name); + by rtf_name; + run; + + + /*7. ×îÖÕÊä³ö*/ + data &outdata; + set _tmp_outdata; + label base_rtf_name = "base Îļþ" + compare_rtf_name = "compare Îļþ" + addyn = "compare ÖÐÐÂÔö" + delyn = "base ÖÐɾ³ý" + diffyn = "´æÔÚ²îÒì"; + run; + + + %exit: + /*8. Çå³ýÖмäÊý¾Ý¼¯*/ + %if %upcase(&del_temp_data) = YES %then %do; + proc datasets library = work nowarn noprint; + delete _tmp_rtf_list_base + _tmp_rtf_list_compare + _tmp_rtf_list_bc + _tmp_diff + _tmp_outdata + %if %symexist(diff_n_max) %then %do; + %do i = 1 %to &diff_n_max; + _tmp_diff_&i + %end; + %end; + ; + quit; + %end; + + %if %symexist(basedirloc) and %symexist(comparedirloc) %then %do; + /*ɾ³ý _tmp_rtf_list_base.txt ºÍ _tmp_rtf_list_compare.txt*/ + X "del ""&basedirloc\_tmp_rtf_list_base.txt"" & exit"; + X "del ""&comparedirloc\_tmp_rtf_list_compare.txt"" & exit"; + %end; + + /*ɾ³ý _null_.log Îļþ*/ + X "del _null_.log & exit"; + + %put NOTE: ºê CompareAllRTF ÒѽáÊøÔËÐУ¡; +%mend; diff --git a/src/gb18030/CompareRTF.sas b/src/gb18030/CompareRTF.sas new file mode 100644 index 0000000..efd3f8f --- /dev/null +++ b/src/gb18030/CompareRTF.sas @@ -0,0 +1,363 @@ +/* +ÏêϸÎĵµÇëÇ°Íù Github ²éÔÄ: https://github.com/Snoopy1866/RTFTools-For-SAS +*/ + +%macro CompareRTF(base, compare, outdata = diff, del_temp_data = yes, + ignorecreatim = yes, + ignoreheader = yes, + ignorefooter = yes, + ignorecellstyle = yes, + ignorefonttable = yes, + ignorecolortable = yes) + / parmbuff; + + /*´ò¿ª°ïÖúÎĵµ*/ + %if %qupcase(&SYSPBUFF) = %bquote((HELP)) or %qupcase(&SYSPBUFF) = %bquote(()) %then %do; + X explorer "https://github.com/Snoopy1866/RTFTools-For-SAS/blob/main/docs/CompareRTF.md"; + %goto exit; + %end; + + /*1. »ñÈ¡Îļþ·¾¶*/ + %let reg_file_expr = %bquote(/^(?:([A-Za-z_][A-Za-z_0-9]{0,7})|[\x22\x27]?((?:[A-Za-z]:\\|\\\\[^\\\/:?\x22\x27<>|]+)[^\\\/:?\x22\x27<>|]+(?:\\[^\\\/:?\x22\x27<>|]+)*)[\x22\x27]?)$/); + %let reg_file_id = %sysfunc(prxparse(%superq(reg_file_expr))); + + /*base*/ + %if %sysfunc(prxmatch(®_file_id, %superq(base))) %then %do; + %let baseref = %sysfunc(prxposn(®_file_id, 1, %superq(base))); + %let baseloc = %sysfunc(prxposn(®_file_id, 2, %superq(base))); + + /*Ö¸¶¨µÄÊÇÎļþÒýÓÃÃû*/ + %if %bquote(&baseref) ^= %bquote() %then %do; + %if %sysfunc(fileref(&baseref)) > 0 %then %do; + %put ERROR: ÎļþÃûÒýÓà %upcase(&baseref) 䶨Ò壡; + %goto exit; + %end; + %else %if %sysfunc(fileref(&baseref)) < 0 %then %do; + %put ERROR: ÎļþÃûÒýÓà %upcase(&baseref) Ö¸ÏòµÄÎļþ²»´æÔÚ£¡; + %goto exit; + %end; + %else %if %sysfunc(fileref(&baseref)) = 0 %then %do; + %let baseloc = %qsysfunc(pathname(&baseref, F)); + %end; + %end; + + /*Ö¸¶¨µÄÊÇÎïÀí·¾¶*/ + %if %superq(baseloc) ^= %bquote() %then %do; + %if %sysfunc(fileexist(%superq(baseloc))) = 0 %then %do; + %put ERROR: Îļþ·¾¶ %superq(baseloc) ²»´æÔÚ£¡; + %goto exit; + %end; + %end; + %end; + %else %do; + %put ERROR: ÎļþÒýÓÃÃû³¬³ö 8 ×Ö½Ú£¬»òÕßÎļþÎïÀíµØÖ·²»·ûºÏ Winodws ¹æ·¶£¡; + %goto exit; + %end; + + /*compare*/ + %if %sysfunc(prxmatch(®_file_id, %superq(compare))) %then %do; + %let compareref = %sysfunc(prxposn(®_file_id, 1, %superq(compare))); + %let compareloc = %sysfunc(prxposn(®_file_id, 2, %superq(compare))); + + /*Ö¸¶¨µÄÊÇÎļþÒýÓÃÃû*/ + %if %bquote(&compareref) ^= %bquote() %then %do; + %if %sysfunc(fileref(&compareref)) > 0 %then %do; + %put ERROR: ÎļþÃûÒýÓà %upcase(&compareref) 䶨Ò壡; + %goto exit; + %end; + %else %if %sysfunc(fileref(&compareref)) < 0 %then %do; + %put ERROR: ÎļþÃûÒýÓà %upcase(&compareref) Ö¸ÏòµÄÎļþ²»´æÔÚ£¡; + %goto exit; + %end; + %else %if %sysfunc(fileref(&compareref)) = 0 %then %do; + %let compareloc = %qsysfunc(pathname(&compareref, F)); + %end; + %end; + + /*Ö¸¶¨µÄÊÇÎïÀí·¾¶*/ + %if %superq(compareloc) ^= %bquote() %then %do; + %if %sysfunc(fileexist(%superq(compareloc))) = 0 %then %do; + %put ERROR: Îļþ·¾¶ %superq(compareloc) ²»´æÔÚ£¡; + %goto exit; + %end; + %end; + %end; + %else %do; + %put ERROR: ÎļþÒýÓÃÃû³¬³ö 8 ×Ö½Ú£¬»òÕßÎļþÎïÀíµØÖ·²»·ûºÏ Winodws ¹æ·¶£¡; + %goto exit; + %end; + + + /*2. ÒÔ´¿Îı¾ÐÎʽ¶ÁÈ¡RTFÎļþ*/ + data _tmp_rtf_data_base(compress = yes); + informat line $32767.; + format line $32767.; + length line $32767.; + + infile %unquote(%str(%')%superq(baseloc)%str(%')) truncover; + input line $char32767.; + run; + + data _tmp_rtf_data_compare(compress = yes); + informat line $32767.; + format line $32767.; + length line $32767.; + + infile %unquote(%str(%')%superq(compareloc)%str(%')) truncover; + input line $char32767.; + run; + + /*3. ´¦ÀíºöÂԱȽϵIJ¿·Ö*/ + /*3.1 ºöÂÔ×ÖÌå±í*/ + %if %upcase(&ignorefonttable) = YES %then %do; + %let reg_fonttable_ini_expr = %bquote(/^\{\\fonttbl$/o); + %let reg_fonttable_def_expr = %bquote(/^\{\\f\d+\\froman\\fprq\d+\\fcharset\d+\\cpg\d+\s.+\x3B\}$/o); + + data _tmp_rtf_data_base; + set _tmp_rtf_data_base; + reg_fonttable_ini_id = prxparse("®_fonttable_ini_expr"); + reg_fonttable_def_id = prxparse("®_fonttable_def_expr"); + + if prxmatch(reg_fonttable_ini_id, strip(line)) then delete; + if prxmatch(reg_fonttable_def_id, strip(line)) then delete; + run; + + data _tmp_rtf_data_compare; + set _tmp_rtf_data_compare; + reg_fonttable_ini_id = prxparse("®_fonttable_ini_expr"); + reg_fonttable_def_id = prxparse("®_fonttable_def_expr"); + + if prxmatch(reg_fonttable_ini_id, strip(line)) then delete; + if prxmatch(reg_fonttable_def_id, strip(line)) then delete; + run; + %end; + + /*3.2 ºöÂÔÑÕÉ«±í*/ + %if %upcase(&ignorecolortable) = YES %then %do; + %let reg_colortable_ini_expr = %bquote(/^\}?\{\\colortbl\x3B$/o); + %let reg_colortable_def_expr = %bquote(/^\\red\d+\\green\d+\\blue\d+\x3B$/o); + + data _tmp_rtf_data_base; + set _tmp_rtf_data_base; + reg_colortable_ini_id = prxparse("®_colortable_ini_expr"); + reg_colortable_def_id = prxparse("®_colortable_def_expr"); + + if prxmatch(reg_colortable_ini_id, strip(line)) then desc = "delete"; + if prxmatch(reg_colortable_def_id, strip(line)) then desc = "delete"; + run; + + data _tmp_rtf_data_compare; + set _tmp_rtf_data_compare; + reg_colortable_ini_id = prxparse("®_colortable_ini_expr"); + reg_colortable_def_id = prxparse("®_colortable_def_expr"); + + if prxmatch(reg_colortable_ini_id, strip(line)) then desc = "delete"; + if prxmatch(reg_colortable_def_id, strip(line)) then desc = "delete"; + run; + %end; + + /*3.3 ºöÂÔ´´½¨Ê±¼ä*/ + %if %upcase(&ignorecreatim) = YES %then %do; + %let reg_creatim_expr = %bquote(/\\creatim\\yr\d{1,4}\\mo\d{1,2}\\dy\d{1,2}\\hr\d{1,2}\\min\d{1,2}\\sec\d{1,2}/o); + data _tmp_rtf_data_base; + set _tmp_rtf_data_base; + reg_creatim_id = prxparse("®_creatim_expr"); + + if prxmatch(reg_creatim_id, strip(line)) then delete; + run; + + data _tmp_rtf_data_compare; + set _tmp_rtf_data_compare; + reg_creatim_id = prxparse("®_creatim_expr"); + + if prxmatch(reg_creatim_id, strip(line)) then delete; + run; + %end; + + /*3.4 ºöÂÔҳü*/ + %if %upcase(&ignoreheader) = YES %then %do; + %let reg_header_expr = %bquote(/^\{\\header\\pard\\plain\\q[lcr]\{$/o); + + data _tmp_rtf_data_base; + set _tmp_rtf_data_base; + reg_header_id = prxparse("®_header_expr"); + + retain header_brace_unclosed; /*δ±ÕºÏµÄ´óÀ¨ºÅÊýÁ¿*/ + retain header_start_flag 0 + header_end_flag 0; + if prxmatch(reg_header_id, strip(line)) then do; /*ҳü¿ªÊ¼*/ + header_brace_unclosed = (count(strip(line), "{") - count(strip(line), "\{")) - (count(strip(line), "}") - count(strip(line), "\}")); + header_start_flag = 1; + delete; + end; + else if header_start_flag = 1 and header_end_flag = 0 then do; + header_brace_unclosed + (count(strip(line), "{") - count(strip(line), "\{")) - (count(strip(line), "}") - count(strip(line), "\}")); + if header_brace_unclosed = 0 then do; /*ҳü½áÊø*/ + header_end_flag = 1; + header_brace_unclosed = .; + delete; + end; + else do; /*ҳüÖмä*/ + delete; + end; + end; + else if header_brace_unclosed = . then do; + header_start_flag = 0; + header_end_flag = 0; + end; + run; + + data _tmp_rtf_data_compare; + set _tmp_rtf_data_compare; + reg_header_id = prxparse("®_header_expr"); + + retain header_brace_unclosed; /*δ±ÕºÏµÄ´óÀ¨ºÅÊýÁ¿*/ + retain header_start_flag 0 + header_end_flag 0; + if prxmatch(reg_header_id, strip(line)) then do; /*ҳü¿ªÊ¼*/ + header_brace_unclosed = (count(strip(line), "{") - count(strip(line), "\{")) - (count(strip(line), "}") - count(strip(line), "\}")); + header_start_flag = 1; + delete; + end; + else if header_start_flag = 1 and header_end_flag = 0 then do; + header_brace_unclosed + (count(strip(line), "{") - count(strip(line), "\{")) - (count(strip(line), "}") - count(strip(line), "\}")); + if header_brace_unclosed = 0 then do; /*ҳü½áÊø*/ + header_end_flag = 1; + header_brace_unclosed = .; + delete; + end; + else do; /*ҳüÖмä*/ + delete; + end; + end; + else if header_brace_unclosed = . then do; + header_start_flag = 0; + header_end_flag = 0; + end; + run; + %end; + + /*3.5 ºöÂÔÒ³½Å*/ + %if %upcase(&ignorefooter) = YES %then %do; + %let reg_footer_expr = %bquote(/^\{\\footer\\pard\\plain\\q[lcr]\{$/o); + + data _tmp_rtf_data_base; + set _tmp_rtf_data_base; + reg_footer_id = prxparse("®_footer_expr"); + + retain footer_brace_unclosed; /*δ±ÕºÏµÄ´óÀ¨ºÅÊýÁ¿*/ + retain footer_start_flag 0 + footer_end_flag 0; + if prxmatch(reg_footer_id, strip(line)) then do; /*Ò³½Å¿ªÊ¼*/ + footer_brace_unclosed = (count(strip(line), "{") - count(strip(line), "\{")) - (count(strip(line), "}") - count(strip(line), "\}")); + footer_start_flag = 1; + delete; + end; + else if footer_start_flag = 1 and footer_end_flag = 0 then do; + footer_brace_unclosed + (count(strip(line), "{") - count(strip(line), "\{")) - (count(strip(line), "}") - count(strip(line), "\}")); + if footer_brace_unclosed = 2 and strip(line) = "{\row}" then do; /*Ò³½Å½áÊø*/ + footer_end_flag = 1; + footer_brace_unclosed = .; + delete; + end; + else do; /*Ò³½ÅÖмä*/ + delete; + end; + end; + else if footer_brace_unclosed = . then do; + footer_start_flag = 0; + footer_end_flag = 0; + end; + run; + + data _tmp_rtf_data_compare; + set _tmp_rtf_data_compare; + reg_footer_id = prxparse("®_footer_expr"); + + retain footer_brace_unclosed; /*δ±ÕºÏµÄ´óÀ¨ºÅÊýÁ¿*/ + retain footer_start_flag 0 + footer_end_flag 0; + if prxmatch(reg_footer_id, strip(line)) then do; /*Ò³½Å¿ªÊ¼*/ + footer_brace_unclosed = (count(strip(line), "{") - count(strip(line), "\{")) - (count(strip(line), "}") - count(strip(line), "\}")); + footer_start_flag = 1; + delete; + end; + else if footer_start_flag = 1 and footer_end_flag = 0 then do; + footer_brace_unclosed + (count(strip(line), "{") - count(strip(line), "\{")) - (count(strip(line), "}") - count(strip(line), "\}")); + if footer_brace_unclosed = 2 and strip(line) = "{\row}" then do; /*Ò³½Å½áÊø*/ + footer_end_flag = 1; + footer_brace_unclosed = .; + delete; + end; + else do; /*Ò³½ÅÖмä*/ + delete; + end; + end; + else if footer_brace_unclosed = . then do; + footer_start_flag = 0; + footer_end_flag = 0; + end; + run; + %end; + + /*3.6 ºöÂÔµ¥Ôª¸ñÑùʽ*/ + %if %upcase(&ignorecellstyle) = YES %then %do; + %let reg_cellstyle_expr = %bquote(/^(?:\\clbrdr[tblr]\\brdrs\\brdrw\d+\\brdrcf\d+)*\\cltxlrtb\\clvertal[tc](?:\\clcbpat\d+)?(?:\\clpadt\d+\\clpadft\d+\\clpadr\d+\\clpadfr\d+)?\\cellx\d+$/o); + + data _tmp_rtf_data_base; + set _tmp_rtf_data_base; + reg_cellstyle_id = prxparse("®_cellstyle_expr"); + + if prxmatch(reg_cellstyle_id, strip(line)) then delete; + run; + + data _tmp_rtf_data_compare; + set _tmp_rtf_data_compare; + reg_cellstyle_id = prxparse("®_cellstyle_expr"); + + if prxmatch(reg_cellstyle_id, strip(line)) then delete; + run; + %end; + + + /*4. ±È½ÏоÉÊý¾Ý¼¯*/ + proc compare base = _tmp_rtf_data_base compare = _tmp_rtf_data_compare noprint; + run; + + + /*5. ´¢´æ±È½Ï½á¹û*/ + %let _sysinfo = &sysinfo; + data _tmp_outdata; + base_path = "&baseloc"; + compare_path = "&compareloc"; + base_name = scan(base_path, -1, "\"); + compare_name = scan(compare_path, -1, "\"); + diffyn = ifc(&_sysinfo > 0, "Y", ""); + + label base_path = "baseÎļþ·¾¶" + compare_path = "compareÎļþ·¾¶" + base_name = "baseÎļþÃû" + compare_name = "compareÎļþÃû" + diffyn = "´æÔÚ²îÒì"; + run; + + + /*6. ×îÖÕÊä³ö*/ + data &outdata; + set _tmp_outdata; + run; + + + %exit: + /*7. Çå³ýÖмäÊý¾Ý¼¯*/ + %if %upcase(&del_temp_data) = YES %then %do; + proc datasets library = work nowarn noprint; + delete _tmp_rtf_data_base + _tmp_rtf_data_compare + _tmp_outdata + ; + quit; + %end; + + %put NOTE: ºê CompareRTF ÒѽáÊøÔËÐУ¡; +%mend; diff --git a/src/gb18030/CompareRTFWithDataset.sas b/src/gb18030/CompareRTFWithDataset.sas new file mode 100644 index 0000000..ec293c9 --- /dev/null +++ b/src/gb18030/CompareRTFWithDataset.sas @@ -0,0 +1,300 @@ +/* +ÏêϸÎĵµÇëÇ°Íù Github ²éÔÄ: https://github.com/Snoopy1866/RTFTools-For-SAS +*/ + +%macro CompareRTFWithDataset(rtf, dataset, del_temp_data = yes, + ignoreCRLF = yes, + ignoreLeadBlank = yes, + ignoreEmptyColumn = yes, + ignoreHalfOrFullWidth = no, + ignoreEmbeddedBlank = no + ) / parmbuff; + + /*´ò¿ª°ïÖúÎĵµ*/ + %if %qupcase(&SYSPBUFF) = %bquote((HELP)) or %qupcase(&SYSPBUFF) = %bquote(()) %then %do; + X explorer "https://github.com/Snoopy1866/RTFTools-For-SAS/blob/main/docs/CompareRTFWithDataset.md"; + %goto exit; + %end; + + /*¼ì²éÒÀÀµ*/ + %let is_dependency_loaded = 1; + proc sql noprint; + select count(*) into : is_transcode_loaded from DICTIONARY.CATALOGS where libname = "WORK" and memname = "SASMACR" and objname = "_MACRO_TRANSCODE"; + select count(*) into : is_readrtf_loaded from DICTIONARY.CATALOGS where libname = "WORK" and memname = "SASMACR" and objname = "READRTF"; + quit; + %if not &is_transcode_loaded %then %do; + %put ERROR: Ç°ÖÃÒÀÀµÈ±Ê§£¬ÇëÏȼÓÔØÎļþ Transcode.sas¡£; + %let is_dependency_loaded = 0; + %end; + + %if not &is_readrtf_loaded %then %do; + %put ERROR: Ç°ÖÃÒÀÀµÈ±Ê§£¬ÇëÏȼÓÔغê³ÌÐò %nrstr(%%)ReadRTF¡£; + %let is_dependency_loaded = 0; + %end; + + %if not &is_dependency_loaded %then %do; + %goto exit; + %end; + + + /*1. »ñÈ¡Îļþ·¾¶*/ + %let reg_file_expr = %bquote(/^(?:([A-Za-z_][A-Za-z_0-9]{0,7})|[\x22\x27]?((?:[A-Za-z]:\\|\\\\[^\\\/:?\x22\x27<>|]+)[^\\\/:?\x22\x27<>|]+(?:\\[^\\\/:?\x22\x27<>|]+)*)[\x22\x27]?)$/); + %let reg_file_id = %sysfunc(prxparse(%superq(reg_file_expr))); + + %if %sysfunc(prxmatch(®_file_id, %superq(rtf))) %then %do; + %let rtfref = %sysfunc(prxposn(®_file_id, 1, %superq(rtf))); + %let rtfloc = %sysfunc(prxposn(®_file_id, 2, %superq(rtf))); + + /*Ö¸¶¨µÄÊÇÎļþÒýÓÃÃû*/ + %if %bquote(&rtfref) ^= %bquote() %then %do; + %if %sysfunc(fileref(&rtfref)) > 0 %then %do; + %put ERROR: ÎļþÃûÒýÓà %upcase(&rtfref) 䶨Ò壡; + %goto exit; + %end; + %else %if %sysfunc(fileref(&rtfref)) < 0 %then %do; + %put ERROR: ÎļþÃûÒýÓà %upcase(&rtfref) Ö¸ÏòµÄÎļþ²»´æÔÚ£¡; + %goto exit; + %end; + %else %if %sysfunc(fileref(&rtfref)) = 0 %then %do; + %let rtfloc = %qsysfunc(pathname(&rtfref, F)); + %end; + %end; + + /*Ö¸¶¨µÄÊÇÎïÀí·¾¶*/ + %if %superq(rtfloc) ^= %bquote() %then %do; + %if %sysfunc(fileexist(%superq(rtfloc))) = 0 %then %do; + %put ERROR: Îļþ·¾¶ %superq(rtfloc) ²»´æÔÚ£¡; + %goto exit; + %end; + %end; + %end; + %else %do; + %put ERROR: ÎļþÒýÓÃÃû³¬³ö 8 ×Ö½Ú£¬»òÕßÎļþÎïÀíµØÖ·²»·ûºÏ Winodws ¹æ·¶£¡; + %goto exit; + %end; + + + /*2. ¶ÁÈ¡RTFÎļþ*/ + /*2.1 ¸´ÖÆÒ»·ÝÎļþ£¬¹æ±ÜÎļþÒѱ»Íⲿ´ò¿ªµ¼Ö¶ÁÈ¡³åÍ»µÄÎÊÌâ*/ + X "copy ""&rtfloc"" ""&rtfloc.-copy"" & exit"; + + /*2.2 µ÷Óà %ReadRTF ¶ÁÈ¡Îļþ*/ + %ReadRTF(file = "&rtfloc.-copy", outdata = _tmp_rtf(drop = obs_seq), compress = yes, del_rtf_ctrl = yes); + + /*2.3 ɾ³ý¸´ÖƵÄÎļþ*/ + X "del ""&rtfloc.-copy"" & exit"; + + %if &readrtf_exit_with_error = TRUE %then %do; + X mshta vbscript:msgbox("&readrtf_exit_with_error_text",4144,"´íÎóÐÅÏ¢")(window.close); + %goto exit; + %end; + + + /*3. ¸´ÖÆ dataset£¬Ê¹Óà sql into Óï¾ä´´½¨ºê±äÁ¿*/ + data _tmp_dataset; + set &dataset; + run; + + proc sql noprint; + select name into : dataset_col_1- from DICTIONARY.COLUMNS where libname = "WORK" and memname = "_TMP_DATASET"; /*dataset ±äÁ¿Ãû*/ + %let dataset_col_n = &SQLOBS; + + select type into : dataset_col_type_1- from DICTIONARY.COLUMNS where libname = "WORK" and memname = "_TMP_DATASET"; /*dataset ±äÁ¿ÀàÐÍ*/ + %let dataset_col_type_n = &SQLOBS; + + select ifc(not missing(format), format, "best.") into : dataset_col_format_1- from DICTIONARY.COLUMNS where libname = "WORK" and memname = "_TMP_DATASET"; /*dataset ±äÁ¿Êä³ö¸ñʽ*/ + %let dataset_col_format_n = &SQLOBS; + + select name into : rtf_col_1- from DICTIONARY.COLUMNS where libname = "WORK" and memname = "_TMP_RTF"; /*rtf ±äÁ¿Ãû*/ + %let rtf_col_n = &SQLOBS; + + select name into : rtf_col_eq1_1- from DICTIONARY.COLUMNS where libname = "WORK" and memname = "_TMP_RTF" and length = 1; /*rtf ÒÉËÆ¿ÕÁбäÁ¿Ãû*/ + %let rtf_col_eq1_n = &SQLOBS; + quit; + + + /*4. Ô¤´¦Àí*/ + /*4.1 dataset ½«Êýֵת»»³É×Ö·û´®*/ + proc sql noprint; + create table _tmp_dataset_char_ver as + select + %do i = 1 %to &dataset_col_n; + %if &&dataset_col_type_&i = num %then %do; + ifc(not missing(&&dataset_col_&i), strip(put(&&dataset_col_&i, &&dataset_col_format_&i)), '') as &&dataset_col_&i + %end; + %else %do; + "&&dataset_col_&i"n + %end; + + %if &i < &dataset_col_n %then %do; %bquote(,) %end; + %end; + from _tmp_dataset; + quit; + + /*4.2 dataset ºöÂÔCRLF×Ö·û*/ + %if %upcase(&ignoreCRLF) = YES %then %do; + data _tmp_dataset_char_ver; + set _tmp_dataset_char_ver; + %do i = 1 %to &dataset_col_n; + &&dataset_col_&i = kcompress(&&dataset_col_&i, "0D0A"x); + %end; + run; + %end; + + /*4.3 dataset ºöÂÔÇ°Öÿոñ*/ + %if %upcase(&ignoreLeadBlank) = YES %then %do; + data _tmp_dataset_char_ver; + set _tmp_dataset_char_ver; + %do i = 1 %to &dataset_col_n; + &&dataset_col_&i = strip(&&dataset_col_&i); + %end; + run; + %end; + + /*4.4 rtf ºöÂÔÈ«½Ç°ë½Ç·ûºÅ*/ + %if %upcase(&ignoreHalfOrFullWidth) = YES %then %do; + %let HalfOrWidthTranslation = %nrstr(/*±êµã·ûºÅ£¨²»º¬ÒýºÅ£©*/ + ",", "£¬", + ".", "¡£", + "?", "£¿", + "!", "£¡", + ":", "£º", + ";", "£»", + "~", "¡«", + + /*ÒýºÅ*/ + """", "¡°", + """", "¡±", + """", "¨”", + """", "¨•", + """", "£¢", + '''', "¡®", + '''', "¡¯", + '''', "£à", + '''', "£§", + '''', "¡ä", + + /*À¨ºÅ*/ + "(", "£¨", + ")", "£©", + "<", "£¼", + "<", "¡´", + ">", "£¾", + ">", "¡µ", + "[", "£Û", + "]", "£Ý", + "{", "£û", + "}", "£ý", + + /*Êýѧ·ûºÅ*/ + "0", "£°", "1", "£±", "2", "£²", "3", "£³", "4", "£´", + "5", "£µ", "6", "£¶", "7", "£·", "8", "£¸", "9", "£¹", + "+", "£«", "-", "£­", "*", "£ª", "/", "£¯", "\", "£Ü", "^", "£Þ", + "=", "£½", + "%%", "£¥", + + /*À­¶¡×Öĸ*/ + "a", "£á", "b", "£â", "c", "£ã", "d", "£ä", "e", "£å", "f", "£æ", "g", "£ç", "h", "£è", "i", "£é", "j", "£ê", "k", "£ë", "l", "£ì", "m", "£í", + "n", "£î", "o", "£ï", "p", "£ð", "q", "£ñ", "r", "£ò", "s", "£ó", "t", "£ô", "u", "£õ", "v", "£ö", "w", "£÷", "x", "£ø", "y", "£ù", "z", "£ú", + "A", "£Á", "B", "£Â", "C", "£Ã", "D", "£Ä", "E", "£Å", "F", "£Æ", "G", "£Ç", "H", "£È", "I", "£É", "J", "£Ê", "K", "£Ë", "L", "£Ì", "M", "£Í", + "N", "£Î", "O", "£Ï", "P", "£Ð", "Q", "£Ñ", "R", "£Ò", "S", "£Ó", "T", "£Ô", "U", "£Õ", "V", "£Ö", "W", "£×", "X", "£Ø", "Y", "£Ù", "Z", "£Ú", + + /*ÌØÊâ·ûºÅ*/ + "&", "£¦", + "@", "£À", + "#", "££", + "$", "¡ç", + "|", "£ü", + "_", "£ß" + ); + + data _tmp_dataset_char_ver; + set _tmp_dataset_char_ver; + %do i = 1 %to &dataset_col_n; + &&dataset_col_&i = ktranslate(&&dataset_col_&i, %unquote(%superq(HalfOrWidthTranslation))); + %end; + run; + + data _tmp_rtf; + set _tmp_rtf; + %do i = 1 %to &rtf_col_n; + &&rtf_col_&i = ktranslate(&&rtf_col_&i, %unquote(%superq(HalfOrWidthTranslation))); + %end; + run; + %end; + + /*4.5 ºöÂÔÄÚǶ¿Õ¸ñ*/ + %if %upcase(&ignoreembeddedblank = yes) %then %do; + data _tmp_dataset_char_ver; + set _tmp_dataset_char_ver; + %do i = 1 %to &dataset_col_n; + &&dataset_col_&i = kcompress(&&dataset_col_&i, , "s"); + %end; + run; + + data _tmp_rtf; + set _tmp_rtf; + %do i = 1 %to &rtf_col_n; + &&rtf_col_&i = kcompress(&&rtf_col_&i, , "s"); + %end; + run; + %end; + + /*4.6 rtf ºöÂÔ¿ÕÁÐ*/ + %if %upcase(&ignoreEmptyColumn) = YES %then %do; + %if &rtf_col_eq1_n > 0 %then %do; + %do i = 1 %to &rtf_col_eq1_n; + proc sql noprint; + select max(lengthn(&&rtf_col_eq1_&i)) into : col_len_max from _tmp_rtf; + + %if &col_len_max = 0 %then %do; + alter table _tmp_rtf drop &&rtf_col_eq1_&i; + %end; + quit; + %end; + %end; + %end; + + + /*5. ͬ²½±äÁ¿Ãû*/ + proc sql noprint; + select name into : rtf_col_1- from DICTIONARY.COLUMNS where libname = "WORK" and memname = "_TMP_RTF"; /*rtf ±äÁ¿Ãû*/ + %let rtf_col_n = &SQLOBS; + quit; + + %if &rtf_col_n ^= &dataset_col_n %then %do; + %put ERROR: ±äÁ¿ÊýÁ¿²»Æ¥Å䣡; + %goto exit; + %end; + %else %do; + proc sql noprint; + create table _tmp_rtf_rename as + select + %do i = 1 %to &rtf_col_n; + &&rtf_col_&i as &&dataset_col_&i + %if &i < &rtf_col_n %then %do; %bquote(,) %end; + %end; + from _tmp_rtf; + quit; + %end; + + + /*6. ±È½Ï RTF ÎļþÓëÊý¾Ý¼¯*/ + proc compare base = _tmp_rtf_rename compare = _tmp_dataset_char_ver; + run; + + + %exit: + /*7. Çå³ýÖмäÊý¾Ý¼¯*/ + %if %upcase(&del_temp_data) = YES %then %do; + proc datasets library = work nowarn noprint; + delete _tmp_rtf + _tmp_rtf_rename + _tmp_dataset + _tmp_dataset_char_ver + ; + quit; + %end; + + %put NOTE: ºê CompareRTFWithDataset ÒѽáÊøÔËÐУ¡; +%mend; diff --git a/src/gb18030/MergeRTF.sas b/src/gb18030/MergeRTF.sas new file mode 100644 index 0000000..cb995ec --- /dev/null +++ b/src/gb18030/MergeRTF.sas @@ -0,0 +1,631 @@ +/* +ÏêϸÎĵµÇëÇ°Íù Github ²éÔÄ: https://github.com/Snoopy1866/RTFTools-For-SAS +*/ + +%macro MergeRTF(DIR, + OUT = #AUTO, + RTF_LIST = #NULL, + DEPTH = MAX, + AUTOORDER = YES, + EXCLUDE = #NULL, + VD = #AUTO, + MERGE = YES, + MERGED_FILE_SHOW = SHORT, + LINK_TO_PREV = NO, + DEL_TEMP_DATA = YES) + /des = "ºÏ²¢RTFÎļþ" parmbuff; + + /*´ò¿ª°ïÖúÎĵµ*/ + %if %qupcase(&SYSPBUFF) = %bquote((HELP)) or %qupcase(&SYSPBUFF) = %bquote(()) %then %do; + X explorer "https://github.com/Snoopy1866/RTFTools-For-SAS/blob/main/docs/MergeRTF.md"; + %goto exit; + %end; + + + /*1. »ñȡĿ¼·¾¶*/ + %let reg_dir_expr = %bquote(/^(?:([A-Za-z_][A-Za-z_0-9]{0,7})|[\x22\x27]?((?:[A-Za-z]:\\|\\\\[^\\\/:?\x22\x27<>|]+)[^\\\/:?\x22\x27<>|]+(?:\\[^\\\/:?\x22\x27<>|]+)*)[\x22\x27]?)$/); + %let reg_dir_id = %sysfunc(prxparse(%superq(reg_dir_expr))); + %if %sysfunc(prxmatch(®_dir_id, %superq(dir))) %then %do; + %let dirref = %sysfunc(prxposn(®_dir_id, 1, %superq(dir))); + %let dirloc = %sysfunc(prxposn(®_dir_id, 2, %superq(dir))); + + /*Ö¸¶¨µÄÊÇĿ¼ÒýÓÃÃû*/ + %if %bquote(&dirref) ^= %bquote() %then %do; + %if %sysfunc(fileref(&dirref)) > 0 %then %do; + %put ERROR: Ŀ¼ÒýÓà %upcase(&dirref) 䶨Ò壡; + %goto exit; + %end; + %else %if %sysfunc(fileref(&dirref)) < 0 %then %do; + %put ERROR: Ŀ¼ÒýÓà %upcase(&dirref) Ö¸ÏòµÄĿ¼²»´æÔÚ£¡; + %goto exit; + %end; + %else %if %sysfunc(fileref(&dirref)) = 0 %then %do; + %let dirloc = %qsysfunc(pathname(&dirref, F)); + %end; + %end; + + /*Ö¸¶¨µÄÊÇÎïÀí·¾¶*/ + %if %superq(dirloc) ^= %bquote() %then %do; + %if %sysfunc(fileexist(%superq(dirloc))) = 0 %then %do; + %put ERROR: Ŀ¼·¾¶ %superq(dirloc) ²»´æÔÚ£¡; + %goto exit; + %end; + %end; + %end; + %else %do; + %put ERROR: Ŀ¼ÒýÓÃÃû³¬³ö 8 ×Ö½Ú£¬»òÕßĿ¼ÎïÀíµØÖ·²»·ûºÏ Winodws ¹æ·¶£¡; + %goto exit; + %end; + + /*2. ½¨Á¢ÐéÄâ´ÅÅÌ*/ + %if %sysmexecname(%sysmexecdepth - 1) ^= MERGERTF %then %do; + %let is_disk_symbol_all_used = FALSE; + filename dlist pipe "wmic logicaldisk get deviceid"; + data _null_; + infile dlist truncover end = end; + input disk_symbol $1.; + retain unused_disk_symbol 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; + unused_disk_symbol = transtrn(unused_disk_symbol, disk_symbol, trimn('')); + if end then do; + if length(unused_disk_symbol) = 0 then do; + call symputx('is_disk_symbol_all_used', 'TRUE'); + end; + else do; + call symputx('unused_disk_symbol', unused_disk_symbol); + end; + end; + run; + + %if &is_disk_symbol_all_used = TRUE %then %do; + %put ERROR: ÎÞÊ£ÓàÅÌ·û¿ÉÓ㬳ÌÐòÎÞ·¨ÔËÐУ¡; + %goto exit_with_error; + %end; + + %if %upcase(&vd) = #AUTO %then %do; + %let vd = %substr(&unused_disk_symbol, 1, 1); + %put NOTE: ×Ô¶¯Ñ¡Ôñ¿ÉÓõÄÅÌ·û %upcase(&vd); + %end; + %else %do; + %if not %sysfunc(find(&unused_disk_symbol, &vd)) %then %do; + %put ERROR: ÅÌ·û %upcase(&vd) ²»ºÏ·¨»ò±»Õ¼Óã¬ÇëÖ¸¶¨ÆäËûºÏ·¨»òδ±»Ê¹ÓõÄÅÌ·û£¡; + %goto exit_with_error; + %end; + %end; + + X "subst &vd: ""&dirloc"" & exit"; + %end; + + + /*3. ÊÇ·ñÖ¸¶¨ÍⲿÎļþ×÷Ϊ RTF ºÏ²¢Çåµ¥*/ + %if %upcase(&rtf_list) = #NULL %then %do; /*δָ¶¨ÍⲿÎļþ*/ + + /*ʹÓà DOS ÃüÁî»ñÈ¡ËùÓÐ RTF Îļþ£¬´æ´¢ÔÚ _tmp_rtf_list.txt ÖÐ*/ + X "dir ""&vd:\*.rtf"" /b/on/s > ""&vd:\_tmp_rtf_list.txt"" & exit"; + + + %if %upcase(&autoorder) = YES %then %do; /*×Ô¶¯ÅÅÐò*/ + /*ÌáÈ¡ RTF ÎļþÃûÖеÄÐÅÏ¢*/ + data _tmp_rtf_list; + infile "&vd:\_tmp_rtf_list.txt" truncover encoding = 'gbke'; + input rtf_path $char1000.; + + /*Õæʵ·¾¶*/ + rtf_path_real = cats("&dirloc", substr(rtf_path, 3)); + + /*ʶ±ð±í¸ñºÍÇåµ¥*/ + reg_table_id = prxparse("/^.*(((?:ÁÐ)?±í|Çåµ¥|ͼ)\s*(\d+(?:\.\d+)*)\.?\s*(.*)\.rtf)\s*$/o"); + + /*ɸѡÃüÃû¹æ·¶µÄ rtf Îļþ*/ + if prxmatch(reg_table_id, rtf_path) then do; + rtf_name = prxposn(reg_table_id, 1, rtf_path); /*RTF ÎļþÃû*/ + rtf_type = prxposn(reg_table_id, 2, rtf_path); /*RTF ÀàÐÍ*/ + rtf_seq = prxposn(reg_table_id, 3, rtf_path); /*RTF ±àºÅ*/ + ref_label = prxposn(reg_table_id, 4, rtf_path); /*RTF ÃèÊöÎÄ×Ö*/ + + rtf_filename_valid_flag = "Y"; + end; + + /*ÎļþÀàÐÍÑÜÉú±àÂë*/ + select (rtf_type); + when ("±í") rtf_type_n = 1; + when ("ͼ") rtf_type_n = 2; + when ("Áбí") rtf_type_n = 3; + when ("Çåµ¥") rtf_type_n = 4; + otherwise rtf_type_n = constant("BIG"); + end; + + /*ÎļþËùÔÚÎļþ¼ÐµÄÉî¶È*/ + rtf_dir_depth = count(rtf_path, "\"); + + /*ɸѡָ¶¨Éî¶ÈµÄÎļþ¼ÐµÄ rtf Îļþ*/ + %if %upcase(&depth) = MAX %then %do; + rtf_depth_valid_flag = "Y"; + %end; + %else %do; + if rtf_dir_depth <= &depth then do; + rtf_depth_valid_flag = "Y"; + end; + %end; + run; + + + /*¼ÆËã RTF ÎļþÃû°üº¬µÄÐòºÅ*/ + proc sql noprint; + select max(count(rtf_seq, ".")) + 1 into : lv_max trimmed from _tmp_rtf_list; /*¼ÆËã rtf ÎļþÃûµÄÐòºÅµÄ×î´ó²ã¼¶ÊýÁ¿*/ + + /*Ìí¼Ó´ú±í²ã¼¶ÐòºÅµÄ±äÁ¿£¬×î¶àÓÐ n ²ã£¬¾ÍÌí¼Ó n ¸ö±äÁ¿£¬Ã¿¸ö±äÁ¿´ú±íµ±Ç° rtf ÎļþÔÚij¸ö²ã¼¶µÄ˳Ðò*/ + alter table _tmp_rtf_list + add %do i = 1 %to %eval(&lv_max - 1); + seq_lv_&i num, + %end; + seq_lv_&lv_max num + ; + quit; + + data _tmp_rtf_list_add_lv; + set _tmp_rtf_list; + lv_max_curr_obs = countw(rtf_seq, "."); /*¼ÆË㵱ǰ rtf ÎļþÃûµÄÐòºÅµÄ²ã¼¶ÊýÁ¿*/ + + array seq_lv{&lv_max} seq_lv_1-seq_lv_&lv_max; + + do i = 1 to lv_max_curr_obs; + seq_lv{i} = input(scan(rtf_seq, i, "."), 8.); + end; + + drop i lv_max_curr_obs; + run; + + + /*¸ù¾ÝÐòºÅ½øÐÐÅÅÐò*/ + proc sort data = _tmp_rtf_list_add_lv out = _tmp_rtf_list_add_lv_sorted; + by %do i = 1 %to &lv_max; + seq_lv_&i + %end; + rtf_type_n + ref_label + rtf_dir_depth + ; + run; + %end; + %else %if %upcase(&autoorder) = NO %then %do; /*ÊÖ¶¯ÅÅÐò*/ + X explorer "&vd:\_tmp_rtf_list.txt"; + X mshta vbscript:msgbox("ÇëÔÚµ¯³öµÄ´°¿ÚÖÐÊÖ¶¯µ÷Õû RTF ÎļþµÄºÏ²¢Ë³Ðò£¬±£´æºó»Øµ½´Ëµ¯´°£¬°´È·ÈÏ°´Å¥¼ÌÐø¡£¶ÔÓÚÎÞÐèºÏ²¢µÄ RTF Îļþ£¬Äú¿ÉÒÔÔÚ¶ÔÓ¦ÐеĿªÍ·Ê¹Óà '//' ½øÐÐ×¢ÊÍ£¬»òÖ±½Óɾ³ý¶ÔÓ¦ÐУ¬¿ÕÐн«±»ºöÂÔ¡£",4160,"Ìáʾ")(window.close); + + /*ÊÖ¶¯ÅÅÐòºó£¬±£´æÒ»·Ý¸±±¾£¬ÒÔ¹©ºóÐøµ÷ÓÃʱָ¶¨²ÎÊý RTF_LIST = rtf_list_copy.txt*/ + X "copy ""&vd:\_tmp_rtf_list.txt"" ""&vd:\rtf_list_copy.txt"" & exit"; + + /*µÝ¹éµ÷ÓÃ×ÔÉí*/ + %MergeRTF(dir = &dir, + out = &out, + rtf_list = rtf_list_copy.txt, + depth = &depth, + autoorder = &autoorder, + exclude = &exclude, + vd = &vd, + merge = &merge, + merged_file_show = &merged_file_show, + del_temp_data = &del_temp_data); + %goto exit_with_recursive_end; + %end; + %end; + %else %do; + /*Ö±½Ó¶ÁÈ¡ÍⲿÎļþ*/ + data _tmp_rtf_list; + infile "&vd:\&rtf_list" truncover encoding = 'gbke'; + input rtf_path $char1000.; + + if kcompress(rtf_path, , "s") = "" then delete; /*ɾ³ý¿ÕÐÐ*/ + else if substr(strip(rtf_path), 1, 2) = "//" then delete; /*ɾ³ýÒÑ×¢Ê굀 RTF Îļþ*/ + run; + + /*¼æÈÝÐÔ´¦Àí*/ + data _tmp_rtf_list_add_lv_sorted; + set _tmp_rtf_list; + + /*Õæʵ·¾¶*/ + rtf_path_real = cats("&dirloc", substr(rtf_path, 3)); + + /*ÎļþÃû*/ + rtf_name = kscan(rtf_path, -1, "\"); + + rtf_filename_valid_flag = "Y"; + rtf_depth_valid_flag = "Y"; + run; + %end; + + + %let run_start_time = %sysfunc(time()); /*¼Ç¼¿ªÊ¼Ê±¼ä*/ + + + /*4. ½öÁгö¶ø²»ºÏ²¢ rtf Îļþ£¬ÓÃÓÚµ÷ÊÔºÍÊÔÔËÐÐ*/ + %if %upcase(&merge) = NO %then %do; + data rtf_list; + set _tmp_rtf_list_add_lv_sorted; + label rtf_path = "ÐéÄâ´ÅÅÌ·¾¶" + rtf_path_real = "ÎïÀí´ÅÅÌ·¾¶" + rtf_name = "ÎļþÃû" + rtf_filename_valid_flag = "ÎļþÃûÊÇ·ñ¹æ·¶" + rtf_depth_valid_flag = "ÎļþÊÇ·ñÔÚÖ¸¶¨Éî¶ÈÄÚ"; + keep rtf_name rtf_path rtf_path_real rtf_filename_valid_flag rtf_depth_valid_flag; + run; + %goto exit_with_no_merge; + %end; + + + /*----------------ÁÙʱ¹Ø±ÕÈÕÖ¾Êä³ö------------------*/ + proc printto log=_null_; + run; + + + /*5. ¹¹Ôì filename Óï¾ä£¬½¨Á¢ÎļþÒýÓÃ*/ + data _tmp_rtf_list_fnst; + set _tmp_rtf_list_add_lv_sorted(where = (rtf_filename_valid_flag = "Y" and rtf_depth_valid_flag = "Y")) end = end; + + fileref = 'rtf' || strip(put(_n_, 8.)); + fnstm = 'filename ' || strip(fileref) || ' "' || strip(rtf_path) || '";'; + + call execute(fnstm); + + if end then call symput("rtf_ref_max", put(_n_, 8.)); /*»ñÈ¡ÐèÒªºÏ²¢µÄ rtf ÎļþµÄÒýÓÃ×ÜÊý*/ + run; + + + /*6. ¶ÁÈ¡ rtf Îļþ*/ + %if &rtf_ref_max = 0 %then %do; + %put ERROR: Îļþ¼Ð &dirloc ÄÚûÓзûºÏÒªÇóµÄ rtf Îļþ¿ÉÒԺϲ¢£¡; + %goto exit; + %end; + %else %if &rtf_ref_max = 1 %then %do; + %put ERROR: Îļþ¼Ð &dirloc ÄÚÖ»ÓÐÒ»¸ö·ûºÏÒªÇóµÄ rtf Îļþ£¬ÎÞÐèºÏ²¢£¡; + %goto exit; + %end; + %else %do; + %do i = 1 %to &rtf_ref_max; + %if %sysfunc(fileref(rtf&i)) < 0 %then %do; + X mshta vbscript:msgbox("ºÏ²¢Ê§°Ü£¬Îļþ %qsysfunc(pathname(rtf&i, F)) ²»´æÔÚ£¡",4112,"Ìáʾ")(window.close); + %goto exit_with_no_merge; + %end; + %else %do; + data _tmp_rtf&i(compress = yes); + informat line $32767.; + format line $32767.; + length line $32767.; + + infile rtf&i truncover; + input line $char32767.; + run; + %end; + %end; + %end; + + + /*7. ¼ì²â rtf ÎļþÊÇ·ñ±» SAS Ö®ÍâµÄÆäËû³ÌÐòÐÞ¸Ä*/ + %do i = 1 %to &rtf_ref_max; + data _null_; + set _tmp_rtf&i(obs = 1); + reg_rtf_file_valid_header_id = prxparse("/^{\\rtf1\\ansi\\ansicpg\d+\\uc\d+\\deff\d\\deflang\d+\\deflangfe\d+$/o"); + if prxmatch(reg_rtf_file_valid_header_id, strip(line)) then do; + call symput("rtf&i._modified_flag", "N"); /*ºê±äÁ¿ rtf&i._modified_flag, ±êʶ rtf ÎļþÊÇ·ñ±»ÆäËû³ÌÐòÐ޸Ĺý*/ + end; + else do; + call symput("rtf&i._modified_flag", "Y"); + end; + run; + %end; + + + + /*8. »ñÈ¡¿ÉºÏ²¢µÄ rtf ÎļþÒýÓÃÁбí*/ + %let mergeable_rtf_list = %bquote(); + %let unmergeable_rtf_index = 0; + %do i = 1 %to &rtf_ref_max; + %if &&rtf&i._modified_flag = N %then %do; + %let mergeable_rtf_list = &mergeable_rtf_list rtf&i; + %end; + %else %do; + %let unmergeable_rtf_index = %eval(&unmergeable_rtf_index + 1); + proc sql noprint; + select %if %upcase(&merged_file_show) = SHORT %then %do; + rtf_name + %end; + %else %if %upcase(&merged_file_show) = FULL %then %do; + rtf_path_real + %end; + %else %if %upcase(&merged_file_show) = VIRTUAL %then %do; + rtf_path + %end; + into : unmergeable_rtf_file_&unmergeable_rtf_index trimmed from _tmp_rtf_list_fnst where fileref = "rtf&i"; + quit; + %end; + %end; + %let unmergeable_rtf_sum = &unmergeable_rtf_index; + + + /*----------------»Ö¸´ÈÕÖ¾Êä³ö------------------*/ + proc printto log=log; + run; + + + %if &mergeable_rtf_list = %bquote() %then %do; + %put ERROR: Îļþ¼Ð &dirloc ÄÚûÓпÉÒԺϲ¢µÄ rtf Îļþ£¡; + %goto exit; + %end; + + %do i = 1 %to &unmergeable_rtf_sum; + %put ERROR: Îļþ %superq(unmergeable_rtf_file_&i) Ëƺõ±»ÐÞ¸ÄÁË£¬ÒÑÌø¹ý¸ÃÎļþ£¡; + %end; + + /*----------------ÁÙʱ¹Ø±ÕÈÕÖ¾Êä³ö------------------*/ + proc printto log=_null_; + run; + + + /*9. ´¦Àí rtf Îļþ*/ + /*´óÖÂ˼·ÈçÏ£º + + Ô¤´¦Àí£¬É¾³ýµÚ 2 ¸ö¼°Ö®ºóµÄRTFÎļþµÄҳüҳ½Å + + ¿ªÍ·µÄ rtf Îļþ£¬É¾³ýĩβµÄ } + + + ÖмäµÄ rtf Îļþ + - ɾ³ý \sectd ֮ǰµÄËùÓÐÐÐ + - ÔÚ \sectd Ç°ÃæÌí¼Ó \sect + - ɾ³ýĩβµÄ } + + + ½áβµÄ rtf Îļþ£¬±£ÁôĩβµÄ } + */ + %let mergeable_rtf_ref_max = %sysfunc(countw(&mergeable_rtf_list, %bquote( ))); + %do i = 1 %to &mergeable_rtf_ref_max; + %let mergeable_rtf_&i._start_time = %sysfunc(time()); /*¼Ç¼µ¥¸ö rtf Îļþ´¦Àí¿ªÊ¼Ê±¼ä*/ + + %let mergeable_rtf_ref = %scan(&mergeable_rtf_list, &i, %bquote( )); + + /*Ô¤´¦Àí£¬É¾³ýµÚ 2 ¸ö¼°Ö®ºóµÄRTFÎļþµÄҳüҳ½Å*/ + %if %sysevalf(&i >= 2) %then %do; + %if %upcase(&link_to_prev) = YES %then %do; + %let reg_header_expr = %bquote(/^\{\\header\\pard\\plain\\q[lcr]\{$/o); + %let reg_footer_expr = %bquote(/^\{\\footer\\pard\\plain\\q[lcr]\{$/o); + + /*ҳü*/ + data _tmp_&mergeable_rtf_ref(compress = yes); + set _tmp_&mergeable_rtf_ref; + + reg_header_id = prxparse("®_header_expr"); + + retain header_brace_unclosed; /*δ±ÕºÏµÄ´óÀ¨ºÅÊýÁ¿*/ + retain header_start_flag 0 + header_end_flag 0; + if prxmatch(reg_header_id, strip(line)) then do; /*ҳü¿ªÊ¼*/ + header_brace_unclosed = (count(strip(line), "{") - count(strip(line), "\{")) - (count(strip(line), "}") - count(strip(line), "\}")); + header_start_flag = 1; + delete; + end; + else if header_start_flag = 1 and header_end_flag = 0 then do; + header_brace_unclosed + (count(strip(line), "{") - count(strip(line), "\{")) - (count(strip(line), "}") - count(strip(line), "\}")); + if header_brace_unclosed = 0 then do; /*ҳü½áÊø*/ + header_end_flag = 1; + header_brace_unclosed = .; + delete; + end; + else do; /*ҳüÖмä*/ + delete; + end; + end; + else if header_brace_unclosed = . then do; + header_start_flag = 0; + header_end_flag = 0; + end; + run; + + /*Ò³½Å*/ + data _tmp_&mergeable_rtf_ref(compress = yes); + set _tmp_&mergeable_rtf_ref; + + reg_footer_id = prxparse("®_footer_expr"); + + retain footer_brace_unclosed; /*δ±ÕºÏµÄ´óÀ¨ºÅÊýÁ¿*/ + retain footer_start_flag 0 + footer_end_flag 0; + if prxmatch(reg_footer_id, strip(line)) then do; /*Ò³½Å¿ªÊ¼*/ + footer_brace_unclosed = (count(strip(line), "{") - count(strip(line), "\{")) - (count(strip(line), "}") - count(strip(line), "\}")); + footer_start_flag = 1; + delete; + end; + else if footer_start_flag = 1 and footer_end_flag = 0 then do; + footer_brace_unclosed + (count(strip(line), "{") - count(strip(line), "\{")) - (count(strip(line), "}") - count(strip(line), "\}")); + if footer_brace_unclosed = 2 and strip(line) = "{\row}" then do; /*Ò³½Å½áÊø*/ + footer_end_flag = 1; + footer_brace_unclosed = -2; + delete; + end; + else do; /*Ò³½ÅÖмä*/ + delete; + end; + end; + else if footer_brace_unclosed = -2 then do; /*ĩβ \pard}} ´¦Àí*/ + if substr(strip(line), 1, 7) = "\pard}}" then do; + line = strip(substr(line, 8)); + footer_brace_unclosed = .; + end; + end; + else if footer_brace_unclosed = . then do; + footer_start_flag = 0; + footer_end_flag = 0; + end; + run; + %end; + %end; + + /*Õýʽ´¦Àí*/ + data _tmp_&mergeable_rtf_ref(compress = yes); + set _tmp_&mergeable_rtf_ref end = end; + + %if %sysevalf(&i = 1) %then %do; + retain fst_sectd_found 1; /*¿ªÍ·µÄ rtf Îļþ²»ÐèÒª¿¼ÂÇÊÇ·ñÒѾ­ÕÒµ½µÚÒ»ÐÐ \sectd£¬Òò´Ë¸³ÖµÎª 1*/ + %end; + %else %do; + retain fst_sectd_found 0; /*ºóÐøµÄ rtf ÎļþÐèÒª¿¼ÂÇÊÇ·ñÕÒµ½µÚÒ»ÐÐ \sectd£¬²¢Ìí¼Ó \sect£¬Òò´Ë¸³ÖµÎª 0*/ + %end; + + /*·Ö½Ú·û´¦Àí*/ + reg_sectd_id = prxparse("/^\\sectd\\linex\d\\endnhere\\pgwsxn\d+\\pghsxn\d+\\lndscpsxn\\headery\d+\\footery\d+\\marglsxn\d+\\margrsxn\d+\\margtsxn\d+\\margbsxn\d+$/o"); + if fst_sectd_found = 0 then do; /*Ê״η¢ÏÖ \sectd£¬ÔÚ\sectd Ç°ÃæÌí¼Ó \sect£¬ÒÔ±ãÉú³É rtf ÎļþÖ®¼äµÄ·Ö½Ú·û*/ + if prxmatch(reg_sectd_id, strip(line)) then do; + line = cats("\sect", strip(line)); + fst_sectd_found = 1; + end; + else do; + delete; /*ɾ³ý¶àÓàµÄÔªÐÅÏ¢£¨×ÖÌå±í¡¢ÑÕÉ«±íµÈ£¬ÕâЩÐÅÏ¢ÔÚ¿ªÍ·µÄ rtf ÖÐÒѾ­±»¶¨Ò壬ÎÞÐèÖظ´¶¨Ò壩*/ + end; + end; + + /*´ó¸Ù¼¶±ð±ê¼Ç´¦Àí*/ + length former_outlinelevel_text latter_outlinelevel_text $32.; + retain former_outlinelevel_text ""; /*ÓÃÓڱȽϵĴó¸Ù¼¶±ðÎı¾*/ + reg_outlinelevel_id = prxparse("/\\outlinelevel\d{(.*)}/o"); + reg_outlinelevel_change_id = prxparse("s/\\outlinelevel\d//o"); + if prxmatch(reg_outlinelevel_id, strip(line)) then do; + latter_outlinelevel_text = hashing('MD5', prxposn(reg_outlinelevel_id, 1, strip(line))); + if former_outlinelevel_text = latter_outlinelevel_text then do; + line = prxchange(reg_outlinelevel_change_id, 1, strip(line)); /*ɾ³ýÖظ´µÄ´ó¸Ù¼¶±ð±ê¼Ç*/ + end; + else do; + former_outlinelevel_text = latter_outlinelevel_text; /*¸üÐÂÓÃÓڱȽϵĴó¸Ù¼¶±ðÎı¾*/ + end; + end; + + drop fst_sectd_found reg_sectd_id; + + %if %sysevalf(&i < &mergeable_rtf_ref_max) %then %do; /*ɾ³ýĩβµÄ }£¨½áβµÄ rtf Îļþ±£Áô }£©*/ + if end then delete; + %end; + run; + + /*»ñÈ¡¿ÉºÏ²¢µÄ rtf ÎļþÃû*/ + proc sql noprint; + select %if %upcase(&merged_file_show) = SHORT %then %do; + rtf_name + %end; + %else %if %upcase(&merged_file_show) = FULL %then %do; + rtf_path_real + %end; + %else %if %upcase(&merged_file_show) = VIRTUAL %then %do; + rtf_path + %end; + into : merged_rtf_file_&i trimmed from _tmp_rtf_list_fnst where fileref = "&mergeable_rtf_ref"; + quit; + %let mergeable_rtf_&i._end_time = %sysfunc(time()); /*¼Ç¼µ¥¸ö rtf Îļþ´¦Àí½áÊøʱ¼ä*/ + %let mergeable_rtf_&i._spend_time = %sysfunc(putn(%sysevalf(&&mergeable_rtf_&i._end_time - &&mergeable_rtf_&i._start_time), 8.2)); /*¼ÆËãµ¥¸ö rtf Îļþ´¦ÀíºÄʱ*/ + %end; + + + /*10. ºÏ²¢ rtf Îļþ*/ + data _tmp_rtf_merged(compress = yes); + set %do i = 1 %to &mergeable_rtf_ref_max; + _tmp_%scan(&mergeable_rtf_list, &i, %bquote( )) + %end; + ; + run; + + + /*----------------»Ö¸´ÈÕÖ¾Êä³ö------------------*/ + proc printto log=log; + run; + + + %do i = 1 %to &mergeable_rtf_ref_max; + %put NOTE: Îļþ %superq(merged_rtf_file_&i) ºÏ²¢Íê³É£¬ºÄʱ &&mergeable_rtf_&i._spend_time s£¡; + %end; + + + /*11. Êä³ö rtf Îļþ*/ + %if %upcase(&out) = #AUTO %then %do; + %let date = %sysfunc(putn(%sysfunc(today()), yymmdd10.)); + %let time = %sysfunc(time()); + %let hour = %sysfunc(putn(%sysfunc(hour(&time)), z2.)); + %let minu = %sysfunc(putn(%sysfunc(minute(&time)), z2.)); + %let secd = %sysfunc(putn(%sysfunc(second(&time)), z2.)); + %let out = %bquote(merged-&date &hour-&minu-&secd..rtf); + %end; + %else %do; + %let reg_out_id = %sysfunc(prxparse(%bquote(/^[\x22\x27]?(.+?)[\x22\x27]?$/o))); + %if %sysfunc(prxmatch(®_out_id, %superq(out))) %then %do; + %let out = %bquote(%sysfunc(prxposn(®_out_id, 1, %superq(out)))); + %end; + %end; + + data _null_; + set _tmp_rtf_merged; + file "&vd:\&out" lrecl = 32767; + act_length = length(line); + put line $varying32767. act_length; + run; + + + /*12. µ¯³öÌáʾ¿ò*/ + %let run_end_time = %sysfunc(time()); /*¼Ç¼½áÊøʱ¼ä*/ + %let run_spend_time = %sysfunc(putn(%sysevalf(&run_end_time - &run_start_time), 8.2)); /*¼ÆËãºÄʱ*/ + + %if %sysevalf(&mergeable_rtf_ref_max < &rtf_ref_max) %then %do; + X mshta vbscript:msgbox("ºÏ²¢³É¹¦£¬ºÄʱ &run_spend_time s£¡²¿·ÖÒѱ»ÐÞ¸ÄµÄ rtf ÎļþδºÏ²¢£¬Çë²é¿´ÈÕÖ¾ÏêÇ飡",4144,"Ìáʾ")(window.close); + %end; + %else %do; + X mshta vbscript:msgbox("ºÏ²¢³É¹¦£¬ºÄʱ &run_spend_time s£¡",4160,"Ìáʾ")(window.close); + %end; + + + %exit: + /*----------------ÁÙʱ¹Ø±ÕÈÕÖ¾Êä³ö------------------*/ + proc printto log=_null_; + run; + + + /*ɾ³ýÁÙʱÊý¾Ý¼¯*/ + %if %upcase(&del_temp_data) = YES and %symexist(rtf_ref_max) %then %do; + proc datasets library = work nowarn noprint; + delete %do i = 1 %to &rtf_ref_max; + _tmp_rtf&i + %end; + ; + quit; + %end; + + + %exit_with_no_merge: + /*----------------ÁÙʱ¹Ø±ÕÈÕÖ¾Êä³ö------------------*/ + proc printto log=_null_; + run; + + + /*ɾ³ýÁÙʱÊý¾Ý¼¯*/ + %if %upcase(&del_temp_data) = YES %then %do; + proc datasets library = work nowarn noprint; + delete _tmp_rtf_list + _tmp_rtf_list_add_lv + _tmp_rtf_list_add_lv_sorted + _tmp_rtf_list_fnst + _tmp_rtf_merged + ; + quit; + %end; + + + /*----------------»Ö¸´ÈÕÖ¾Êä³ö------------------*/ + proc printto log=log; + run; + + + /*ɾ³ý _tmp_rtf_list.txt*/ + X "del ""&vd:\_tmp_rtf_list.txt"" & subst &vd: /D & exit"; + + /*ɾ³ý _null_.log Îļþ*/ + X "del _null_.log & exit"; + + + %put NOTE: ºê MergeRTF ÒѽáÊøÔËÐУ¡; + + %exit_with_recursive_end: + %exit_with_error: +%mend; diff --git a/src/gb18030/MixCWFont.sas b/src/gb18030/MixCWFont.sas new file mode 100644 index 0000000..233c84c --- /dev/null +++ b/src/gb18030/MixCWFont.sas @@ -0,0 +1,351 @@ +/* +ÏêϸÎĵµÇëÇ°Íù Github ²éÔÄ: https://github.com/Snoopy1866/RTFTools-For-SAS +*/ + +%macro MixCWFont(RTF, + OUT = #AUTO, + CFONT = #AUTO, + WFONT = #AUTO, + DEL_TEMP_DATA = YES) + /des = "ÖÐÎ÷ÎÄ×ÖÌå»ìÅÅ" parmbuff; + + /*´ò¿ª°ïÖúÎĵµ*/ + %if %qupcase(&SYSPBUFF) = %bquote((HELP)) or %qupcase(&SYSPBUFF) = %bquote(()) %then %do; + X explorer "https://github.com/Snoopy1866/RTFTools-For-SAS/blob/main/docs/MixCWFont.md"; + %goto exit; + %end; + + + /*1. »ñÈ¡Îļþ·¾¶*/ + %let reg_file_expr = %bquote(/^(?:([A-Za-z_][A-Za-z_0-9]{0,7})|[\x22\x27]?((?:[A-Za-z]:\\|\\\\[^\\\/:?\x22\x27<>|]+)[^\\\/:?\x22\x27<>|]+(?:\\[^\\\/:?\x22\x27<>|]+)*)[\x22\x27]?)$/); + %let reg_file_id = %sysfunc(prxparse(%superq(reg_file_expr))); + + %if %sysfunc(prxmatch(®_file_id, %superq(rtf))) %then %do; + %let rtfref = %sysfunc(prxposn(®_file_id, 1, %superq(rtf))); + %let rtfloc = %sysfunc(prxposn(®_file_id, 2, %superq(rtf))); + + /*Ö¸¶¨µÄÊÇÎļþÒýÓÃÃû*/ + %if %bquote(&rtfref) ^= %bquote() %then %do; + %if %sysfunc(fileref(&rtfref)) > 0 %then %do; + %put ERROR: ÎļþÃûÒýÓà %upcase(&rtfref) 䶨Ò壡; + %goto exit; + %end; + %else %if %sysfunc(fileref(&rtfref)) < 0 %then %do; + %put ERROR: ÎļþÃûÒýÓà %upcase(&rtfref) Ö¸ÏòµÄÎļþ²»´æÔÚ£¡; + %goto exit; + %end; + %else %if %sysfunc(fileref(&rtfref)) = 0 %then %do; + %let rtfloc = %qsysfunc(pathname(&rtfref, F)); + %end; + %end; + + /*Ö¸¶¨µÄÊÇÎïÀí·¾¶*/ + %if %superq(rtfloc) ^= %bquote() %then %do; + %if %sysfunc(fileexist(%superq(rtfloc))) = 0 %then %do; + %put ERROR: Îļþ·¾¶ %superq(rtfloc) ²»´æÔÚ£¡; + %goto exit; + %end; + %end; + %end; + %else %do; + %put ERROR: ÎļþÒýÓÃÃû³¬³ö 8 ×Ö½Ú£¬»òÕßÎļþÎïÀíµØÖ·²»·ûºÏ Winodws ¹æ·¶£¡; + %goto exit; + %end; + + + /*2 ¸´ÖÆÒ»·ÝÎļþ£¬¹æ±ÜÎļþÒѱ»Íⲿ´ò¿ªµ¼Ö¶ÁÈ¡³åÍ»µÄÎÊÌâ*/ + %let file_suffix = %qscan(%superq(rtfloc), -1, %str(.)); + %if %qupcase(&file_suffix) = RTF %then %do; + %let rtfloc_mixed = %qsysfunc(substr(%superq(rtfloc), 1, %length(%superq(rtfloc)) - 4))-mixed.rtf; + %end; + %else %do; + %let rtfloc_mixed = %superq(rtfloc)-mixed.rtf; + %end; + X "copy ""&rtfloc"" ""&rtfloc_mixed"" & exit"; + + + /*3. ¶ÁÈ¡ rtf Îļþ*/ + data _tmp_rtf(compress = yes); + informat line $32767.; + format line $32767.; + length line $32767.; + + infile "&rtfloc_mixed" truncover; + input line $char32767.; + run; + + + /*4. ʶ±ð×ÖÌå±í*/ + + /*ÒÑÖªµÄ³£ÓÃ×ÖÌ壬µ± CFONT = #AUTO »ò WFONT = #AUTO£¬ÇÒÔÚ×ÖÌå±íÖз¢ÏÖÕâЩ×ÖÌåʱ£¬»á×Ô¶¯Ó¦Óõ½Îı¾ÖÐ*/ + %let cfont_predefined_list = %upcase('CSongGB18030C-Light', 'CSongGB18030C-LightHWL', 'DengXian', 'FangSong', 'KaiTi', 'Lisu', 'Noto Sans SC Regular', 'SimSun', 'YouYuan'); + %let wfont_predefined_list = %upcase('Arial', 'Calibri', 'Cascadia Code', 'Consolas', 'HelveticaNeueforSAS', 'HelveticaNeueforSAS Light', 'Times', 'Times New Roman'); + + data _tmp_rtf_font_spec(compress = yes); + set _tmp_rtf; + + seq = _n_; + + length is_fonttable_def $1 + is_fonttable_def_start $1 + is_fonttable_def_end $1 + font_name $40 + font_lang $1; + /*ʹÓÃÕýÔòʶ±ð×ÖÌå±íµÄ¶¨Òå*/ + if strip(line) = '{\fonttbl' then is_fonttable_def_start = 'Y'; + if strip(line) = '}{\colortbl;' then is_fonttable_def_end = 'Y'; + + reg_fonttable_def_id = prxparse("/^\{\\f(\d+)\\froman\\fprq\d+\\fcharset\d+\\cpg\d+\s(.+)\x3B\}$/o"); + if prxmatch(reg_fonttable_def_id, strip(line)) then do; + is_fonttable_def = 'Y'; + font_id = input(prxposn(reg_fonttable_def_id, 1, strip(line)), 8.); + font_name = prxposn(reg_fonttable_def_id, 2, strip(line)); + + /*ÖÐÎ÷ÎÄ×ÖÌå·ÖÀà*/ + retain cfont_seq wfont_seq; + if upcase(font_name) in (&cfont_predefined_list) then do; + font_lang = 'C'; + cfont_seq + 1; + end; + else if upcase(font_name) in (&wfont_predefined_list) then do; + font_lang = 'W'; + wfont_seq + 1; + end; + else font_lang = 'O'; + end; + else do; + cfont_seq = .; + wfont_seq = .; + end; + + if font_lang = 'C' then call symputx('is_cfont_found', 'TRUE'); + if font_lang = 'W' then call symputx('is_wfont_found', 'TRUE'); + run; + + /*5. ÌáÈ¡»ò²¹³ä×ÖÌå±í*/ + %let is_cw_font_found = TRUE; + %let is_cfont_found = FALSE; + %let is_wfont_found = FALSE; + + %let last_font_id = 0; + + /*¸´ÖÆ´Ó¿ªÍ·µ½×ÖÌå±í¶¨Òå½áÊøλÖÃµÄ RTF ´úÂëÐÐ*/ + proc sql noprint; + select seq into : font_def_end_seq trimmed from _tmp_rtf_font_spec where is_fonttable_def_end = 'Y'; /*×ÖÌå±í¶¨Òå½áÊøµÄÐкÅ*/ + create table _tmp_rtf_font_added as select * from _tmp_rtf_font_spec(firstobs = 1 obs = %eval(&font_def_end_seq - 1)); + quit; + + /*¸ù¾Ý²ÎÊý CFONT ¾ö¶¨ÊÇ·ñ²åÈëÖÐÎÄ×ÖÌ嶨Òå*/ + %if %qupcase(&cfont) = #AUTO %then %do; + proc sql noprint; + select font_id into : cfont_id trimmed from _tmp_rtf_font_spec where cfont_seq = 1; + quit; + + /*×ÖÌå±í䶨ÒåÖÐÎÄ×ÖÌå*/ + %if &SQLOBS = 0 %then %do; + %let is_cw_font_found = FALSE; + X mshta vbscript:msgbox("δÕÒµ½×ÖÌå±íÖеÄÖÐÎÄ×ÖÌ壬ÇëÊÖ¶¯Ö¸¶¨²ÎÊý CFONT Ϊһ¸öºÏÊʵÄÖÐÎÄ×ÖÌåÃû³Æ£¡",4112,"Ìáʾ")(window.close); + %end; + %end; + %else %do; + proc sql noprint; + select ifn(not missing(font_id), font_id + 1, 1) into : last_font_id trimmed from _tmp_rtf_font_spec where seq = &font_def_end_seq - 1; + quit; + + %let cfont_id = &last_font_id; + + proc sql noprint; + insert into _tmp_rtf_font_added + set line = "{\f&cfont_id\froman\fprq2\fcharset134\cpg936 %superq(cfont);}", + is_fonttable_def = 'Y', + font_name = "%superq(cfont)", + font_lang = 'C', + font_id = &cfont_id; + quit; + %end; + + /*¸ù¾Ý²ÎÊý WFONT ¾ö¶¨ÊÇ·ñ²åÈëÎ÷ÎÄ×ÖÌ嶨Òå*/ + %if %qupcase(&wfont) = #AUTO %then %do; + proc sql noprint; + select font_id into : wfont_id trimmed from _tmp_rtf_font_spec where wfont_seq = 1; + quit; + + /*×ÖÌå±í䶨ÒåÎ÷ÎÄ×ÖÌå*/ + %if &SQLOBS = 0 %then %do; + %let is_cw_font_found = FALSE; + X mshta vbscript:msgbox("δÕÒµ½×ÖÌå±íÖеÄÎ÷ÎÄ×ÖÌ壬ÇëÊÖ¶¯Ö¸¶¨²ÎÊý WFONT Ϊһ¸öºÏÊʵÄÎ÷ÎÄ×ÖÌåÃû³Æ£¡",4112,"Ìáʾ")(window.close); + %end; + %end; + %else %do; + %if &last_font_id > 0 %then %do; + %let wfont_id = %eval(&cfont_id + 1); + %end; + %else %do; + proc sql noprint; + select ifn(not missing(font_id), font_id + 1, 1) into : last_font_id trimmed from _tmp_rtf_font_spec where seq = &font_def_end_seq - 1; + quit; + + %let wfont_id = &last_font_id; + %end; + + proc sql noprint; + insert into _tmp_rtf_font_added + set line = "{\f&wfont_id\froman\fprq2\fcharset134\cpg936 %superq(wfont);}", + is_fonttable_def = 'Y', + font_name = "%superq(wfont)", + font_lang = 'W', + font_id = &wfont_id; + quit; + %end; + + /*²¹ÆëÊ£ÓàµÄ RTF ´úÂëÐÐ*/ + data _tmp_rtf_font_added(compress = yes); + set _tmp_rtf_font_added + _tmp_rtf_font_spec(firstobs = &font_def_end_seq); + run; + + %if &is_cw_font_found = FALSE %then %do; + %goto exit; + %end; + + + /*6. ´¦Àí±íÍ·ÎÄ×ÖÕÛÐеÄÎÊÌâ*/ + data _tmp_rtf_polish(compress = yes); + set _tmp_rtf_font_added; + + reg_header_cell_id = prxparse("/\\pard\\plain\\intbl(?:\\keepn)?\\sb\d*\\sa\d*\\q[lcr]\\f\d*\\fs\d*\\cf\d*\{((?:\\\x27[0-9A-F]{2}|\\u\d{1,5};|[\x20-\x7e])+)\{\\line\}/o"); + + length tmp_line $32767; + retain tmp_line; + + if missing(tmp_line) then do; + if prxmatch(reg_header_cell_id, trim(line)) then do; + tmp_line = trim(line); + + if substr(trim(line), length(line) - 5) = '\cell}' then do; /*Èç¹ûµ¥Ôª¸ñÄÚº¬ÓÐ {\line} ÇÒÒÔ \cell ½á⣬Ôò²»±£Áô tmp_line µÄÖµµ½ÏÂÒ»¸ö¹Û²â*/ + line = tmp_line; + tmp_line = ''; + end; + + delete; + end; + end; + else if not missing(tmp_line) then do; + tmp_line = trim(tmp_line) || trim(line); + + if substr(trim(line), length(line) - 6) = '{\line}' then do; /*ÕÛÐÐÖмäµÄÎı¾£¬ÒÔ {\line} ½áβ*/ + delete; + end; + else if substr(trim(line), length(line) - 5) = '\cell}' then do; /*ÕÛÐÐĩβµÄÎı¾£¬ÒÔ {\cell} ½áβ*/ + line = tmp_line; + tmp_line = ''; + end; + end; + + keep line; + run; + + + /*7. ÐÞ¸Ä×ÖÌå*/ + data _tmp_rtf_mixed(compress = yes); + set _tmp_rtf_polish; + length context_mixed $32767; + + /*Ð޸ĵ¥Ôª¸ñÎı¾×ÖÌå*/ + reg_cell_id = prxparse("/\\pard\\plain\\intbl(?:\\keepn)?\\sb\d*\\sa\d*\\q[lcr]\\f\d*\\fs\d*\\cf\d*\{((?:\\\x27[0-9A-F]{2}|\\u\d{1,5};|[\x20-\x7e])+)\\cell\}/o"); + reg_cell_inside_id = prxparse("/\\animtext\d*\\ul\d*\\strike\d*\\b\d*\\i\d*\\f\d*\\fs\d*\\cf\d*((?:\\\x27[0-9A-F]{2}|\\u\d{1,5};|[\x20-\x7e])+)/o"); + reg_cell_change_font_id = prxparse("s/(?!<\\f&cfont_id )((?:\\\x27[0-9A-F]{2}|\\u\d{1,5};)+)/\\f&cfont_id $1\\f&wfont_id /o"); + if prxmatch(reg_cell_id, trim(line)) then do; + call prxposn(reg_cell_id, 1, st, len); + context_mixed = substr(trim(line), st, len); + + if prxmatch(reg_cell_inside_id, trim(line)) then do; /*±íÍ·²»Ö¹Ò»ÐУ¬ÐèÒª½øÒ»²½¶¨Î»*/ + call prxposn(reg_cell_inside_id, 1, st, len); + context_mixed = substr(trim(line), st, len); + end; + + /*ÐÞ¸Ä×ÖÌå*/ + call prxchange(reg_cell_change_font_id, -1, trim(context_mixed), context_mixed); + if find(context_mixed, "\f&cfont_id") ^= 1 then do; + context_mixed = "\f&wfont_id " || trim(context_mixed); + end; + + line = substr(line, 1, st - 1) || trim(context_mixed) || substr(line, st + len); + end; + + /*Ð޸ıêÌâÎı¾×ÖÌå*/ + reg_outllv_id = prxparse("/\\outlinelevel\d*\{((?:\\\x27[0-9A-F]{2}|\\u\d{1,5};|[\x20-\x7e])+)\}/o"); + reg_outlnlv_change_font_id = prxparse("s/(?!<\\f&cfont_id )((?:\\\x27[0-9A-F]{2}|\\u\d{1,5};)+)/\\f&cfont_id $1\\f&wfont_id /o"); + + if prxmatch(reg_outllv_id, trim(line)) then do; + call prxposn(reg_outllv_id, 1, st, len); + context_mixed = substr(trim(line), st, len); + + /*ÐÞ¸Ä×ÖÌå*/ + call prxchange(reg_outlnlv_change_font_id, -1, trim(context_mixed), context_mixed); + if find(context_mixed, "\f&cfont_id") ^= 1 then do; + context_mixed = "\f&wfont_id " || trim(context_mixed); + end; + + line = substr(line, 1, st - 1) || trim(context_mixed) || substr(line, st + len); + end; + + /*Ð޸ĽÅ×¢Îı¾×ÖÌå*/ + reg_ftnt_id = prxparse("/\\pard\\b\d*\\i\d*\\chcbpat\d*\\q[lcr]\\f\d*\\fs\d*\\cf\d*\{\}\\q[lcr]\\fs\d*((?:\\\x27[0-9A-F]{2}|\\u\d{1,5};|[\x20-\x7e])+)\\cf\d*\\chcbpat\d*/o"); + reg_ftnt_change_font_id = prxparse("s/(?!<\\f&cfont_id )((?:\\\x27[0-9A-F]{2}|\\u\d{1,5};)+)/\\f&cfont_id $1\\f&wfont_id /o"); + + if prxmatch(reg_ftnt_id, trim(line)) then do; + call prxposn(reg_ftnt_id, 1, st, len); + context_mixed = substr(trim(line), st, len); + + /*ÐÞ¸Ä×ÖÌå*/ + call prxchange(reg_ftnt_change_font_id, -1, trim(context_mixed), context_mixed); + if find(context_mixed, "\f&cfont_id") ^= 1 then do; + context_mixed = "\f&wfont_id " || trim(context_mixed); + end; + + line = substr(line, 1, st - 1) || trim(context_mixed) || substr(line, st + len); + end; + run; + + + /*8. Êä³öÎļþ*/ + %if %qupcase(&out) = #AUTO %then %do; + %let outloc = %superq(rtfloc_mixed); + %end; + %else %do; + %let reg_out_id = %sysfunc(prxparse(%bquote(/^[\x22\x27]?(.+?)[\x22\x27]?$/o))); + %if %sysfunc(prxmatch(®_out_id, %superq(out))) %then %do; + %let outloc = %bquote(%sysfunc(prxposn(®_out_id, 1, %superq(out)))); + %end; + %end; + + data _null_; + set _tmp_rtf_mixed(keep = line); + file "&outloc" lrecl = 32767; + act_length = length(line); + put line $varying32767. act_length; + run; + + + /*9. ɾ³ýÖмäÊý¾Ý¼¯*/ + %if %qupcase(&del_temp_data) = YES %then %do; + proc datasets library = work nowarn noprint; + delete _tmp_rtf + _tmp_rtf_font_spec + _tmp_rtf_font_added + _tmp_rtf_polish + _tmp_rtf_list_fnst + _tmp_rtf_mixed + ; + quit; + %end; + + %if %qupcase(&out) ^= #AUTO %then %do; + X "del ""&rtfloc_mixed"" & exit"; + %end; + + + %exit: + %put NOTE: ºê MixCWFont ÒѽáÊøÔËÐУ¡; +%mend; diff --git a/src/gb18030/ReadAllRTF.sas b/src/gb18030/ReadAllRTF.sas new file mode 100644 index 0000000..34eb5c8 --- /dev/null +++ b/src/gb18030/ReadAllRTF.sas @@ -0,0 +1,186 @@ +/* +ÏêϸÎĵµÇëÇ°Íù Github ²éÔÄ: https://github.com/Snoopy1866/RTFTools-For-SAS +*/ + +options cmplib = sasuser.func; + +%macro ReadAllRTF(dir, + outlib = work, + vd = #AUTO, + compress = yes, + del_rtf_ctrl = yes, + del_temp_data = yes)/ parmbuff; + + /*´ò¿ª°ïÖúÎĵµ*/ + %if %qupcase(&SYSPBUFF) = %bquote((HELP)) or %qupcase(&SYSPBUFF) = %bquote(()) %then %do; + X explorer "https://github.com/Snoopy1866/RTFTools-For-SAS/blob/main/docs/ReadAllRTF.md"; + %goto exit; + %end; + + /*¼ì²âÐéÄâ´ÅÅÌÅÌ·ûʹÓÃ״̬*/ + %let is_disk_symbol_all_used = FALSE; + filename dlist pipe "wmic logicaldisk get deviceid"; + data a; + infile dlist truncover end = end; + input disk_symbol $1.; + retain unused_disk_symbol 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; + unused_disk_symbol = transtrn(unused_disk_symbol, disk_symbol, trimn('')); + if end then do; + if length(unused_disk_symbol) = 0 then do; + call symputx('is_disk_symbol_all_used', 'TRUE'); + end; + else do; + call symputx('unused_disk_symbol', unused_disk_symbol); + end; + end; + run; + + %if &is_disk_symbol_all_used = TRUE %then %do; + %put ERROR: ÎÞÊ£ÓàÅÌ·û¿ÉÓ㬳ÌÐòÎÞ·¨ÔËÐУ¡; + %goto exit_with_error; + %end; + + %if %upcase(&vd) = #AUTO %then %do; + %let vd = %substr(&unused_disk_symbol, 1, 1); + %put NOTE: ×Ô¶¯Ñ¡Ôñ¿ÉÓõÄÅÌ·û %upcase(&vd); + %end; + %else %do; + %if not %sysfunc(find(&unused_disk_symbol, &vd)) %then %do; + %put ERROR: ÅÌ·û %upcase(&vd) ²»ºÏ·¨»ò±»Õ¼Óã¬ÇëÖ¸¶¨ÆäËûºÏ·¨»òδ±»Ê¹ÓõÄÅÌ·û£¡; + %goto exit_with_error; + %end; + %end; + + + /*1. »ñȡĿ¼·¾¶*/ + %let reg_dir_expr = %bquote(/^(?:([A-Za-z_][A-Za-z_0-9]{0,7})|[\x22\x27]?((?:[A-Za-z]:\\|\\\\[^\\\/:?\x22\x27<>|]+)[^\\\/:?\x22\x27<>|]+(?:\\[^\\\/:?\x22\x27<>|]+)*)[\x22\x27]?)$/); + %let reg_dir_id = %sysfunc(prxparse(%superq(reg_dir_expr))); + %if %sysfunc(prxmatch(®_dir_id, %superq(dir))) %then %do; + %let dirref = %sysfunc(prxposn(®_dir_id, 1, %superq(dir))); + %let dirloc = %sysfunc(prxposn(®_dir_id, 2, %superq(dir))); + + /*Ö¸¶¨µÄÊÇĿ¼ÒýÓÃÃû*/ + %if %bquote(&dirref) ^= %bquote() %then %do; + %if %sysfunc(fileref(&dirref)) > 0 %then %do; + %put ERROR: Ŀ¼ÒýÓà %upcase(&dirref) 䶨Ò壡; + %goto exit; + %end; + %else %if %sysfunc(fileref(&dirref)) < 0 %then %do; + %put ERROR: Ŀ¼ÒýÓà %upcase(&dirref) Ö¸ÏòµÄĿ¼²»´æÔÚ£¡; + %goto exit; + %end; + %else %if %sysfunc(fileref(&dirref)) = 0 %then %do; + %let dirloc = %qsysfunc(pathname(&dirref, F)); + %end; + %end; + + /*Ö¸¶¨µÄÊÇÎïÀí·¾¶*/ + %if %superq(dirloc) ^= %bquote() %then %do; + %if %sysfunc(fileexist(%superq(dirloc))) = 0 %then %do; + %put ERROR: Ŀ¼·¾¶ %superq(dirloc) ²»´æÔÚ£¡; + %goto exit; + %end; + %end; + %end; + %else %do; + %put ERROR: Ŀ¼ÒýÓÃÃû³¬³ö 8 ×Ö½Ú£¬»òÕßĿ¼ÎïÀíµØÖ·²»·ûºÏ Winodws ¹æ·¶£¡; + %goto exit; + %end; + + + X "subst &vd: ""&dirloc"" & exit"; /*½¨Á¢ÐéÄâ´ÅÅÌ*/ + + /*1. ʹÓà DOS ÃüÁî»ñÈ¡ËùÓÐ RTF Îļþ£¬´æ´¢ÔÚ _tmp_rtf_list.txt ÖÐ*/ + X "dir ""&vd:\*.rtf"" /b/on > ""&vd:\_tmp_rtf_list.txt"" & exit"; + + + /*2. ¶ÁÈ¡ _tmp_rtf_list.txt Îļþ£¬¹¹½¨ filename Óï¾ä*/ + data _tmp_rtf_list; + infile "&vd:\_tmp_rtf_list.txt" truncover encoding = 'gbke'; + input rtf_name $char1000. rtf_path $char32767. fnstm $char32767.; + rtf_path = cats("&vd:\", rtf_name); + + /*ʶ±ð±í¸ñºÍÇåµ¥*/ + reg_table_id = prxparse("/^((?:ÁÐ)?±í|Çåµ¥)\s*(\d+(?:\.\d+)*)\.?\s*(.*)\.rtf\s*$/o"); + + if prxmatch(reg_table_id, rtf_name) then do; + rtf_type = prxposn(reg_table_id, 1, rtf_name); + rtf_seq = prxposn(reg_table_id, 2, rtf_name); + ref_label = prxposn(reg_table_id, 3, rtf_name); + + /*¹¹ÔìÊä³öÊý¾Ý¼¯Ãû³Æ*/ + if rtf_type = "±í" then outdata_prefix = "t"; + else if rtf_type in ("Áбí", "Çåµ¥") then outdata_prefix = "l"; + + outdata_seq = transtrn(rtf_seq, ".", "_"); + + outdata_name = "&outlib.." || outdata_prefix || "_" || outdata_seq; + + /*¹¹Ôì filename Óï¾ä£¬½¨Á¢ÎļþÒýÓÃ*/ + fileref = 'rtf' || put(_n_, 8. -L); + fnstm = 'filename ' || strip(fileref) || ' "' || strip(rtf_path) || '";'; + + call execute(fnstm); + + /*±ê¼ÇÃüÃû¹æ·¶µÄ rtf Îļþ*/ + rtf_valid_flag = "Y"; + end; + run; + + + /*3. µ÷Óà %ReadRTF() ½âÎö RTF Îļþ*/ + data _null_; + set _tmp_rtf_list; + retain call_macro_n 0; + + if rtf_valid_flag = "Y" then do; + call_macro_n + 1; + call_macro_expr = '%ReadRTF(file = ' || strip(fileref) || ', outdata = ' || strip(outdata_name) || '(label = "' || strip(ref_label) || '"), compress = ' || "&compress" || ', del_rtf_ctrl = ' || "&del_rtf_ctrl" || ');'; + + call symputx(cats("call_macro_expr_", call_macro_n), call_macro_expr); + end; + call symputx("call_macro_n", call_macro_n); + run; + + /*----------------ÁÙʱ¹Ø±ÕÈÕÖ¾Êä³ö------------------*/ + proc printto log=_null_; + run; + + %do i = 1 %to &call_macro_n; + %unquote(%superq(call_macro_expr_&i)); + + /*²¶×½µ½¶ÁÈ¡´íÎó£¬ÐèÒªÁÙʱÊä³öÈÕÖ¾*/ + %if %bquote(&readrtf_exit_with_error) = TRUE %then %do; + /*----------------ÁÙʱ»Ö¸´ÈÕÖ¾Êä³ö------------------*/ + proc printto log=log; + run; + + %put ERROR: %superq(readrtf_exit_with_error_text); + + /*----------------¹Ø±ÕÈÕÖ¾Êä³ö------------------*/ + proc printto log=_null_; + run; + %end; + %end; + + /*----------------»Ö¸´ÈÕÖ¾Êä³ö------------------*/ + proc printto log=log; + run; + + + /*4. ɾ³ýÁÙʱÊý¾Ý¼¯*/ + %if %upcase(&del_temp_data) = YES %then %do; + proc datasets library = work nowarn noprint; + delete _tmp_rtf_list + ; + quit; + %end; + + + /*5. ɾ³ý _tmp_rtf_list.txt*/ + X " del ""&vd:\_tmp_rtf_list.txt"" & subst &vd: /D & exit"; + + + %exit: + %put NOTE: ºê ReadAllRTF ÒѽáÊøÔËÐУ¡; +%mend; diff --git a/src/gb18030/ReadRTF.sas b/src/gb18030/ReadRTF.sas new file mode 100644 index 0000000..39ee356 --- /dev/null +++ b/src/gb18030/ReadRTF.sas @@ -0,0 +1,487 @@ +/* +ÏêϸÎĵµÇëÇ°Íù Github ²éÔÄ: https://github.com/Snoopy1866/RTFTools-For-SAS +*/ + + +options cmplib = sasuser.func; + +%macro ReadRTF(file, outdata, compress = yes, del_rtf_ctrl = yes, del_temp_data = yes)/ parmbuff; + + /*´ò¿ª°ïÖúÎĵµ*/ + %if %qupcase(&SYSPBUFF) = %bquote((HELP)) or %qupcase(&SYSPBUFF) = %bquote(()) %then %do; + X explorer "https://github.com/Snoopy1866/RTFTools-For-SAS/blob/main/docs/ReadRTF.md"; + %goto exit; + %end; + + /*¼ì²éÒÀÀµ*/ + proc sql noprint; + select * from DICTIONARY.CATALOGS where libname = "WORK" and memname = "SASMACR" and objname = "_MACRO_TRANSCODE"; + quit; + %if &SQLOBS = 0 %then %do; + %put ERROR: Ç°ÖÃÒÀÀµÈ±Ê§£¬ÇëÏȼÓÔØÎļþ Transcode.sas¡£; + %goto exit; + %end; + + /*ÉùÃ÷¾Ö²¿±äÁ¿*/ + %local i; + + /*ÉùÃ÷È«¾Ö±äÁ¿*/ + %if not %symexist(readrtf_exit_with_error) %then %do; + %global readrtf_exit_with_error; + %end; + + %if not %symexist(readrtf_exit_with_error_text) %then %do; + %global readrtf_exit_with_error_text; + %end; + + %let readrtf_exit_with_error = FALSE; + %let readrtf_exit_with_error_text = %bquote(); + + + /*1. »ñÈ¡Îļþ·¾¶*/ + %let reg_file_expr = %bquote(/^(?:([A-Za-z_][A-Za-z_0-9]{0,7})|[\x22\x27]?((?:[A-Za-z]:\\|\\\\[^\\\/:?\x22\x27<>|]+)[^\\\/:?\x22\x27<>|]+(?:\\[^\\\/:?\x22\x27<>|]+)*)[\x22\x27]?)$/); + %let reg_file_id = %sysfunc(prxparse(%superq(reg_file_expr))); + %if %sysfunc(prxmatch(®_file_id, %superq(file))) %then %do; + %let fileref = %sysfunc(prxposn(®_file_id, 1, %superq(file))); + %let fileloc = %sysfunc(prxposn(®_file_id, 2, %superq(file))); + + /*Ö¸¶¨µÄÊÇÎļþÒýÓÃÃû*/ + %if %bquote(&fileref) ^= %bquote() %then %do; + %if %sysfunc(fileref(&fileref)) > 0 %then %do; + %put ERROR: ÎļþÃûÒýÓà %upcase(&fileref) 䶨Ò壡; + %goto exit; + %end; + %else %if %sysfunc(fileref(&fileref)) < 0 %then %do; + %put ERROR: ÎļþÃûÒýÓà %upcase(&fileref) Ö¸ÏòµÄÎļþ²»´æÔÚ£¡; + %goto exit; + %end; + %else %if %sysfunc(fileref(&fileref)) = 0 %then %do; + %let fileloc = %qsysfunc(pathname(&fileref, F)); + %end; + %end; + + /*Ö¸¶¨µÄÊÇÎïÀí·¾¶*/ + %if %superq(fileloc) ^= %bquote() %then %do; + %if %sysfunc(fileexist(%superq(fileloc))) = 0 %then %do; + %put ERROR: Îļþ·¾¶ %superq(fileloc) ²»´æÔÚ£¡; + %goto exit; + %end; + %end; + %end; + %else %do; + %put ERROR: ÎļþÒýÓÃÃû³¬³ö 8 ×Ö½Ú£¬»òÕßÎļþÎïÀíµØÖ·²»·ûºÏ Winodws ¹æ·¶£¡; + %goto exit; + %end; + + + /*2. ÒÔ´¿Îı¾ÐÎʽ¶ÁÈ¡RTFÎļþ*/ + data _tmp_rtf_data(compress = &compress); + informat line $32767.; + format line $32767.; + length line $32767.; + + infile %unquote(%str(%')&fileloc%str(%')) truncover; + input line $char32767.; + run; + + %if &SYSERR > 0 %then %do; + %let readrtf_exit_with_error_text = %superq(SYSERRORTEXT); + %goto exit_with_error; + %end; + + + /*3. µ÷Õû±íÍ·£¨½â¾öÓÉÓÚ±íÍ·ÄÚǶ»»Ðзûµ¼Ö嵀 RTF ´úÂëÕÛÐÐÎÊÌ⣩*/ + data _tmp_rtf_data_polish_header(compress = &compress); + set _tmp_rtf_data; + + len = length(line); + + length break_line $32767.; + + reg_header_break_id = prxparse("/^(\\pard\\plain\\intbl\\keepn\\sb\d*\\sa\d*\\q[lcr]\\f\d*\\fs\d*\\cf\d*\{.*){\\line}$/o"); + reg_header_break_continue_id = prxparse("/^(.*){\\line}$/o"); + reg_header_break_end_id = prxparse("/^(.*\\cell})$/o"); + + retain break_line ""; + retain break_line_found 0; + + if prxmatch(reg_header_break_id, strip(line)) then do; /*·¢ÏÖ±íÍ·³öÏÖÕÛÐÐÎÊÌâ*/ + break_line = catt(break_line, prxposn(reg_header_break_id, 1, strip(line))); + break_line_found = 1; + delete; + end; + else if prxmatch(reg_header_break_continue_id, strip(line)) then do; /*·¢ÏÖÁ¬ÐøÕÛÐÐ*/ + if break_line_found = 1 then do; + break_line = catt(break_line, "|", prxposn(reg_header_break_continue_id, 1, strip(line))); + delete; + end; + end; + else if prxmatch(reg_header_break_end_id, strip(line)) then do; /*ÕÛÐнáÊø*/ + if break_line_found = 1 then do; + break_line = catt(break_line, "|", prxposn(reg_header_break_end_id, 1, strip(line))); + line = break_line; + + break_line_found = 0; + break_line = ""; + end; + end; + run; + + + /*5. µ÷ÕûÊý¾ÝÐУ¨½â¾öÓÉÓÚ³¬³¤×Ö·û´®µ¼Ö嵀 RTF ´úÂëÕÛÐÐÎÊÌ⣩*/ + data _tmp_rtf_data_polish_body(compress = &compress); + set _tmp_rtf_data_polish_header; + + length line_data_part $32767 line_data_part_buffer $32767; + + reg_data_line_start_id = prxparse("/^\\pard\\plain\\intbl(?:\\keepn)?\\sb\d*\\sa\d*\\q[lcr]\\f\d*\\fs\d*\\cf\d*\{((?:\\'[0-9A-F]{2}|\\u\d{1,5};|[\x20-\x7e])*)$/o"); + reg_data_line_mid_id = prxparse("/^((?:\\'[0-9A-F]{2}|\\u\d{1,5};|[\x20-\x7e])*)$/o"); + reg_data_line_end_id = prxparse("/^((?:\\'[0-9A-F]{2}|\\u\d{1,5};|[\x20-\x7e])*)\\cell\}$/o"); + + retain line_data_part ""; + retain line_data_part_found 0; + + if prxmatch(reg_data_line_start_id, strip(line)) then do; + line_data_part_buffer = prxposn(reg_data_line_start_id, 1, strip(line)); + /*ÕýÔò±í´ïʽʹÓÃÁËASCII×Ö·û¼¯ºÏ£¬µ¼ÖÂijЩ·ÇÊý¾ÝÐб»´íÎóµØÆ¥Å䣬ÐèÒª½øÒ»²½É¸Ñ¡*/ + if find(line_data_part_buffer, "\cell}") = 0 then do; /*¿ØÖÆ×Ö\cell}²»¿ÉÄܳöÏÖÔÚÊý¾ÝÐпªÍ·*/ + line_data_part_found = 1; + line_data_part = strip(line); + delete; + end; + end; + + if line_data_part_found = 1 then do; + if prxmatch(reg_data_line_mid_id, strip(line)) then do; + if find(strip(line), "\shppict") > 0 then do; /*¿ØÖÆ×Ö \shppict Ö¸¶¨ Word 97 ͼƬ£¬Ëüͨ³£³öÏÖÔÚҳü logo ÖУ¬²»¿ÉÄÜÔÚÊý¾ÝÐÐÖгöÏÖ*/ + line_data_part_found = 0; + line_data_part = ""; + end; + else do; + line_data_part_buffer = prxposn(reg_data_line_mid_id, 1, strip(line)); + /*ÕýÔò±í´ïʽʹÓÃÁËASCII×Ö·û¼¯ºÏ£¬µ¼ÖÂijЩ·ÇÊý¾ÝÐб»´íÎóµØÆ¥Å䣬ÐèÒª½øÒ»²½É¸Ñ¡*/ + if find(line_data_part_buffer, "\cell}") = 0 and substr(line_data_part_buffer, 1, 5) ^= "\pard" then do; /*¿ØÖÆ×Ö \cell}, \pard ²»¿ÉÄܳöÏÖÔÚÊý¾ÝÐÐÖмä*/ + if line_data_part_found = 1 then do; + line_data_part = cats(line_data_part, line_data_part_buffer); + delete; + end; + end; + end; + end; + + if prxmatch(reg_data_line_end_id, strip(line)) then do; + line_data_part_buffer = prxposn(reg_data_line_end_id, 1, strip(line)); + if line_data_part_found = 1 then do; + line_data_part = cats(line_data_part, line_data_part_buffer, "\cell}"); + line = line_data_part; + + line_data_part_found = 0; + line_data_part = ""; + end; + end; + end; + run; + + + /*4. ʶ±ð±í¸ñÊý¾Ý*/ + %let is_outlinelevel_found = 0; + + data _tmp_rtf_raw(compress = &compress); + set _tmp_rtf_data_polish_body; + + /*±äÁ¿¸öÊý*/ + retain var_n 0; + + /*±äÁ¿Î»ÖÃ*/; + retain var_pointer 0; + + /*ÊÇ·ñ·¢ÏÖ±í¸ñ±êÌâ*/ + retain is_outlinelevel_found 0; + + /*ÊÇ·ñ·¢ÏÖ±íÍ·*/ + retain is_header_found 0; + + /*ÊÇ·ñ·¢ÏÖ±íÍ·µ¥Ôª¸ñ±ß¿òλÖö¨Òå*/ + retain is_header_def_found 0; + + /*±íÍ·µ¥Ôª¸ñ²ãÊýλÖÃ(´ÓÉÏÍùϵÝÔö)*/ + retain header_cell_level 0; + + /*±íÍ·µ¥Ôª¸ñ×ó²à±ß¿òλÖÃ*/ + retain header_cell_left_padding 0; + + /*±íÍ·µ¥Ôª¸ñÓÒ²à±ß¿òλÖÃ*/ + retain header_cell_right_padding 0; + + /*ÊÇ·ñ·¢ÏÖ±í¸ñÊý¾Ý*/ + retain is_data_found 0; + + /* + µ±Ç° rtf ´úÂëÖ¸ÏòµÄ±äÁ¿Î»Öà + obs_var_pointer Ëæ×ŶÁÈ¡µÄ rtf Êý¾ÝÐÐÊý×ÔÔö£¬×î´ó²»³¬¹ý var_n£¬ + ÇÒÔÚÏÂÒ»¶ÎÊý¾ÝµÄÆðʼλÖñ»ÖØÖÃΪ 0 + */ + retain obs_var_pointer 0; + + /*¹Û²âÐòºÅ*/ + retain obs_seq 0; + + + /*¶¨ÒåÕýÔò±í´ïʽɸѡ±íÍ·ºÍÊý¾Ý*/ + reg_outlinelevel_id = prxparse("/\\outlinelevel\d/o"); + reg_header_line_id = prxparse("/\\trowd\\trkeep\\trhdr\\trq[lcr]/o"); + reg_header_def_line_id = prxparse("/\\clbrdr[tlbr]\\brdrs\\brdrw\d*\\brdrcf\d*(?:\\clbrdr[tlbr]\\brdrs\\brdrw\d*\\brdrcf\d*)*\\cltxlrt[bl]\\clvertal[tcb](?:\\clcbpat\d*)?\\cellx(\d+)/o"); + reg_data_line_id = prxparse("/^\\pard\\plain\\intbl(?:\\keepn)?\\sb\d*\\sa\d*\\q[lcr]\\f\d*\\fs\d*\\cf\d*\{((?:\\'[0-9A-F]{2}|\\u\d{1,5};|[\x20-\x7e])*)\\cell\}$/o"); + reg_sect_line_id = prxparse("/\\sect\\sectd\\linex\d*\\endnhere\\pgwsxn\d*\\pghsxn\d*\\lndscpsxn\\headery\d*\\footery\d*\\marglsxn\d*\\margrsxn\d*\\margtsxn\d*\\margbsxn\d*/o"); + + + length context_raw $32767; + + /*·¢ÏÖ±í¸ñ±êÌâ*/ + if prxmatch(reg_outlinelevel_id, strip(line)) then do; + if is_outlinelevel_found = 0 then do; + is_outlinelevel_found = 1; + call symputx("is_outlinelevel_found", 1); + end; + end; + + /*·¢ÏÖ±íÍ·*/ + else if prxmatch(reg_header_line_id, strip(line)) then do; + is_header_found = 1; + header_cell_level + 1; + end; + + /*·¢ÏÖ±íÍ·µ¥Ôª¸ñ±ß¿òλÖõĶ¨Òå*/ + else if prxmatch(reg_header_def_line_id, strip(line)) then do; + is_header_def_found = 1; + header_cell_left_padding = header_cell_right_padding; + header_cell_right_padding = input(prxposn(reg_header_def_line_id, 1, strip(line)), 8.); + + var_pointer + 1; + var_n = max(var_n, var_pointer); + end; + + + /*·¢ÏÖÊý¾Ý*/ + else if prxmatch(reg_data_line_id, strip(line)) then do; + if is_outlinelevel_found = 1 then do; /*ÏÞ¶¨ÔÚ±í¸ñ±êÌâºóµÄÊý¾ÝÐУ¬ÅųýҳüÖеÄÊý¾Ý*/ + if is_header_found = 1 then do; /*½ô¸úÔÚ¿ØÖÆ×Ö \trhdr ºóµÄÊý¾ÝÐУ¬Êµ¼ÊÉϾÍÊDZíÍ·*/ + if not prxmatch(reg_header_def_line_id, strip(line)) and is_header_def_found = 1 then do; /*±íÍ·±ß¿òλÖö¨ÒåÒѽáÊø£¬½«Ö¸ÕëÖØÖÃΪ 0*/ + var_pointer = 0; + end; + flag_header = "Y"; + var_pointer + 1; + var_n = max(var_n, var_pointer); + context_raw = prxposn(reg_data_line_id, 1, strip(line)); + end; + else do; /*Êý¾ÝÐÐ*/ + flag_data = "Y"; + is_data_found = 1; + obs_var_pointer + 1; + if obs_var_pointer = 1 then do; + obs_seq + 1; + end; + context_raw = prxposn(reg_data_line_id, 1, strip(line)); + + header_cell_level = 0; + end; + end; + + is_header_def_found = 0; + header_cell_left_padding = 0; + header_cell_right_padding = 0; + end; + + /*·¢ÏÖ·Ö½Ú·û*/ + else if prxmatch(reg_sect_line_id, strip(line)) then do; + is_outlinelevel_found = 0; + end; + + /*ÆäËûÇé¿ö*/ + else do; + if header_cell_right_padding > 0 then do; + is_header_def_found = 0; + header_cell_left_padding = 0; + header_cell_right_padding = 0; + end; + + if var_pointer > 0 then do; /*±íÍ·¶¨ÒåÔÝʱ½áÊø£¬½«Ö¸ÕëλÖÃÖØÖÃΪ 0*/ + is_header_found = 0; + var_pointer = 0; + end; + + + if obs_var_pointer = var_n then do; /*Êý¾ÝÐж¨ÒåÔÝʱ½áÊø£¬½«Ö¸ÕëλÖÃÖØÖÃΪ 0*/ + obs_var_pointer = 0; + end; + end; + run; + + %if &is_outlinelevel_found = 0 %then %do; + %put ERROR: ÔÚ RTF ÎļþÖÐδ·¢ÏÖ´ó¸Ù¼¶±ðµÄ±êÌ⣬ÇëʹÓÿØÖÆ×Ö \outlinelevel Éú³É RTF ÎļþµÄ±êÌ⣡; + %goto exit; + %end; + + /*5. ɾ³ý RTF ¿ØÖÆ×Ö*/ + %if %upcase(&del_rtf_ctrl) = YES %then %do; + /*¿ØÖÆ×Ö-¿ÕµÄ·Ö×é*/ + %let reg_ctrl_1 = %bquote({\s*}|(?= a&i..header_cell_left_padding and a&max_header_level..header_cell_right_padding <= a&i..header_cell_right_padding + %end; + ; + quit; + + /*±êÇ©½øÒ»²½´¦Àí*/ + data _tmp_rtf_header_expand_polish; + set _tmp_rtf_header_expand; + reg_header_control_word_id = prxparse("s/\\animtext\d*\\ul\d*\\strike\d*\\b\d*\\i\d*\\f\d*\\fs\d*\\cf\d*\s*//o"); + + header_context = prxchange(reg_header_control_word_id, -1, strip(header_context)); + + if substr(header_context, 1, 1) = "|" then do; + header_context = substr(header_context, 2); + end; + + if header_context = "" then do; + header_context = "¿Õ±êÇ©"; + end; + run; + + + /*9. ÐÞ¸ÄSASÊý¾Ý¼¯µÄÊôÐÔ*/ + proc sql noprint; + /*»ñÈ¡±äÁ¿¸öÊý*/ + select nvar - 2 into : var_n from DICTIONARY.TABLES where libname = "WORK" and memname = "_TMP_OUTDATA"; + + %do i = 1 %to &var_n; + /*»ñÈ¡±äÁ¿Êµ¼ÊËùÐ賤¶È*/ + select max(length(col&i)) into : var_&i._maxlen from _tmp_outdata; + + /*»ñÈ¡±äÁ¿±êÇ©*/ + select header_context into : var_&i._label trimmed from _tmp_rtf_header_expand_polish where var_pointer = &i; + %end; + + alter table _tmp_outdata + modify %do i = 1 %to &var_n; + COL&i char(&&var_&i._maxlen) label = "%superq(var_&i._label)", + %end; + OBS_SEQ label = "ÐòºÅ"; + alter table _tmp_outdata + drop _NAME_; + quit; + + + /*10. ×îÖÕÊä³ö*/ + data &outdata; + set _tmp_outdata; + run; + + %goto exit; + + + /*Òì³£Í˳ö*/ + %exit_with_error: + %let readrtf_exit_with_error = TRUE; + + /*Õý³£Í˳ö*/ + %exit: + /*11. Çå³ýÖмäÊý¾Ý¼¯*/ + %if %upcase(&del_temp_data) = YES %then %do; + proc datasets library = work nowarn noprint; + delete _tmp_outdata + _tmp_rtf_data + _tmp_rtf_data_polish_header + _tmp_rtf_data_polish_body + _tmp_rtf_context + _tmp_rtf_context_sorted + _tmp_rtf_header + _tmp_rtf_header_expand + _tmp_rtf_header_expand_polish + _tmp_rtf_raw + _tmp_rtf_raw_del_ctrl + ; + quit; + %end; + + %put NOTE: ºê ReadRTF ÒѽáÊøÔËÐУ¡; +%mend; + diff --git a/src/gb18030/Transcode.sas b/src/gb18030/Transcode.sas new file mode 100644 index 0000000..9f52a06 --- /dev/null +++ b/src/gb18030/Transcode.sas @@ -0,0 +1,74 @@ +/* +ÏêϸÎĵµÇëÇ°Íù Github ²éÔÄ: https://github.com/Snoopy1866/RTFTools-For-SAS +*/ + +/*ɾ³ýÊý¾Ý¼¯ SASUSER.FUNC£¬·ÀÖ¹Çл»±àÂë»·¾³ºóÖØÐÂÔËÐÐÎÞ·¨±»¸²¸Ç*/ +proc datasets library = sasuser noprint nowarn; + delete func; +quit; + + +/*ÄÚÖú꣬½ö¹© Fcmp º¯Êý run_macro() ʹÓÃ*/ +%macro _macro_transcode; + %let code_point = %sysfunc(dequote(%superq(code_point))); + %let raw_encoding = %sysfunc(dequote(&raw_encoding)); + data _null_(encoding = asciiany); + length char $32767; + char = kcvt("&code_point"x, "&raw_encoding", getoption('encoding')); + call symput("char", strip(char)); + run; + + %let is_transcode_success = 1; +%mend; + + +/*×Ô¶¨Ò庯Êý£¬ÓÃÓÚ½âÎöÂëµã*/ +proc fcmp outlib = sasuser.func.rtf; + function transcode(code_point $, raw_encoding $) $ 32767; + length char $32767; + + is_transcode_success = 0; + char = ""; + if raw_encoding = "utf8" then do; /*UTF-8 ±àÂëÖ±½Óµ÷ÓÃÄÚÖú¯Êý*/ + char = unicode(code_point, "NCR"); + return(char); + end; + else do; + rc = run_macro('_macro_transcode', code_point, raw_encoding, char, is_transcode_success); /*ÆäËû±àÂëµ÷Óà KVCT º¯Êý£¬ÓÉÓÚ KVCT º¯ÊýµÄÌØÊâÐÔ£¬ÐèÒªÔÚÎÞÌض¨±àÂëµÄ DATA ²½ÖÐʹÓÃ*/ + if rc = 0 and is_transcode_success = 1 then do; + return(char); + end; + else do; + return("ERROR: תÂëʧ°Ü£¡"); + end; + end; + endsub; +quit; + + +/*×Ô¶¨Ò庯Êý£¬ÓÃÓÚ½âÎö RTF µ¥Ôª¸ñÄÚµÄ×Ö·û´®*/ +proc fcmp outlib = sasuser.func.rtf inlib = sasuser.func; + function cell_transcode(str $) $32767; + reg_code_gbk_id = prxparse("/((?:\\\x27[0-9A-F]{2})+)/o"); + reg_code_utf8_id = prxparse("/((?:\\u\d{1,5};)+)/o"); + + length str_decoded $32767 _tmp_str $32767 _tmp_str_nomarkup $32767 _tmp_str_decoded $32767; + str_decoded = str; + if prxmatch(reg_code_gbk_id, str_decoded) then do; + do while(prxmatch(reg_code_gbk_id, str_decoded)); + _tmp_str = prxposn(reg_code_gbk_id, 1, str_decoded); + _tmp_str_nomarkup = compress(_tmp_str, "\'"); + _tmp_str_decoded = transcode(_tmp_str_nomarkup, "gbk"); + reg_code_gbk_chg_id = prxparse("s/((?:\\\x27[0-9A-F]{2})+)/"||trim(_tmp_str_decoded)||"/"); + str_decoded = prxchange(reg_code_gbk_chg_id, 1, strip(str_decoded)); + end; + end; + else if prxmatch(reg_code_utf8_id, str_decoded) then do; + _tmp_str = str_decoded; + _tmp_str_nomarkup = transtrn(_tmp_str, "\u", "&#"); + _tmp_str_decoded = transcode(_tmp_str_nomarkup, "utf8"); + str_decoded = _tmp_str_decoded; + end; + return(str_decoded); + endsub; +quit; diff --git a/src/utf16/CompareAllRTF.sas b/src/utf16/CompareAllRTF.sas new file mode 100644 index 0000000..79e29df Binary files /dev/null and b/src/utf16/CompareAllRTF.sas differ diff --git a/src/utf16/CompareRTF.sas b/src/utf16/CompareRTF.sas new file mode 100644 index 0000000..69bd752 Binary files /dev/null and b/src/utf16/CompareRTF.sas differ diff --git a/src/utf16/CompareRTFWithDataset.sas b/src/utf16/CompareRTFWithDataset.sas new file mode 100644 index 0000000..497ebbc Binary files /dev/null and b/src/utf16/CompareRTFWithDataset.sas differ diff --git a/src/utf16/MergeRTF.sas b/src/utf16/MergeRTF.sas new file mode 100644 index 0000000..764425b Binary files /dev/null and b/src/utf16/MergeRTF.sas differ diff --git a/src/utf16/MixCWFont.sas b/src/utf16/MixCWFont.sas new file mode 100644 index 0000000..7bd96ce Binary files /dev/null and b/src/utf16/MixCWFont.sas differ diff --git a/src/utf16/ReadAllRTF.sas b/src/utf16/ReadAllRTF.sas new file mode 100644 index 0000000..60dd7cb Binary files /dev/null and b/src/utf16/ReadAllRTF.sas differ diff --git a/src/utf16/ReadRTF.sas b/src/utf16/ReadRTF.sas new file mode 100644 index 0000000..878d70e Binary files /dev/null and b/src/utf16/ReadRTF.sas differ diff --git a/src/utf16/Transcode.sas b/src/utf16/Transcode.sas new file mode 100644 index 0000000..366d7a8 Binary files /dev/null and b/src/utf16/Transcode.sas differ diff --git a/src/utf8/ReadRTF.sas b/src/utf8/ReadRTF.sas index 266104d..77f04de 100644 --- a/src/utf8/ReadRTF.sas +++ b/src/utf8/ReadRTF.sas @@ -94,6 +94,8 @@ options cmplib = sasuser.func; data _tmp_rtf_data_polish_header(compress = &compress); set _tmp_rtf_data; + len = length(line); + length break_line $32767.; reg_header_break_id = prxparse("/^(\\pard\\plain\\intbl\\keepn\\sb\d*\\sa\d*\\q[lcr]\\f\d*\\fs\d*\\cf\d*\{.*){\\line}$/o"); @@ -330,7 +332,7 @@ options cmplib = sasuser.func; %let reg_ctrl_3 = %bquote(\\nosupersub); /*控制字-上标*/ - %let reg_ctrl_4 = %bquote(\{?\\super\s+((?:\\[\\\{\}]|[^\\\{\}])+)\}?); /* + %let reg_ctrl_4 = %bquote(\{?\\super\s*((?:\\[\\\{\}]|[^\\\{\}])+)\}?); /* https://github.com/Snoopy1866/RTFTools-For-SAS/issues/20 https://github.com/Snoopy1866/RTFTools-For-SAS/issues/26 */