You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
For big CSV files, best is to use EXTENSION file_fdw. Complex but works fine, and need only one command after implemented.
CREATESCHEMAcsv; -- a library, easy to drop CREATEFUNCTIONcsv.split_csv2(
linetext, -- the input CSV UTF-8 line
delim_char char(1) =',', -- can be also E'\t', ';', '|', etc.
quote_char char(1) ='"'-- field quotation
) RETURNS setof text[] AS $f$
import csv
return csv.reader(
line.splitlines(),
quotechar=quote_char,
delimiter=delim_char,
skipinitialspace=True,
escapechar='\\'
)
$f$ IMMUTABLE language PLpythonU;
CREATEFUNCTIONcsv.split_csv_line2(
text, char(1) DEFAULT ',', char(1) DEFAULT '"'
) RETURNS text[] AS $f$
SELECT x FROMcsv.split_csv($1,$2,$3) x LIMIT1;
$f$ language SQL IMMUTABLE;
-- not reliable but fast? http://stackoverflow.com/a/42926265/287948-- compare performance with csv.split_csv_line2()-- ideal a C implementationCREATE OR REPLACEFUNCTIONcsv.split_csv_line(
linetext, -- the input CSV string
delim_char char(1) =',', -- can be also E'\t', ';', '|', etc.
quote_char char(1) ='"', -- field quotation
OUT result text[]
) LANGUAGE plpgsql AS
$f$DECLARE
i integer;
t text :='';
c char(1);
/* * 0 means unquoted * 1 means quoted * 2 means quoted, and we just read a quote*/
q integer :=0;
BEGIN/* loop through the characters */
FOR i IN1..length(line) LOOP
/* get i-th character */
c :=substring(lineFROM i FOR 1);
/* end of string is at an unquoted delimiter */
IF c = delim_char AND q <>1 THEN
result := result || t;
t :='';
q :=0;
ELSIF c = quote_char THEN
CASE q
WHEN 0 THEN
q :=1;
WHEN 1 THEN
IF c = quote_char THEN
q :=2;
ELSE
t := t || c;
END IF;
WHEN 2 THEN
q :=1;
t := t || quote_char;
END CASE;
ELSE
IF q =2 THEN
q :=0;
END IF;
t := t || c;
END IF;
END LOOP;
/* add the last string */
result := result || t;
END;$f$;
CREATE EXTENSION file_fdw WITH SCHEMA csv;
CREATE SERVER files FOREIGN DATA WRAPPER file_fdw;
CREATE or replaceFUNCTIONcsv.ftable_drop(tname text) RETURNS void AS $f$
BEGIN
EXECUTE format('DROP VIEW IF EXISTS %s', 'csv.'||$1);
EXECUTE format('DROP FOREIGN TABLE IF EXISTS %s CASCADE', 'csv.fgn_'||$1);
END
$f$ language PLpgSQL;
CREATE or replaceFUNCTIONcsv.ftable_create(
tname text, -- table name for scan text and "csv_tname"
fpath text, -- file path to the input CSV
delim_char char(1) =',', -- can be also E'\t', ';', '|', etc.
quote_char char(1) ='"', -- field quotation
p_check boolean default true -- false to not check-and-drop.
) RETURNS textAS $f$
DECLARE
fgn_tname text;
csv_tname text;
BEGIN
fgn_tname :='csv.fgn_'||$1;
csv_tname :='csv.'||$1;
IF p_check THEN PERFORM csv.ftable_drop($1); END IF;
EXECUTE format(E'CREATE FOREIGN TABLE %s (line text) SERVER files OPTIONS ( filename %L, format \'text\')', fgn_tname, $2);
EXECUTE format('CREATE VIEW %s AS SELECT csv.split_csv_line(line,%L,%L) as c FROM %s', csv_tname, delim_char,quote_char, fgn_tname);
RETURN format(E'\ntables %I and %I created\n',fgn_tname,csv_tname);
END
$f$ language PLpgSQL;
---- TESTING ----SELECTcsv.ftable_create('my','/tmp/TSE2/lix.csv',';');
Small and medium files
big CSV files
For big CSV files, best is to use
EXTENSION file_fdw
. Complex but works fine, and need only one command after implemented.The text was updated successfully, but these errors were encountered: