经过前面多篇博文的测试和对比,我们的GreenPlum集群现在已经准备就绪,就等各位的fire了,吼吼~~
[src.mysql] 源库 mysql 连接信息
host = "192.168.1.1"
port = "3306"
user = "test"
password = "test"
db = "test"
encodingdir = "share"
encoding = "utf8"
[desc.pgsql] 目的库Greenplum连接信息
connect_string = "host=192.168.1.1 dbname=test port=5888 user=test password=pgsql"
table1 : select * from table_big where column1 < '2016-08-05'
table2 :
table3
table4: select column1, column2 from tableX where column1 != 10
table5: select * from table_big where column1 >= '2016-08-05'
[root@hzadg-helf-xxx ~]# ./mysql2pgsql -d
[gpadmin@hzadg-helf-xxx ~]# psql -d helf
helf=# CREATE TABLE part (
P_PARTKEY int4,
P_NAME text,
P_MFGR text,
P_BRAND text,
P_TYPE text,
P_SIZE int4,
P_CONTAINER text,
P_RETAILPRICE numeric,
P_COMMENT text)
with (APPENDONLY=true, ORIENTATION=column, COMPRESSTYPE=zlib, COMPRESSLEVEL=1, BLOCKSIZE=1048576, OIDS=false)
DISTRIBUTED BY (P_PARTKEY)
[root@hzadg-helf-xxx ~]# ./mysql2pgsql -l -s schema
[src.pgsql] 源库 pgsql 连接信息
connect_string = "host=192.168.1.1 dbname=test port=5888 user=test password=pgsql"
[local.pgsql] 本地临时DB pgsql 连接信息
connect_string = "host=192.168.1.1 dbname=test port=5888 user=test2 password=pgsql"
目的库 pgsql 连接信息
[desc.pgsql]
connect_string = "host=192.168.1.1 dbname=test port=5888 user=test3 password=pgsql"
[gpadmin@hzadg-helf-xxx ~]# psql -d helf
helf=# CREATE TABLE part (
P_PARTKEY int4,
P_NAME text,
P_MFGR text,
P_BRAND text,
P_TYPE text,
P_SIZE int4,
P_CONTAINER text,
P_RETAILPRICE numeric,
P_COMMENT text)
with (APPENDONLY=true, ORIENTATION=column, COMPRESSTYPE=zlib, COMPRESSLEVEL=1, BLOCKSIZE=1048576, OIDS=false)
DISTRIBUTED BY (P_PARTKEY)
[root@hzadg-helf-xxx ~]# ./pgsql2pgsql
使用gphdfs协议,以外部表的方式,用psql直接在线导入。
[gpadmin@hzadg-helf-xxx ~]# psql -d helf
helf=# CREATE TABLE ext_tpch.PART (
P_PARTKEY SERIAL,
P_NAME VARCHAR(55),
P_MFGR CHAR(25),
P_BRAND CHAR(10),
P_TYPE VARCHAR(25),
P_SIZE INTEGER,
P_CONTAINER CHAR(10),
P_RETAILPRICE DECIMAL,
P_COMMENT VARCHAR(23)
)
LOCATION ('gphdfs://:/data/tpch/part.tbl')
FORMAT 'TEXT' (DELIMITER '|' NULL AS '' ESCAPE AS E'\\');
[gpadmin@hzadg-helf-xxx ~]# psql -d helf
helf=# CREATE TABLE part (
P_PARTKEY int4,
P_NAME text,
P_MFGR text,
P_BRAND text,
P_TYPE text,
P_SIZE int4,
P_CONTAINER text,
P_RETAILPRICE numeric,
P_COMMENT text)
with (APPENDONLY=true, ORIENTATION=column, COMPRESSTYPE=zlib, COMPRESSLEVEL=1, BLOCKSIZE=1048576, OIDS=false)
DISTRIBUTED BY (P_PARTKEY)
vi /usr/local/greenplum-db/lib/hadoop/hadoop_env.sh
export JAVA_HOME=XXX
export HADOOP_HOME=XXX
for f in /opt/cloudera/parcels/CDH/jars/hadoop-*.jar;do
CLASSPATH=${CLASSPATH}:$f;
done
gpconfig -c gp_hadoop_target_version -v "'XXX'"
gpconfig -c gp_hadoop_home -v "'XXX'"
gpstop -u
[gpadmin@hzadg-helf-xxx ~]# psql -d helf
helf=# insert into tpch.PART select * from ext_tpch.PART;
使用gpfdist,以外部表的方式,用psql直接在线导入。
[root@hzadg-helf-xxx ~]# psql -d helf
helf=# CREATE TABLE ext_tpch.PART (
P_PARTKEY SERIAL,
P_NAME VARCHAR(55),
P_MFGR CHAR(25),
P_BRAND CHAR(10),
P_TYPE VARCHAR(25),
P_SIZE INTEGER,
P_CONTAINER CHAR(10),
P_RETAILPRICE DECIMAL,
P_COMMENT VARCHAR(23)
)
LOCATION ('gpfdist://HOST:PORT/part.tbl')
FORMAT 'TEXT' (DELIMITER '|' NULL AS '' ESCAPE AS E'\\');
[gpadmin@hzadg-helf-xxx ~]# psql -d helf
helf=# CREATE TABLE part (
P_PARTKEY int4,
P_NAME text,
P_MFGR text,
P_BRAND text,
P_TYPE text,
P_SIZE int4,
P_CONTAINER text,
P_RETAILPRICE numeric,
P_COMMENT text)
with (APPENDONLY=true, ORIENTATION=column, COMPRESSTYPE=zlib, COMPRESSLEVEL=1, BLOCKSIZE=1048576, OIDS=false)
DISTRIBUTED BY (P_PARTKEY)
[root@hzadg-helf-xxx ~]# psql -d helf
helf=# insert into tpch.PART select * from ext_tpch.PART;
本文来自网易实践者社区,经作者何李夫授权发布。