Hatena::ブログ(Diary)

ablog このページをアンテナに追加 RSSフィード

2018-09-20

PostgreSQL のクエリーリライトがルールベースか確認する

リライタのエントリポイントは、pg_rewrite_queries() であり、クエリの木のリストをもらってクエリ木のリストを返す。pg_rewrite_queries() の中からリライトモジュールの QueryRewrite() を呼び出し、1つずつクエリ木を処理する。

f:id:yohei-a:20180920133847p:image:w640

PostgreSQL では、VIEW や RULE をクエリを書き換えることによって実装しています。 もし必要ならばこの段階でクエリを書き換えます。 ここでの処理はリライト処理と呼ばれ、リライト処理を行うモジュールをリライタ (rewriter) と呼びます。 リライト処理のエントリポイントは QueryRewrite (rewrite/rewriteHandler.c) です。

f:id:yohei-a:20180920133632p:image:w360

PostgreSQL の構造とソースツリー | Let's Postgres

ソースコードを確認する

$ tar xfvJ postgresql-10.4.tar.bz2
/*-------------------------------------------------------------------------
 *
 * rewriteHandler.c
 *		Primary module of query rewriter.
 *
 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
 *	  src/backend/rewrite/rewriteHandler.c
 *
 * NOTES
 *	  Some of the terms used in this file are of historic nature: "retrieve"
 *	  was the PostQUEL keyword for what today is SELECT. "RIR" stands for
 *	  "Retrieve-Instead-Retrieve", that is an ON SELECT DO INSTEAD SELECT rule
 *	  (which has to be unconditional and where only one rule can exist on each
 *	  relation).
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "access/sysattr.h"
#include "catalog/dependency.h"
#include "catalog/pg_type.h"
#include "commands/trigger.h"
#include "foreign/fdwapi.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "parser/analyze.h"
#include "parser/parse_coerce.h"
#include "parser/parsetree.h"
#include "rewrite/rewriteDefine.h"
#include "rewrite/rewriteHandler.h"
#include "rewrite/rewriteManip.h"
#include "rewrite/rowsecurity.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"


/* We use a list of these to detect recursion in RewriteQuery */
typedef struct rewrite_event
{
	Oid			relation;		/* OID of relation having rules */
	CmdType		event;			/* type of rule being fired */
} rewrite_event;

typedef struct acquireLocksOnSubLinks_context
{
	bool		for_execute;	/* AcquireRewriteLocks' forExecute param */
} acquireLocksOnSubLinks_context;

static bool acquireLocksOnSubLinks(Node *node,
					   acquireLocksOnSubLinks_context *context);
static Query *rewriteRuleAction(Query *parsetree,
				  Query *rule_action,
				  Node *rule_qual,
				  int rt_index,
				  CmdType event,
				  bool *returning_flag);
static List *adjustJoinTreeList(Query *parsetree, bool removert, int rt_index);
static List *rewriteTargetListIU(List *targetList,
					CmdType commandType,
					OverridingKind override,
					Relation target_relation,
					int result_rti,
					List **attrno_list);
static TargetEntry *process_matched_tle(TargetEntry *src_tle,
					TargetEntry *prior_tle,
					const char *attrName);
static Node *get_assignment_input(Node *node);
static void rewriteValuesRTE(RangeTblEntry *rte, Relation target_relation,
				 List *attrnos);
static void markQueryForLocking(Query *qry, Node *jtnode,
					LockClauseStrength strength, LockWaitPolicy waitPolicy,
					bool pushedDown);
static List *matchLocks(CmdType event, RuleLock *rulelocks,
		   int varno, Query *parsetree, bool *hasUpdate);
static Query *fireRIRrules(Query *parsetree, List *activeRIRs,
			 bool forUpdatePushedDown);
static bool view_has_instead_trigger(Relation view, CmdType event);
static Bitmapset *adjust_view_column_set(Bitmapset *cols, List *targetlist);
(以下略)
/*-------------------------------------------------------------------------
 *
 * rewriteHandler.h
 *		External interface to query rewriter.
 *
 *
 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * src/include/rewrite/rewriteHandler.h
 *
 *-------------------------------------------------------------------------
 */
#ifndef REWRITEHANDLER_H
#define REWRITEHANDLER_H

#include "utils/relcache.h"
#include "nodes/parsenodes.h"

extern List *QueryRewrite(Query *parsetree);
extern void AcquireRewriteLocks(Query *parsetree,
					bool forExecute,
					bool forUpdatePushedDown);

extern Node *build_column_default(Relation rel, int attrno);
extern void rewriteTargetListUD(Query *parsetree, RangeTblEntry *target_rte,
					Relation target_relation);

extern Query *get_view_query(Relation view);
extern const char *view_query_is_auto_updatable(Query *viewquery,
							 bool check_cols);
extern int relation_is_updatable(Oid reloid,
					  bool include_triggers,
					  Bitmapset *include_cols)

#endif							/* REWRITEHANDLER_H */

検証

$ sudo yum -y install postgresql
$ sudo yum -y install postgresql-contrib
$ pgbench -i -s 100 -U awsuser -h ******.******.ap-northeast-1.rds.amazonaws.com -d mydb
$ psql "host=******.*******.ap-northeast-1.rds.amazonaws.com user=awsuser dbname=mydb port=5432"
select attname, n_distinct, most_common_vals from pg_stats where tablename = 'pgbench_accounts';
SQL> analyze table pgbench_accounts;
  • ビューを作成する
create view v_pgbench_accounts as select * from pgbench_accounts;

2018-09-19

db tech showcase 2018 Day 1

年に一度のデータベース界の同窓会的なイベント db tech showcase 2018 Day 1 に参加してきた。写真は懇親会でのマグロ解体ショー。

f:id:yohei-a:20180920032213j:image:w640

小幡さん、おつかれさまでした!

f:id:yohei-a:20180920042610j:image:w640


以下は聴講したセッションのメモ。

顧客理解のためのDWHにおける、ビッグデータ品質マネジメント

概要
サマリ
  • DWHでどのようにデータ品質をチェックしているかというお話。
  • データ処理の部分だけでなく、データソースもきちんとテストする必要がある。
  • プロセス(データ処理部分)のテスト観点
    • 正確性: 仕様通りマッピング・変換されているか
    • 非重複性: ユニークになるべきデータの組み合わせがユニークになっているか
    • 完全性: 投入されるべきデーアが全て投入されているか
    • 一貫性: テーブル・パーティション同士の関連性が一貫しているか
  • 課題
質疑応答
  • ETLツールでも同じようなことができるのではないか?
  • RDBの制約などの機能でチェックできるのではないか?
    • データサイズが大きいのでRDBではしんどい。
  • 何に時間が一番がかかるか?
    • データを知るのに時間がかかる。どこに何のデータがあるか。
  • データディクショナリを作っているか
    • 作っているが、変更が入るので最新には追いつかない。

Pgpool-IIではじめるPostgreSQLクラスタ運用

f:id:yohei-a:20180920042400j:image:w640

概要
スライド
  • To be uploaded
メモ
  • P.9 の絵は複数のサーバプロセスがあるが簡略化した絵になっている
  • P.11 一時テーブル、強いロックなどはスタンバイで使えない。
  • 2010年に PostgreSQL 9.0 でトランザクションログの非同期レプリケーションが実装され、その後同期レプリケーションが実装され、現在、マルチマスタレプリケーションやシャーディングが開発されている。
  • 参照だけスタンバイに接続する場合、アプリ側で振り分けを実装したり、参照でも一貫性を求めるものはマスターを見るなど考慮点が多いが、Pgpool-II はアプリに意識させなくてもよろしくやってくれる質実剛健な機能が豊富に揃っていると感じた。
  • 紹介されていた Pgpool-II とレプリケーションによる構成は「レプリケーション」というと Oracle では Data Guard と比較しそうになるが、災害対策ではなく同一サイトでの耐障害性対策としての機能が多く、Oracle でいうと RAC と比較するのが適切なケースもあると感じた*1。優劣の比較ではなく各機能がどんな課題を解決するためにあり、Oracle ではどの機能がソリューションになるかという意味での比較。

爆速データレイクがほしい人向けImpalaパフォーマンスチューニング

f:id:yohei-a:20180920042139j:image:w640

概要
スライド

Impala のパフォーマンス・チューニングはI/Oとノード通信を減らすのが肝で、Hive や Spark でも通用する話。小手先のクエリチューニングではなくデータ構造が命。PROFILE で時間ベースで分析しよう。といった内容で本質的で非常に分かりやすかった。Parquet の説明は今まで見た中で一番分かりやすかった。Impala は統計情報をベースにコストベースオプティマイザで動作しているとのこと、統計情報は Hive カタログに保存されているのだろうと思う。


分散DB Apache Kuduのアーキテクチャ - DBの性能と一貫性を両立させる仕組み「HybridTime」とは

f:id:yohei-a:20180920042256j:image:w640

概要
  • 講師:佐藤 貴彦さん(Cloudera 株式会社 - セールスエンジニア
  • 講師略歴: 奈良先端技術大学院大学ネットワークの研究をし、インフラなど低レイヤーの技術が好きになる。卒業後はOracleで、データベースを中心にインフラ全般のコンサルティングなどを行う。その後、Basho TechnologyでNoSQL及び分散システムに触れ、現在はClouderaでHadoop関連技術を中心に、幅広く手がける。趣味はクライミング。共著で「絵で見てわかるITインフラの仕組み」を執筆。
  • 概要: Apache Kudu は分析系クエリに強いカラムナー型の分散データベースです。KuduはOLTPとOLAPの両方のワークロードに耐えられる、HTAPと呼ばれる種類のDBで、昨年の #dbts2017では、Kuduの「速さ」について紹介しました。KuduはBI/DWHなど分析向けのDBといったイメージが強い一方で、 元々はGoogleのSpanner論文など触発されて開発されており、地理位置が離れたノード間でも一貫性を担保する仕組みを持っています。その仕組の元にあるのが、「HybridTime」と呼ばれるDBの内部時計です。今回はHybridTimeについて、その論文を紹介しながら仕組みに触れ、どのような特性を持っているのか、なぜこれがKuduの「速さ」にもつながるのかについてお話したいと思います。
スライド
  • To be uploaded
  • Kudu はC++で書かれたストレージエンジンで Impala や Spark などから使うことができる HTAP なDB。Exadata のストレージサーバのように push down 機能がある。MVCC だが単一行でしか対応していない。

P.S.

MySQL 界隈の方々と二次会に行って MySQL の Performance Schema について質問したり、各社の運用事情など聞けて楽しかった。

*1:Data Guard も同一サイトの耐障害性対策としても使える

2018-09-17

AWS Glue の Zeppelin ノートブックで PySpark を実行して CSV を加工してみた

AWS Glue で開発エンドポイントを作成して、Zeppelin のノートブックで PySpark を実行して S3にある CSV を加工(行をフィルタ)してS3に書いてみた。S3 から読んだ CSV は Glue の DynamicFrame から SparkSQL DataFrame に変換してフィルタした後、DynamicFrame に変換してS3に書いた。DynamicFrame には単純なメソッドしかないため、SparkSQL を使って高度なデータ処理をしたい場合は、DataFrame に変換してやればよい。


準備

テストデータ作成
  • テストデータ(CSV)を作成する。
% perl -e 'printf(qq/%d,%d,%d,%d\n/,$_,2..4) for 1..100' > number.csv
% wc -l number.csv
     100 number.csv
% head -3 number.csv
1,2,3,4
2,2,3,4
3,2,3,4
% tail -3 number.csv
98,2,3,4
99,2,3,4
100,2,3,4
S3バケットにデータを置く
Glue で開発エンドポイントと Zeppelin ノートブックを作成

実行

  • Zeppelin ノートブックを開いて以下の PySpark のコードを実行する。
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.dynamicframe import DynamicFrame
from awsglue.job import Job
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf
from pyspark.sql.types import StringType

glueContext = GlueContext(SparkContext.getOrCreate())

# S3 location for input
input_dir = "s3://az-handson/input"

# S3 location for output
output_dir = "s3://az-handson/output"

# Read CSV
dyf = glueContext.create_dynamic_frame.from_options(connection_type = "s3", 
    connection_options = {"paths": [ input_dir ]}, format="csv", format_options={ "withHeader": False})

# Convert to DataFrame
df = dyf.toDF()

# Filter
filtered_df = df.where(df['col0'] > 50)

# Turn it back to a dynamic frame
output_dyf = DynamicFrame.fromDF(filtered_df, glueContext, "nested")

# Write it out in CSV
glueContext.write_dynamic_frame.from_options(frame = output_dyf, connection_type = "s3", connection_options = {"path": output_dir}, format = "csv")

結果

  • S3 に加工結果が出力されている

f:id:yohei-a:20180917161311p:image:w640

f:id:yohei-a:20180917161512p:image:w640

  • Zeppelin でフィルタ後の DataFrame を表示してみる。
filtered_df.show()

f:id:yohei-a:20180917161746p:image:w640


メモ

  • DynamicFrame の定義を確認する
dyf.printSchema()
  • dataframe の定義を確認する
df.describe()

参考

2018-09-16

AWS Glue の DynamicFrame と Spark の DataFrame の違い

Sparkとは

P.100

Apache Spark」も、MapReduce より効率の良いデータ処理を実現するプロジェクトとして開発が進められています。Hadoop の延長線上にある Tez とは異なり、Spark は Hadoop とは別の独立したプロジェクトです。Spark の特徴は大量のメモリを活用して高速化を実現することです。(中略)コンピュータが異常停止すると途中まで処理した中間データは消えてしまいますが、そのときには処理をやり直して、失われた中間データをま作れば良いというのが Spark の考え方です(図 3.8)。

(中略)

Spark は Hadoop を置き換えるものではなく、MapReduce を置き換える存在です。例えば、分散ファイルシステムである HDFS や、リソースマネージャである YARN などは、Spark からでもそのまま利用できます。Hadoop を利用しない構成も可能であり、分散ストレージとして Amazon S3 を利用したり、あるいは分散データベースである Cassandra からデータを読み込んだりするようなことも可能です。

f:id:yohei-a:20180916181231j:image:w640


Spark の歴史

P.5

Spark プロジェクトはもともとカリフォルニア大学バークレー校 AMPLab の研究プロジェクトとして2009年にスタートしました。BDAS(the Berkeley Data Analytics Stack)と呼ばれるビッグデータ分析のためのソフトウェアスタックがあり、Spark はそのコンポーネントのひとつに位置付けられています。Spark プロジェクトは2010年初頭にオープンソース化され、2013年6月に Apache Incubator Project に採択されて「Apache Spark」となりました。この頃から本格的な開発体制が整い始め、2013年10月には AMPLab から Spark 開発者がスピンアウトして、米 Databrics が設立されました。現在も Spark 開発者の多くは Databrics に所属しています。

AMPLAB is a University of California, Berkeley lab focused on Big data analytics. The name stands for the Algorithms, Machines and People Lab.[1][2] It has been publishing papers since 2008[3] and was officially launched in 2011.[4]

While AMPLab has worked on a wide variety of big data projects, many know it as the lab that invented Apache Spark.[5]

AMPLab - Wikipedia

f:id:yohei-a:20180916184431p:image:w640

About | AMPLab – UC Berkeley

DAG(Directed Acyclic Graph)とは

P.202-204

MapReduce に変わる新しいフレームワーク DAGによる内部表現

新しいフレームワークに共通するのがDAG(directed acyclic graph)と呼ばれるデータ構造です(図 5.12)。日本語では「有向非循環グラフ」と呼ばれます。DAG そのものは何かの技術ではなく、数学コンピュータアルゴリズムで用いられるデータモデルの一つです。DAG は、次のような性質を持ちます。

  • ノードノードが矢印で結ばれる(有向)
  • 矢印をいくら辿っても同じノードに戻らない(非循環)

データフローでは、実行すべき一連のタスクをDAGによるデータ構造として表現します。図中の矢印はタスクの実行順序を示しており、その依存関係を保ちながらうまく実行順序を決めることで、すべてのタスクを漏れなく完了させることができます。後は、これをどれだけ効率よく実行できるかという問題です。

従来の MapReduce も「Map」と「Reduce」の2種類のノードから成るシンプルなDAGであると考えることができます。ただし、一つのノードで処理が終わらなければ次の処理に進めないという非効率なものでした。

一方、データフローではDAGを構成する各ノードがすべて同時並行で実行されます。処理の終わったデータは、ネットワーク経由で次々と受け渡され、MapReduce にあった待ち時間をなくしています。


SparkにおけるDAG

DAGはシステムの内部的な表現であり、利用者がその存在を意識することはほとんどありません。データフローに限らず、Hive on Tez や Presto のようなクエリエンジンでもDAGは採用されており、SQLからDAGのデータ構造が内部で自動生成されています。一方、Spark のようなデータフローのフレームワークでは、プログラミング言語を用いてより直接的にDAGのデータ構造を組み立てます。

(中略)

DAGによるプログラミングの特徴が遅延評価(lazy evaluation)です。プログラムの各行は、実際にはDAGのデータ構造を組み立てているだけであり、その場では何の処理も行いません。まずはDAGを構築し、その後で明示的に、あるいは暗黙的に実行結果を要求することによって、ようやくデータ処理が開始されます。

MapReduceのようにMapやReduceを一つずつ実行するのではなく、最初にデータパイプライン全体をDAGとして組み立ててから実行に移すことで、内部のスケジューラが分散システムにとって効率の良い実行計画を建ててくれるのがデータフローの優れたところです。


RDDとは

P.14

Apache Spark のデータ処理には「RDD(Resilient Distributed Dataset)」と呼ばれるデータ構造を利用します。Spark のプログラミングモデルは「RDDを加工して新たなRDDを生成し、これを繰り返すことで目的の結果を得る」というものになっています。

(中略)

RDD は大量のデータを要素として保持する分散コレクションです。巨大な配列やリストのようなデータ構造を想像すると分かりやすいでしょう。RDD は複数のマシンから構成されるクラスタ上での分散処理を前提として設計されており、内部的にはパーティションというかたまりに分割されています。Spark では、このパーティション分散処理の単位となります。RDD をパーティションごとに複数のマシンで処理することによって、単一のマシンでは処理しきれない大量のデータを扱うことができるのです。

ユーザーはたとえばHDFSなどの分散ファイルシステム上のファイルの内容を RDD にロードし、RDD を加工することで大量のデータの分散処理を実現できます。Spark ではこの加工に相当する処理を「変換」と呼びます。そして、RDD の内容を元に「アクション」と呼ばれる処理を適用して目的の結果を得るのです(図 2.2)。

このほかに、RDD はイミュータブル(内部の要素の値を変更できない)ということと、生成や変換が遅延評価されるという性質があります。

f:id:yohei-a:20180916191822j:image:w640

D


DataFrame*1 とは

P.110

Spark SQL ではドライバプログラムからさまざまな形式のデータセットを統一的に扱うために、DataFrame と呼ばれる抽象的なデータ構造を用います。DataFrame とは RDBMS のテーブルのように行と名前とデータ型が付与された列の概念を持つデータ構造です。Spark SQL ではさまざまnあデータ型をサポートしており、Dataframe の列のデータ型に指定することができます。



DynamicFrame とは

  • AWS Glue でデータの抽出・変換をする際に使う Spark の DataFrame の Wrapper。
  • DynamicFrame は PythonScala 両方の API がある。
  • DataFrame は最初にスキーマ定義(列の型など)が必要で、同一列に型が異なる値があると String としてしか扱えないが、DynamicFrame だとスキーマ定義で読んだ上で型を揃えるなどの前処理ができる?
  • なので、DynamicFrame で前処理、DataFrame でSparkSQLで高度なAPIを使う、DynamicFrame に変換して書出しといった使い方になる?

参考

  • Python の DynamicFrame クラスの説明

DynamicFrame クラス

Apache Spark の主要な抽象化の 1 つは SparkSQL DataFrame で、これは R と Pandas にある DataFrame 構造に似ています。DataFrame はテーブルと似ており、機能スタイル (マップ/リデュース/フィルタ/その他) 操作と SQL 操作 (選択、プロジェクト、集計) をサポートしています。

DataFrames は、強力で広く使用されていますが、抽出、変換、およびロード (ETL) 操作に関しては制限があります。最も重要なのは、データをロードする前にスキーマを指定する必要があることです。SparkSQL は、データに対してパスを 2 つ作ることでこれを解決します。1 つ目はスキーマを推測し、2 つ目はデータをロードします。ただし、この推測は限定されており、実際の煩雑なデータには対応しません。たとえば、同じフィールドが異なるレコードの異なるタイプである可能性があります。Apache Spark は、多くの場合、作業を中断して、元のフィールドテキストを使用してタイプを string として報告します。これは正しくない可能性があり、スキーマの不一致を解決する方法を細かく制御する必要があります。また、大規模なデータセットの場合、ソースデータに対する追加パスが非常に高価になる可能性があります。

これらの制限に対応するために、AWS Glue により DynamicFrame が導入されました。DynamicFrame は、DataFrame と似ていますが、各レコードが自己記述できるため、最初はスキーマは必要ありません。代わりに、AWS Glue は必要に応じてオンザフライでスキーマを計算し、選択 (または共用) タイプを使用してスキーマの不一致を明示的にエンコードします。これらの不整合を解決して、固定スキーマを必要とするデータストアとデータセットを互換性のあるものにできます。

同様に、DynamicRecord は DynamicFrame 内の論理レコードを表します。これは、Spark DataFrame の行と似ていますが、自己記述型であり、固定スキーマに適合しないデータに使用できます。

スキーマの不一致を解決したら、DynamicFrames を DataFrames との間で変換することができます。

DynamicFrame クラス - AWS Glue
  • Scala の DynamicFrame の説明

DynamicFrame は、自己記述型の DynamicRecord オブジェクト分散コレクションです。

DynamicFrame は、ETL (抽出、変換、ロード) オペレーションの柔軟なデータモデルを提供するように設計されています。これらのオブジェクトを作成するのにスキーマは必要なく、乱雑または不整合な値や型を持つデータの読み取りと変換に使用できます。スキーマは、スキーマを必要とするオペレーションオンデマンドで計算できます。

DynamicFrame は、データクリーニングと ETL 用の広範な変換を提供します。また、既存のコードと統合するための SparkSQL DataFrames との相互変換や、DataFrames が提供する多くの分析オペレーションをサポートしています。

AWS Glue Scala DynamicFrame クラス - AWS Glue

GlueContext

The file context.py contains the GlueContext class. GlueContext extends PySpark's SQLContext class to provide Glue-specific operations. Most Glue programs will start by instantiating a GlueContext and using it to construct a DynamicFrame.

DynamicFrame

The DynamicFrame, defined in dynamicframe.py, is the core data structure used in Glue scripts. DynamicFrames are similar to Spark SQL's DataFrames in that they represent distributed collections of data records, but DynamicFrames provide more flexible handling of data sets with inconsistent schemas. By representing records in a self-describing way, they can be used without specifying a schema up front or requiring a costly schema inference step.

DynamicFrames support many operations, but it is also possible to convert them to DataFrames using the toDF method to make use of existing Spark SQL operations.

https://github.com/awslabs/aws-glue-libs/tree/master/awsglue

— Construction —

  • __init__
  • fromDF
  • toDF

(中略)

— Transforms —

  • apply_mapping
  • drop_fields
  • filter
  • join
  • map
  • relationalize
  • rename_field
  • resolveChoice
  • select_fields
  • spigot
  • split_fields
  • split_rows
  • unbox
  • unnest
  • write

(中略)

— Errors —

  • assertErrorThreshold
  • errorsAsDynamicFrame
  • errorsCount
  • stageErrorsCount
DynamicFrame Class - AWS Glue
# Copyright 2016-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# Licensed under the Amazon Software License (the "License"). You may not use
# this file except in compliance with the License. A copy of the License is
# located at
#
#  http://aws.amazon.com/asl/
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
# or implied. See the License for the specific language governing
# permissions and limitations under the License.

import json
from awsglue.utils import makeOptions, callsite
from itertools import imap, ifilter
from awsglue.gluetypes import _deserialize_json_string, _create_dynamic_record, _revert_to_dict, _serialize_schema
from awsglue.utils import _call_site, _as_java_list, _as_scala_option, _as_resolve_choiceOption
from pyspark.rdd import RDD, PipelinedRDD
from pyspark.sql.dataframe import DataFrame
from pyspark.serializers import PickleSerializer, BatchedSerializer


class ResolveOption(object):
    """
    ResolveOption is used for resolve ChoiceType while converting DynamicRecord to DataFrame
    option.action includes "Project", "KeepAsStruct" and "Cast".
    """
    def __init__(self, path, action, target=None):
        """
        :param path: string, path name to ChoiceType
        :param action: string,
        :param target: spark sql Datatype
        """
        self.path = path
        self.action = action
        self.target = target

参考

*1:SparkSQLの

2018-09-15

Athena に JDBC Driver 経由で接続してクエリを発行する

簡単な Java プログラムを作成して JDBC Driver 経由で Athena に接続してみた。CloudTrail を確認すると、JDBC接続しても JDBC Driver に同梱されている AWS SDK for java から API を実行していることが分かる。


準備

$ wget https://s3.amazonaws.com/athena-downloads/drivers/JDBC/SimbaAthenaJDBC_2.0.5/AthenaJDBC41_2.0.5.jar
import java.sql.*;
import java.util.Properties;

public class AthenaJDBCDemo {
	static final String athenaUrl ="jdbc:awsathena://AwsRegion=ap-northeast-1;";
	public static void main(String[] args) {
	Connection conn = null;
	Statement statement = null;
	try {
		Class.forName("com.simba.athena.jdbc.Driver");
		Properties info = new Properties();
		info.put("S3OutputLocation", "s3://aws-athena-query-results-<Account ID>-ap-northeast-1/");
		info.put("LogPath", "/home/ec2-user");
		info.put("LogLevel","6");
		info.put("AwsCredentialsProviderClass","com.simba.athena.amazonaws.auth.PropertiesFileCredentialsProvider");
		info.put("AwsCredentialsProviderArguments","/home/ec2-user/.athenaCredentials");
		String databaseName = "default";
		System.out.println("Connecting to Athena...");
		conn = DriverManager.getConnection(athenaUrl,info);
		System.out.println("Listing tables...");
		String sql = "show tables in "+ databaseName;
		statement = conn.createStatement();
		ResultSet rs = statement.executeQuery(sql);
		while (rs.next()) {
			//Retrieve table column.
			String name = rs.getString("tab_name");
			//Display values.
			System.out.println("Name: " + name);
		}
		rs.close();
		conn.close();
	} catch (Exception ex) {
		ex.printStackTrace();
	} finally {
    	try {
	    	if (statement != null)
		    	statement.close();
		    } catch (Exception ex) {
	    }
	    try {
		    if (conn != null)
			    conn.close();
	    	} catch (Exception ex) {
		    	ex.printStackTrace();
		    }
	    }
	    System.out.println("Finished connectivity test.");
    }
}
$ javac -classpath ./AthenaJDBC41_2.0.5.jar AthenaJDBCDemo.java
  • クレデンシャルを作成する
$ vi .athenaCredentials
accessKey=...
secretKey=...

実行する

$ java -cp .:./AthenaJDBC41_2.0.5.jar AthenaJDBCDemo
Connecting to Athena...
log4j:WARN No appenders could be found for logger (com.simba.athena.amazonaws.AmazonWebServiceClient).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
Listing tables...
Name: cloudtrail_logs_cloudtrail_269419664770_do_not_delete
Finished connectivity test.

実行結果を確認する

f:id:yohei-a:20180916043454p:image:w640

  • CloudTrail を確認する

f:id:yohei-a:20180916042413p:image:w640

  • CloudTrail のイベントの詳細

f:id:yohei-a:20180916043842p:image:w640

{
    "eventVersion": "1.05",
    "userIdentity": {
        "type": "IAMUser",
        "principalId": "...",
        "arn": "arn:aws:iam::...:user/...",
        "accountId": "...",
        "accessKeyId": "...",
        "userName": "..."
    },
    "eventTime": "2018-09-15T19:17:23Z",
    "eventSource": "athena.amazonaws.com",
    "eventName": "GetQueryExecution",
    "awsRegion": "ap-northeast-1",
    "sourceIPAddress": "**.***.42.228", ★EC2のグローバルIP
    "userAgent": "sbAthenaJDBCDriver/2.0, aws-sdk-java/1.11.335 Linux/4.14.62-65.117.amzn1.x86_64 OpenJDK_64-Bit_Server_VM/24.191-b01 java/1.7.0_191", ★AWS SDK for Java in JDBC Driver からアクセスしている
    "requestParameters": {
        "queryExecutionId": "c7a91af0-8965-41f9-9694-b5996f766b35"
    },
    "responseElements": null,
    "requestID": "bdb5f440-3eda-498f-a254-a668f2c9fffe",
    "eventID": "eae75fbd-37eb-4937-a652-c1e50fe2b5a0",
    "eventType": "AwsApiCall", ★ APIを発行している
    "recipientAccountId": "..."
}

f:id:yohei-a:20180916043656p:image:w640

{
    "eventVersion": "1.05",
    "userIdentity": {
        "type": "IAMUser",
        "principalId": "...",
        "arn": "arn:aws:iam::...:user/...",
        "accountId": "...",
        "accessKeyId": "...",
        "userName": "..."
    },
    "eventTime": "2018-09-15T19:17:24Z",
    "eventSource": "athena.amazonaws.com",
    "eventName": "GetQueryResultsStream",
    "awsRegion": "ap-northeast-1",
    "sourceIPAddress": "**.***.42.228",
    "userAgent": "sbAthenaJDBCDriver/2.0, aws-sdk-java/1.11.335 Linux/4.14.62-65.117.amzn1.x86_64 OpenJDK_64-Bit_Server_VM/24.191-b01 java/1.7.0_191",
    "requestParameters": {
        "queryExecutionId": "c7a91af0-8965-41f9-9694-b5996f766b35",
        "maxResults": 10000
    },
    "responseElements": null,
    "requestID": "62dac377-e0f2-4727-8643-982cf07f18cc",
    "eventID": "bf4426a4-bec2-4d8a-97f8-fbf3e130c9db",
    "eventType": "AwsApiCall",
    "recipientAccountId": "..."
}

補足

$ unzip AthenaJDBC41_2.0.5.jar
$ tree -d com
com
├── amazonaws
│&#160;&#160; └── auth
└── simba
    ├── athena
    │&#160;&#160; ├── amazonaws&#160;&#160; │&#160;&#160; ├── adapters&#160;&#160; │&#160;&#160; │&#160;&#160; └── types&#160;&#160; │&#160;&#160; ├── annotation&#160;&#160; │&#160;&#160; ├── auth&#160;&#160; │&#160;&#160; │&#160;&#160; ├── internal&#160;&#160; │&#160;&#160; │&#160;&#160; ├── policy&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── actions&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── conditions&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; └── internal&#160;&#160; │&#160;&#160; │&#160;&#160; ├── presign&#160;&#160; │&#160;&#160; │&#160;&#160; └── profile&#160;&#160; │&#160;&#160; │&#160;&#160;     └── internal&#160;&#160; │&#160;&#160; │&#160;&#160;         └── securitytoken&#160;&#160; │&#160;&#160; ├── client&#160;&#160; │&#160;&#160; │&#160;&#160; └── builder&#160;&#160; │&#160;&#160; ├── event&#160;&#160; │&#160;&#160; │&#160;&#160; └── request&#160;&#160; │&#160;&#160; ├── handlers&#160;&#160; │&#160;&#160; ├── http&#160;&#160; │&#160;&#160; │&#160;&#160; ├── apache&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── client&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; └── impl&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── request&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; └── impl&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; └── utils&#160;&#160; │&#160;&#160; │&#160;&#160; ├── client&#160;&#160; │&#160;&#160; │&#160;&#160; ├── conn&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; └── ssl&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160;     └── privileged&#160;&#160; │&#160;&#160; │&#160;&#160; ├── exception&#160;&#160; │&#160;&#160; │&#160;&#160; ├── impl&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; └── client&#160;&#160; │&#160;&#160; │&#160;&#160; ├── protocol&#160;&#160; │&#160;&#160; │&#160;&#160; ├── request&#160;&#160; │&#160;&#160; │&#160;&#160; ├── response&#160;&#160; │&#160;&#160; │&#160;&#160; ├── settings&#160;&#160; │&#160;&#160; │&#160;&#160; └── timers&#160;&#160; │&#160;&#160; │&#160;&#160;     ├── client&#160;&#160; │&#160;&#160; │&#160;&#160;     └── request&#160;&#160; │&#160;&#160; ├── internal&#160;&#160; │&#160;&#160; │&#160;&#160; ├── auth&#160;&#160; │&#160;&#160; │&#160;&#160; ├── config&#160;&#160; │&#160;&#160; │&#160;&#160; └── http&#160;&#160; │&#160;&#160; ├── jmx&#160;&#160; │&#160;&#160; │&#160;&#160; └── spi&#160;&#160; │&#160;&#160; ├── log&#160;&#160; │&#160;&#160; ├── metrics&#160;&#160; │&#160;&#160; │&#160;&#160; └── internal&#160;&#160; │&#160;&#160; ├── partitions&#160;&#160; │&#160;&#160; │&#160;&#160; └── model&#160;&#160; │&#160;&#160; ├── profile&#160;&#160; │&#160;&#160; │&#160;&#160; └── path&#160;&#160; │&#160;&#160; │&#160;&#160;     ├── config&#160;&#160; │&#160;&#160; │&#160;&#160;     └── cred&#160;&#160; │&#160;&#160; ├── protocol&#160;&#160; │&#160;&#160; │&#160;&#160; └── json&#160;&#160; │&#160;&#160; │&#160;&#160;     └── internal&#160;&#160; │&#160;&#160; ├── regions&#160;&#160; │&#160;&#160; ├── retry&#160;&#160; │&#160;&#160; │&#160;&#160; ├── internal&#160;&#160; │&#160;&#160; │&#160;&#160; └── v2&#160;&#160; │&#160;&#160; ├── sdk&#160;&#160; │&#160;&#160; ├── services&#160;&#160; │&#160;&#160; │&#160;&#160; ├── athena&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; └── model&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160;     └── transform&#160;&#160; │&#160;&#160; │&#160;&#160; ├── athenastreamingservice&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; └── model&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160;     └── transform&#160;&#160; │&#160;&#160; │&#160;&#160; ├── glue&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; └── model&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160;     └── transform&#160;&#160; │&#160;&#160; │&#160;&#160; └── securitytoken&#160;&#160; │&#160;&#160; │&#160;&#160;     ├── internal&#160;&#160; │&#160;&#160; │&#160;&#160;     └── model&#160;&#160; │&#160;&#160; │&#160;&#160;         └── transform&#160;&#160; │&#160;&#160; ├── transform&#160;&#160; │&#160;&#160; ├── util&#160;&#160; │&#160;&#160; │&#160;&#160; └── json&#160;&#160; │&#160;&#160; └── waiters&#160;&#160; ├── athena&#160;&#160; │&#160;&#160; ├── api&#160;&#160; │&#160;&#160; ├── core&#160;&#160; │&#160;&#160; ├── dataengine&#160;&#160; │&#160;&#160; │&#160;&#160; └── metadata&#160;&#160; │&#160;&#160; ├── exceptions&#160;&#160; │&#160;&#160; ├── jdbc41&#160;&#160; │&#160;&#160; ├── model&#160;&#160; │&#160;&#160; └── utilities&#160;&#160; ├── commons&#160;&#160; │&#160;&#160; └── codec&#160;&#160; │&#160;&#160;     ├── binary&#160;&#160; │&#160;&#160;     ├── digest&#160;&#160; │&#160;&#160;     ├── language&#160;&#160; │&#160;&#160;     └── net&#160;&#160; ├── dsi&#160;&#160; │&#160;&#160; ├── core&#160;&#160; │&#160;&#160; │&#160;&#160; ├── impl&#160;&#160; │&#160;&#160; │&#160;&#160; ├── interfaces&#160;&#160; │&#160;&#160; │&#160;&#160; └── utilities&#160;&#160; │&#160;&#160; ├── dataengine&#160;&#160; │&#160;&#160; │&#160;&#160; ├── filters&#160;&#160; │&#160;&#160; │&#160;&#160; ├── impl&#160;&#160; │&#160;&#160; │&#160;&#160; ├── interfaces&#160;&#160; │&#160;&#160; │&#160;&#160; └── utilities&#160;&#160; │&#160;&#160; ├── exceptions&#160;&#160; │&#160;&#160; └── utilities&#160;&#160; ├── exceptions&#160;&#160; │&#160;&#160; └── jdbc4&#160;&#160; ├── jdbc&#160;&#160; │&#160;&#160; ├── classloader&#160;&#160; │&#160;&#160; ├── common&#160;&#160; │&#160;&#160; │&#160;&#160; └── utilities&#160;&#160; │&#160;&#160; ├── core&#160;&#160; │&#160;&#160; ├── exceptions&#160;&#160; │&#160;&#160; ├── jdbc41&#160;&#160; │&#160;&#160; │&#160;&#160; └── utilities&#160;&#160; │&#160;&#160; └── utils&#160;&#160; ├── jdbc41&#160;&#160; ├── license&#160;&#160; │&#160;&#160; ├── interfaces&#160;&#160; │&#160;&#160; └── validators&#160;&#160; ├── shaded&#160;&#160; │&#160;&#160; ├── apache&#160;&#160; │&#160;&#160; │&#160;&#160; ├── commons&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── codec&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── binary&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── digest&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── language&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; └── bm&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; └── net&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── csv&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; └── logging&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160;     └── impl&#160;&#160; │&#160;&#160; │&#160;&#160; ├── http&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── annotation&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── auth&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; └── params&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── client&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── config&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── entity&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── methods&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── params&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── protocol&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; └── utils&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── concurrent&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── config&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── conn&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── params&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── routing&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── scheme&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── socket&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── ssl&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; └── util&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── cookie&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; └── params&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── entity&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── impl&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── auth&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── bootstrap&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── client&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── conn&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; └── tsccm&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── cookie&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── entity&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── execchain&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── io&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; └── pool&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── io&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── message&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── params&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── pool&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── protocol&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; ├── ssl&#160;&#160; │&#160;&#160; │&#160;&#160; │&#160;&#160; └── util&#160;&#160; │&#160;&#160; │&#160;&#160; └── log4j&#160;&#160; │&#160;&#160; │&#160;&#160;     ├── chainsaw&#160;&#160; │&#160;&#160; │&#160;&#160;     ├── config&#160;&#160; │&#160;&#160; │&#160;&#160;     ├── helpers&#160;&#160; │&#160;&#160; │&#160;&#160;     ├── jdbc&#160;&#160; │&#160;&#160; │&#160;&#160;     ├── jmx&#160;&#160; │&#160;&#160; │&#160;&#160;     ├── lf5&#160;&#160; │&#160;&#160; │&#160;&#160;     │&#160;&#160; ├── config&#160;&#160; │&#160;&#160; │&#160;&#160;     │&#160;&#160; ├── util&#160;&#160; │&#160;&#160; │&#160;&#160;     │&#160;&#160; └── viewer&#160;&#160; │&#160;&#160; │&#160;&#160;     │&#160;&#160;     ├── categoryexplorer&#160;&#160; │&#160;&#160; │&#160;&#160;     │&#160;&#160;     ├── configure&#160;&#160; │&#160;&#160; │&#160;&#160;     │&#160;&#160;     └── images&#160;&#160; │&#160;&#160; │&#160;&#160;     ├── net&#160;&#160; │&#160;&#160; │&#160;&#160;     ├── nt&#160;&#160; │&#160;&#160; │&#160;&#160;     ├── or&#160;&#160; │&#160;&#160; │&#160;&#160;     │&#160;&#160; ├── jms&#160;&#160; │&#160;&#160; │&#160;&#160;     │&#160;&#160; └── sax&#160;&#160; │&#160;&#160; │&#160;&#160;     ├── pattern&#160;&#160; │&#160;&#160; │&#160;&#160;     ├── rewrite&#160;&#160; │&#160;&#160; │&#160;&#160;     ├── spi&#160;&#160; │&#160;&#160; │&#160;&#160;     ├── varia&#160;&#160; │&#160;&#160; │&#160;&#160;     └── xml&#160;&#160; │&#160;&#160; ├── fasterxml&#160;&#160; │&#160;&#160; │&#160;&#160; └── jackson&#160;&#160; │&#160;&#160; │&#160;&#160;     ├── annotation&#160;&#160; │&#160;&#160; │&#160;&#160;     ├── core&#160;&#160; │&#160;&#160; │&#160;&#160;     │&#160;&#160; ├── async&#160;&#160; │&#160;&#160; │&#160;&#160;     │&#160;&#160; ├── base&#160;&#160; │&#160;&#160; │&#160;&#160;     │&#160;&#160; ├── filter&#160;&#160; │&#160;&#160; │&#160;&#160;     │&#160;&#160; ├── format&#160;&#160; │&#160;&#160; │&#160;&#160;     │&#160;&#160; ├── io&#160;&#160; │&#160;&#160; │&#160;&#160;     │&#160;&#160; ├── json&#160;&#160; │&#160;&#160; │&#160;&#160;     │&#160;&#160; │&#160;&#160; └── async&#160;&#160; │&#160;&#160; │&#160;&#160;     │&#160;&#160; ├── sym&#160;&#160; │&#160;&#160; │&#160;&#160;     │&#160;&#160; ├── type&#160;&#160; │&#160;&#160; │&#160;&#160;     │&#160;&#160; └── util&#160;&#160; │&#160;&#160; │&#160;&#160;     └── databind&#160;&#160; │&#160;&#160; │&#160;&#160;         ├── annotation&#160;&#160; │&#160;&#160; │&#160;&#160;         ├── cfg&#160;&#160; │&#160;&#160; │&#160;&#160;         ├── deser&#160;&#160; │&#160;&#160; │&#160;&#160;         │&#160;&#160; ├── impl&#160;&#160; │&#160;&#160; │&#160;&#160;         │&#160;&#160; └── std&#160;&#160; │&#160;&#160; │&#160;&#160;         ├── exc&#160;&#160; │&#160;&#160; │&#160;&#160;         ├── ext&#160;&#160; │&#160;&#160; │&#160;&#160;         ├── introspect&#160;&#160; │&#160;&#160; │&#160;&#160;         ├── jsonFormatVisitors&#160;&#160; │&#160;&#160; │&#160;&#160;         ├── jsonschema&#160;&#160; │&#160;&#160; │&#160;&#160;         ├── jsontype&#160;&#160; │&#160;&#160; │&#160;&#160;         │&#160;&#160; └── impl&#160;&#160; │&#160;&#160; │&#160;&#160;         ├── module&#160;&#160; │&#160;&#160; │&#160;&#160;         ├── node&#160;&#160; │&#160;&#160; │&#160;&#160;         ├── ser&#160;&#160; │&#160;&#160; │&#160;&#160;         │&#160;&#160; ├── impl&#160;&#160; │&#160;&#160; │&#160;&#160;         │&#160;&#160; └── std&#160;&#160; │&#160;&#160; │&#160;&#160;         ├── type&#160;&#160; │&#160;&#160; │&#160;&#160;         └── util&#160;&#160; │&#160;&#160; └── joda&#160;&#160; │&#160;&#160;     └── time&#160;&#160; │&#160;&#160;         ├── base&#160;&#160; │&#160;&#160;         ├── chrono&#160;&#160; │&#160;&#160;         ├── convert&#160;&#160; │&#160;&#160;         ├── field&#160;&#160; │&#160;&#160;         ├── format&#160;&#160; │&#160;&#160;         └── tz&#160;&#160; │&#160;&#160;             └── data&#160;&#160; │&#160;&#160;                 ├── Africa&#160;&#160; │&#160;&#160;                 ├── America&#160;&#160; │&#160;&#160;                 │&#160;&#160; ├── Argentina&#160;&#160; │&#160;&#160;                 │&#160;&#160; ├── Indiana&#160;&#160; │&#160;&#160;                 │&#160;&#160; ├── Kentucky&#160;&#160; │&#160;&#160;                 │&#160;&#160; └── North_Dakota&#160;&#160; │&#160;&#160;                 ├── Antarctica&#160;&#160; │&#160;&#160;                 ├── Arctic&#160;&#160; │&#160;&#160;                 ├── Asia&#160;&#160; │&#160;&#160;                 ├── Atlantic&#160;&#160; │&#160;&#160;                 ├── Australia&#160;&#160; │&#160;&#160;                 ├── Etc&#160;&#160; │&#160;&#160;                 ├── Europe&#160;&#160; │&#160;&#160;                 ├── Indian&#160;&#160; │&#160;&#160;                 └── Pacific&#160;&#160; ├── streams&#160;&#160; │&#160;&#160; ├── parameters&#160;&#160; │&#160;&#160; ├── parametersoutput&#160;&#160; │&#160;&#160; ├── resultset&#160;&#160; │&#160;&#160; └── resultsetinput&#160;&#160; ├── support&#160;&#160; │&#160;&#160; ├── channels&#160;&#160; │&#160;&#160; ├── conv&#160;&#160; │&#160;&#160; ├── exceptions&#160;&#160; │&#160;&#160; └── security&#160;&#160; └── utilities&#160;&#160;     └── conversion
    └── support

274 directories
$ ls -l|head -10
total 296
-rw-rw-r--  1 ec2-user ec2-user   916 Jul 13 03:56 AbortedException.class
drwxrwxr-x  3 ec2-user ec2-user  4096 Jul 13 03:56 adapters
-rw-rw-r--  1 ec2-user ec2-user   957 Jul 13 03:56 AmazonClientException.class
-rw-rw-r--  1 ec2-user ec2-user  4172 Jul 13 03:56 AmazonServiceException.class
-rw-rw-r--  1 ec2-user ec2-user  1336 Jul 13 03:56 AmazonServiceException$ErrorType.class
-rw-rw-r--  1 ec2-user ec2-user 21418 Jul 13 03:56 AmazonWebServiceClient.class
-rw-rw-r--  1 ec2-user ec2-user   669 Jul 13 03:56 AmazonWebServiceRequest$1.class
-rw-rw-r--  1 ec2-user ec2-user 10498 Jul 13 03:56 AmazonWebServiceRequest.class
-rw-rw-r--  1 ec2-user ec2-user  1487 Jul 13 03:56 AmazonWebServiceResponse.class
/*
 * Copyright 2010-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 *  http://aws.amazon.com/apache2.0
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
package com.amazonaws;

import com.amazonaws.annotation.NotThreadSafe;
import com.amazonaws.annotation.SdkInternalApi;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.event.ProgressListener;
import com.amazonaws.handlers.HandlerContextKey;
import com.amazonaws.internal.StaticCredentialsProvider;
import com.amazonaws.metrics.RequestMetricCollector;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

/**
 * Base class for all user facing web service requests.
 */
@NotThreadSafe
public abstract class AmazonWebServiceRequest implements Cloneable, ReadLimitInfo, HandlerContextAware {

    public static final AmazonWebServiceRequest NOOP = new AmazonWebServiceRequest() {
    };

    /**
     * The optional progress listener for receiving updates about the progress of the request.
     */
    private ProgressListener progressListener = ProgressListener.NOOP;

    /**
     * Arbitrary options storage for individual {@link AmazonWebServiceRequest}s. This field is not
     * intended to be used by clients.
     */
    private final RequestClientOptions requestClientOptions = new RequestClientOptions();

    /**
     * A request metric collector used for this specific service request; or null if there is none.
     * This collector always takes precedence over the ones specified at the http client level and
     * AWS SDK level.
     */
    private RequestMetricCollector requestMetricCollector;

    /**
     * The optional credentials to use for this request - overrides the default credentials set at
     * the client level.
     */
    private AWSCredentialsProvider credentialsProvider;

    /**
     * A map of custom header names to header values.
     */
    private Map<String, String> customRequestHeaders;

    /**
     * Custom query parameters for the request.
     */
    private Map<String, List<String>> customQueryParameters;

    /**
     * User-defined context for the request.
     */
    private transient Map<HandlerContextKey<?>, Object> handlerContext = new HashMap<HandlerContextKey<?>, Object>();

    /**
     * The source object from which the current object was cloned; or null if there isn't one.
     */
    private AmazonWebServiceRequest cloneSource;

    private Integer sdkRequestTimeout = null;

    private Integer sdkClientExecutionTimeout = null;

    /**
     * Sets the optional credentials to use for this request, overriding the default credentials set at the client level.
     *
     * @param credentials The optional AWS security credentials to use for this request, overriding the default credentials set at
     *                    the client level.
     * @deprecated by {@link #setRequestCredentialsProvider(AWSCredentialsProvider)}. If you must use {@link AWSCredentials} you
     * can wrap it with a {@link com.amazonaws.auth.AWSStaticCredentialsProvider}.
     */
    @Deprecated
    public void setRequestCredentials(AWSCredentials credentials) {
        this.credentialsProvider = credentials == null ? null : new StaticCredentialsProvider(credentials);
    }

    /**
     * Returns the optional credentials to use to sign this request, overriding the default
     * credentials set at the client level.
     *
     * @return The optional credentials to use to sign this request, overriding the default
     *         credentials set at the client level.
     *
     * @deprecated by {@link #getRequestCredentialsProvider()}
     */
    @Deprecated
    public AWSCredentials getRequestCredentials() {
        return credentialsProvider == null ? null : credentialsProvider.getCredentials();
    }

    /**
     * Sets the optional credentials provider to use for this request, overriding the default credentials
     * provider at the client level.
     *
     * @param credentialsProvider
     *            The optional AWS security credentials provider to use for this request, overriding the
     *            default credentials provider at the client level.
     */
    public void setRequestCredentialsProvider(AWSCredentialsProvider credentialsProvider) {
        this.credentialsProvider = credentialsProvider;
    }

    /**
     * Returns the optional credentials provider to use to sign this request, overriding the default
     * credentials provider at the client level.
     *
     * @return The optional credentials provider to use to sign this request, overriding the default
     *         credentials provider at the client level.
     */
    public AWSCredentialsProvider getRequestCredentialsProvider() {
        return credentialsProvider;
    }

    /**
     * Sets the optional credentials provider to use for this request, overriding the default credentials
     * provider at the client level.
     *
     * @param credentialsProvider
     *            The optional AWS security credentials provider to use for this request, overriding the
     *            default credentials provider at the client level.
     * @return A reference to this updated object so that method calls can be chained together.
     */
    public <T extends AmazonWebServiceRequest> T withRequestCredentialsProvider(final AWSCredentialsProvider credentialsProvider) {
        setRequestCredentialsProvider(credentialsProvider);
        @SuppressWarnings("unchecked")
        T t = (T) this;
        return t;
    }

    /**
     * Gets the options stored with this request object. Intended for internal use only.
     */
    public RequestClientOptions getRequestClientOptions() {
        return requestClientOptions;
    }

    /**
     * Returns a request level metric collector; or null if not specified.
     */
    public RequestMetricCollector getRequestMetricCollector() {
        return requestMetricCollector;
    }

    /**
     * Sets a request level request metric collector which takes precedence over the ones at the
     * http client level and AWS SDK level.
     */
    public void setRequestMetricCollector(RequestMetricCollector requestMetricCollector) {
        this.requestMetricCollector = requestMetricCollector;
    }

    /**
     * Specifies a request level metric collector which takes precedence over the ones at the http
     * client level and AWS SDK level.
     */
    public <T extends AmazonWebServiceRequest> T withRequestMetricCollector(RequestMetricCollector metricCollector) {
        setRequestMetricCollector(metricCollector);
        @SuppressWarnings("unchecked")
        T t = (T) this;
        return t;
    }

    /**
     * Sets the optional progress listener for receiving updates about the progress of the request.
     *
     * @param progressListener
     *            The new progress listener.
     */
    public void setGeneralProgressListener(ProgressListener progressListener) {
        this.progressListener = progressListener == null ? ProgressListener.NOOP : progressListener;
    }

    /**
     * Returns the optional progress listener for receiving updates about the progress of the
     * request.
     *
     * @return the optional progress listener for receiving updates about the progress of the
     *         request.
     */
    public ProgressListener getGeneralProgressListener() {
        return progressListener;
    }

    /**
     * Sets the optional progress listener for receiving updates about the progress of the request,
     * and returns a reference to this object so that method calls can be chained together.
     *
     * @param progressListener
     *            The new progress listener.
     * @return A reference to this updated object so that method calls can be chained together.
     */
    public <T extends AmazonWebServiceRequest> T withGeneralProgressListener(ProgressListener progressListener) {
        setGeneralProgressListener(progressListener);
        @SuppressWarnings("unchecked")
        T t = (T) this;
        return t;
    }

    /**
     * Returns an immutable map of custom header names to header values.
     *
     * @return The immutable map of custom header names to header values.
     */
    public Map<String, String> getCustomRequestHeaders() {
        if (customRequestHeaders == null) {
            return null;
        }
        return Collections.unmodifiableMap(customRequestHeaders);
    }

    /**
     * Put a new custom header to the map of custom header names to custom header values, and return
     * the previous value if the header has already been set in this map.
     * <p>
     * Any custom headers that are defined are used in the HTTP request to the AWS service. These
     * headers will be silently ignored in the event that AWS does not recognize them.
     * <p>
     * NOTE: Custom header values set via this method will overwrite any conflicting values coming
     * from the request parameters.
     *
     * @param name
     *            The name of the header to add
     * @param value
     *            The value of the header to add
     * @return the previous value for the name if it was set, null otherwise
     */
    public String putCustomRequestHeader(String name, String value) {
        if (customRequestHeaders == null) {
            customRequestHeaders = new HashMap<String, String>();
        }
        return customRequestHeaders.put(name, value);
    }

    /**
     * @return the immutable map of custom query parameters. The parameter value is modeled as a
     *         list of strings because multiple values can be specified for the same parameter name.
     */
    public Map<String, List<String>> getCustomQueryParameters() {
        if (customQueryParameters == null) {
            return null;
        }
        return Collections.unmodifiableMap(customQueryParameters);
    }

    /**
     * Add a custom query parameter for the request. Since multiple values are allowed for the same
     * query parameter, this method does NOT overwrite any existing parameter values in the request.
     * <p>
     * Any custom query parameters that are defined are used in the HTTP request to the AWS service.
     *
     * @param name
     *            The name of the query parameter
     * @param value
     *            The value of the query parameter. Only the parameter name will be added in the URI
     *            if the value is set to null. For example, putCustomQueryParameter("param", null)
     *            will be serialized to "?param", while putCustomQueryParameter("param", "") will be
     *            serialized to "?param=".
     */
    public void putCustomQueryParameter(String name, String value) {
        if (customQueryParameters == null) {
            customQueryParameters = new HashMap<String, List<String>>();
        }
        List<String> paramList = customQueryParameters.get(name);
        if (paramList == null) {
            paramList = new LinkedList<String>();
            customQueryParameters.put(name, paramList);
        }
        paramList.add(value);
    }

    @Override
    public final int getReadLimit() {
        return requestClientOptions.getReadLimit();
    }

    /**
     * Copies the internal state of this base class to that of the target request.
     *
     * @return the target request
     */
    protected final <T extends AmazonWebServiceRequest> T copyBaseTo(T target) {
        if (customRequestHeaders != null) {
            for (Map.Entry<String, String> e : customRequestHeaders.entrySet())
                target.putCustomRequestHeader(e.getKey(), e.getValue());
        }
        if (customQueryParameters != null) {
            for (Map.Entry<String, List<String>> e : customQueryParameters.entrySet()) {
                if (e.getValue() != null) {
                    for (String value : e.getValue()) {
                        target.putCustomQueryParameter(e.getKey(), value);
                    }
                }
            }
        }

        target.setRequestCredentialsProvider(credentialsProvider);
        target.setGeneralProgressListener(progressListener);
        target.setRequestMetricCollector(requestMetricCollector);
        requestClientOptions.copyTo(target.getRequestClientOptions());
        return target;
    }

    /**
     * Returns the source object from which the current object was cloned; or null if there isn't
     * one.
     */
    public AmazonWebServiceRequest getCloneSource() {
        return cloneSource;
    }

    /**
     * Returns the root object from which the current object was cloned; or null if there isn't one.
     */
    public AmazonWebServiceRequest getCloneRoot() {
        AmazonWebServiceRequest cloneRoot = cloneSource;
        if (cloneRoot != null) {
            while (cloneRoot.getCloneSource() != null) {
                cloneRoot = cloneRoot.getCloneSource();
            }
        }
        return cloneRoot;
    }

    private void setCloneSource(AmazonWebServiceRequest cloneSource) {
        this.cloneSource = cloneSource;
    }

    /**
     * Returns the amount of time to wait (in milliseconds) for the request to complete before
     * giving up and timing out. A non-positive value disables this feature.
     * <p>
     * This feature requires buffering the entire response (for non-streaming APIs) into memory to
     * enforce a hard timeout when reading the response. For APIs that return large responses this
     * could be expensive.
     * <p>
     * <p>
     * The request timeout feature doesn't have strict guarantees on how quickly a request is
     * aborted when the timeout is breached. The typical case aborts the request within a few
     * milliseconds but there may occasionally be requests that don't get aborted until several
     * seconds after the timer has been breached. Because of this the request timeout feature should
     * not be used when absolute precision is needed.
     * </p>
     * <p>
     * <b>Note:</b> This feature is not compatible with Java 1.6.
     * </p>
     *
     * @return The amount of time to wait (in milliseconds) for the request to complete before
     *         giving up and timing out. A non-positive value disables the timeout for this request.
     * @see {@link AmazonWebServiceRequest#setSdkClientExecutionTimeout(int)} to enforce a timeout
     *      across all retries
     */
    public Integer getSdkRequestTimeout() {
        return sdkRequestTimeout;
    }

    /**
     * Sets the amount of time to wait (in milliseconds) for the request to complete before giving
     * up and timing out. A non-positive value disables this feature.
     * <p>
     * This feature requires buffering the entire response (for non-streaming APIs) into memory to
     * enforce a hard timeout when reading the response. For APIs that return large responses this
     * could be expensive.
     * <p>
     * <p>
     * The request timeout feature doesn't have strict guarantees on how quickly a request is
     * aborted when the timeout is breached. The typical case aborts the request within a few
     * milliseconds but there may occasionally be requests that don't get aborted until several
     * seconds after the timer has been breached. Because of this the request timeout feature should
     * not be used when absolute precision is needed.
     * </p>
     * <p>
     * <b>Note:</b> This feature is not compatible with Java 1.6.
     * </p>
     *
     * @param sdkRequestTimeout
     *            The amount of time to wait (in milliseconds) for the request to complete before
     *            giving up and timing out. A non-positive value disables the timeout for this
     *            request.
     * @see {@link AmazonWebServiceRequest#setSdkClientExecutionTimeout(int)} to enforce a timeout
     *      across all retries
     */
    public void setSdkRequestTimeout(int sdkRequestTimeout) {
        this.sdkRequestTimeout = sdkRequestTimeout;
    }

    /**
     * Sets the amount of time to wait (in milliseconds) for the request to complete before giving
     * up and timing out. A non-positive value disables this feature. Returns the updated
     * AmazonWebServiceRequest object so that additional method calls may be chained together.
     * <p>
     * This feature requires buffering the entire response (for non-streaming APIs) into memory to
     * enforce a hard timeout when reading the response. For APIs that return large responses this
     * could be expensive.
     * <p>
     * <p>
     * The request timeout feature doesn't have strict guarantees on how quickly a request is
     * aborted when the timeout is breached. The typical case aborts the request within a few
     * milliseconds but there may occasionally be requests that don't get aborted until several
     * seconds after the timer has been breached. Because of this the request timeout feature should
     * not be used when absolute precision is needed.
     * </p>
     * <p>
     * <b>Note:</b> This feature is not compatible with Java 1.6.
     * </p>
     *
     * @param sdkRequestTimeout
     *            The amount of time to wait (in milliseconds) for the request to complete before
     *            giving up and timing out. A non-positive value disables the timeout for this
     *            request.
     * @return The updated {@link AmazonWebServiceRequest} object.
     * @see {@link AmazonWebServiceRequest#setSdkClientExecutionTimeout(int)} to enforce a timeout
     *      across all retries
     */
    public <T extends AmazonWebServiceRequest> T withSdkRequestTimeout(int sdkRequestTimeout) {
        setSdkRequestTimeout(sdkRequestTimeout);
        @SuppressWarnings("unchecked")
        T t = (T) this;
        return t;
    }

    /**
     * Returns the amount of time (in milliseconds) to allow the client to complete the execution of
     * an API call. This timeout covers the entire client execution except for marshalling. This
     * includes request handler execution, all HTTP request including retries, unmarshalling, etc.
     * <p>
     * This feature requires buffering the entire response (for non-streaming APIs) into memory to
     * enforce a hard timeout when reading the response. For APIs that return large responses this
     * could be expensive.
     * <p>
     * <p>
     * The client execution timeout feature doesn't have strict guarantees on how quickly a request
     * is aborted when the timeout is breached. The typical case aborts the request within a few
     * milliseconds but there may occasionally be requests that don't get aborted until several
     * seconds after the timer has been breached. Because of this the client execution timeout
     * feature should not be used when absolute precision is needed.
     * </p>
     * <p>
     * This may be used together with {@link AmazonWebServiceRequest#setSdkRequestTimeout(int)} to
     * enforce both a timeout on each individual HTTP request (i.e. each retry) and the total time
     * spent on all requests across retries (i.e. the 'client execution' time). A non-positive value
     * disables this feature.
     * </p>
     * <p>
     * <b>Note:</b> This feature is not compatible with Java 1.6.
     * </p>
     *
     * @return The amount of time (in milliseconds) to allow the client to complete the execution of
     *         an API call. A non-positive value disables the timeout for this request.
     * @see {@link AmazonWebServiceRequest#setSdkRequestTimeout(int)} to enforce a timeout per HTTP
     *      request
     */
    public Integer getSdkClientExecutionTimeout() {
        return this.sdkClientExecutionTimeout;
    }

    /**
     * Sets the amount of time (in milliseconds) to allow the client to complete the execution of
     * an API call. This timeout covers the entire client execution except for marshalling. This
     * includes request handler execution, all HTTP request including retries, unmarshalling, etc.
     * <p>
     * This feature requires buffering the entire response (for non-streaming APIs) into memory to
     * enforce a hard timeout when reading the response. For APIs that return large responses this
     * could be expensive.
     * <p>
     * <p>
     * The client execution timeout feature doesn't have strict guarantees on how quickly a request
     * is aborted when the timeout is breached. The typical case aborts the request within a few
     * milliseconds but there may occasionally be requests that don't get aborted until several
     * seconds after the timer has been breached. Because of this the client execution timeout
     * feature should not be used when absolute precision is needed.
     * </p>
     * <p>
     * This may be used together with {@link AmazonWebServiceRequest#setSdkRequestTimeout(int)} to
     * enforce both a timeout on each individual HTTP request (i.e. each retry) and the total time
     * spent on all requests across retries (i.e. the 'client execution' time). A non-positive value
     * disables this feature.
     * </p>
     * <p>
     * <b>Note:</b> This feature is not compatible with Java 1.6.
     * </p>
     *
     * @param sdkClientExecutionTimeout
     *            The amount of time (in milliseconds) to allow the client to complete the execution
     *            of an API call. A non-positive value disables the timeout for this request.
     * @see {@link AmazonWebServiceRequest#setSdkRequestTimeout(int)} to enforce a timeout per HTTP
     *      request
     */
    public void setSdkClientExecutionTimeout(int sdkClientExecutionTimeout) {
        this.sdkClientExecutionTimeout = sdkClientExecutionTimeout;
    }

    /**
     * Sets the amount of time (in milliseconds) to allow the client to complete the execution of
     * an API call. This timeout covers the entire client execution except for marshalling. This
     * includes request handler execution, all HTTP request including retries, unmarshalling, etc.
     * <p>
     * This feature requires buffering the entire response (for non-streaming APIs) into memory to
     * enforce a hard timeout when reading the response. For APIs that return large responses this
     * could be expensive.
     * <p>
     * <p>
     * The client execution timeout feature doesn't have strict guarantees on how quickly a request
     * is aborted when the timeout is breached. The typical case aborts the request within a few
     * milliseconds but there may occasionally be requests that don't get aborted until several
     * seconds after the timer has been breached. Because of this the client execution timeout
     * feature should not be used when absolute precision is needed.
     * </p>
     * <p>
     * This may be used together with {@link AmazonWebServiceRequest#setSdkRequestTimeout(int)} to
     * enforce both a timeout on each individual HTTP request (i.e. each retry) and the total time
     * spent on all requests across retries (i.e. the 'client execution' time). A non-positive value
     * disables this feature.
     * </p>
     * <p>
     * <b>Note:</b> This feature is not compatible with Java 1.6.
     * </p>
     *
     * @param sdkClientExecutionTimeout
     *            The amount of time (in milliseconds) to allow the client to complete the execution
     *            of an API call. A non-positive value disables the timeout for this request.
     * @return The updated AmazonWebServiceRequest object for method chaining
     * @see {@link AmazonWebServiceRequest#setSdkRequestTimeout(int)} to enforce a timeout per HTTP
     *      request
     */
    public <T extends AmazonWebServiceRequest> T withSdkClientExecutionTimeout(int sdkClientExecutionTimeout) {
        setSdkClientExecutionTimeout(sdkClientExecutionTimeout);
        @SuppressWarnings("unchecked")
        T t = (T) this;
        return t;
    }

    @Override
    public <X> void addHandlerContext(HandlerContextKey<X> key, X value) {
        this.handlerContext.put(key, value);
    }

    @Override
    @SuppressWarnings("unchecked")
    public <X> X getHandlerContext(HandlerContextKey<X> key) {
        return (X) this.handlerContext.get(key);
    }

    /**
     * Retrieve an unmodifiable collection of all handler context objects. This allows a {@link Request} derived from a
     * {@link AmazonWebServiceRequest} to inherit its context. This does not protect the objects within the map from being
     * modified.
     *
     * <p>This should not be used by customers.</p>
     */
    @SdkInternalApi
    Map<HandlerContextKey<?>, Object> getHandlerContext() {
        return Collections.unmodifiableMap(this.handlerContext);
    }

    /**
     * Creates a shallow clone of this object for all fields except the handler context. Explicitly does <em>not</em> clone the
     * deep structure of the other fields in the message.
     *
     * @see Object#clone()
     */
    @Override
    public AmazonWebServiceRequest clone() {
        try {
            AmazonWebServiceRequest cloned = (AmazonWebServiceRequest) super.clone();
            cloned.setCloneSource(this);

            // Deep-copy context to ensure modifications made by the handlers do not leak back to the caller or other uses of the
            // same request.
            cloned.handlerContext = new HashMap<HandlerContextKey<?>, Object>(cloned.handlerContext);

            return cloned;
        } catch (CloneNotSupportedException e) {
            throw new IllegalStateException(
                    "Got a CloneNotSupportedException from Object.clone() " + "even though we're Cloneable!", e);
        }
    }
}

参考