We can't find the internet
Attempting to reconnect
Something went wrong!
Hang in there while we get back on track
Using AWS
(2024-03-24)
Recently I had to switch from ExAWS
to AWS
. I’m going to quickly run through how I set it up with AWS
, but you can read the original post below.
TLDR: We want to use RDS IAM to connect to our DB, so we have to generate a shortlived token based on the current AWS role (e.g. ECS instance) as the database password.
First, add the dependencies:
defp deps do
# ...
# To generate the presigned url and for working with AWS
{:aws, "~> 0.14.0"},
# Alternatively you can just include `:aws_signature` if you are not
# going to use `AWS` for anything.
# {:aws_signature, "~> 0.3"}
# To fetch AWS credentials
#
# We don't want `:aws_credentials` to start in dev and test since
# the startup will fail if no credentials can be fetched.
{:aws_credentials, "~> 0.2.1", runtime: Mix.env() == :prod},
# CAStore for connecting to RDS with TLS
{:aws_rds_castore, "~> 1.1"},
end
We only start :aws_credentials
in production as :aws_credentials
halts the application startup if it can’t fetch any AWS credentials.
Now set up config/runtime.exs
to fetch the required arguments:
config :my_app, MyApp.Repo,
configure: {
MyApp.Repo,
:configure_with_auth_token,
[
host: host,
username: username,
dbname: dbname,
port: port,
region: region
]
}
I store these arguments in a single environment variable as JSON and decode it before setting them here.
Also make sure :aws_credentials
is started when running migrations in config/runtime.exs
:
start_apps_before_migration =
:my_app
|> Application.fetch_env!(MyApp.Repo)
|> Keyword.fetch!(:start_apps_before_migration)
|> Kernel.++([:aws_credentials])
config :my_app, MyApp.Repo, start_apps_before_migration: start_apps_before_migration
We put this in config/runtime.exs
as :aws_credentials
will fail startup when it can’t fetch the AWS credentials. We don’t want it to run in the development and test environments.
Finally set up configure_with_auth_token/2
in your repo module:
defmodule MyApp.Repo do
use Ecto.Repo,
otp_app: :my_app,
adapter: Ecto.Adapters.Postgres
# Helper function to configure the connection with dynamically generated
# auth token for the IAM instance role
def configure_with_auth_token(opts, credentials) do
hostname = Keyword.fetch!(credentials, :host)
username = Keyword.fetch!(credentials, :username)
port = Keyword.fetch!(credentials, :port)
dbname = Keyword.fetch!(credentials, :dbname)
region = Keyword.fetch!(credentials, :region)
aws_credentials = :aws_credentials.get_credentials()
auth_token = rds_auth_token(aws_credentials, hostname, port, username, region)
Keyword.merge(opts, [
hostname: hostname,
port: port,
username: username,
password: auth_token,
database: dbname,
ssl: true,
ssl_opts: AwsRdsCAStore.ssl_opts(hostname)
])
end
defp rds_auth_token(aws_credentials, hostname, port, username, region, opts \\ [ttl: 900]) do
access_key = Map.fetch!(aws_credentials, :access_key_id)
secret_key = Map.fetch!(aws_credentials, :secret_access_key)
datetime = :erlang.universaltime()
url = "https://#{hostname}:#{port}/?Action=connect&DBUser=#{username}"
signed_url =
:aws_signature.sign_v4_query_params(
access_key,
secret_key,
region,
"rds-db",
datetime,
url,
opts
)
String.trim_leading(signed_url, "https://")
end
end
I still had to keep the wait_for_connection
logic in place from further down to prevent flakey deployments.
Assuming role
To connect to a database cross-account you must assume a role before generating the token. This wasn’t too bad using AWS
, but did take a minute to get right. Instead of calling :aws_credentials.get_credentials/0
in MyApp.Repo.configure_with_auth_token/2
, we’ll assume the role first:
defmodule MyApp.Repo do
# ...
defp get_aws_credentials(db_credentials, region) do
aws_credentials = :aws_credentials.get_credentials()
case Keyword.get(db_credentials, :role_arn) do
nil -> aws_credentials
role_arn -> assume_role(aws_credentials, region, role_arn)
end
end
defp assume_role(aws_credentials, region, role_arn, opts \\ [ttl: 900]) do
access_key = Map.fetch!(aws_credentials, :access_key_id)
secret_key = Map.fetch!(aws_credentials, :secret_access_key)
session_token = Map.fetch!(aws_credentials, :token)
client = AWS.Client.create(access_key, secret_key, session_token, region)
input = %{
"DurationSeconds" => Map.fetch!(opts, :ttl),
"RoleArn" => role_arn,
"RoleSessionName" => to_string(__MODULE__)
}
case AWS.STS.assume_role(client, input) do
{:ok, %{
"AssumeRoleResponse" => %{
"AssumeRoleResult" => %{
"Credentials" => %{
"AccessKeyId" => access_key_id,
"SecretAccessKey" => secret_access_key,
"SessionToken" => session_token
}
}
}
}, _response} ->
%{
access_key_id: access_key_id,
secret_access_key: secret_access_key,
token: session_token
}
{:error, error} ->
raise error
end
end
end
Now in rds_auth_token/6
you must pull out the session token as we:
defp rds_auth_token(aws_credentials, hostname, port, username, region, opts \\ [ttl: 900]) do
# ...
session_token = :uri_string.quote(Map.fetch!(aws_credentials, :token))
opts = Keyword.put(opts, :session_token, session_token)
# ...
end
Remember to add :role_arn
to your MyApp.Repo
config.
Logging startup errors
Any exception raised in the configure_with_auth_token/4
callback didn’t show up in the logs. To understand what went wrong when the app starts to fail, I had catch any exceptions and print them before reraising to halt the application startup:
defmodule MyApp.Repo do
def configure_with_auth_token(opts, credentials) do
# ...
rescue
error ->
# If there are any issues with starting the supervisor the whole app
# will be shut down, so we want to print early here.
IO.warn(Exception.format_banner(:error, error, __STACKTRACE__))
reraise error, __STACKTRACE__
end
end
ExAWS
(2023-11-28)
AWS RDS supports IAM database authentication. This means that we don’t have to deal with password rotation and can instead use shortlived tokens as database passwords!
To set up RDS IAM database authentication you need to enable RDS IAM authentication first and ensure that your database user has RDS IAM authentication enabled.
In postgres this requires running a GRANT rds_iam TO REPLACE_WITH_DB_USERNAME;
query.
The token must be generated and used as the database password each time Ecto sets up a connection as the token will only be valid for 15 minutes.
We’ll use ExAWS
in the example below.
First we’ll add the dependencies:
defp deps do
# ...
# To generate the token
{:ex_aws, "~> 2.4"},
# CAStore for connecting to RDS with TLS
{:aws_rds_castore, "~> 1.1"},
end
Now we’ll update the repo config. Since the URL is generated on demand we don’t need to set the :url
option. Instead, we will use the :configure
callback. This is what I have in my config/runtime.exs
:
config :my_app, MyApp.Repo,
configure: {
MyApp.Repo,
:configure_with_auth_token,
[
host: host,
username: username,
dbname: dbname,
port: port
]
}
And you should also make sure that ExAws
is started when running migrations in config/config.exs
:
config :my_app, MyApp.Repo,
# ExAWS is used in prod to generate the IAM DB password token
start_apps_before_migration: [:ssl, :logger, :ex_aws]
The last piece is to implement the configure_with_auth_token/2
function:
defmodule MyApp.Repo do
use Ecto.Repo,
otp_app: :my_app,
adapter: Ecto.Adapters.Postgres
# Helper function to configure the connection with dynamically generated
# auth token for the IAM instance role
def configure_with_auth_token(opts, credentials) do
hostname = Keyword.fetch!(credentials, :host)
username = Keyword.fetch!(credentials, :username)
port = Keyword.fetch!(credentials, :port)
dbname = Keyword.fetch!(credentials, :dbname)
region = Keyword.fetch!(credentials, :region)
aws_credentials = :aws_credentials.get_credentials()
auth_token = rds_auth_token(aws_credentials, hostname, port, username, region)
Keyword.merge(opts, [
hostname: hostname,
port: port,
username: username,
password: auth_token,
database: dbname,
ssl: true,
ssl_opts: AwsRdsCAStore.ssl_opts(hostname)
])
end
defp rds_auth_token(aws_credentials, hostname, port, username, region, opts \\ [ttl: 900]) do
access_key = Map.fetch!(aws_credentials, :access_key_id)
secret_key = Map.fetch!(aws_credentials, :secret_access_key)
datetime = :erlang.universaltime()
url = "https://#{hostname}:#{port}/?Action=connect&DBUser=#{username}"
signed_url =
:aws_signature.sign_v4_query_params(
access_key,
secret_key,
region,
"rds-db",
datetime,
url,
opts
)
String.trim_leading(signed_url, "https://")
end
end
Now you got RDS IAM database authentication running!
Flakey deployments during migrations with ExAws
I saw flakey deployments when I first implemented this in my ECS cluster. Migrations were running in an ECS task and frequently failed after a few seconds. There were no helpful error messages in the log, all it explained was:
[error] Could not create schema migrations table. This error usually happens due to the following:
...
** (DBConnection.ConnectionError) connection not available and request was dropped from queue after 2967ms. This means requests are coming in and your connection pool cannot serve them fast enough. You can address this by:
...
I found that this was a combination of both ExAWS
and the Ecto.Migrator.run/3
call. ExAWS
takes a bit before it has the instance credentials available in the ExAWS.Config.AuthCache
GenServer, and Ecto.Migrator.run/3
won’t wait for a connection to be established with the :configure
callback before running the query.
What we have to do is to force the migration task to wait until we have an established connection. We could increase :queue_target
and :queue_interval
, but I felt it was better to just wait until the connection had been established.
First, we’ll update our release module to call wait_for_connection/1
:
defmodule MyApp.Release do
# ...
def migrate do
load_app()
for repo <- repos() do
{:ok, _, _} =
Ecto.Migrator.with_repo(repo, fn repo ->
wait_for_connection(repo)
Ecto.Migrator.run(repo, :up, all: true)
end)
end
end
def rollback(repo, version) do
load_app()
{:ok, _, _} =
Ecto.Migrator.with_repo(repo, fn repo ->
wait_for_connection(repo)
Ecto.Migrator.run(repo, :down, to: version)
end)
end
# ...
end
Now we implement wait_for_connection/1
that will run a query every interval (every 50ms) until it gets a connection or times out (after 30s):
defmodule MyApp.Release do
# ...
@interval 50
@timeout :timer.seconds(30)
# Due to ExAWS being slow we want to give the migration task enough time
# to establish a connection before running the migrations.
defp wait_for_connection(repo) do
wait_for_connection(repo, System.monotonic_time(), 0)
end
defp wait_for_connection(repo, _start, time) when time >= @timeout do
raise "Could not establish a connection with #{inspect repo} after #{time}ms"
end
defp wait_for_connection(repo, start, _time) do
case canary(repo) do
:ok ->
:ok
:error ->
stop = System.monotonic_time()
time = System.convert_time_unit(stop - start, :native, :millisecond)
:timer.sleep(@interval)
wait_for_connection(repo, start, time)
end
end
defp canary(repo) do
case repo.query("SELECT 1") do
{:ok, %{rows: [[1]]}} -> :ok
_ -> :error
end
rescue
_ in DBConnection.ConnectionError -> :error
end
# ...
end
This resolved the flakey deployments.
Hi, I'm Dan Schultzer, I write in this blog, work a lot in Elixir, maintain several open source projects, and help companies streamline their development process