Published on

Search DSL using NimbleParsec

I have wanted to use NimbleParsec for a while, and finally got a good use case when I had to implement a search DSL in a project.

The requirement was to parse a query string into Flop filters. The search query would look like this:

inserted_at>=1702744783 AND amount<10

As this was implemented in an API, we will start at the controller level by setting up this plug function:

defp parse_flop_filters(%{params: %{"query" => query}} = conn, _opts) do
  case QueryParser.parse(query, inserted_at: :utc_datetime, amount: :integer) do
    {:ok, filters} ->
      assign(conn, :flop_options, Map.put(conn.assigns.flop_options, :filters, filters))

    {:error, error} ->
      conn
      |> put_status(422)
      |> json(%{error: "Invalid query: #{error}"})
      |> halt()
  end
end

We should be explicit here, only allowing fields we want to handle in the API rather than passing everything on to Flop. In the above, we’re passing in the search fields and their types. The type is necessary because we want to parse from unix epoch timestamps (which we use in the API) to DateTime that Flop expects.

Now we’ll implement our parse/2 function:

defmodule MyAppWeb.QueryParser do
  import NimbleParsec
  import __MODULE__.Helpers

  defparsec :do_parse, query()

  def parse(query, query_fields) do
    with {:ok, filters, "", _, {_, _}, _} <- do_parse(query),
         :ok <- validate_fields(filters, query_fields),
         {:ok, filters} <- parse_timestamps(filters, query_fields) do
      {:ok, filters}
    else
      {:ok, _filters, string, _, _, _} -> {:error, "couldn't process all of the query, got #{inspect string}"}
      {:error, error, _, _, _, _} -> {:error, error}
      {:error, error} -> {:error, error}
    end
  end

  defp validate_fields(filters, query_fields) do
    expected_fields = Enum.map(query_fields, fn {key, _type} -> to_string(key) end)
    current_fields = Enum.map(filters, & &1.field)

    case current_fields -- expected_fields do
      [] -> :ok
      unexpected_fields -> {:error, "unknown filter fields, got #{Enum.join(unexpected_fields, ", ")}"}
    end
  end

  # ...
end

In the above, we will first parse the query string with NimbleParsec, and then validate the search fields. Finally, we are doing post-processing where we cast the unix timestamps into DateTime. The validation and casting of the timestamp looks like this:

defmodule MyAppWeb.QueryParser do
  # ...

  defp parse_timestamps(filters, query_fields) do
    fields =
      query_fields
      |> Enum.filter(fn {_field, type} -> type == :utc_datetime end)
      |> Enum.map(fn {field, _type} -> to_string(field) end)

    Enum.reduce_while(filters, {:ok, []}, fn filter, {:ok, filters} ->
      case update_timestamp(filter, fields) do
        {:ok, filter} -> {:cont, {:ok, filters ++ [filter]}}
        {:error, error} -> {:halt, {:error, error}}
      end
    end)
  end

  defp update_timestamp(%{field: field, value: value} = filter, fields) do
    case Enum.member?(fields, field) do
      true ->
        case from_unix_epoch(value) do
          {:ok, datetime} -> {:ok, %{filter | value: datetime}}
          _error -> {:error, "unexpected timestamp value for #{field}, got #{value}"}
        end

      false ->
        {:ok, filter}
    end
  end

  defp from_unix_epoch(value) when is_integer(value) do
    DateTime.from_unix(value)
  end

  defp from_unix_epoch(_value), do: :error
end

Now all that’s left to do is implement the parser itself! I highly recommend checking out Logflare’s parser which gave me what I needed to set this up.

We begin by setting up the query combinator:

defmodule MyAppWeb.QueryParser.Helpers do
  import NimbleParsec

  def query do
    field_clause()
    |> optional(
      ignore(whitespace())
      |> concat(query_operator())
      |> concat(ignore(whitespace())))
    |> times(min: 1, max: 100)
    |> post_traverse({:to_filters, []})
  end

  # ...

  def to_filters(rest, args, context, _line, _offset) do
    filters = for %{field: _} = filter <- args, do: filter
    {rest, filters, context}
  end
end

The query combinator matches field clauses with an optional query operator following it. The post traverse was necessary because I couldn’t just use ignore with the query operator since I will verify it’s a valid operator (as you’ll see later). So instead I had to let it match and filter it out at the end.

Now we’ll implement the field clause combinator:

defmodule MyAppWeb.QueryParser.Helpers do
  # ...

  def field_clause do
    any_field()
    |> concat(operator())
    |> concat(field_value())
    |> reduce({:to_filter, []})
    |> map({:check_for_no_invalid_field_values!, []})
    |> label("field filter clause")
  end

  def any_field do
    ascii_string([?a..?z, ?A..?Z, ?_], min: 1)
    |> reduce({List, :to_string, []})
    |> unwrap_and_tag(:field)
    |> label("filter field")
  end

  def operator do
    choice([
      string(">=") |> replace(:>=),
      string("<=") |> replace(:<=),
      string(">") |> replace(:>),
      string("<") |> replace(:<),
    ])
    |> unwrap_and_tag(:operator)
    |> label("filter operator")
  end

  def field_value do
    choice([
      number(),
      ascii_string([?a..?z, ?A..?Z, ?_, ?0..?9], min: 1),
      invalid_match_all_value()
    ])
    |> unwrap_and_tag(:value)
    |> label("valid filter value")
  end

  def number do
    integer(min: 1)
    |> label("number")
  end

  def invalid_match_all_value do
    choice([
      ascii_string([33..255], min: 1),
      empty() |> replace(~S|""|)
    ])
    |> unwrap_and_tag(:invalid_field_value)
  end

  def check_for_no_invalid_field_values!(%{value: {:invalid_field_value, value}}) do
    raise "invalid filter value: #{value}"
  end

  def check_for_no_invalid_field_values!(filter), do: filter

  def to_filter(args) do
    %{
      field: Keyword.fetch!(args, :field),
      op: Keyword.fetch!(args, :operator),
      value: Keyword.fetch!(args, :value)
    }
  end

  # ...
end

Here we match a field clause with the field name, field operator, and field value. The field must only contain letters or _. The operator can be any of the <, >, <=, >= comparisons. The value can be alphanumeric and will be parsed as either an integer or a string. We’ll also match invalid values since we want to catch everything up to the next whitespace and raise an error if there are invalid characters. Finally, it all gets mapped into a filter that can be passed on to Flop.

The last thing is to match the query operator that goes in between the field clauses:

defmodule MyAppWeb.QueryParser.Helpers do
  # ...

  def whitespace do
    ascii_string([?\s, ?\n], min: 1)
  end

  def query_operator do
    choice([
      string("AND"),
      invalid_match_all_query_operator()
    ])
    |> unwrap_and_tag(:operator)
    |> map({:check_for_no_invalid_query_operator_values!, []})
    |> label("query clause operator")
  end

  def invalid_match_all_query_operator do
    ascii_string([33..255], min: 1)
    |> unwrap_and_tag(:invalid_query_operator)
  end

  def check_for_no_invalid_query_operator_values!({:operator, {:invalid_query_operator, operator}}) do
    raise "invalid query operator: #{operator}"
  end

  def check_for_no_invalid_query_operator_values!(filter), do: filter

  # ...
end

We’ll only support AND since that’s all we can do with Flop. And again we are matching any characters so we can raise an error for an invalid operator.

Here we are, our first search DSL, ready to be extended!

There is a lot that can (and should) be added to this. More field operators, ensuring that a query operator has a field clause on the right side, nested clauses. But this gives us a great starting point for our search DSL!

Tests for MyAppWeb.QueryParser.
defmodule MyAppWeb.QueryParserTest do
  use MyAppWeb.ConnCase

  alias MyAppWeb.QueryParser

  describe "parse/2" do
    test "with no field" do
      assert {:error, error} = QueryParser.parse("", [field: :string])
      assert error == "expected filter field while processing field filter clause"
    end

    test "with no operator" do
      assert {:error, error} = QueryParser.parse("field", [field: :string])
      assert error == "expected filter operator while processing field filter clause"
    end

    test "with no value" do
      assert_raise RuntimeError, "invalid filter value: \"\"", fn ->
        QueryParser.parse("field<", [field: :string])
      end
    end

    test "with invalid field" do
      assert {:error, error} = QueryParser.parse("$$$$=value", [field: :string])
      assert error == "expected filter field while processing field filter clause"
    end

    test "with invalid operator" do
      assert {:error, error} = QueryParser.parse("field=value", [field: :string])
      assert error == "expected filter operator while processing field filter clause"
    end

    test "with invalid value" do
      assert_raise RuntimeError, "invalid filter value: $$$$", fn ->
        QueryParser.parse("field<$$$$", [field: :string])
      end
    end

    test "with missing field in opts" do
      assert {:error, error} = QueryParser.parse("field<value", [other_field: :string])
      assert error == "unknown filter fields, got field"
    end

    test "with valid expression" do
      assert {:ok, [filter]} = QueryParser.parse("field<value", [field: :string])
      assert filter == %{field: "field", op: :<, value: "value"}
    end

    test "with invalid datetime type" do
      assert {:error, error} = QueryParser.parse("field<invalid", [field: :utc_datetime_usec])
      assert error == "unexpected timestamp value for field, got invalid"
    end

    test "with invalid datetime value" do
      assert {:error, error} = QueryParser.parse("field<#{253_402_300_800}", [field: :utc_datetime_usec])
      assert error == "unexpected timestamp value for field, got #{253_402_300_800}"
    end

    test "with datetime value" do
      assert {:ok, [filter]} = QueryParser.parse("field<#{1_464_096_368}", [field: :utc_datetime_usec])
      assert filter == %{field: "field", op: :<, value: ~U[2016-05-24 13:26:08.000Z]}
    end

    test "with invalid part" do
      assert {:error, error} = QueryParser.parse("field<value ", [field: :string])
      assert error == "couldn't process all of the query, got \" \""
    end

    test "with multiple missing fields in opts" do
      assert {:error, error} = QueryParser.parse("first_field<test AND second_field<test", [field: :string])
      assert error == "unknown filter fields, got first_field, second_field"
    end

    test "with invalid clause operator" do
      expr = "first_field<value1 OR second_field>value2"
      opts = [first_field: :string, second_field: :string]

      assert_raise RuntimeError, "invalid query operator: OR", fn ->
        QueryParser.parse(expr, opts)
      end
    end

    test "with multiple valid expressions" do
      expr = "first_field<value1 AND second_field>value2"
      opts = [first_field: :string, second_field: :string]

      assert {:ok, [filter_1, filter_2]} = QueryParser.parse(expr, opts)
      assert filter_1 == %{field: "first_field", op: :<, value: "value1"}
      assert filter_2 == %{field: "second_field", op: :>, value: "value2"}
    end
  end
end

Hi, I'm Dan Schultzer, I write in this blog, work a lot in Elixir, maintain several open source projects, and help companies streamline their development process