Skip to content

Commit

Permalink
Fix UnicodeConversionError (#5)
Browse files Browse the repository at this point in the history
* Fix UnicodeConversionError

* Test unexpected inputs

* Speed up compilation
  • Loading branch information
adriankumpf authored Mar 10, 2022
1 parent 090c050 commit 325adb6
Show file tree
Hide file tree
Showing 7 changed files with 343 additions and 2 deletions.
28 changes: 26 additions & 2 deletions lib/matchers/text.ex
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ defmodule Infer.Text do
iex> Infer.Text.html?("<")
false
iex> binary = File.read!("test/archives/sample.zip")
iex> Infer.Text.html?(binary)
false
"""
@spec html?(binary()) :: boolean()
def html?(binary) do
Expand All @@ -45,7 +49,7 @@ defmodule Infer.Text do
char_list =
binary
|> String.trim()
|> String.to_charlist()
|> :binary.bin_to_list()

Enum.any?(values, fn val ->
if starts_with_ignore_ascii_case(char_list, val) do
Expand All @@ -62,19 +66,39 @@ defmodule Infer.Text do
Takes the binary file contents as arguments. Returns `true` if it's xml.
See: https://mimesniff.spec.whatwg.org/
## Examples
iex> Infer.Text.xml?(~s(<?xml version="1.0" encoding="ISO-8859-1"?>))
true
iex> binary = File.read!("test/archives/sample.zip")
iex> Infer.Text.xml?(binary)
false
"""
@spec xml?(binary()) :: boolean()
def xml?(binary) do
char_list =
binary
|> String.trim()
|> String.to_charlist()
|> :binary.bin_to_list()

starts_with_ignore_ascii_case(char_list, '<?xml')
end

@doc """
Takes the binary file contents as arguments. Returns `true` if it's a shell script.
## Examples
iex> Infer.Text.shell_script?("#!/bin/sh")
true
iex> binary = File.read!("test/archives/sample.zip")
iex> Infer.Text.shell_script?(binary)
false
"""
@spec shell_script?(binary()) :: boolean()
def shell_script?(<<"#!", _rest::binary>>), do: true
Expand Down
4 changes: 4 additions & 0 deletions mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ defmodule Infer.MixProject do
app: :infer,
version: "0.2.0",
elixir: "~> 1.10",
elixirc_paths: elixirc_paths(Mix.env()),
start_permanent: Mix.env() == :prod,
deps: deps(),
package: package(),
Expand All @@ -29,6 +30,9 @@ defmodule Infer.MixProject do
]
end

defp elixirc_paths(:test), do: ["lib", "test/support"]
defp elixirc_paths(_), do: ["lib"]

# Run "mix help deps" to learn about dependencies.
defp deps do
[
Expand Down
163 changes: 163 additions & 0 deletions test/infer_test.exs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
defmodule InferTest do
use ExUnit.Case

doctest Infer
doctest Infer.App
doctest Infer.Archive
Expand All @@ -10,4 +11,166 @@ defmodule InferTest do
doctest Infer.Font
doctest Infer.Text
doctest Infer.Video

describe "Infer.App" do
@matchers Infer.Matchers.list() |> Enum.filter(&(&1.matcher_type == :app))

test "handles app files" do
for {_path, binary} <- TestFiles.list(only: :app) do
assert Enum.find(@matchers, & &1.matcher.(binary))
end
end

for %Infer.Type{matcher: matcher} <- @matchers do
test "#{inspect(matcher)} handles non-app files" do
for {path, binary} <- TestFiles.list(except: :app) do
assert {_, false} = {path, unquote(matcher).(binary)}
end
end
end
end

describe "Infer.Book" do
@matchers Infer.Matchers.list() |> Enum.filter(&(&1.matcher_type == :book))

test "handles book files" do
for {_path, binary} <- TestFiles.list(only: :books) do
assert Enum.find(@matchers, & &1.matcher.(binary))
end
end

for %Infer.Type{matcher: matcher} <- @matchers do
test "#{inspect(matcher)} handles non-book files" do
for {path, binary} <- TestFiles.list(except: :books) do
assert {_, false} = {path, unquote(matcher).(binary)}
end
end
end
end

describe "Infer.Image" do
@matchers Infer.Matchers.list() |> Enum.filter(&(&1.matcher_type == :image))

test "handles image files" do
for {_path, binary} <- TestFiles.list(only: :images) do
assert Enum.find(@matchers, & &1.matcher.(binary))
end
end

for %Infer.Type{matcher: matcher} <- @matchers do
test "#{inspect(matcher)} handles non-image files" do
for {path, binary} <- TestFiles.list(except: :images) do
assert {_, false} = {path, unquote(matcher).(binary)}
end
end
end
end

describe "Infer.Video" do
@matchers Infer.Matchers.list() |> Enum.filter(&(&1.matcher_type == :video))

test "handles video files" do
for {_path, binary} <- TestFiles.list(only: :videos) do
assert Enum.find(@matchers, & &1.matcher.(binary))
end
end

for %Infer.Type{matcher: matcher} <- @matchers do
test "#{inspect(matcher)} handles non-video files" do
for {path, binary} <- TestFiles.list(except: :videos) do
assert {_, false} = {path, unquote(matcher).(binary)}
end
end
end
end

describe "Infer.Audio" do
@matchers Infer.Matchers.list() |> Enum.filter(&(&1.matcher_type == :audio))

test "handles audio files" do
for {_path, binary} <- TestFiles.list(only: :audio) do
assert Enum.find(@matchers, & &1.matcher.(binary))
end
end

for %Infer.Type{matcher: matcher} <- @matchers do
test "#{inspect(matcher)} handles non-audio files" do
for {path, binary} <- TestFiles.list(except: :audio) do
assert {_, false} = {path, unquote(matcher).(binary)}
end
end
end
end

describe "Infer.Font" do
@matchers Infer.Matchers.list() |> Enum.filter(&(&1.matcher_type == :font))

test "handles font files" do
for {_path, binary} <- TestFiles.list(only: :fonts) do
assert Enum.find(@matchers, & &1.matcher.(binary))
end
end

for %Infer.Type{matcher: matcher} <- @matchers do
test "#{inspect(matcher)} handles non-font files" do
for {path, binary} <- TestFiles.list(except: :fonts) do
assert {_, false} = {path, unquote(matcher).(binary)}
end
end
end
end

describe "Infer.Doc" do
@matchers Infer.Matchers.list() |> Enum.filter(&(&1.matcher_type == :doc))

test "handles documnet files" do
for {_path, binary} <- TestFiles.list(only: :docs) do
assert Enum.find(@matchers, & &1.matcher.(binary))
end
end

for %Infer.Type{matcher: matcher} <- @matchers do
test "#{inspect(matcher)} handles non-document files" do
for {path, binary} <- TestFiles.list(except: :docs) do
assert {_, false} = {path, unquote(matcher).(binary)}
end
end
end
end

describe "Infer.Archive" do
@matchers Infer.Matchers.list() |> Enum.filter(&(&1.matcher_type == :archive))

test "handles archive files" do
for {_path, binary} <- TestFiles.list(only: :archives) do
assert Enum.find(@matchers, & &1.matcher.(binary))
end
end

for %Infer.Type{matcher: matcher} <- @matchers do
test "#{inspect(matcher)} handles non-archive files" do
for {path, binary} <- TestFiles.list(except: [:archives, :docs, :books]) do
assert {_, false} = {path, unquote(matcher).(binary)}
end
end
end
end

describe "Infer.Text" do
@matchers Infer.Matchers.list() |> Enum.filter(&(&1.matcher_type == :text))

test "handles text files" do
for {_path, binary} <- TestFiles.list(only: :text) do
assert Enum.find(@matchers, & &1.matcher.(binary))
end
end

for %Infer.Type{matcher: matcher} <- @matchers do
test "#{inspect(matcher)} handles non-text files" do
for {path, binary} <- TestFiles.list(except: :text) do
assert {_, false} = {path, unquote(matcher).(binary)}
end
end
end
end
end
20 changes: 20 additions & 0 deletions test/support/test_files.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
defmodule TestFiles do
@types [:app, :archives, :audio, :books, :docs, :fonts, :images, :videos, :text]
@paths Map.new(@types, &{&1, Path.wildcard("test/#{&1}/*")})

def list(opts \\ []) do
requested_types =
case List.wrap(opts[:only]) do
[] -> @types -- List.wrap(opts[:except])
only -> only
end

Stream.flat_map(@paths, fn {type, paths} ->
if type in requested_types do
Enum.map(paths, &{Path.basename(&1), File.read!(&1)})
else
[]
end
end)
end
end
7 changes: 7 additions & 0 deletions test/text/sample.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<!DOCTYPE html>
<html>
<body>
<h1>My First Heading</h1>
<p>My first paragraph.</p>
</body>
</html>
3 changes: 3 additions & 0 deletions test/text/sample.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/sh

echo "Hello World!"
Loading

0 comments on commit 325adb6

Please sign in to comment.