Erlang Central

Difference between revisions of "List Generators"

From ErlangCentral Wiki

m (Added newlines)
(8 intermediate revisions by 3 users not shown)
Line 5:Line 5:
 
(This is a living page, please add your own list based generators here!)
 
(This is a living page, please add your own list based generators here!)
  
[[Adam]] Lindberg
+
[[User:Adam|Adam Lindberg]]
 +
[[User:Thomas|Thomas Arts]]
 +
[[User:John Hughes]]
  
== Non-empty list ==
+
== Non-Empty List ==
  
 
If you want to generate lists which can never be empty, it's fairly simple to wrap the standard '''list()''' generator in QuickCheck:
 
If you want to generate lists which can never be empty, it's fairly simple to wrap the standard '''list()''' generator in QuickCheck:
 
<code>
 
<code>
 
nelist(G) ->
 
nelist(G) ->
     ?LET({E, L}, {G, list(G)},
+
     [G|list(G)].
        [E|L]).
+
 
</code>
 
</code>
 +
This generator will generate one extra element and append it to the list to ensure that the list is never empty. But.. shrinking this list is sub-optimal, since it will not always discard the first element, even if a minimal counter example with only one element exists.
  
 +
A better solution, which is in QuickCheck 1.19.2 and later, is
 +
<code>
 +
nelist(G) ->
 +
    non_empty(list(G)).
 +
</code>
 +
This build-in non_empty is defined with a ?SUCHTHAT, which behaves much better in shrinking
 +
<code>
 +
non_empty(ListG) ->
 +
    ?SUCHTHAT(List,ListG,List /= []).
 +
</code>
 +
In addition the non_empty generator also works for binaries.
  
== Regexp POSIX Character Classes ==
+
== List Without Duplicates ==
  
Generators for all the [http://en.wikipedia.org/wiki/Regular_expression#POSIX_character_classes POSIX character classes] in regexp. ''Note: some of them do not generate printable strings in Erlang.''
+
The easiest way to generate a list without duplicates, is to generate a list that may contain duplicates, and then remove them. For example, if a sorted list will do, then a list without duplicates can be generated by
 +
<code caption="A generator for duplicate-free lists">
 +
ulist(G) ->
 +
  ?LET(L,list(G),lists:usort(L))
 +
</code>
  
 +
Then you can generate a list of unique integers, for example, using
 
<code>
 
<code>
 +
ulist(int())
 +
</code>
 +
 +
If you don't want all your lists to be sorted, then you could use the slightly more complex
 +
<code caption="A generator for randomly ordered duplicate-free lists">
 +
ulist(G) ->
 +
  ?LET(L,list(G),L--(L--lists:usort(L)))
 +
</code>
 +
 +
If you want to generate, for example, a list of pairs with different first components, then you should generate the list of first components first, and generate the list of pairs from it. For example,
 +
<code caption="Generate a list of key-value pairs with unique keys">
 +
?LET(Keys,ulist(key()),
 +
    [{K,value()} || K <- Keys]).
 +
</code>
 +
 +
 +
== Regexp POSIX Character Class Strings ==
 +
 +
String generators for all the [http://en.wikipedia.org/wiki/Regular_expression#POSIX_character_classes POSIX character classes] in regexp. ''Note: some of them do not generate printable strings in Erlang.''
 +
 +
Also note that these generators are designed in such a way that the chosen list shrinks in length, but not in content. This avoids enormously large shrinking trees and gives rather quick shrinking results. However, it also may give rise to rather random looking output without knowing what actually is the cause of an error. In such cases, it is recommended to add a default value for shrinking.
 +
 +
<code>
 +
alnum() -> default("0",list(oneof(seq($a, $z) ++ seq($A, $Z) ++ seq($0, $9)))).
 +
</code>
 +
 +
If you happen to want shrinking in your content, use the '''choose''' generator.
 +
 +
<code>
 +
-import(lists, [seq/2]).
 +
 
%%% Alphanumeric characters ([a-zA-Z0-9]).
 
%%% Alphanumeric characters ([a-zA-Z0-9]).
alnum() -> oneof(seq($a, $z) ++ seq($A, $Z) ++ seq($0, $9)).
+
alnum() -> list(oneof(seq($a, $z) ++ seq($A, $Z) ++ seq($0, $9))).
  
 
%%% Alphabetic characters ([a-zA-Z]).
 
%%% Alphabetic characters ([a-zA-Z]).
alpha() -> oneof(seq($a, $z) ++ seq($A, $Z)).
+
alpha() -> list(oneof(seq($a, $z) ++ seq($A, $Z))).
  
 
%%% ASCII characters ([\x00-\x7F]).
 
%%% ASCII characters ([\x00-\x7F]).
ascii() -> oneof(seq(0, 127)).
+
ascii() -> list(oneof(seq(0, 127))).
  
 
%%% Space and tab ([ \t]).
 
%%% Space and tab ([ \t]).
blank() -> oneof([$ , $\t]).
+
blank() -> list(oneof([$ , $\t])).
  
 
%%% Control characters ([\x00-\x1F\x7F]).
 
%%% Control characters ([\x00-\x1F\x7F]).
cntrl() -> oneof(seq(0, 31) ++ [127]).
+
cntrl() -> list(oneof(seq(0, 31) ++ [127])).
  
 
%%% Digits ([0-9]).
 
%%% Digits ([0-9]).
digit() -> oneof(seq($0, $9)).
+
digit() -> list(oneof(seq($0, $9))).
  
 
%%% Visible characters (i.e. anything except spaces, control
 
%%% Visible characters (i.e. anything except spaces, control
 
%%% characters, etc.) ([\x21-\x7E]).
 
%%% characters, etc.) ([\x21-\x7E]).
graph() -> oneof(seq(33, 126)).
+
graph() -> list(oneof(seq(33, 126))).
  
 
%%% Lowercase letters ([a-z]).
 
%%% Lowercase letters ([a-z]).
lower() -> oneof(seq($a, $z)).
+
lower() -> list(oneof(seq($a, $z))).
  
 
%%% Visible characters and spaces (i.e. anything except control
 
%%% Visible characters and spaces (i.e. anything except control
 
%%% characters, etc.) ([\x20-\x7E]).
 
%%% characters, etc.) ([\x20-\x7E]).
print() -> oneof(seq(32, 126)).
+
print() -> list(oneof(seq(32, 126))).
  
 
%%% Punctuation and symbols ([!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]).
 
%%% Punctuation and symbols ([!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]).
punct() -> oneof("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}\~").
+
punct() -> list(oneof("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}\~")).
  
%%% @doc All whitespace characters, including line breaks ([ \t\r\n\v\f]).
+
%%% All whitespace characters, including line breaks ([ \t\r\n\v\f]).
space() -> oneof([$ , $\t, $\r, $\n, $\v, $\f]).
+
space() -> list(oneof([$ , $\t, $\r, $\n, $\v, $\f])).
  
 
%%% Uppercase letters ([A-Z]).
 
%%% Uppercase letters ([A-Z]).
upper() -> oneof(seq($A, $Z)).
+
upper() -> list(oneof(seq($A, $Z))).
  
 
%%% Word characters (letters, numbers and underscores) ([A-Za-z0-9_]).
 
%%% Word characters (letters, numbers and underscores) ([A-Za-z0-9_]).
word() ->  oneof(seq($a, $z) ++ seq($A, $Z) ++ seq($0, $9) ++ "_").
+
word() ->  list(oneof(seq($a, $z) ++ seq($A, $Z) ++ seq($0, $9) ++ "_")).
  
%%% @doc Hexadecimal digits ([A-Fa-f0-9]).
+
%%% Hexadecimal digits ([A-Fa-f0-9]).
xdigit() -> oneof(seq($A, $F) ++ seq($a, $f) ++ seq($0, $9)).
+
xdigit() -> list(oneof(seq($A, $F) ++ seq($a, $f) ++ seq($0, $9))).
 
</code>
 
</code>

Revision as of 13:44, 22 March 2010


Contents

Authors

(This is a living page, please add your own list based generators here!)

Adam Lindberg Thomas Arts User:John Hughes

Non-Empty List

If you want to generate lists which can never be empty, it's fairly simple to wrap the standard list() generator in QuickCheck:

nelist(G) ->
    [G|list(G)].

This generator will generate one extra element and append it to the list to ensure that the list is never empty. But.. shrinking this list is sub-optimal, since it will not always discard the first element, even if a minimal counter example with only one element exists.

A better solution, which is in QuickCheck 1.19.2 and later, is

nelist(G) ->
    non_empty(list(G)).

This build-in non_empty is defined with a ?SUCHTHAT, which behaves much better in shrinking

non_empty(ListG) ->
     ?SUCHTHAT(List,ListG,List /= []).

In addition the non_empty generator also works for binaries.

List Without Duplicates

The easiest way to generate a list without duplicates, is to generate a list that may contain duplicates, and then remove them. For example, if a sorted list will do, then a list without duplicates can be generated by

ulist(G) -> 
  ?LET(L,list(G),lists:usort(L))

Then you can generate a list of unique integers, for example, using

ulist(int())

If you don't want all your lists to be sorted, then you could use the slightly more complex

ulist(G) ->
  ?LET(L,list(G),L--(L--lists:usort(L)))

If you want to generate, for example, a list of pairs with different first components, then you should generate the list of first components first, and generate the list of pairs from it. For example,

?LET(Keys,ulist(key()),
     [{K,value()} || K <- Keys]).


Regexp POSIX Character Class Strings

String generators for all the POSIX character classes in regexp. Note: some of them do not generate printable strings in Erlang.

Also note that these generators are designed in such a way that the chosen list shrinks in length, but not in content. This avoids enormously large shrinking trees and gives rather quick shrinking results. However, it also may give rise to rather random looking output without knowing what actually is the cause of an error. In such cases, it is recommended to add a default value for shrinking.

alnum() -> default("0",list(oneof(seq($a, $z) ++ seq($A, $Z) ++ seq($0, $9)))).

If you happen to want shrinking in your content, use the choose generator.

-import(lists, [seq/2]).

%%% Alphanumeric characters ([a-zA-Z0-9]).
alnum() -> list(oneof(seq($a, $z) ++ seq($A, $Z) ++ seq($0, $9))).

%%% Alphabetic characters ([a-zA-Z]).
alpha() -> list(oneof(seq($a, $z) ++ seq($A, $Z))).

%%% ASCII characters ([\x00-\x7F]).
ascii() -> list(oneof(seq(0, 127))).

%%% Space and tab ([ \t]).
blank() -> list(oneof([$ , $\t])).

%%% Control characters ([\x00-\x1F\x7F]).
cntrl() -> list(oneof(seq(0, 31) ++ [127])).

%%% Digits ([0-9]).
digit() -> list(oneof(seq($0, $9))).

%%% Visible characters (i.e. anything except spaces, control
%%% characters, etc.) ([\x21-\x7E]).
graph() -> list(oneof(seq(33, 126))).

%%% Lowercase letters ([a-z]).
lower() -> list(oneof(seq($a, $z))).

%%% Visible characters and spaces (i.e. anything except control
%%% characters, etc.) ([\x20-\x7E]).
print() -> list(oneof(seq(32, 126))).

%%% Punctuation and symbols ([!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]).
punct() -> list(oneof("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}\~")).

%%% All whitespace characters, including line breaks ([ \t\r\n\v\f]).
space() -> list(oneof([$ , $\t, $\r, $\n, $\v, $\f])).

%%% Uppercase letters ([A-Z]).
upper() -> list(oneof(seq($A, $Z))).

%%% Word characters (letters, numbers and underscores) ([A-Za-z0-9_]).
word() ->  list(oneof(seq($a, $z) ++ seq($A, $Z) ++ seq($0, $9) ++ "_")).

%%% Hexadecimal digits ([A-Fa-f0-9]).
xdigit() -> list(oneof(seq($A, $F) ++ seq($a, $f) ++ seq($0, $9))).