One of the problems given to solve in HTDP was to write a program that solved the 8 queens problem, that is, to place 8 queens on a standard chess board so that none of them are attacking each other. I quite enjoyed working through this, and came up with something that used a straight size 64 scheme vector to represent the board which I thought would be reasonably efficient.
First of all, as encouraged by HTDP, there were the functions to manipulate and display the board:
(define-struct board (squares size))
(define (display-board b) …)
(define (coord->index x y n) …)
(define (board-ref b x y) …)
;; Constructors
(define (build-board n f) …)
(define (empty-board n) …)
and then the methods suggested within the book to solve the problem.
I’ve attached the code at the end along with fairly naive translations into Ocaml and C++.
The surprising thing to me though is that vast differences in speed. The Ocaml runs around 15 times faster than the scheme, and the C++ runs around 20 times faster than the Ocaml. Now, to some people, this might suggest that C++ is much faster than Ocaml which in turn is much faster than PLT Scheme. However, the wealth of information around on the net leads me to believe that this is not true. I therefore conclude that I am not very good at writing efficient scheme or efficient Ocaml.
I can see that there are minor algorithmic differences between the code. In particular, the C++ uses out-of-bound data rather than a Some / None type as in the Ocaml. Additionally, the C++ uses simple machine integers to represent the square types. It is difficult to believe that this accounts for the difference though, in which case, what am I doing wrong?
$ time mzscheme -qu queens.ss
cpu time: 1235 real time: 1281 gc time: 453
51886 recursive calls to (placements …)
mzscheme -qu queens.ss 0.00s user 0.00s system 0% cpu 1.468 total
$ ocamlopt -noassert -unsafe -ccopt -O3 -ccopt -fomit-frame-pointer q.ml -o q.exe
$ time ./q.exe
n==10
./q.exe 0.00s user 0.00s system 0% cpu 1.000 total
$ time ./a.exe 8 1000 | grep -v Qu
./a.exe 8 1000 0.00s user 0.00s system 0% cpu 4.938 total
mzscheme code
(module queens mzscheme
(require (lib "42.ss" "srfi"))
(define-struct board (squares size))
;; 28.2.1
(define (display-board b)
(printf "~n")
(let ((s (board-squares b))
(n (board-size b)))
(do-ec (: y n)
(: x n)
(begin
(display (if (= x 0) "(" " "))
(display (vector-ref s (coord->index x y n)))
(when (= x 7) (printf ")~n"))))))
;; 28.2.2
(define (coord->index x y n) (+ (* y n) x))
(define (board-ref b x y)
(vector-ref (board-squares b) (coord->index x y (board-size b))))
(define (build-board n f)
(let ((size (* n n)))
(make-board
(vector-of-length-ec size
(:range i size)
(f (remainder i n) (quotient i n)))
n)))
(define (empty-board n)
(build-board n (lambda (x y) #f)))
;; 28.2.3
(define-struct posn (x y))
(define (threatened? queen square)
(let ((x1 (posn-x queen))
(y1 (posn-y queen))
(x2 (posn-x square))
(y2 (posn-y square)))
(or (= x1 x2)
(= y1 y2)
(= (- x2 x1) (- y2 y1))
(= (- x1 y2) (- x2 y1)))))
;; 28.2.4
(define init-pos (make-posn 0 0))
(define (add-queen board p)
(let ((s (board-size board))
(qx (posn-x p))
(qy (posn-y p)))
(build-board s (lambda (x y)
(let ((c (board-ref board x y)))
(cond ((and (= x qx) (= y qy)) 'Qu)
(c c)
((threatened? p (make-posn x y)) 'At)
(else #f)))))))
(define (invalid-posn? board p)
(not (eq? (board-ref board (posn-x p) (posn-y p)) #f)))
(define (next-valid-posn board p)
(let ((s (board-size board)))
(let loop ((x (+ 1 (posn-x p)))
(y (posn-y p)))
(cond ((>= x s) (loop 0 (+ y 1)))
((>= y s) #f)
(else (let ((new-p (make-posn x y)))
(if (invalid-posn? board new-p)
(loop (+ x 1) y)
new-p)))))))
(define (display-posn p)
(if (posn? p)
(printf "(~a, ~a)~n" (posn-x p) (posn-y p))
(printf "Not posn~n")))
(define *count-placements* 0)
(define (placement board p rem)
(set! *count-placements* (+ *count-placements* 1))
(cond ((= rem 0) board)
((not p) #f)
((invalid-posn? board p) (placement board (next-valid-posn board p) rem))
(else (let loop ((cur-pos p))
(if (not cur-pos) #f
(let ((possible-board (placement (add-queen board cur-pos) init-pos (- rem 1))))
(if possible-board possible-board
(loop (next-valid-posn board cur-pos)))))))))
(define (solve-for-queens n)
(set! *count-placements* 0)
(time (display-board (placement (empty-board n) init-pos n)))
(printf "~a recursive calls to (placements ...)~n"
*count-placements*))
(solve-for-queens 8 )
(provide (all-defined)))
;; (require "queens.ss")
Ocaml code
type square = Queen | Safe | Attacked;;
type posn = Posn of int * int;;
type board = Board of square array * int;;
let string_of_square = function
| Queen -> "Qu"
| Safe -> "#f"
| Attacked -> "At";;
let index_of_coord x y n = (y*n) + x;;
let coord_of_index i n =
let y = i / n in (i - y*n, y);;
let board_ref b x y =
match b with
Board (squares, size) -> Array.get squares (index_of_coord x y size);;
let print_board board =
match board with
None -> print_string "Nonen"
| Some (Board (_, s) as b) ->
let b_x = (s - 1) in
let rec loop x y =
if y >= s then ()
else if x >= s then loop 0 (y+1)
else begin
print_string (if x = 0 then "(" else " ");
let i = index_of_coord x y s in
print_string (string_of_square (board_ref b x y));
if x = b_x then print_string ")n";
loop (x+1) y
end
in loop 0 0;;
let build_board n f =
Board (Array.init (n * n) f, n);;
let empty_board n =
build_board n (fun i -> Safe);;
let is_threatened queen square =
match queen, square with
(x1, y1), (x2, y2) ->
x1 == x2 ||
y1 == y2 ||
(x2 - x1) == (y2 - y1) ||
(x1 - y2) == (x2 - y1);;
let add_queen board p =
match board, p with
Board (squares, size), (qx, qy)
-> build_board size (fun i ->
let c, qi = (Array.get squares i), index_of_coord qx qy size in
if i == qi then Queen
else if c != Safe then c
else if (is_threatened p (coord_of_index i size)) then Attacked
else Safe);;
let is_invalid_posn board p =
match p with
None -> true
| Some (Posn (x, y) as posn) -> board_ref board x y != Safe;;
let next_valid_posn board p =
match board, p with
_, None -> None
| Board (_, size), Some (Posn (x, y)) ->
let rec loop x y =
if x >= size then loop 0 (y+1)
else if y >= size then None
else let new_posn = Posn (x, y) in
if is_invalid_posn board (Some new_posn) then loop (x+1) y
else Some new_posn
in loop (x+1) y;;
let valid_board b =
match b with
Some (Board (_, _)) -> true
| _ -> false;;
let init_pos = Some (Posn (0,0));;
let rec placement board posn rem =
match board, posn with
None, _ -> None
| _, None -> None
| Some (Board (squares, size) as b), Some (Posn (x, y) as p) ->
if rem = 0 then board
else if is_invalid_posn b posn then placement board (next_valid_posn b posn) rem
else let rec loop curpos =
match curpos with
None -> None
| Some (Posn (x, y)) ->
let possible_board = (placement
(Some (add_queen b (x, y)))
init_pos
(rem - 1)) in
if valid_board possible_board then possible_board
else loop (next_valid_posn b curpos)
in loop posn;;
let do_times f n =
Printf.printf "n==%dn" n;
let rec loop n =
f ();
if n != 1 then loop (n - 1)
in loop n;;
let solve_queens n =
(placement (Some (empty_board n)) init_pos n);;
do_times (fun () -> (solve_queens 8 )) 10;;
print_board (solve_queens 8 );;
C++ code
#include <vector>
#include <string>
#include <iostream>
using namespace std;
namespace Status {
static const int Qu = 0;
static const int Safe = 1;
static const int At = 2;
static const int Invalid = 3;
static string mapping[] = { "Qu", "#f", "At", "I" };
string string_of_square(int i) { return mapping[i]; }
}
using namespace Status;
typedef std::vector<int> Squares;
struct Posn
{
int x_;
int y_;
Posn(int x, int y) : x_(x), y_(y) { }
bool is_invalid() const { return (x_ < 0) && (y_ < 0); }
};
const Posn init_pos(0, 0);
class Board
{
private:
Squares squares_;
int size_;
int index_of_coord(int x, int y) const { return y * size_ + x; }
public:
Board(int size) : squares_((size * size), Safe), size_(size) { }
int ref(int x, int y) const { return squares_[index_of_coord(x, y)]; }
int ref(const Posn& p) const { return ref(p.x_, p.y_); }
void display() const {
for (int y = 0; y < size_; ++y) {
for (int x = 0; x < size_; ++x) {
cout << ((x == 0) ? "(" : " ");
cout << string_of_square(ref(x, y));
}
cout << ")n";
}
}
template<typename T>
static Board build_board(int size, T f) {
Board b(size);
for (int y = 0; y < size; ++y) {
for (int x = 0; x < size; ++x) {
b.squares_[b.index_of_coord(x, y)] = f(x, y);
}
}
return b;
}
Board add_queen(const Posn& p) const;
Posn next_valid_posn(const Posn& p) const {
for (int y = p.y_; y < size_; y++) {
for (int x = p.x_ + 1; x < size_; x++) {
if (ref(x, y) == Safe) { return Posn(x, y); }
}
}
return Posn(-1, -1);
}
static Board invalid_board() {
static Board b(1);
b.squares_[0] = Invalid;
return b;
}
bool is_invalid() const { return squares_[0] == Invalid; }
static Board placement(const Board& b, const Posn& p, int rem) {
if (b.is_invalid()) { return b; }
else if (p.is_invalid()) { return invalid_board(); }
else if (rem == 0) { return b; }
else if (b.ref(p) != Safe) {
return placement(b, b.next_valid_posn(p), rem);
} else {
Posn curpos = p;
while (! curpos.is_invalid()) {
Board b1 = placement(b.add_queen(curpos), init_pos, rem - 1);
if (b1.is_invalid()) {
curpos = b.next_valid_posn(curpos);
} else {
return b1;
}
}
return invalid_board();
}
}
};
bool is_threatened(int x1, int y1, int x2, int y2)
{
return x1 == x2 ||
y1 == y2 ||
(x2 - x1) == (y2 - y1) ||
(x1 - y2) == (x2 - y1);
}
class add_queen_fobj
{
public:
add_queen_fobj(const Board& b, const Posn& p) : b_(b), p_(p) { }
int operator()(int x2, int y2) {
int c = b_.ref(x2, y2);
int x1 = p_.x_;
int y1 = p_.y_;
if ((x1 == x2) && (y1 == y2)) { return Qu; }
else if (c != Safe) { return c; }
else if (is_threatened(x1, y1, x2, y2)) { return At; }
else { return Safe; }
}
private:
const Board& b_;
const Posn& p_;
};
Board Board::add_queen(const Posn& p) const {
return build_board(size_, add_queen_fobj(*this, p));
}
void usage(const char* prog)
{
cout << "usage: " << prog << " [size] [times]n";
exit(1);
}
int main(int argc, char* argv[])
{
if (argc != 3) { usage(argv[0]); }
int size = atoi(argv[1]);
int times = atoi(argv[2]);
Board b(size);
Board result(1);
for (int i = 0; i < times; ++i) {
result = Board::placement(b, init_pos, 8 );
}
result.display();
}
Did you compile the Ocaml to native code — that is, did you use “ocamlopt” instead of “ocamlc” or the repl? If you’re using the interpreter, a factor of 15 or 20 relative to C++ sounds about right, actually.
I’d expect a difference of at most 4x for compiled Ocaml versus compiled C++, for floating point or bit bashing code. If your app involves allocating lots of small short-lived objects, then I’d expect Ocaml to be marginally faster than C++.
Inlining a few of the functions speeds things up a bit.
With minimal changes this can be done as follows:
(module queens mzscheme
(require (lib “42.ss” “srfi”))
(define-syntax define-inline
(syntax-rules ()
[(_ (name arg ...) body)
(define-syntax name
(syntax-rules ()
[(_ arg ...) body]))]))
(define-struct board (squares size))
;; 28.2.1
(define (display-board b)
(printf “~n”)
(let ((s (board-squares b))
(n (board-size b)))
(do-ec (: y n)
(: x n)
(begin
(display (if (= x 0) “(” ” “))
(display (vector-ref s (coord->index x y n)))
(when (= x 7) (printf “)~n”))))))
;; 28.2.2
(define-inline (coord->index x y n)
(+ (* y n) x))
(define-inline (board-ref b x y)
(vector-ref (board-squares b) (coord->index x y (board-size b))))
(define (build-board n f)
(let ((size (* n n)))
(make-board
(vector-of-length-ec size
(:range i size)
(f (remainder i n) (quotient i n)))
n)))
(define-inline (empty-board n)
(build-board n (lambda (x y) #f)))
;; 28.2.3
(define-struct posn (x y))
(define (threatened? queen square)
(let ((x1 (posn-x queen))
(y1 (posn-y queen))
(x2 (posn-x square))
(y2 (posn-y square)))
(or (= x1 x2)
(= y1 y2)
(= (- x2 x1) (- y2 y1))
(= (- x1 y2) (- x2 y1)))))
;; 28.2.4
(define init-pos (make-posn 0 0))
(define (add-queen board p)
(let ((s (board-size board))
(qx (posn-x p))
(qy (posn-y p)))
(build-board s (lambda (x y)
(let ((c (board-ref board x y)))
(cond ((and (= x qx) (= y qy)) ‘Qu)
(c c)
((threatened? p (make-posn x y)) ‘At)
(else #f)))))))
(define (invalid-posn? board p)
(not (eq? (board-ref board (posn-x p) (posn-y p)) #f)))
(define (next-valid-posn board p)
(let ((s (board-size board)))
(let loop ((x (+ 1 (posn-x p)))
(y (posn-y p)))
(cond ((>= x s) (loop 0 (+ y 1)))
((>= y s) #f)
(else (let ((new-p (make-posn x y)))
(if (invalid-posn? board new-p)
(loop (+ x 1) y)
new-p)))))))
(define (display-posn p)
(if (posn? p)
(printf “(~a, ~a)~n” (posn-x p) (posn-y p))
(printf “Not posn~n”)))
(define *count-placements* 0)
(define (placement board p rem)
(set! *count-placements* (+ *count-placements* 1))
(cond ((= rem 0) board)
((not p) #f)
((invalid-posn? board p) (placement board (next-valid-posn board p) rem))
(else (let loop ((cur-pos p))
(if (not cur-pos) #f
(let ((possible-board (placement (add-queen board cur-pos) init-pos (- rem 1))))
(if possible-board possible-board
(loop (next-valid-posn board cur-pos)))))))))
(define (solve-for-queens n)
(set! *count-placements* 0)
(time (display-board (placement (empty-board n) init-pos n)))
(printf “~a recursive calls to (placements …)~n”
*count-placements*))
(solve-for-queens 8 )
(provide (all-defined)))
I know the point of the post is to compare the speed of similar approaches in different languages, but you can save a *lot* of time by changing the representation of a board.
At the moment add-queen creates a new array each time a new board configuration is visited.
The representation in
avoids this copying, and is much faster. Try it!
The link disappeared:
Queens implementation using Galore Heaps also in PLT Scheme.
Is using all that pattern matching in the OCaml code really necessary?
Are you compiling the Scheme version? Have you tried different compilers/interpreters (e.g. Chicken, Scheme 48, etc…)
The fastest solution I have seen the the 8-queens problem is written in MCPL by Martin Richards (designer of BCPL and MCPL). It uses clever bit-pattern tricks and only four bytes are needed to represent the board. See: http://citeseer.ist.psu.edu/richards97backtracking.html
Hi Shaurz,
Richards solution is is clever. However, it only counts the number of solutions, but the three bytes ld, cols, and rd doesn’t contain enough information to print out the board when a solution is found (when cols=all).
Here is a Richards solution in Scheme:
(define all #b11111111)
(define (queens n)
(let ([all (- (expt 2 n) 1)])
(let ([count 0])
(define (try ld cols rd)
(if (= cols all)
(set! count (+ count 1))
(let loop ([poss (bitwise-and all (bitwise-not (bitwise-ior ld cols rd)))])
(unless (zero? poss)
(let ([bit (bitwise-and poss (- poss))])
(try (* 2 (bitwise-ior ld bit))
(bitwise-ior cols bit)
(quotient (bitwise-ior rd bit) 2))
(loop (- poss bit)))))))
(try 0 0 0)
(printf "There was ~a solutions for a ~a board" count n))))
Welcome to DrScheme, version 369.2-svn21dec2006.
Language: Pretty Big (includes MrEd and Advanced Student).
> (queens 8)
There was 92 solutions for a 8 board
Can you try implementing your solution in Common Lisp? It would be interesting to see how it goes.
Try Scheme or Lush.
> Neel Krishnaswami Says:
> December 22nd, 2006 at 4:54 pm
>
> Did you compile the Ocaml to native code – that is, did you use “ocamlopt” instead of “ocamlc” or the repl? If you’re
> using the interpreter, a factor of 15 or 20 relative to C++ sounds about right, actually.
Yes I did. Here are the commands executed – I took the arguments to the compiler from the shootout
(q.exe is the compiled ocaml, a.exe is the compiled C++).
$ ocamlopt -noassert -unsafe -ccopt -O3 -ccopt -fomit-frame-pointer q.ml -o q.exe
$ time ./q.exe
n==10
./q.exe 0.00s user 0.00s system 0% cpu 1.000 total
$ time ./a.exe 8 1000 | grep -v Qu
./a.exe 8 1000 0.00s user 0.00s system 0% cpu 4.938 total
> Jens Axel Søgaard Says:
> December 22nd, 2006 at 5:19 pm
>
> Inlining a few of the functions speeds things up a bit.
> With minimal changes this can be done as follows:
[snip]
This gave a speed-up of about 20% – define-inline is a nice macro, thanks.
> Jens Axel Søgaard Says:
> December 22nd, 2006 at 5:32 pm
>
> I know the point of the post is to compare the speed of similar approaches in different languages,
> but you can save a *lot* of time by changing the representation of a board.
>
> At the moment add-queen creates a new array each time a new board configuration is visited.
> The representation in
>
> avoids this copying, and is much faster. Try it!
Yes, much faster, you’re right. This gives a similar speed to the C++.
> # Warren Henning Says:
> December 22nd, 2006 at 6:09 pm
>
> Is using all that pattern matching in the OCaml code really necessary?
I’ve no idea, this is my first piece of OCaml. Do you have any suggestions for improvements?
> shaurz Says:
> December 22nd, 2006 at 6:26 pm
>
> Are you compiling the Scheme version? Have you tried different compilers/interpreters (e.g. Chicken, Scheme 48, etc…)
No, this is running under mzscheme 352 on Windows which I believe includes a JIT.
> # smakk Says:
> December 23rd, 2006 at 3:28 am
>
> Can you try implementing your solution in Common Lisp? It would be interesting to see how it goes.
I have even less experience of writing CL than writing scheme and I suspect my solution would be
equally inefficient :-/
Why don’t you post your question to the appropriate comp.lang.ml newsgroup, instead of using bad technology (such as blogs)?
I suppose people would help you. But you have to look in the right places for the answers you want.
Here is a much shorter, faster and clearer implementation in OCaml:
open List;;
let rec safe (x1, y1) (x2, y2) =
x1 x2 && y1 y2 && x2 - x1 y2 - y1 && x1 - y2 x2 - y1;;
let rec search f n qs ps accu = match ps with
| [] -> if length qs = n then f qs accu else accu
| q::ps -> search f n qs ps (search f n (q::qs) (filter (safe q) ps) accu);;
let n = 8;;
let rec ps n x y =
if y=n then [] else
if x=n then ps n 0 (y + 1) else
(x, y) :: ps n (x + 1) y;;
let ps = ps n 0 0;;
Printf.printf "%d solutions\n" (search (fun _ -> (+) 1) n [] ps 0);;
This implementation starts with a list of possible board positions and tries a queen on each one, filtering out the safe remaining positions and recursing.
If you want to print the boards, you can use:
let print_board n queens =
let a = Array.make_matrix n n '.' in
iter (fun (x, y) -> a.(y).(x) Array.iter print_char row; print_newline()) a;
print_newline ();;
let () = search (fun qs () -> print_board n qs) n [] ps ();;
This is over 100x faster than your OCaml. It can be made much faster still by using a bitboard and bitmasks.
Cheers,
Jon.
Jon: As I understand it, the point was never to write a fast implementation, but rather to compare the run-time of similar implementation approaches under different languages.
Faster code can be written in any language, but that would make the comparison between languages useless as you’re not comparing similar algorithms.
Mark, my point was that this OCaml implementation goes out of its way to box and check values at run-time. This is necessary in Lisp or Scheme but not in OCaml, Standard ML, Haskell, F#, Scala or any other modern functional programming language.