/* N-QUEENS problem, Fork95 (V1.7) implementation 9710 C. Kessler */

#include <fork.h>
#include <syscall.h>
#include <assert.h>
#include <io.h>

sh int N;            /* the number of queens */

#define PRINT_SOLUTIONS 0

sh simple_lock screen = 0;  /*lock for screen output*/

sh int solutions = 0;   /*counter for number of solutions*/

#if PRINT_SOLUTIONS
/* output an array q of n column indices of the queens ordered by row;
 * with an extra column index k for row n, and count the solutions:
 */
async void print_queens( pr int *q, pr int n, pr int k )
{
 pr int j;
 simple_lockup( &screen );
 mpadd( &solutions, 1 );
 printf("Solution %d:  ", solutions);
 for (j=0; j<n; j++) printf("%d ", q[j]);
 printf("%d\n",k);
 simple_unlock( &screen );
}

/* alternatively: produce simple graphical output by
 * plotting up to eight checkerboards in a line.
 * The join() statement is used to arrange the solutions
 * on-line as they are computed. Same parameters as above.
 */
sh char *str="................................................................................\n";

sync void print_p_solutions( pr int *q, pr int n, pr int k )
{
 sh int p = 0;
 sh int i;
 pr int pos;
 /* farm assert(n==N-1); */
 $ = mpadd(&p,1);
 solutions += p;
 seq printf("-------------------- Next %d solutions (%d..%d): --------------------\n",
            p, solutions-p+1, solutions);
 str[(N+1)*$] = '|';    /*init: draw board boundary symbols*/
 for (i=0; i<N-1; i++) {  /* shared loop: print N lines of str */
   pos = 1+$*(N+1)+q[i];
   str[pos] = 'Q';   /*draw queen on board $ in column q[i]*/
   farm write(1,str+$*(N+1),(N+1)); /*draw line i of all boards*/
   seq write(1,"\n",1);
   str[pos] = '.';   /*reset symbol on board $ in column q[i]*/
 }
 pos = 1+$*(N+1)+k;
 str[pos] = 'Q';   /*draw queen on board $ in column k*/
 farm write(1,str+$*(N+1),(N+1));
 seq write(1,"\n",1); 
 str[pos] = '.';   /*reset symbol on board $ in column k*/
}

async void print_up_to_eight_solutions( pr int *q, pr int n, pr int k )
{
 pr int j;
 pr int waittime = 0; /* last set of solutions must be printed also if p<8*/
#define maxtime 300    /*empirically determined value for waiting time*/
 join( 400;                                       /* stack size for body */
       (__NUM_PR__>=4 || (waittime++ > maxtime)); /* start-condition */
       __RANK__ < 8 )                             /* stayinside-cond */
      print_p_solutions(q,n,k);            /*body = bus tour*/
 else continue;
}
#endif


/** q is allocated with N entries, n<N.
 *  It has already been tested whether column index j occurs in q[].
 */
void seq_test_queen( int *q, int n, int j )
{
 int i, l;
 // two queens are on same diagonal iff col1-row1==col2-row2
 // two queens are on same counterdiagonal iff col1+row1==col2+row2
 for (i=0; i<n; i++) {
   if (j-q[i] == n-i) return;   /*fail*/
   if (q[i]-j == n-i) return;   /*fail*/
 }
 // now position j is feasible.
 q[n]=j;

 if (n<N-1) { // not yet arrived at last row:
    for (i=0; i<N; i++) {   // try all column indices i
      int thr, cq;
      for (cq=0,thr=0; cq<n; cq++)   // loop over q[]
        if (q[cq]==i) {thr=1;break;} // column i threatened by queen cq
      if (thr || (i==j)) continue;   // column i threatened by a queen
      seq_test_queen( q, n+1, i );   // recursive call for row n+1
    }
 }
 else  // final row
#if PRINT_SOLUTIONS
    print_up_to_eight_solutions( q, N-1, j );
#else
    syncadd( &solutions, 1 );
#endif
} 

/** Queens in row 0...n-1 have been positioned, 
 *  their column indices are stored in q[0:n-1].
 *  A new queen may be set at position (n,j).
 *  Test queen at (n,j) for diagonal threatening,
 *  and do recursive tests for rows n+1...N-1:
 */
sync void test_queen( sh int *q, sh int n, sh int j )
{
 sh int p = 0;                /* number of available procs */
 sh int *qq;
 sh int *cand; 
 sh int cands = 0;
 sh int feasible = 1;         /*flag to be cleared if j is unfeasible*/
 int i, l, fpp, gps;
 asm("stg fpp,%fpp");
 asm("stg gps,%gps");
 pprintf("after entry: $$=%d, fpp=%p gps=%p\n", $$, fpp, gps);
 $ = mpadd( &p, 1 );   /*renumber $ and determine p*/
 if (p==1) {
    qq = (int *)shalloc( N * sizeof(int) );
    farm {
      for (i=0; i<n; i++) qq[i] = q[i];
      seq_test_queen( qq, n, j );
    }
    shallfree();
    return;
 }
 // it has already been tested whether column index j occurs in q[] */
 farm 
  forall (i,0,n,p) {
   // two queens are on the same diagonal 
   // iff either row2-row1 == col1-col2 or row2-row1==col2-col1:
 pprintf("in forall: $$=%d\n", $$);
   if (j-q[i] == n-i)  feasible = 0;   // -> fail
   if (q[i]-j == n-i)  feasible = 0;   // -> fail
  }
 if (!feasible) return;

 if (n<N-1) {  // not yet arrived at last row.
    // generate list of N-n candidates for row n+1:
    cand = (int *)shalloc( (N-n) * sizeof(int) );
    farm 
      forall (i,0,N,p) { /* in parallel try all column indices i */
        int pos, cq, thr;
        for (cq=0,thr=0; cq<n; cq++) /* loop over q[] */
          if (q[cq]==i) {thr=1;break;}  /* column i threatened by queen cq */
        if (!thr && (i!=j)) {         /*column i not threatened by a queen */
          pos = mpadd( &cands, 1 );
          cand[pos] = i;   /*found feasible candidate*/
        }
      }
    if (!cands) return;
    if (p < cands) { // p is too small. Apply strip mining
      sh int iter = 0;
      int *myqq;
      int k;
      farm {   // one temporary array per processor:
        myqq = (int *)shmalloc( N * sizeof(int) );
        for (l=0; l<n; l++)  myqq[l] = q[l];
        myqq[n]=j;
        FORALL( k, &iter, 0, cands, 1 )
          seq_test_queen( myqq, n+1, cand[k] ); 
        shfree( myqq );
      }
    }
    else { // p is large enough. Make one subgroup for each candidate
      qq = (int *)shalloc( (n+1) * sizeof(int) );
      farm forall(l,0,n,p)  qq[l] = q[l];
      qq[n]=j;
      fork( cands; @=$%cands; ) 
        test_queen( qq, n+1, cand[@] );
      shallfree();
    }
 }
 else  // arrived at last row: print the solution
#if PRINT_SOLUTIONS
    seq print_up_to_eight_solutions( q, N-1, j );
#else
    syncadd( &solutions, 1 );
#endif
}

sh unsigned int starttime, stoptime;

void main( void )
{
 int Fpp, Gps;
 if (__PROC_NR__==0) {
    printf("Enter N = ");
    scanf("%d", &N );
    if (N > __STARTED_PROCS__) {
      printf("You should run at least %d processors\n", N);
      exit(0);
    }
    printf("\nComputing solutions to the %d-Queens problem...\n\n", N);
 }
 start {
   pprintf("$$=%d\n", $$);
   /*for the first queen all column positions in row 0 are feasible: */
   initTracing( 100000 );
   startTracing();
   starttime = getct();
 asm("stg fpp,%Fpp");
 pprintf("$$=%d fpp=%p gps=%p\n", $$,Fpp,Gps);
   fork( N; @=$%N; ) {
      asm("stg fpp,%Fpp");
      asm("stg gps,%Gps");
      pprintf("before entry: $$=%d fpp=%p gps=%p\n", $$,Fpp, Gps);
      test_queen( NULL, 0, @ );
      asm("stg fpp,%Fpp");
      asm("stg gps,%Gps");
      pprintf("after exit: $$=%d, fpp=%p ct=%d\n", $$, Fpp, getct());
   }
   asm("stg fpp,%Fpp");
   asm("stg gps,%Gps");
   pprintf("after fork: $$=%d, fpp=%p ct=%d\n", $$, Fpp, getct());
   stopTracing();
   stoptime = getct();
   seq {
      printf("Solutions: %d\nTIME: %d cc\n",
              solutions,   stoptime - starttime );  
      printf("[Press RETURN to continue]"); scanf("%c", &N );
   }
   writeTraceFile("queens.trv", "synchronous N-Queens implementation");
 pprintf("$$=%d\n", $$);
 }
 exit(0);
}
