/* * QLearning.java * * Created on 1 janvier 2003, 22:23 */ /** * * @author Vanden Berghen Frank * @version 1.1 * * Thanks to Jens G. Balchen for the bug correction line 183 * */ import java.awt.*; import java.awt.event.*; public class BotQLearning extends java.applet.Applet implements Runnable, ActionListener, KeyListener { private static final int nStateArm=9,nStateHand=19; private static final double degreeArmMin=Math.toRadians(-30), degreeArmMax=Math.toRadians(30), degreeHandMin=Math.toRadians(-150), degreeHandMax=Math.toRadians(0), heightBody=40, widthBody=80, lengthArm=60, lengthHand=40; private static final int ground=200, frame_size_x=800, frame_size_y=300; private double epsilon=.8, gamma=0.9, alpha=1.0; private static final double SQR(double a) { return a*a; } private static final int armMinus=0, armPlus=1, handMinus=2, handPlus=3, indefinite=-1, nAction=4; private static final double BADValue=Double.NEGATIVE_INFINITY; java.util.Random generateur=new java.util.Random(); private double Q[][][]; private double Xpos=100; private int s1Cur=nStateArm/2, s2Cur=nStateHand/2; private double totalRewards=0.0; private int totalSteps=0, runBatch=0; boolean running=true; Button run,skipHigh,skipLow,stop,reset,epsDec,epsInc,gamDec,gamInc,alphaDec,alphaInc,resetQ; TextField tfEps,tfGamma,tfAlpha; Thread myThread=null; java.text.DecimalFormat doubleFormatter; public BotQLearning() {} public void resetQ() { int i,j,k; for (i=0; i= epsilon) { // choose the best Direction double QBest=Q[s1][s2][possibleChoice[0]],v; int nBest=0,i; for (i=1; iQBest) { nBest=i; QBest=v; } } return possibleChoice[nBest]; } // choose a random Direction return possibleChoice[generateur.nextInt(nChoice)]; } private double getMaxQ(int s1,int s2) { int i; double q=Q[s1][s2][0]; for (i=1; i0 return (xOld-yOld*(x-xOld)/(y-yOld))-Math.sqrt(SQR(x)+SQR(y)); } // y>0 ; yOld>0 if (yOld>=0) return 0; // y>0 ; yOld<0: return -(x-y*(xOld-x)/(yOld-y))+Math.sqrt(SQR(xOld)+SQR(yOld)); } public void paint (Graphics g) { super.paint(g); double degreeS1=s1Cur*(degreeArmMax-degreeArmMin)/nStateArm+degreeArmMin, degreeS2=s2Cur*(degreeHandMax-degreeHandMin)/nStateHand+degreeHandMin+degreeS1, x=lengthArm*Math.cos(degreeS1)+lengthHand*Math.cos(degreeS2)+widthBody, y=lengthArm*Math.sin(degreeS1)+lengthHand*Math.sin(degreeS2)+heightBody, degreeRot=0.0; if (y<0) degreeRot=Math.atan(-y/x); double cosRot=Math.cos(degreeRot), sinRot=Math.sin(degreeRot); int x1=(int)(Xpos), y1=(int)(ground), x2=(int)(Xpos+cosRot*widthBody), y2=(int)(ground-sinRot*widthBody); double x3=Xpos-sinRot*heightBody, y3=ground-cosRot*heightBody, x4=x3+cosRot*widthBody, y4=y3-sinRot*widthBody, xArm=x4+lengthArm*Math.cos(degreeS1+degreeRot), yArm=y4-lengthArm*Math.sin(degreeS1+degreeRot); int ix4=(int)x4, iy4=(int)y4, ixArm=(int)xArm, iyArm=(int)yArm, ixHand=(int)(xArm+lengthHand*Math.cos(degreeS2+degreeRot)), iyHand=(int)(yArm-lengthHand*Math.sin(degreeS2+degreeRot)); g.setColor(new Color(100,120,12)); g.drawLine(x1,y1,x2,y2); g.drawLine(x1,y1,(int)x3,(int)y3); g.drawLine(x2,y2,ix4,iy4); g.drawLine((int)x3,(int)y3,ix4,iy4); g.setColor(Color.orange); g.drawLine(ix4, iy4, ixArm, iyArm); g.drawLine(ix4+1, iy4+1, ixArm+1, iyArm+1); g.drawLine(ix4-1, iy4-1, ixArm-1, iyArm-1); g.drawLine(ix4+1, iy4-1, ixArm+1, iyArm-1); g.drawLine(ix4-1, iy4+1, ixArm-1, iyArm+1); g.setColor(Color.red); g.drawLine(ixArm, iyArm, ixHand, iyHand); g.drawLine(ixArm+1, iyArm+1, ixHand, iyHand); g.drawLine(ixArm-1, iyArm-1, ixHand, iyHand); g.drawLine(ixArm+1, iyArm-1, ixHand, iyHand); g.drawLine(ixArm-1, iyArm+1, ixHand, iyHand); g.setColor(Color.white); g.drawLine(0,ground+1,frame_size_x,ground+1); g.drawLine(0,ground+2,frame_size_x,ground+2); g.setColor(Color.lightGray); g.drawLine(0,ground+3,frame_size_x,ground+3); g.drawLine(0,ground+4,frame_size_x,ground+4); g.setColor(Color.gray); g.drawLine(0,ground+5,frame_size_x,ground+5); g.drawLine(0,ground+6,frame_size_x,ground+6); g.setColor(Color.darkGray); g.drawLine(0,ground+7,frame_size_x,ground+7); g.drawLine(0,ground+8,frame_size_x,ground+8); if (totalSteps==0) g.drawString ("average speed : 0", 40, 50); else g.drawString ("average speed : "+(totalRewards/(double)totalSteps), 40, 50); if ((ixArm>frame_size_x)||(ixHand>frame_size_x)) Xpos=100; if (Xpos<0) Xpos=100; } private void startMyThread() { running=true; if (myThread==null) {myThread=new Thread(this); myThread.start();} } public void start() { runBatch=0; startMyThread(); } public void stop() { running=false; } public void actionPerformed(java.awt.event.ActionEvent e) { Object s=e.getSource(); if (s==run) start(); if (s==stop) stop(); if (s==resetQ) resetQ(); if (s==skipHigh) { runBatch=1000000; startMyThread(); } if (s==skipLow) { runBatch=30000; startMyThread(); } if (s==reset) { totalRewards=0; totalSteps=0; } if ((s==epsDec )&&(epsilon>0.01)) { epsilon-=0.1; tfEps.setText(doubleFormatter.format(epsilon)); } if ((s==gamDec )&&(gamma >0.01)) { gamma -=0.1; tfGamma.setText(doubleFormatter.format(gamma)); } if ((s==alphaDec)&&(alpha >0.01)) { alpha -=0.1; tfAlpha.setText(doubleFormatter.format(alpha)); } if ((s==epsInc )&&(epsilon<0.99)) { epsilon+=0.1; tfEps.setText(doubleFormatter.format(epsilon)); } if ((s==gamInc )&&(gamma <0.99)) { gamma +=0.1; tfGamma.setText(doubleFormatter.format(gamma)); } if ((s==alphaInc)&&(alpha <0.99)) { alpha +=0.1; tfAlpha.setText(doubleFormatter.format(alpha)); } repaint(); } public void keyTyped(java.awt.event.KeyEvent e) {} public void keyReleased(java.awt.event.KeyEvent e) {} public void keyPressed(java.awt.event.KeyEvent e) { int key=e.getKeyCode(); stop(); switch (key) { case KeyEvent.VK_RIGHT: if (s2Cur!=nStateHand-1) { totalRewards+=updateQ(handPlus); totalSteps++; } break; case KeyEvent.VK_LEFT: if (s2Cur!=0) { totalRewards+=updateQ(handMinus); totalSteps++; } break; case KeyEvent.VK_UP: if (s1Cur!=nStateArm-1) { totalRewards+=updateQ(armPlus); totalSteps++; } break; case KeyEvent.VK_DOWN: if (s1Cur!=0) { totalRewards+=updateQ(armMinus); totalSteps++; } break; } repaint (); } public void run() { Color c = new Color(123,200,145); while (running) { totalRewards+=updateQ(indefinite); totalSteps++; if (runBatch>0) { if (runBatch%100==0) { Graphics g=getGraphics(); g.setColor(c); g.fillRect(550,40,50,10); g.setColor(Color.darkGray); g.drawString ("iteration before completion: "+runBatch, 400, 50); } runBatch--; } else { repaint(); try { Thread.currentThread().sleep(100); } catch (InterruptedException e){} } } myThread=null; } public static void main(String args[]) { final BotQLearning app= new BotQLearning(); Frame aFrame= new Frame("Applet"); aFrame.addWindowListener( new WindowAdapter() { public void windowClosing(WindowEvent e) { app.stop(); app.destroy(); System.exit(0);} }); aFrame.add(app, BorderLayout.CENTER); aFrame.setSize(BotQLearning.frame_size_x,BotQLearning.frame_size_y); app.init(); app.start(); aFrame.setVisible(true); } }