Skip to main content

Screen Based Automation

Screen Based Automation

This is the example of automating Windows 10 calclator using screen based automation.

Steps include:

  1. Open calculator using start menu
  2. Maximize calculator window and switch to scientific mode
  3. Calculate logarithm
  4. Extract result from screen using OCR
  5. Close calculator window

Application class returns WindowsPage object which provides access to Windows 10 desktop.

WindowsScreenApplication .java
package eu.ibagroup.easyrpa.calculator;

import eu.ibagroup.easyrpa.calculator.page.WindowsPage;
import eu.ibagroup.easyrpa.engine.rpa.Application;
import eu.ibagroup.easyrpa.engine.rpa.driver.ScreenDriver;
import eu.ibagroup.easyrpa.engine.rpa.element.ScreenElement;

public class WindowsScreenApplication extends Application<ScreenDriver, ScreenElement> {

	public WindowsScreenApplication(ScreenDriver driver) {
		super(driver);
	}

	@Override
	public WindowsPage open(String... args) {
		return createPage(WindowsPage.class);
	}
} 
WindowsPage only contains a single public method openCalcApp() which opens calculator main window represented as CalculatorMainPage

WindowsPage.java
package eu.ibagroup.easyrpa.calculator.page;

import eu.ibagroup.easyrpa.engine.rpa.element.ScreenElement;
import eu.ibagroup.easyrpa.engine.rpa.page.ScreenPage;
import eu.ibagroup.easyrpa.engine.rpa.po.annotation.FindBy;
import eu.ibagroup.easyrpa.engine.rpa.po.annotation.Wait;
import lombok.extern.slf4j.Slf4j;
import org.sikuli.script.Key;

@Slf4j
public class WindowsPage extends ScreenPage {

	private static final long ANIMATION_TIMEOUT = 1000;

	@FindBy(image = "images/windows-start-1.png")
	@Wait(3)
	private ScreenElement start;

	public CalculatorMainPage openCalcApp() {
		launchFromStart("calculator");
		sleep(ANIMATION_TIMEOUT);

		return createPage(CalculatorMainPage.class);
	}

	private void launchFromStart(String appName) {
		start.click();
		sleep(ANIMATION_TIMEOUT);
		getDriver().sendKeys(appName);
		sleep(ANIMATION_TIMEOUT);
		getDriver().sendKeys(Key.ENTER);
	}

	private static void sleep(long millis) {
		try {
			Thread.sleep(millis);
		} catch (InterruptedException e) {
			log.error(e.getMessage(), e);
		}
	}
}

CalculatorMainPage can potentially provide handles for all the buttons and menus rendered on calculator window, but we restricted those to demo purposes.

CalculatorMainPage.java
package eu.ibagroup.easyrpa.calculator.page;

import eu.ibagroup.easyrpa.engine.rpa.element.ScreenElement;
import eu.ibagroup.easyrpa.engine.rpa.page.ScreenPage;
import eu.ibagroup.easyrpa.engine.rpa.po.annotation.FindBy;
import eu.ibagroup.easyrpa.engine.rpa.po.annotation.Wait;
import lombok.extern.slf4j.Slf4j;
import org.sikuli.script.Key;
import org.sikuli.script.Region;

@Slf4j
public class CalculatorMainPage extends ScreenPage {

	private static final double CALC_DISPLAY_HEIGHT_RATIO = 0.14;

	private static final double CALC_DISPLAY_WIDTH_RATIO = 0.85;

	private static final double CALC_DISPLAY_Y_RATIO = 0.12;

	@FindBy(image = "images/btn-navigation.png")
	@Wait(3)
	private ScreenElement navigation;

	@FindBy(image = "images/btn-four.png")
	@Wait(3)
	private ScreenElement fourBtn;

	@FindBy(image = "images/btn-six.png")
	@Wait(3)
	private ScreenElement sixBtn;

	@FindBy(image = "images/btn-eight.png")
	@Wait(3)
	private ScreenElement eightBtn;

	@FindBy(image = "images/btn-plus.png")
	@Wait(3)
	private ScreenElement plusBtn;

	@FindBy(image = "images/btn-equals.png")
	@Wait(3)
	private ScreenElement equalsBtn;

	public void four() {
		fourBtn.click();
	}

	public void six() {
		sixBtn.click();
	}

	public void eight() {
		eightBtn.click();
	}

	public void plus() {
		plusBtn.click();
	}

	public void equals() {
		equalsBtn.click();
	}

	public String getDisplayText() {
		int h = (int) (CALC_DISPLAY_HEIGHT_RATIO * getDriver().getScreen().h);
		int w = (int) (CALC_DISPLAY_WIDTH_RATIO * getDriver().getScreen().w);
		int y = (int) (CALC_DISPLAY_Y_RATIO * getDriver().getScreen().h);
		int x = 5;

		log.debug("Reading region: x={} y={} w={} h={}", x, y, w, h);

		return getRegionText(x, y, w, h);
	}

	private String getRegionText(int x, int y, int w, int h) {
		Region regResult = getDriver().getScreen().newRegion(x, y, w, h);
		regResult.highlight(2);

		return regResult.text();
	}

	public void exit() {
		getDriver().getScreen().keyDown(Key.ALT);
		getDriver().getScreen().keyDown(Key.F4);
		getDriver().getScreen().keyUp();
	}

	public void maximizeWindow() {
		getDriver().getScreen().keyDown(Key.ALT);
		getDriver().getScreen().keyDown(Key.SPACE);
		getDriver().getScreen().keyDown("x");
		getDriver().getScreen().keyUp();
	}
}

getDisplayText() is worth a specific mention since it employs OCR to read a certain region on screen. This region is basically a rectangular defined by upper left vertex (x, y), height (h) and width (w).