Selenium WebDriver 用于模拟浏览器的功能,可以做网站测试用,也可以用来做crawler。我是用eclipse开发的,导入selenium-server-standalone-***.jar(Right click project -->Properties --> Java Buid Path --> Libraries --> Add External Jar...)。这个包可以在Selenium官网下载。
下面的代码是简单的跟一个网站做交互,内容在注释里。
1 public class IndeedJobSearch { 2 3 public static void main(String[] args) throws InterruptedException { 4 // TODO Auto-generated method stub 5 6 //Create firefox driver to drive the browser 7 //File pathBinary = new File("D:\\Programes\\tools\\FireFox\\firefox.exe"); 8 //FirefoxBinary Binary = new FirefoxBinary(pathBinary); 9 //FirefoxProfile firefoxPro = new FirefoxProfile(); 10 //WebDriver driver = new FirefoxDriver(Binary, firefoxPro); 11 //ChromeDriver or ie 12 System.setProperty("webdriver.chrome.driver",//webdriver.ie.driver 13 "D:\\Projects\\JavaWorkspace\\ThirdPartyLibs\\WebDriver\\chromedriver.exe");//IEDriverServer.exe 14 WebDriver driver = new ChromeDriver(); //InternetExplorerDriver 15 16 //Open Indeed home page 17 driver.get("http://www.indeed.hk/"); 18 //Find what field and enter Selenium 19 Thread.sleep(2000); 20 driver.findElement(By.id("what")).sendKeys("Selenium"); //"findElements" will return all 21 //Find location field and enter London 22 driver.findElement(By.id("where")).clear(); 23 Thread.sleep(2000); 24 driver.findElement(By.id("where")).sendKeys("Hong Kong"); 25 //Find FindJobs button and click on it 26 Thread.sleep(2000); 27 driver.findElement(By.id("fj")).click(); 28 //From job search results page, get page title and jobs count message 29 30 System.out.println(driver.getTitle()); 31 System.out.println(driver.findElement(By.id("searchCount")).getText()); 32 33 driver.close(); 34 } 35 36 }
要抓数据就要用方法来定位数据,下面是普遍用到的数据定位方法,看注释。
1 public class LocatingStrategies { 2 3 public static void main(String[] args) throws InterruptedException { 4 // TODO Auto-generated method stub 5 6 //Create firefox driver to drive the browser 7 File pathBinary = new File("D:\\Programes\\tools\\FireFox\\firefox.exe"); 8 FirefoxBinary Binary = new FirefoxBinary(pathBinary); 9 FirefoxProfile firefoxPro = new FirefoxProfile(); 10 WebDriver driver = new FirefoxDriver(Binary, firefoxPro); 11 12 //Open Indeed home page 13 driver.get("http://www.indeed.hk/"); 14 15 //Locating by ID 16 //driver.findElement(By.id("what")).sendKeys("Selenium"); //"findElements" will return all 17 18 //Locating by name 19 //driver.findElement(By.name("q")).sendKeys("Selenium"); 20 21 //Locating by LinkText 22 //driver.findElement(By.linkText("建立個人檔案")).click(); 23 24 //Locating by partialLinkText 25 //driver.findElement(By.partialLinkText("招聘廣告")).click(); 26 27 //Locating by Xpath By.xpath("//input[@placeholder=‘email‘]")).sendKeys("User NAme"); 28 // System.out.println( 29 // 30 // driver.findElement(By.xpath("//img[@title=‘Indeed 香港‘]")) 31 // .getAttribute("src") 32 // 33 // ); 34 35 //Locating by CssSelector By.cssSelector("css=input.input_submit")).click(); 36 //driver.findElement(By.cssSelector("input.input_submit")).click(); 37 38 //Locating by Tagname 39 System.out.println( 40 41 driver.findElements(By.tagName("a")).size() 42 43 ); 44 45 //Locating by ClassName 46 System.out.println( 47 48 driver.findElements(By.className("input_text")).size() 49 50 ); 51 driver.findElement(By.className("input_text")).sendKeys("Selenium"); 52 } 53 54 }
原文:http://www.cnblogs.com/bruceyo/p/4919876.html