Web scraper
05 May 2018
- Python
import requests
from lxml import html
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36'}
xpath_product = '//h1//span[@id="productTitle"]//text()'
xpath_brand = '//div[@id="mbc"]/@data-brand'
def getBrandName(url):
page = requests.get(url,headers=headers)
parsed = html.fromstring(page.content)
return parsed.xpath(xpath_brand)- Javascript w phantomjs
var page = require('webpage').create();
page.customHeaders = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36'};
page.open('https://www.amazon.com/Folgers-Coffee-Flavorful-Aromatic-Canister/dp/B01JETQ3LC?th=1', function(status){s
if (status !== 'success') {
console.log('Unable to access network');
} else {
var ua = page.evaluate(function () {
return document.getElementById('bylineInfo').textContent;
});
console.log(ua);
}
phantom.exit();
});