Amazon Product Advertisement API ItemSearch with BeautifulSoup4 and lxml in Python3.x

aws
In [ ]:
import requests
import hmac
import hashlib
import base64
import datetime
import urllib
from bs4 import BeautifulSoup
In [ ]:
access_key = 'xxx' # https://console.aws.amazon.com/iam/home?rw_useCurrentProtocol=1#/security_credential
secret_key = 'xxx'

endpoint = 'http://webservices.amazon.co.jp/onca/xml?'
ts = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')

params = {'Service': 'AWSECommerceService',
    'AWSAccessKeyId': access_key,
    'AssociateTag': 'asterisk37n-22',
    'Operation': 'ItemSearch',
    'SearchIndex': 'Apparel', # SearchIndex parameter does not work. Result depends only on BrowseNode
    'BrowseNode': '2128134051',
    'ResponseGroup': 'Images,ItemAttributes,Offers,SalesRank,Reviews',
    'Version': '2013-08-01'}
parsed_url_params = urllib.parse.urlencode(sorted(params.items()))

params.update({'Timestamp': ts})
parsed_canonical_params = urllib.parse.urlencode(sorted(params.items()))

canonical_string = '''GET
webservices.amazon.co.jp
/onca/xml
''' + parsed_canonical_params

dig = hmac.new(secret_key.encode('utf-8'), msg=canonical_string.encode('utf-8'), digestmod=hashlib.sha256).digest()
code = base64.b64encode(dig).decode()      # py3k-mode
encoded_signature = urllib.parse.quote(code)

url = endpoint + parsed_url_params + '&Timestamp=' + urllib.parse.quote(ts) + '&Signature=' + encoded_signature
res = requests.get(url)
html_source = res.text
In [ ]:
bs = BeautifulSoup(html_source, 'xml')
items = bs.findAll('item') # lxml is case sensitive
for item in items:
    print(item.ASIN)

コメント

人気の投稿