Empty List For Hrefs To Achieve Pagination Through Javascript Onclick Functions
My intension is to achieve the pagination from javascript functions, so for example I am taking the URL as http://events.justdial.com/events/index.php?city=Hyderabad, from this URL
Solution 1:
If you tracked the requests, you'll find post requests to the following URL : http://events.justdial.com/events/search.php
Post Data :
city:Hyderabad cat:0 area:0 fromDate: toDate: subCat:0 pageNo:2fetch:events
and the response is in JSON format.
So, your code should be the following
import re
import json
classjustdialdotcomSpider(BaseSpider):
name = "justdialdotcom"
domain_name = "www.justdial.com"
start_urls = ["http://events.justdial.com/events/search.php"]
# Initial requestdefparse(self, response):
return [FormRequest(url="http://events.justdial.com/events/search.php",
formdata={'fetch': 'area',
'pageNo': '1',
'city' : 'Hyderabad',
'cat' : '0',
'area' : '0',
'fromDate': '',
'toDate' : '',
'subCat' : '0'
},
callback=self.area_count
)]
# Get total count and paginate through eventsdefarea_count(self, response):
total_count = 0for area in json.loads(response.body):
total_count += int(area["count"])
pages_count = (total_count / 10) + 1
page = 1while (page <= pages_count):
yield FormRequest(url="http://events.justdial.com/events/search.php",
formdata={'fetch': 'events',
'pageNo': str(page),
'city' : 'Hyderabad',
'cat' : '0',
'area' : '0',
'fromDate': '',
'toDate' : '',
'subCat' : '0'
},
callback=self.parse_events
)
page += 1# parse events defparse_events(self, response):
events = json.loads(response.body)
events.pop(0)
for event_details in events:
yield FormRequest(url="http://events.justdial.com/events/search.php",
formdata={'fetch': 'event',
'eventId': str(event_details["id"]),
},
callback=self.parse_event
)
defparse_event(self, response):
event_details = json.loads(response.body)
items = []
#item = Product()
items.append(item)
return items
Post a Comment for "Empty List For Hrefs To Achieve Pagination Through Javascript Onclick Functions"