# -*- coding: UTF-8 -*- ''' Created on 20150206 @author: Hansen ''' import urllib2 import sys import io import json #Fetch HTML from URL def fecth_html(index,url,keepHtml,resultFile): req = urllib2.Request(url) req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0') rsp = urllib2.urlopen(req) content = rsp.read() #receive_header = rsp.info() #print(sys.getfilesystemencoding()) #content = content.decode('utf-8','replace') if keepHtml: fileinfo = open(str(index)+'.html','w') fileinfo.write(content) print("save file "+ str(index)+'.html: ok') parse_json(content,resultFile) #Parse HTML def parse_json(content,resultFile): jsonData = json.loads(content) shops = jsonData['shopBeans'] print(len(shops)) for shop in shops: szTitle = shop['filterFullName'] szTitle = szTitle.replace("\r\n", "-").replace(" ",""); szStar = shop['shopPowerTitle'] szMeanPrice = str(shop['avgPrice']) szMeanPrice = szMeanPrice.replace("\n", "").replace(" ",""); szAddressA = shop['mainRegionName'] szAddressB = shop['address'] szAddress = (szAddressA+"-"+szAddressB).replace("\r\n", "-").replace(" ",""); szTaste = shop['refinedScore1'] szEvn = shop['refinedScore2'] szService = shop['refinedScore3'] fileinfo = io.open(resultFile,'a',encoding='utf_16') fileinfo.write(szTitle+","+szStar+","+szMeanPrice+","+szAddress+"," +szTaste+","+szEvn+","+szService+"\n")