# -*- coding: UTF-8 -*-
import math
import random
import sys
import threading
import time
from time import ctime, sleep
import requests
import xlrd
target_citycode_list = ['', '', '', '']
adcode_dic = {}
FEXCEL = '高德地图API_城市编码对照表.xlsx'
data = xlrd.open_workbook(FEXCEL)
table = data.sheets()[1]
nrows = table.nrows
ncols = table.ncols
for i in range(0, nrows):
l = table.row_values(i)
name_ = l[0]
adcode = l[1]
citycode = l[2]
if citycode in target_citycode_list:
if citycode not in adcode_dic:
adcode_dic[citycode] = {}
adcode_dic[citycode][adcode] = {}
adcode_dic[citycode][adcode]['name'] = name_
adcode_dic[citycode][adcode]['adcode'] = adcode
REQUEST_LIST = []
for i in adcode_dic:
for ii in adcode_dic[i]:
REQUEST_LIST.append(adcode_dic[i][ii]['adcode'])
REQUEST_LEN = len(REQUEST_LIST)
EACH_THREAD_REQUEST_NUM = 1
MAX_PAGINATION = 100
QPS = 50
QPS_TIME_UNIT = 1
INFOCODE_OK = ''
KEY_POOL_LIST = []
touse_key = ''
def dynamic_write_pool_file():
global KEY_POOL_LIST
file_name_key_pool = 'key_pool.pool'
keypoollist_old = KEY_POOL_LIST
KEY_POOL_LIST = []
f = open(file_name_key_pool, 'r', encoding='utf-8')
KEY_POOL_LIST = []
for i in f:
try:
key = i.split('\t')[1].split()
KEY_POOL_LIST.append(key[0])
except Exception:
print(Exception)
f.closed
d1 = keypoollist_old.reverse()
d2 = KEY_POOL_LIST.reverse()
print(63, d1)
print(64, d2)
if d1 == d2:
print(time.time(), '-old')
else:
print(time.time(), '66POOL-new')
# if (d1>d2)-(d1<d2) == 0:
# print('64POOL-new')
# else:
# print('66POOL-old')
# # if cmp(KEY_POOL_LIST_old.reverse(),KEY_POOL_LIST.reverse())==0:
# print('64POOL-new')
# else:
# print('66POOL-new')
# if KEY_POOL_LIST == KEY_POOL_LIST_old:
# pass
# else:
# print('POOL-new')
dynamic_write_pool_file()
URL_TYPE = 'http://restapi.amap.com/v3/place/text'
touse_key = ''
OFFSET_NUM = 24
OFFSET = '&offset=%s' % (OFFSET_NUM)
CITYLIMIT = '&citylimit=true'
EXTENTION = '&extention=all'
POI_TYPES_LIST = ['', '', '', '', '', '', '', '', '', '',
'']
URL_FOR_CHANGE_KEY = 'http://restapi.amap.com/v3/place/text?key=%s&types=060100&city=010&OFFSET=1'
change_key_qps = 0
def change_key():
global touse_key, change_key_qps, KEY_POOL_LIST
dynamic\_write\_pool\_file()
# 高德没有遵守自己的QPS/日限策略;所不能通过其返回码,来控制key的使用;
pool\_num = len(KEY\_POOL\_LIST)
mean\_use\_key = random.randint(0, pool\_num)
for i in range(mean\_use\_key, pool\_num, 1):
key = KEY\_POOL\_LIST\[i\]
if key == touse\_key:
if i == pool\_num:
change\_key()
return
else:
continue
touse\_key = key
url = URL\_FOR\_CHANGE\_KEY % (touse\_key)
try:
change\_key\_qps += 1
if change\_key\_qps % QPS == 0:
sleep(QPS\_TIME\_UNIT)
r = requests.get(url)
json\_ = r.json()
except Exception:
print('requests.get(url)', Exception)
change\_key()
return
infocode = json\_\['infocode'\]
if not infocode == INFOCODE\_OK:
if i == pool\_num:
sys.exit('NOInvalidKEY')
change\_key()
return
return
requests_counter = 0
todo_list = REQUEST_LIST
tosupply_dic = {}
def supply_dic(request):
global tosupply_dic, requests_counter, todo_list, touse_key, POI_TYPES_LIST, OFFSET_NUM
if requests_counter == 0:
change_key()
for type in POI_TYPES_LIST:
url = '%s?key=%s&city=%s&type=%s%s%s' % (URL_TYPE, touse_key, request, type, OFFSET, CITYLIMIT)
if requests_counter % QPS == 0:
sleep(QPS_TIME_UNIT)
try:
requests_counter += 1
r = requests.get(url)
r_json = r.json()
except Exception:
# 冗余
if request not in todo_list:
todo_list.append(request)
infocode = r_json['infocode']
if infocode == '':
count = r_json['count']
page_count = math.ceil(int(count) / OFFSET_NUM)
if page_count > 0:
for page in range(1, page_count, 1):
url_ = '%s&page=%s' % (url, page)
print(url_)
try:
requests_counter += 1
r_ = requests.get(url_)
r_json_ = r_.json()
except Exception:
# 冗余
if request not in todo_list:
todo_list.append(request)
infocode_ = r_json_['infocode']
if infocode_ == '':
pois_list = r_json['pois']
if request not in tosupply_dic:
tosupply_dic[request] = []
tosupply_dic[request].append(pois_list)
if request in todo_list:
list_index = todo_list.index(request)
del todo_list[list_index]
else:
if request not in todo_list:
todo_list.append(request)
change_key()
else:
if request not in todo_list:
todo_list.append(request)
change_key()
MAX_EXCEPTION_URL_NUM = 0
def deal_exception_list():
global todo_list
print(todo_list)
len_ = len(todo_list)
if len_ > MAX_EXCEPTION_URL_NUM:
for nloop in range(0, len_, 1):
adcode = REQUEST_LIST[nloop]
supply_dic(adcode)
else:
return
deal_exception_list()
class MyThread(threading.Thread):
def __init__(self, func, args, name=''):
threading.Thread.__init__(self)
self.name = name
self.func = func
self.args = args
def run(self):
self.func(self.args)
def main():
print('starting at:', ctime())
threads_list = []
thread_sum = math.ceil(REQUEST_LEN / EACH_THREAD_REQUEST_NUM)
for nloop in range(0, thread_sum, 1):
adcode = REQUEST_LIST[nloop]
print(184, adcode)
thread_instance = MyThread(supply_dic, (adcode), supply_dic.__name__)
threads_list.append(thread_instance)
# 主进程将在所有非守护进程退出后,退出
for t in threads_list:
t.setDaemon = False
t.start()
# wait for all thrades to finish
for t in threads_list:
t.join()
deal_exception_list()
FGEN = 'GEN\_GD\_business\_building.csv'
fo = open(FGEN, 'w', encoding='utf-8-sig')
fo.write(
'id,name,type,typecode,biz\_type,address,location,tel,distance,biz\_ext,pname,cityname,adname,shopid,shopinfo,poiweight\\n')
fo.closed
fo = open(FGEN, 'a', encoding='utf-8-sig')
for request in tosupply\_dic:
l = tosupply\_dic\[request\]
for ll in l:
for dic\_ in ll:
str = '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\\n' % (
dic\_\['id'\], dic\_\['name'\], dic\_\['type'\], dic\_\['typecode'\], dic\_\['biz\_type'\],
dic\_\['address'\], dic\_\['location'\].replace(',', ' '), dic\_\['tel'\], dic\_\['distance'\], dic\_\['biz\_ext'\],
dic\_\['pname'\],
dic\_\['cityname'\],
dic\_\['adname'\],
dic\_\['shopid'\],
dic\_\['shopinfo'\],
dic\_\['poiweight'\])
fo.write(str)
fo.closed
if __name__ == '__main__':
main()
手机扫一扫
移动阅读更方便
你可能感兴趣的文章