手动修改key 伪修改内存变量
阅读原文时间:2023年07月11日阅读:2

# -*- coding: UTF-8 -*-
import math
import random
import sys
import threading
import time
from time import ctime, sleep
import requests
import xlrd

target_citycode_list = ['', '', '', '']

target_citycode_list = ['0755']

adcode_dic = {}
FEXCEL = '高德地图API_城市编码对照表.xlsx'
data = xlrd.open_workbook(FEXCEL)
table = data.sheets()[1]
nrows = table.nrows
ncols = table.ncols
for i in range(0, nrows):
l = table.row_values(i)
name_ = l[0]
adcode = l[1]
citycode = l[2]
if citycode in target_citycode_list:
if citycode not in adcode_dic:
adcode_dic[citycode] = {}
adcode_dic[citycode][adcode] = {}
adcode_dic[citycode][adcode]['name'] = name_
adcode_dic[citycode][adcode]['adcode'] = adcode

REQUEST_LIST = []
for i in adcode_dic:
for ii in adcode_dic[i]:
REQUEST_LIST.append(adcode_dic[i][ii]['adcode'])
REQUEST_LEN = len(REQUEST_LIST)
EACH_THREAD_REQUEST_NUM = 1

MAX_PAGINATION = 100
QPS = 50
QPS_TIME_UNIT = 1

http://lbs.amap.com/api/webservice/guide/tools/info

INFOCODE_OK = ''
KEY_POOL_LIST = []
touse_key = ''

def dynamic_write_pool_file():
global KEY_POOL_LIST
file_name_key_pool = 'key_pool.pool'
keypoollist_old = KEY_POOL_LIST
KEY_POOL_LIST = []
f = open(file_name_key_pool, 'r', encoding='utf-8')
KEY_POOL_LIST = []
for i in f:
try:
key = i.split('\t')[1].split()
KEY_POOL_LIST.append(key[0])
except Exception:
print(Exception)
f.closed
d1 = keypoollist_old.reverse()
d2 = KEY_POOL_LIST.reverse()
print(63, d1)
print(64, d2)
if d1 == d2:
print(time.time(), '-old')
else:
print(time.time(), '66POOL-new')
# if (d1>d2)-(d1<d2) == 0:
# print('64POOL-new')
# else:
# print('66POOL-old')
# # if cmp(KEY_POOL_LIST_old.reverse(),KEY_POOL_LIST.reverse())==0:
# print('64POOL-new')
# else:
# print('66POOL-new')
# if KEY_POOL_LIST == KEY_POOL_LIST_old:
# pass
# else:
# print('POOL-new')

dynamic_write_pool_file()

URL_TYPE = 'http://restapi.amap.com/v3/place/text'
touse_key = ''

keywords = '&keywords='

OFFSET_NUM = 24
OFFSET = '&offset=%s' % (OFFSET_NUM)
CITYLIMIT = '&citylimit=true'
EXTENTION = '&extention=all'

120000 商务住宅 商务住宅相关 商务住宅相关

120100 商务住宅 产业园区 产业园区

120200 商务住宅 楼宇 楼宇相关

120201 商务住宅 楼宇 商务写字楼

120202 商务住宅 楼宇 工业大厦建筑物

120203 商务住宅 楼宇 商住两用楼宇

120300 商务住宅 住宅区 住宅区

120301 商务住宅 住宅区 别墅

120302 商务住宅 住宅区 住宅小区

120303 商务住宅 住宅区 宿舍

120304 商务住宅 住宅区 社区中心

由于高德至多返回1000条,所以type值以最小粒度请求,逐个请求

POI_TYPES_LIST = ['', '', '', '', '', '', '', '', '', '',
'']

POI_TYPES = '&types=120000|120100|120200|120201|120202120203|120300|120301|120302|120303|120304'

URL_FOR_CHANGE_KEY = 'http://restapi.amap.com/v3/place/text?key=%s&types=060100&city=010&OFFSET=1'
change_key_qps = 0

def change_key():
global touse_key, change_key_qps, KEY_POOL_LIST

dynamic\_write\_pool\_file()  
# 高德没有遵守自己的QPS/日限策略;所不能通过其返回码,来控制key的使用;  
pool\_num = len(KEY\_POOL\_LIST)  
mean\_use\_key = random.randint(0, pool\_num)  
for i in range(mean\_use\_key, pool\_num, 1):  
    key = KEY\_POOL\_LIST\[i\]  
    if key == touse\_key:  
        if i == pool\_num:  
            change\_key()  
            return  
        else:  
            continue  
    touse\_key = key  
    url = URL\_FOR\_CHANGE\_KEY % (touse\_key)  
    try:  
        change\_key\_qps += 1  
        if change\_key\_qps % QPS == 0:  
            sleep(QPS\_TIME\_UNIT)  
        r = requests.get(url)  
        json\_ = r.json()  
    except Exception:  
        print('requests.get(url)', Exception)  
        change\_key()  
        return  
    infocode = json\_\['infocode'\]  
    if not infocode == INFOCODE\_OK:  
        if i == pool\_num:  
            sys.exit('NOInvalidKEY')  
        change\_key()  
        return  
    return

requests_counter = 0
todo_list = REQUEST_LIST

{adcode:[[],[]]}

tosupply_dic = {}

def supply_dic(request):
global tosupply_dic, requests_counter, todo_list, touse_key, POI_TYPES_LIST, OFFSET_NUM
if requests_counter == 0:
change_key()
for type in POI_TYPES_LIST:
url = '%s?key=%s&city=%s&type=%s%s%s' % (URL_TYPE, touse_key, request, type, OFFSET, CITYLIMIT)
if requests_counter % QPS == 0:
sleep(QPS_TIME_UNIT)
try:
requests_counter += 1
r = requests.get(url)
r_json = r.json()
except Exception:
# 冗余
if request not in todo_list:
todo_list.append(request)
infocode = r_json['infocode']
if infocode == '':
count = r_json['count']
page_count = math.ceil(int(count) / OFFSET_NUM)
if page_count > 0:
for page in range(1, page_count, 1):
url_ = '%s&page=%s' % (url, page)
print(url_)
try:
requests_counter += 1
r_ = requests.get(url_)
r_json_ = r_.json()
except Exception:
# 冗余
if request not in todo_list:
todo_list.append(request)
infocode_ = r_json_['infocode']
if infocode_ == '':
pois_list = r_json['pois']
if request not in tosupply_dic:
tosupply_dic[request] = []
tosupply_dic[request].append(pois_list)
if request in todo_list:
list_index = todo_list.index(request)
del todo_list[list_index]
else:
if request not in todo_list:
todo_list.append(request)
change_key()
else:
if request not in todo_list:
todo_list.append(request)
change_key()

MAX_EXCEPTION_URL_NUM = 0

def deal_exception_list():
global todo_list
print(todo_list)
len_ = len(todo_list)
if len_ > MAX_EXCEPTION_URL_NUM:
for nloop in range(0, len_, 1):
adcode = REQUEST_LIST[nloop]
supply_dic(adcode)
else:
return
deal_exception_list()

class MyThread(threading.Thread):
def __init__(self, func, args, name=''):
threading.Thread.__init__(self)
self.name = name
self.func = func
self.args = args

def run(self):  
    self.func(self.args)

def main():
print('starting at:', ctime())
threads_list = []
thread_sum = math.ceil(REQUEST_LEN / EACH_THREAD_REQUEST_NUM)
for nloop in range(0, thread_sum, 1):
adcode = REQUEST_LIST[nloop]
print(184, adcode)
thread_instance = MyThread(supply_dic, (adcode), supply_dic.__name__)
threads_list.append(thread_instance)
# 主进程将在所有非守护进程退出后,退出
for t in threads_list:
t.setDaemon = False
t.start()
# wait for all thrades to finish
for t in threads_list:
t.join()
deal_exception_list()

FGEN = 'GEN\_GD\_business\_building.csv'  
fo = open(FGEN, 'w', encoding='utf-8-sig')  
fo.write(  
    'id,name,type,typecode,biz\_type,address,location,tel,distance,biz\_ext,pname,cityname,adname,shopid,shopinfo,poiweight\\n')  
fo.closed  
fo = open(FGEN, 'a', encoding='utf-8-sig')  
for request in tosupply\_dic:  
    l = tosupply\_dic\[request\]  
    for ll in l:  
        for dic\_ in ll:  
            str = '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\\n' % (  
                dic\_\['id'\], dic\_\['name'\], dic\_\['type'\], dic\_\['typecode'\], dic\_\['biz\_type'\],  
                dic\_\['address'\], dic\_\['location'\].replace(',', ' '), dic\_\['tel'\], dic\_\['distance'\], dic\_\['biz\_ext'\],  
                dic\_\['pname'\],  
                dic\_\['cityname'\],  
                dic\_\['adname'\],  
                dic\_\['shopid'\],  
                dic\_\['shopinfo'\],  
                dic\_\['poiweight'\])  
            fo.write(str)  
fo.closed

if __name__ == '__main__':
main()