request爬取energyplus天气数据并整理二维字典

爬虫模块

思路以lxml模块为主进行网页解析。用zipfile和tempfile对下载到的zip文件进行解压。
运行代码会在当前文件夹下创建data,并在里面下载所有的天气数据。

值得注意的是,网站存在访问限制,由于有代理服务器,所以没有加sleep,也没有测试网站反爬虫的上限,如果没有代理谨慎使用,或者保守添加停等策略。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import requests
import zipfile
import tempfile
from lxml import etree

def get_data(url):
#url = "https://energyplus.net/weather-download/asia_wmo_region_2/CHN//CHN_Anhui.Huoshan.583140_CSWD/all"
response = requests.get(url)
return url, response.content
def unzip(filename,data):
_tmp_file = tempfile.TemporaryFile() # 创建临时文件
#print(_tmp_file)

_tmp_file.write(data) # byte字节数据写入临时文件
# _tmp_file.seek(0)

zf = zipfile.ZipFile(_tmp_file, mode='r')
for names in zf.namelist():
f = zf.extract(names, './data/'+filename) # 解压到data目录文件下
print(f)
zf.close()

if __name__ == '__main__':
url_main = 'https://energyplus.net/weather-region/asia_wmo_region_2/CHN'
response = requests.get(url_main)
#print(response.content)
html = etree.HTML(response.content)
name_city = html.xpath('/html/body/div[2]/div/section/div/section/div/a/@href ')
print(len(name_city))

for i in range(len(name_city)):
#print(name_i)
s = name_city[i+315].split('/')
# print("https://energyplus.net/weather-download/asia_wmo_region_2/CHN//"+s[-1]+"/all")
url = "https://energyplus.net/weather-download/asia_wmo_region_2/CHN//"+s[-1]+"/all"
if "CSWD" not in url:
continue
print(i+315)
print(url)
url, data = get_data(url)
unzip(s[-1],data)
#exit(0)
# url = "https://energyplus.net/weather-download/asia_wmo_region_2/CHN//CHN_Anhui.Huoshan.583140_CSWD/all"
# url, data = get_data(url) # data为byte字节

更换三轮IP,爬完全部数据。如需其余国家的数据,修改url一行的路由即可。结果如图

数据整理模块

此处必须使用二维索引字典,因为第一索引是省份,第二索引是city名。二维索引字典增添键值对需要进行判断。具体详见代码。。。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import os

filePath = "./"
for _,d,_ in os.walk(filePath):
break
# 很奇怪为什么是一个迭代对象?难道是迭代打开子目录?
def addtwodimdict(thedict, key_a, key_b, val):
if key_a in thedict:
thedict[key_a].update({key_b: val})
else:
thedict.update({key_a:{key_b: val}})

main_dic = dict(dict())
for name in d:
#print(name.split("."))
province,city = name.split(".")[0][4:],name.split(".")[-2]
#print(province,city)
# if province in main_dic.keys():
#main_dic[province][city] = name
addtwodimdict(main_dic,province,city,name)

#print(main_dic)
# import json
# print(json.dumps(main_dic,indent = 4))

结果如图

qq_add_answer

设置的qq加好友问题,count Prime number less than 867718012 。
答案如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#include<bits/stdc++.h>

using namespace std;

typedef long long LL;
const int N = 5e6 + 2;
bool np[N];
int prime[N], pi[N];

int getprime() {
int cnt = 0;
np[0] = np[1] = true;
pi[0] = pi[1] = 0;
for(int i = 2; i < N; ++i) {
if(!np[i]) prime[++cnt] = i;
pi[i] = cnt;
for(int j = 1; j <= cnt && i * prime[j] < N; ++j) {
np[i * prime[j]] = true;
if(i % prime[j] == 0) break;
}
}
return cnt;
}
const int M = 7;
const int PM = 2 * 3 * 5 * 7 * 11 * 13 * 17;
int phi[PM + 1][M + 1], sz[M + 1];
void init() {
getprime();
sz[0] = 1;
for(int i = 0; i <= PM; ++i) phi[i][0] = i;
for(int i = 1; i <= M; ++i) {
sz[i] = prime[i] * sz[i - 1];
for(int j = 1; j <= PM; ++j) {
phi[j][i] = phi[j][i - 1] - phi[j / prime[i]][i - 1];
}
}
}
int sqrt2(LL x) {
LL r = (LL)sqrt(x - 0.1);
while(r * r <= x) ++r;
return int(r - 1);
}
int sqrt3(LL x) {
LL r = (LL)cbrt(x - 0.1);
while(r * r * r <= x) ++r;
return int(r - 1);
}
LL getphi(LL x, int s) {
if(s == 0) return x;
if(s <= M) return phi[x % sz[s]][s] + (x / sz[s]) * phi[sz[s]][s];
if(x <= prime[s]*prime[s]) return pi[x] - s + 1;
if(x <= prime[s]*prime[s]*prime[s] && x < N) {
int s2x = pi[sqrt2(x)];
LL ans = pi[x] - (s2x + s - 2) * (s2x - s + 1) / 2;
for(int i = s + 1; i <= s2x; ++i) {
ans += pi[x / prime[i]];
}
return ans;
}
return getphi(x, s - 1) - getphi(x / prime[s], s - 1);
}
LL getpi(LL x) {
if(x < N) return pi[x];
LL ans = getphi(x, pi[sqrt3(x)]) + pi[sqrt3(x)] - 1;
for(int i = pi[sqrt3(x)] + 1, ed = pi[sqrt2(x)]; i <= ed; ++i) {
ans -= getpi(x / prime[i]) - i + 1;
}
return ans;
}
LL lehmer_pi(LL x) {
if(x < N) return pi[x];
int a = (int)lehmer_pi(sqrt2(sqrt2(x)));
int b = (int)lehmer_pi(sqrt2(x));
int c = (int)lehmer_pi(sqrt3(x));
LL sum = getphi(x, a) + LL(b + a - 2) * (b - a + 1) / 2;
for (int i = a + 1; i <= b; i++) {
LL w = x / prime[i];
sum -= lehmer_pi(w);
if (i > c) continue;
LL lim = lehmer_pi(sqrt2(w));
for (int j = i; j <= lim; j++) {
sum -= lehmer_pi(w / prime[j]) - (j - 1);
}
}
return sum;
}

int main() {
init();
LL n;
while(cin >> n) {
cout << lehmer_pi(n) << endl;
}
return 0;
}

代码copy自Meisell-Lehmer算法模板。
速度极快

使用docker以及dockernetwork搭建etcd集群

安装docker即可,使用如下脚本,network name以及node ip可以自定义。结果如后图。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75

#!/bin/bash

#设置网络名
network_name=new_etcd_network

#创建网络
docker network create --driver bridge --subnet=10.3.36.0/16 --gateway=10.3.1.1 ${network_name}

#设置结点名
node1=etcd_node1
node1_ip=10.3.36.1

node2=etcd_node2
node2_ip=10.3.36.2

node3=etcd_node3
node3_ip=10.3.36.3

#设置集群口令
cluster_token=etcd_cluster


#创建节点1
docker run -d --name ${node1} \
--network ${network_name} \
--publish 12379:2379 \
--publish 12380:2380 \
--ip ${node1_ip} \
--env ALLOW_NONE_AUTHENTICATION=yes \
--env ETCD_NAME=${node1} \
--env ETCD_ADVERTISE_CLIENT_URLS=http://${node1_ip}:2379 \
--env ETCD_INITIAL_ADVERTISE_PEER_URLS=http://${node1_ip}:2380 \
--env ETCD_LISTEN_CLIENT_URLS=http://0.0.0.0:2379 \
--env ETCD_LISTEN_PEER_URLS=http://0.0.0.0:2380 \
--env ETCD_INITIAL_CLUSTER_TOKEN=${cluster_token} \
--env ETCD_INITIAL_CLUSTER=${node1}=http://${node1_ip}:2380,${node2}=http://${node2_ip}:2380,${node3}=http://${node3_ip}:2380 \
--env ETCD_INITIAL_CLUSTER_STATE=new \
bitnami/etcd:latest

#创建节点2
docker run -d --name ${node2} \
--network ${network_name} \
--publish 22379:2379 \
--publish 22380:2380 \
--ip ${node2_ip} \
--env ALLOW_NONE_AUTHENTICATION=yes \
--env ETCD_NAME=${node2} \
--env ETCD_ADVERTISE_CLIENT_URLS=http://${node2_ip}:2379 \
--env ETCD_INITIAL_ADVERTISE_PEER_URLS=http://${node2_ip}:2380 \
--env ETCD_LISTEN_CLIENT_URLS=http://0.0.0.0:2379 \
--env ETCD_LISTEN_PEER_URLS=http://0.0.0.0:2380 \
--env ETCD_INITIAL_CLUSTER_TOKEN=${cluster_token} \
--env ETCD_INITIAL_CLUSTER=${node1}=http://${node1_ip}:2380,${node2}=http://${node2_ip}:2380,${node3}=http://${node3_ip}:2380 \
--env ETCD_INITIAL_CLUSTER_STATE=new \
bitnami/etcd:latest

#创建节点3
docker run -d --name ${node3} \
--network ${network_name} \
--publish 32379:2379 \
--publish 32380:2380 \
--ip ${node3_ip} \
--env ALLOW_NONE_AUTHENTICATION=yes \
--env ETCD_NAME=${node3} \
--env ETCD_ADVERTISE_CLIENT_URLS=http://${node3_ip}:2379 \
--env ETCD_INITIAL_ADVERTISE_PEER_URLS=http://${node3_ip}:2380 \
--env ETCD_LISTEN_CLIENT_URLS=http://0.0.0.0:2379 \
--env ETCD_LISTEN_PEER_URLS=http://0.0.0.0:2380 \
--env ETCD_INITIAL_CLUSTER_TOKEN=${cluster_token} \
--env ETCD_INITIAL_CLUSTER=${node1}=http://${node1_ip}:2380,${node2}=http://${node2_ip}:2380,${node3}=http://${node3_ip}:2380 \
--env ETCD_INITIAL_CLUSTER_STATE=new \
bitnami/etcd:latest


  • Copyrights © 2015-2024 galaxy
  • Visitors: | Views:

请我喝杯咖啡吧~

支付宝
微信