1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 COPY import requestsfrom lxml import etreeimport reimport pandas as pd import timedef get_information (num,bbb ): url = "http://pts.xjtu.edu.cn:8080/Report/ViewRenWu.aspx?ReportNo=" +str (num) querystring = {"ReportNo" :"2" } headers = { 'cache-control' : "no-cache" , 'postman-token' : "0cfe8f51-b9eb-58b7-113a-1215444bcfad" } response = requests.request("GET" , url, headers=headers) html = etree.HTML(response.text) tittle = html.xpath('/html/body/form/table/tr[3]/td/text()' ) name = html.xpath('/html/body/form/table/tr[4]/td/text()' ) if len (name) == 1 : print ('---' ) bbb = bbb-1 if (bbb<=0 ): return 0 ,0 ,0 ,0 return 0 ,1 ,0 ,bbb bbb = 10 number = re.findall(r'\d+' ,name[1 ]) user_name = name[1 ].split('\xa0' )[0 ] print (user_name,number[0 ],tittle[1 ]) time.sleep(1 ) return user_name,number[0 ],tittle[1 ],bbb if __name__ == '__main__' : bbb = 10 ans = [] i=1925 while 1 : i=i+1 user_name,number,tittle,bbb = get_information(i,bbb) if number == 0 : break elif number == 1 : continue ans.append([user_name,number,tittle.replace('\u2022' ,'' )]) if i%10 == 0 : save = pd.DataFrame(ans, columns = ['name' , 'number' , 'study' ]) save.to_csv('hack3.csv' ,encoding = 'utf_8_sig' ) print (ans)