Äú¿ÉÒÔ¾èÖú£¬Ö§³ÖÎÒÃǵĹ«ÒæÊÂÒµ¡£

1Ôª 10Ôª 50Ôª





ÈÏÖ¤Â룺  ÑéÖ¤Âë,¿´²»Çå³þ?Çëµã»÷Ë¢ÐÂÑéÖ¤Âë ±ØÌî



  ÇóÖª ÎÄÕ ÎÄ¿â Lib ÊÓÆµ iPerson ¿Î³Ì ÈÏÖ¤ ×Éѯ ¹¤¾ß ½²×ù Model Center   Code  
»áÔ±   
   
 
     
   
 ¶©ÔÄ
  ¾èÖú
PythonÅÀ³æÓëÊý¾Ý¿ÉÊÓ»¯
 
×÷ÕߣºÔƵ­·çÇáing
  9622  次浏览      29
 2020-2-14
 
±à¼­ÍƼö:
±¾ÎÄÖ÷Òª½éÉÜÁËһЩÊý¾ÝÇåÀíµÄ˼·ÒÔ¼°ÈçºÎÅÀÈ¡Êý¾Ý£¬²¢´¦ÀíÊý¾Ýʹ֮¿ÉÊÓ»¯£¬Ï£Íû¶Ô´ó¼ÒÓаïÖú¡£
±¾ÎÄÀ´×ÔÓÚcsdn £¬ÓÉ»ðÁú¹ûÈí¼þAlice±à¼­£¬ÍƼö¡£

1.Êý¾ÝÍÚ¾ò

´úÂëËùÐè°ü

import urllib.request
import xlwt
import re
import urllib.parse
import time

½øÈëǰ³ÌÎÞÓǹÙÍø

ÎÒÕâÀïÒÔËÑË÷´óÊý¾ÝְλÐÅÏ¢

´ò¿ª¿ª·¢Õßģʽ

Request Headers ÀïÃæÊÇÎÒÃÇÓÃä¯ÀÀÆ÷·ÃÎÊÍøÕ¾µÄÐÅÏ¢£¬ÓÐÁËÐÅÏ¢ºó¾ÍÄÜÄ£Äâä¯ÀÀÆ÷·ÃÎÊ

ÕâÒ²ÊÇΪÁË·ÀÖ¹ÍøÕ¾·â½ûIP£¬²»¹ýǰ³ÌÎÞÓÇÒ»°ãÊDz»»á·âIPµÄ¡£

Ä£Äâä¯ÀÀÆ÷

header={
'Host':'search.51job.com',
'Upgrade-Insecure-Requests':'1',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}

ÕâЩ»ù±¾Êý¾Ý¶¼¿ÉÒÔÅÀÈ¡£º

ΪÁËʵÏÖ½»»¥ÐÍÅÀÈ¡£¬ÎÒдÁËÒ»¸öÄܹ»ÊµÏÖÊäÈëÏëÁ˽âµÄְλ¾ÍÄÜÅÀÈ¡Ïà¹ØÄÚÈݵĺ¯Êý

def getfront(page,item): #pageÊÇÒ³Êý£¬
itemÊÇÊäÈëµÄ×Ö·û´®£¬¼ûºóÎÄ result = urllib.parse.quote(item) #ÏȰÑ×Ö·û´®
ת³ÉÊ®Áù½øÖƱàÂë
ur1 = result+',2,'+ str(page)+'.html'
ur2 = 'https://search.51job.com/list
/000000,000000,0000,00,9,99,'
res = ur2+ur1 #Æ´½ÓÍøÖ·
a = urllib.request.urlopen(res)
html = a.read().decode('gbk')
# ¶ÁȡԴ´úÂ벢תΪunicode
return html

def getInformation(html):
reg = re.compile(r'class="t1 ">.*? <a target="_blank"
title="(.*?)" href="(.*?)".*? <span class="t2"><
a target="_blank" title="(.*?)" href="(.*?)".*?
<span class="t3">(.*?)</span>.*?<span class="t4">(
.*?)</span>.*?<span class="t5">(.*?)</span>.*?',re.S)
#Æ¥Åä»»Ðзû
items=re.findall(reg,html)
return items

ÕâÀïÎÒ³ýÁËÅÀȡͼÉÏÐÅÏ¢Í⣬»¹°Ñְ볬Á´½ÓºóµÄÍøÖ·£¬ÒÔ¼°¹«Ë¾³¬Á´½ÓµÄÍøÖ·ÅÀÈ¡ÏÂÀ´ÁË¡£

ÕâÀïÏȲ»½²£¬ºóÃæºóÃæ»á˵µ½£¬

½ÓÏÂÀ´¾ÍÐèÒª´¢´æÐÅÏ¢£¬ÕâÀïʹÓÃExcel£¬ËäÈ»±È½ÏÂé·³£¬²»¹ýʤÔÚÇåÎúÖ±¹Û

#н¨±í¸ñ¿Õ¼ä
excel1 = xlwt.Workbook()
# ÉèÖõ¥Ôª¸ñ¸ñʽ
sheet1 = excel1.add_sheet('Job', cell_overwrite_ok=True)
sheet1.write(0, 0, 'ÐòºÅ')
sheet1.write(0, 1, 'ְλ')
sheet1.write(0, 2, '¹«Ë¾Ãû³Æ')
sheet1.write(0, 3, '¹«Ë¾µØµã')
sheet1.write(0, 4, '¹«Ë¾ÐÔÖÊ')
sheet1.write(0, 5, 'н×Ê')
sheet1.write(0, 6, 'ѧÀúÒªÇó')
sheet1.write(0, 7, '¹¤×÷¾­Ñé')
sheet1.write(0, 8, '¹«Ë¾¹æÄ£')
sheet1.write(0, 9, '¹«Ë¾ÀàÐÍ')
sheet1.write(0, 10,'¹«Ë¾¸£Àû')
sheet1.write(0, 11,'·¢²¼Ê±¼ä')

ÅÀÈ¡´úÂëÈçÏ£¬ÕâÀï¾ÍÄÜÀûÓÃË«²ãÑ­»·À´ÊµÏÖ»»Ò³ÅÀÈ¡Óë»»ÐÐÊä³ö

ÎÒÕâÀïΪÁË»ñµÃ´óÁ¿Êý¾ÝËùÒÔÅÀÈ¡ÁË1000Ò³£¬µ÷ÊÔʱ¿ÉÒÔÖ»ÅÀÈ¡¼¸Ò³

number = 1
item = input()
for j in range(1,10000): #Ò³Êý×Ô¼ºËæ±ã¸Ä
try:
print("ÕýÔÚÅÀÈ¡µÚ"+str(j)+"Ò³Êý¾Ý...")
html = getfront(j,item) #µ÷ÓûñÈ¡ÍøÒ³Ô­Âë
for i in getInformation(html):
try:
url1 = i[1] #Ö°Î»ÍøÖ·
res1 = urllib.request.urlopen(url1).read().
decode('gbk') company = re.findall(re.compile
(r'<div class="com_tag">.
*?<p class="at" title="(.*?)"><span class="
i_flag">.*?
<p class="at" title="(.*?)">.*?<p class="at"
title="(.*?)
">.*?',re.S),res1)
job_need = re.findall(re.compile(r'<p class="msg
ltype".*?>
.*?&nbsp;&nbsp;<span>|</span>&nbsp;&nbsp;
(.*?)&nbsp;&nbsp;<span>|</span>&nbsp;&nbsp;
(.*?)&nbsp;&nbsp;
<span>|</span>&nbsp;&nbsp;.*?</p>',re.S),res1)
welfare = re.findall(re.compile(r'
<span class="sp4">
(.*?)
</span>',re.S),res1)
print(i[0],i[2],i[4],i[5],company[0][0],
job_need[2][0],
job_need[1][0],company[0][1],company[0][2],
welfare,i[6])
sheet1.write(number,0,number)
sheet1.write(number,1,i[0])
sheet1.write(number,2,i[2])
sheet1.write(number,3,i[4])
sheet1.write(number,4,company[0][0])
sheet1.write(number,5,i[5])
sheet1.write(number,6,job_need[1][0])
sheet1.write(number,7,job_need[2][0])
sheet1.write(number,8,company[0][1])
sheet1.write(number,9,company[0][2])
sheet1.write(number,10,(" ".join(str(i) for
i in welfare)))
sheet1.write(number,11,i[6])
number+=1
excel1.save("51job.xls")
time.sleep(0.3) #ÐÝÏ¢¼ä¸ô£¬±ÜÃâÅÀÈ¡º£Á¿Êý¾Ý
ʱ±»ÎóÅÐΪ¹¥»÷£¬IPÔâµ½·â½û
except:
pass
except:
pass

½á¹ûÈçÏ£º

2.Êý¾ÝÇåÏ´

Ê×ÏÈÒª´ò¿ªÎļþ

#coding:utf-8
import pandas as pd
import re
#³ý´ËÖ®Í⻹Ҫ°²×°xlrd°ü

data = pd.read_excel(r'51job.xls',sheet_name='Job')
result = pd.DataFrame(data)

Çåϴ˼·£º

1¡¢³öÏÖÓпÕÖµ£¨NAN£©µÃÐÅÏ¢£¬Ö±½Óɾ³ýÕûÐÐ

a = result.dropna(axis=0,how='any')
pd.set_option('display.max_rows',None) #Êä³öÈ«²¿ÐУ¬²»Ê¡ÂÔ

2¡¢Ö°Î»³ö´í£¨ºÜ¶àְλ¶¼ÊÇÓë´óÊý¾ÝÎ޹صÄÖ°Òµ£©

b = u'Êý¾Ý'
number = 1
li = a['ְλ']
for i in range(0,len(li)):
try:
if b in li[i]:
#print(number,li[i])
number+=1
else:
a = a.drop(i,axis=0)
except:
pass

3¡¢ÆäËûµØ·½³öÏÖµÄÐÅÏ¢´í룬±ÈÈçÔÚѧÀúÀï³öÏÖ ¡®ÕжàÉÙÈË¡¯

b2= u'ÈË'
li2 = a['ѧÀúÒªÇó']
for i in range(0,len(li2)):
try:
if b2 in li2[i]:
#print(number,li2[i])
number+=1
a = a.drop(i,axis=0)
except:
pass

4¡¢×ª»»Ð½×ʵ¥Î»

ÈçÉÏͼ¾Í³öÏÖµ¥Î»²»Ò»ÖµÄÇé¿ö

b3 =u'Íò/Äê'
b4 =u'ǧ/ÔÂ'
li3 = a['н×Ê']
#×¢ÊͲ¿·ÖµÄprint¶¼ÊÇΪÁ˵÷ÊÔÓõÄ
for i in range(0,len(li3)):
try:
if b3 in li3[i]:
x = re.findall(r'\d*\.?\d+',li3[i])
#print(x)
min_ = format(float(x[0])/12,'.2f') #ת»»³É¸¡µãÐͲ¢±£ÁôÁ½Î»Ð¡Êý
max_ = format(float(x[1])/12,'.2f')
li3[i][1] = min_+'-'+max_+u'Íò/ÔÂ'
if b4 in li3[i]:
x = re.findall(r'\d*\.?\d+',li3[i])
#print(x)
#input()
min_ = format(float(x[0])/10,'.2f')
max_ = format(float(x[1])/10,'.2f')
li3[i][1] = str(min_+'-'+max_+'Íò/ÔÂ')
print(i,li3[i])
except:
pass

±£´æµ½ÁíÒ»¸öExcelÎļþ

a.to_excel('51job2.xls', sheet_name='Job', index=False)

ÕâÀïÖ»ÊǼòµ¥µÄ½éÉÜÁËһЩÊý¾ÝÇåÀíµÄ˼·£¬²¢²»ÊÇ˵ֻҪÇåÀíÕâЩ¾ÍÐÐÁË

ÓÐʱºòÓеĹ«Ë¾ÍøÒ³²¢²»ÊÇǰ³ÌÎÞÓÇÀàÐ͵쬶øÊÇËûÃǹ«Ë¾×Ô¼º×öµÄÍøÒ³£¬ÕâÒ²ºÜÈÝÒ׳ö´í

²»¹ýÖ»ÒªÓÐÁË»ù±¾Ë¼Â·£¬ÕâЩ¶¼²»ÄÑÇåÀí

3.Êý¾Ý¿ÉÊÓ»¯

Êý¾Ý¿ÉÊÓ»¯¿ÉÒÔ˵ÊǺÜÖØÒªµÄ»·½Ú£¬Èç¹ûÖ»ÊÇÅÀÈ¡Êý¾Ý¶ø²»È¥¿ÉÊÓ»¯´¦Àí£¬ÄÇô¿ÉÒÔ˵Êý¾ÝµÄ¼ÛÖµ¸ù±¾Ã»Óз¢»Ó

¿ÉÊÓ»¯´¦ÀíÄÜʹÊý¾Ý¸ü¼ÓÖ±¹Û£¬¸üÓÐÀûÓÚ·ÖÎö

ÉõÖÁ¿ÉÒÔ˵¿ÉÊÓ»¯ÊÇÊý¾ÝÍÚ¾ò×îÖØÒªµÄÄÚÈÝ

ͬÑùµÄÎÒÃÇÏÈ¿´´úÂëÐèÒªµÄ°ü

# -*- coding: utf-8 -*-
import pandas as pd
import re
from pyecharts import Funnel,Pie,Geo
import matplotlib.pyplot as plt

ÕâÀïÌØ±ðÇ¿µ÷£¬pyecharts°üǧÍò±ðװаæµÄ£¬ÎÒÕâÀï×°µÄÊÇ0.5.9°æµÄ

Æä´ÎÈç¹ûÒª×öµØÀí×ø±êͼ£¬ÈÈÁ¦Í¼É¶µÄ£¬±ØÐë°²×°µØÍ¼°ü£¬±ÈÈçÊÀ½çµØÍ¼°ü£¬ÖйúµØÍ¼°ü£¬³ÇÊеØÍ¼°üɶµÄ

½ÓÏÂÀ´¾ÍÊÇÕýÏ·

Ò»ÑùµÄÏÈÒª´ò¿ªÎļþ

file = pd.read_excel(r'51job2.xls',sheet_name='Job')
f = pd.DataFrame(file)
pd.set_option('display.max_rows',None)

1¡¢´´½¨¶à¸öÁбíÀ´µ¥¶À´æ·Å¡¾¡®Ð½×Ê¡¯¡¿¡¾¡®¹¤×÷¾­Ñ顯¡¿¡¾¡®Ñ§ÀúÒªÇ󡯡¿¡¾¡®¹«Ë¾µØµã¡¯¡¿µÈÐÅÏ¢

add = f['¹«Ë¾µØµã']
sly = f['н×Ê']
edu = f['ѧÀúÒªÇó']
exp = f['¹¤×÷¾­Ñé']
address =[]
salary = []
education = []
experience = []
for i in range(0,len(f)):
try:
a = add[i].split('-')
address.append(a[0])
#print(address[i])
s = re.findall(r'\d*\.?\d+',sly[i])
s1= float(s[0])
s2 =float(s[1])
salary.append([s1,s2])
#print(salary[i])
education.append(edu[i])
#print(education[i])
experience.append(exp[i])
#print(experience[i])
except:
pass

2¡¢matploblib¿âÉú³É ¹¤×÷¾­Ñ顪н×Êͼ Óë ѧÀú¡ªÐ½×Êͼ

min_s=[] #¶¨Òå´æ·Å×îµÍн×ʵÄÁбí
max_s=[] #¶¨Òå´æ·Å×î¸ßн×ʵÄÁбí
for i in range(0,len(experience)):
min_s.append(salary[i][0])
max_s.append(salary[i][0])
my_df = pd.DataFrame({'experience':experience,
'min_salay' : min_s, 'max_salay' : max_s})
#¹ØÁª¹¤×÷¾­ÑéÓëн×Ê
data1 = my_df.groupby('experience').mean()['min_salay']
.plot(kind='line')
plt.show()
my_df2 = pd.DataFrame({'education':education,
'min_salay' : min_s, 'max_salay' : max_s})
#¹ØÁªÑ§ÀúÓëн×Ê
data2 = my_df2.groupby('education').mean()['min_salay'].
plot(kind='line')
plt.show()

3¡¢Ñ§ÀúÒªÇóÔ²»·Í¼

def get_edu(list):
education2 = {}
for i in set(list):
education2[i] = list.count(i)
return education2
dir1 = get_edu(education)
# print(dir1)
attr= dir1.keys()
value = dir1.values()
pie = Pie("ѧÀúÒªÇó")
pie.add("", attr, value, center=[50, 50], is_random=False, radius=[30, 75], rosetype='radius',
is_legend_show=False, is_label_show=True,legend_orient='vertical')
pie.render('ѧÀúÒªÇóõ¹åͼ.html')

4¡¢´óÊý¾Ý³ÇÊÐÐèÇóµØÀíλÖ÷ֲ¼Í¼

def get_address(list):
address2 = {}
for i in set(list):
address2[i] = list.count(i)
address2.pop('ÒìµØÕÐÆ¸')
# ÓÐЩµØÃû¿ÉÄܲ»ºÏ·¨»òÕßµØÍ¼°üÀïûÓпÉÒÔ×ÔÐÐɾ³ý£¬Ö®Ç°ÒÔÏÂÃû³Æ¶¼»á±¨´í£¬ÏÖÔÚºÃÏñ¸üÐÂÁË
#address2.pop('ɽ¶«')
#address2.pop('Å­½­')
#address2.pop('³ØÖÝ')
return address2
dir2 = get_address(address)
#print(dir2)
geo = Geo("´óÊý¾ÝÈ˲ÅÐèÇó·Ö²¼Í¼", title_color="#2E2E2E",
title_text_size=24,title_top=20,title_pos="center", width=1300,height=600)
attr2 = dir2.keys()
value2 = dir2.values()
geo.add("",attr2, value2, type="effectScatter", is_random=True, visual_range=[0, 1000], maptype='china',symbol_size=8, effect_scale=5, is_visualmap=True)
geo.render('´óÊý¾Ý³ÇÊÐÐèÇó·Ö²¼Í¼.html')

5¡¢¹¤×÷¾­ÑéÒªÇ󩶷ͼ

def get_experience(list):
experience2 = {}
for i in set(list):
experience2[i] = list.count(i)
return experience2
dir3 = get_experience(experience)
#print(dir3)
attr3= dir3.keys()
value3 = dir3.values()
funnel = Funnel("¹¤×÷¾­Ñé©¶·Í¼",title_pos='center')
funnel.add("", attr3, value3,is_label_show=True,
label_pos="inside", label_text_color="#fff",legend_orient='vertical',
legend_pos='left')
funnel.render('¹¤×÷¾­ÑéÒªÇ󩶷ͼ.html')

µ±È»£¬pyechartsÀïÃæµÄͼ»¹ÓкܶàÖÖ£¬¾Í¿¿´ó¼ÒÈ¥×Ô¼º·¢¾òÁË¡£

·´À¡

½Óµ½²¿·ÖÈË·´Ó¦µÄÂÒÂëÇé¿ö£¬Ö÷Òª¿ÉÄÜÊÇÒòÎªÍøÕ¾¹æÔò±ä¶¯¡£ÎÒÈ¥ÖØÐ¸üÐÂÁËһϴúÂ룬²¢ÇҸĽøÁËһЩµØ·½£¬Èç¹ûÓöµ½ÅÀÈ¡¹ý³ÌÖÐ;ͣϵÄÇé¿ö£¬¿ÉÄÜÊÇÍøÂçÎÊÌâ»òÕßÏÝÈë×èÈû£¬¿ÉÒÔÖØÐÂÔËÐÐÒ»´Î´úÂë

ËùÓдúÂëÈçÏ£º

# -*- coding:utf-8 -*-
import urllib.request
import xlwt
import re
import urllib.parse
import time
header={
'Host':'search.51job.com',
'Upgrade-Insecure-Requests':'1',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0;
Win64; x64) >AppleWebKit/537.36 (KHTML,
like Gecko)
Chrome/78.0.3904.108 Safari/537.36'
}
def getfront(page,item): #pageÊÇÒ³Êý£¬itemÊÇÊäÈë
µÄ×Ö·û´®
result = urllib.parse.quote(item) #ÏȰÑ×Ö·û´®
ת³ÉÊ®Áù½øÖƱàÂë
ur1 = result+',2,'+ str(page)+'.html'
ur2 = 'https://search.51job.com/list/000000,
000000,0000,00,9,99,'
res = ur2+ur1 #Æ´½ÓÍøÖ·
a = urllib.request.urlopen(res)
html = a.read().decode('gbk') # ¶ÁȡԴ´úÂë
²¢×ªÎªunicode
return html
def getInformation(html):
reg = re.compile(r'class="t1 ">.*?
<a target="_blank"
title="(.*?)" href="(.*?)".*? <span class="t2">
<a target="_blank"
title="(.*?)" href="(.*?)".*?<span class="t3">
(.*?)</span>.
*?<span class="t4">(.*?)</span>.*?<span class="t5">(.*?)</span>.*?'
,re.S)#Æ¥Åä»»Ðзû
items=re.findall(reg,html)
return items
#н¨±í¸ñ¿Õ¼ä
excel1 = xlwt.Workbook()
# ÉèÖõ¥Ôª¸ñ¸ñʽ
sheet1 = excel1.add_sheet('Job',
cell_overwrite_ok=True)
sheet1.write(0, 0, 'ÐòºÅ')
sheet1.write(0, 1, 'ְλ')
sheet1.write(0, 2, '¹«Ë¾Ãû³Æ')
sheet1.write(0, 3, '¹«Ë¾µØµã')
sheet1.write(0, 4, '¹«Ë¾ÐÔÖÊ')
sheet1.write(0, 5, 'н×Ê')
sheet1.write(0, 6, 'ѧÀúÒªÇó')
sheet1.write(0, 7, '¹¤×÷¾­Ñé')
sheet1.write(0, 8, '¹«Ë¾¹æÄ£')
sheet1.write(0, 9, '¹«Ë¾ÀàÐÍ')
sheet1.write(0, 10,'¹«Ë¾¸£Àû')
sheet1.write(0, 11,'·¢²¼Ê±¼ä')
number = 1
item = input()
for j in range(1,10000): #Ò³Êý×Ô¼ºËæ±ã¸Ä
try:
print("ÕýÔÚÅÀÈ¡µÚ"+str(j)+"Ò³Êý¾Ý...")
html = getfront(j,item) #µ÷ÓûñÈ¡ÍøÒ³Ô­Âë
for i in getInformation(html):
try:
url1 = i[1] #Ö°Î»ÍøÖ·
res1 = urllib.request.urlopen(url1).read().
decode('gbk')
company = re.findall(re.compile(r'<div class=
"com_tag">
.*?<p class="at" title="(.*?)"><span class="i_flag"
>.*?
<p class="at" title="(.*?)">.*?<p class="at"
title="(.*?)">
.*?',re.S),res1)
job_need = re.findall(re.compile(r'<p class=
"msg ltype".*?>
.*?&nbsp;&nbsp;<span>|</span>&nbsp;&nbsp;
(.*?)&nbsp;&nbsp;<span>|</span>&nbsp;&nbsp;
(.*?)&nbsp;&nbsp;<span>|</span>&nbsp;&nbsp;
.*?</p>',re.S),res1)
welfare = re.findall(re.compile(r'<span class="sp4">(.*?)</span>',re.S),res1)
print(i[0],i[2],i[4],i[5],company[0][0],
job_need[2][0],job_need[1][0],company[0][1],
company[0][2],welfare,i[6])
sheet1.write(number,0,number)
sheet1.write(number,1,i[0])
sheet1.write(number,2,i[2])
sheet1.write(number,3,i[4])
sheet1.write(number,4,company[0][0])
sheet1.write(number,5,i[5])
sheet1.write(number,6,job_need[2][0])
sheet1.write(number,7,job_need[1][0])
sheet1.write(number,8,company[0][1])
sheet1.write(number,9,company[0][2])
sheet1.write(number,10,(" ".join(str(i) for i in welfare)))
sheet1.write(number,11,i[6])
number+=1
excel1.save("51job.xls")
time.sleep(0.3) #ÐÝÏ¢¼ä¸ô£¬±ÜÃâÅÀÈ¡
º£Á¿Êý¾Ýʱ±»ÎóÅÐΪ¹¥»÷£¬IPÔâµ½·â½û
except:
pass
except:
pass

#coding:utf-8
import pandas as pd
import re
data = pd.read_excel(r'51job.xls',sheet_name='Job')
result = pd.DataFrame(data)
a = result.dropna(axis=0,how='any')
pd.set_option('display.max_rows',None)
#Êä³öÈ«²¿ÐУ¬²»Ê¡ÂÔ b = u'Êý¾Ý'
number = 1
li = a['ְλ']
for i in range(0,len(li)):
try:
if b in li[i]:
#print(number,li[i])
number+=1
else:
a = a.drop(i,axis=0) #ɾ³ýÕûÐÐ
except:
pass
b2 = 'ÈË'
li2 = a['ѧÀúÒªÇó']
for i in range(0,len(li2)):
try:
if b2 in li2[i]:
# print(number,li2[i])
number += 1
a = a.drop(i, axis=0)
except:
pass
b3 =u'Íò/Äê'
b4 =u'ǧ/ÔÂ'
li3 = a['н×Ê']
#×¢ÊͲ¿·ÖµÄprint¶¼ÊÇΪÁ˵÷ÊÔÓõÄ
for i in range(0,len(li3)):
try:
if b3 in li3[i]:
x = re.findall(r'\d*\.?\d+',li3[i])
#print(x)
min_ = format(float(x[0])/12,'.2f')
#ת»»³É¸¡µãÐͲ¢±£ÁôÁ½Î»Ð¡Êý
max_ = format(float(x[1])/12,'.2f')
li3[i][1] = min_+'-'+max_+u'Íò/ÔÂ'
if b4 in li3[i]:
x = re.findall(r'\d*\.?\d+',li3[i])
#print(x)
#input()
min_ = format(float(x[0])/10,'.2f')
max_ = format(float(x[1])/10,'.2f')
li3[i][1] = str(min_+'-'+max_+'Íò/ÔÂ')
print(i,li3[i])
except:
pass
a.to_excel('51job2.xls', sheet_name='Job', index=False)

import pandas as pd
import re
from pyecharts import Funnel,Pie,Geo
import matplotlib.pyplot as plt
file = pd.read_excel(r'51job2.xls',sheet_name='Job')
f = pd.DataFrame(file)
pd.set_option('display.max_rows',None)
add = f['¹«Ë¾µØµã']
sly = f['н×Ê']
edu = f['ѧÀúÒªÇó']
exp = f['¹¤×÷¾­Ñé']
address =[]
salary = []
education = []
experience = []
for i in range(0,len(f)):
try:
a = add[i].split('-')
address.append(a[0])
#print(address[i])
s = re.findall(r'\d*\.?\d+',sly[i])
s1= float(s[0])
s2 =float(s[1])
salary.append([s1,s2])
#print(salary[i])
education.append(edu[i])
#print(education[i])
experience.append(exp[i])
#print(experience[i])
except:
pass
min_s=[] #¶¨Òå´æ·Å×îµÍн×ʵÄÁбí
max_s=[] #¶¨Òå´æ·Å×î¸ßн×ʵÄÁбí
for i in range(0,len(experience)):
min_s.append(salary[i][0])
max_s.append(salary[i][0])
#matplotlibÄ£¿éÈç¹ûÏÔʾ²»ÁËÖÐÎÄ×Ö·û´®¿ÉÒÔÓÃÒÔÏ´úÂë¡£
plt.rcParams['font.sans-serif'] = ['KaiTi'] #
Ö¸¶¨Ä¬ÈÏ×ÖÌå
plt.rcParams['axes.unicode_minus'] = False
# ½â¾ö±£´æÍ¼ÏñÊǸººÅ'-'ÏÔʾΪ·½¿éµÄÎÊÌâ
my_df = pd.DataFrame({'experience':experience,
'min_salay' : min_s, 'max_salay' : max_s})
#¹ØÁª¹¤×÷¾­ÑéÓëн×Ê
data1 = my_df.groupby('experience').mean()
['min_salay'].plot(kind='line')
plt.show()
my_df2 = pd.DataFrame({'education':education,
'min_salay' : min_s, 'max_salay' : max_s})
#¹ØÁªÑ§ÀúÓëн×Ê
data2 = my_df2.groupby('education').mean()
['min_salay'].plot(kind='line')
plt.show()
def get_edu(list):
education2 = {}
for i in set(list):
education2[i] = list.count(i)
return education2
dir1 = get_edu(education)
# print(dir1)
attr= dir1.keys()
value = dir1.values()
pie = Pie("ѧÀúÒªÇó")
pie.add("", attr, value, center=[50, 50],
is_random=False, radius=[30, 75], rosetype='radius',
is_legend_show=False, is_label_show=True,
legend_orient='vertical')
pie.render('ѧÀúÒªÇóõ¹åͼ.html')
def get_address(list):
address2 = {}
for i in set(list):
address2[i] = list.count(i)
address2.pop('ÒìµØÕÐÆ¸')
# ÓÐЩµØÃû¿ÉÄܲ»ºÏ·¨»òÕßµØÍ¼°üÀïûÓпÉÒÔ×ÔÐÐɾ³ý£¬
֮ǰÒÔÏÂÃû³Æ¶¼»á±¨´í£¬ÏÖÔÚºÃÏñ¸üÐÂÁË
#address2.pop('ɽ¶«')
#address2.pop('Å­½­')
#address2.pop('³ØÖÝ')
return address2
dir2 = get_address(address)
#print(dir2)
geo = Geo("´óÊý¾ÝÈ˲ÅÐèÇó·Ö²¼Í¼", title_color="#2E2E2E",
title_text_size=24,title_top=20,title_pos="center",
width=1300,height=600)
attr2 = dir2.keys()
value2 = dir2.values()
geo.add("",attr2, value2, type="effectScatter",
is_random=True, visual_range=[0, 1000], maptype='china',
symbol_size=8, effect_scale=5, is_visualmap=True)
geo.render('´óÊý¾Ý³ÇÊÐÐèÇó·Ö²¼Í¼.html')
def get_experience(list):
experience2 = {}
for i in set(list):
experience2[i] = list.count(i)
return experience2
dir3 = get_experience(experience)
#print(dir3)
attr3= dir3.keys()
value3 = dir3.values()
funnel = Funnel("¹¤×÷¾­Ñé©¶·Í¼",title_pos='center')
funnel.add("", attr3, value3,is_label_show=True,
label_pos="inside", label_text_color="#fff",legend_orient='vertical',
legend_pos='left')
funnel.render('¹¤×÷¾­ÑéÒªÇ󩶷ͼ.html')

HTMLÎļþ×îºÃÓùȸèä¯ÀÀÆ÷´ò¿ª£¬Èç¹ûµã¿ªÃ»·´Ó¦¿ÉÒÔÔÚÎļþ¼ÐÀïÕÒµ½¸ÃÎļþÈ»ºó´ò¿ª

 
   
9622 ´Îä¯ÀÀ       29
Ïà¹ØÎÄÕÂ

ÊÖ»úÈí¼þ²âÊÔÓÃÀýÉè¼ÆÊµ¼ù
ÊÖ»ú¿Í»§¶ËUI²âÊÔ·ÖÎö
iPhoneÏûÏ¢ÍÆËÍ»úÖÆÊµÏÖÓë̽ÌÖ
AndroidÊÖ»ú¿ª·¢£¨Ò»£©
Ïà¹ØÎĵµ

Android_UI¹Ù·½Éè¼Æ½Ì³Ì
ÊÖ»ú¿ª·¢Æ½Ì¨½éÉÜ
androidÅÄÕÕ¼°ÉÏ´«¹¦ÄÜ
Android½²ÒåÖÇÄÜÊÖ»ú¿ª·¢
Ïà¹Ø¿Î³Ì

Android¸ß¼¶Òƶ¯Ó¦ÓóÌÐò
Androidϵͳ¿ª·¢
AndroidÓ¦Óÿª·¢
ÊÖ»úÈí¼þ²âÊÔ