import requests
from pyquery import PyQuery as pq
import pymysql
import urllib.request
# 插入数据库
def insertsql(sql):
db = pymysql.connect(
host="localhost",
port=3306,
user='root',
password="admin123",
db="shici_org",
charset="utf8")
cursor = db.cursor()
# sql="insert into sc_chapter(title,content,dynasty_name,author_name,cat_name) values('"+title+"','"+content+"','"+dynasty_name+"','"+author_name+"','"+cat_name+"')"
cursor.execute(sql)
lastid = cursor.lastrowid
print(lastid)
db.close()
#查询
def dbselect(sql):
db = pymysql.connect(host="localhost",user="root",password="admin123",db="zuowen")
cursor = db.cursor()
lst=[]
try:
cursor.execute(sql)
#data = cursor.fetchone() //查询出一条
data = cursor.fetchall()
for row in data:
# print(data)
lst.append(row)
db.commit()
except:
print("插入失败,sql:"+sql)
db.rollback()
cursor.close()
db.close()
return lst
#抓取网页内容
header = {'content-type': 'application/json','User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100101 Firefox/22.0'}
url="https://so.gushiwen.cn/authors/"
t=requests.get(url=url,headers=header) # t = requests.get(url)
html = t.text
doc = pq(html)
items = doc(".sonspic").items()
#循环文件
path = os.listdir("./mizhi")
for f in path:
print(f)
cpath = os.listdir("./mizhi/"+f)
for cf in cpath:
filename = "data/mz/"+f+"/"+cf
#下载图片
urllib.request.urlretrieve("图片地址","图片路径/图片名称");
循环数组加索引
for index,item in enumerate(arr):