The main functions are as follows:
1. Call api interface to get json data; 2. Convert json data to a csv file; 3. Convert each line of data in the csv file to a single xml file; 4. Package every 5 XML files
The modules used are:
csv,xml,threading,tarfile,queue
Daemons
memorandum:
Multi thread communication can use a safe queue
wait and set between threads
Use the loop to wait for notification, jump out of the loop, the thread is the guardian thread, give a flag, and then break
''' Created on 2018 October 4, 2010 @author: Xu Liangjun ****All threads need to be handed over to cup Random execution means that all threads are started //Inter thread communication or inter thread notification requires human control**** //Design module: csv xml.etree.ElementTree threading queue ''' import requests,logging,csv,time from functools import wraps from threading import Thread,Event from xml.etree.ElementTree import ElementTree,tostring,Element from queue import Queue import tarfile import os class DownloadThread(Thread): '''Download thread IO Intensive operation "http://hq.sinajs.cn/list=%s "the data format obtained is: var hq_str_sh601015="Shaanxi black cat,6.400,6.420,6.440,6.470,6.390,6.440,6.450,3377008,21697080.000,53120,6.440,91300,6.430,92750,6.420,131000,6.410,131800,6.400,49600,6.450,25200,6.460,15700,6.470,11700,6.480,14200,6.490,2018-09-28,15:00:00,00"; ''' def __init__(self,sid,queue): Thread.__init__(self) self.sid=sid self.url="http://hq.sinajs.cn/list=%s" % sid self.queue = queue def download(self,url): response=requests.get(url,timeout=3000) response.encoding="GBK" if response.ok: content=response.text maincontent_list = content[content.find("\"")+1:content.rfind("\"")].split(",") # info = "Stock Name:% s; today's opening price:% s; yesterday's closing price:% s; Date:% s"% (maincontent [0], maincontent [u list [1], maincontent [2], maincontent [30] + "" + maincontent [31],) # logging.warn(info) return maincontent_list def run(self): data=self.download(self.url) self.queue.put((self.sid,data)) class ConvertCSVThread(Thread): def __init__(self,queue,csvEvent): Thread.__init__(self) self.queue=queue self.csvEvent=csvEvent def dataToCsv(self,writer): while True: csvdata = [] sid,data=self.queue.get() if sid == -1: break if data and data[0] != '' : csvdata.append((data[0],data[1],data[2],data[30],)) writer.writerows(csvdata) def run(self): csvFile = open( "fundnav.csv" , "w",encoding='utf-8-sig') writer = csv.writer(csvFile,lineterminator='\n') csvdata = [] csvdata.append(("Fund name","Opening price","Closing price","date",)) writer.writerows(csvdata) self.dataToCsv(writer) csvFile.close() self.csvEvent.set() #Tell other threads that the file has been created and you can do something class ConvertXMLThread(Thread): ''' //Turn each line of csv file into a single xml ''' def __init__(self,cEvent,tEvent,csvEvent): Thread.__init__(self) self.cEvent=cEvent self.tEvent=tEvent self.csvEvent=csvEvent def csvToXml(self,scsv): with open(scsv,'r',encoding='utf-8-sig') as f: reader=csv.reader(f) headers=next(reader) headers=list(map(lambda h:h.replace(' ',''),headers)) readerlist=list(reader) index=0 #Record 5 groups sycount=len(readerlist) #Total itindex=0 #seek for row in readerlist: index+=1 itindex+=1 root = Element('Data') eRow = Element('Row') for tag,text in zip(headers,row): e=Element(tag) e.text=text eRow.append(e) root.append(eRow) # print(tostring(root,encoding='unicode',method ='xml' ) ) et=ElementTree(root) et.write("tarxml/%s.xml" % index,"utf-8") if(sycount-itindex) < 5 and sycount==itindex: #Handle the last less than 5 self.cEvent.set() #Notify other threads. After the conversion, you can do something self.tEvent.wait() #block break elif index == 5: #Pack every five self.cEvent.set() #Notify other threads. After the conversion, you can do something self.tEvent.wait() #block self.tEvent.clear() index = 0 def run(self): while True: self.csvEvent.wait() self.csvToXml("fundnav.csv") break class TARThread(Thread): '''This thread is a guardian thread. The main thread ends, and this thread ends''' def __init__(self,cEvent,tEvent): Thread.__init__(self) self.count=0 self.cEvent=cEvent self.tEvent=tEvent self.setDaemon(True) #Guardian thread, main thread ends, this thread ends def tarXML(self): self.count+=1 tfname='%d.tgz' % self.count tf = tarfile.open( tfname ,'w:gz') for fname in os.listdir('./tarxml'): if fname.endswith('.xml'): tf.add('./tarxml/%s' % fname) os.remove('./tarxml/%s' % fname) tf.close() if not tf.members: os.remove(tfname) def run(self): while True: self.cEvent.wait() #Blocking, no further execution, waiting for notification (set) self.tarXML() self.cEvent.clear() self.tEvent.set() def handle(): '''test //Multiple producers: IO producers //A consumer: CPU intensive consumers, as long as queque has data to process //Threads interact with each other through the queque object ''' start=time.time() queue=Queue() codes=['sh601006','sh601005','sh601003','sh601002','sh601001','sh601007','sh601008', 'sh601009','sh601010','sh601011','sh601012','sh601013','sh601014','sh601015','sh601016','sh601017', 'sh601018','sh601019','sh601020','sh601021','sh601022','sh601023','sh601024','sh601025','sh601026'] csvEvent=Event() #Control the conversion of csv files into multiple xml files, and ensure the existence of csv files ct=ConvertCSVThread(queue,csvEvent) dts=[DownloadThread(codes[i],queue) for i in range(len(codes))] #The thread starts and waits for the cup call for t in dts: t.start() ct.start() cEvent=Event() tEvent=Event() cxml=ConvertXMLThread(cEvent,tEvent,csvEvent) tart=TARThread(cEvent,tEvent) cxml.start() tart.start() #The main thread can only be executed after the execution of t thread for t in dts: t.join() queue.put((-1,None)) #The main thread can only be executed after the execution of ct thread ct.join() print("time consuming %s" % (time.time() - start) ) handle()