A multithreading example in Python
05 Apr 2017
This is a simple example demonstrating how to run a Python script with different inputs in parallel and merge the results. Here an application is to get aggregation statistics in different dates through computationally intense queries, and merge across all dates.
from multiprocessing import Process
import pandas as pd
import os
def get_days(start, num_of_days):
''' generate a list of dates starting from the starting date
to the starting date + num_of_days
'''
date_range = pd.date_range(start, periods=num_of_days, freq='1D')
return map(lambda dt: dt.strftime("%Y-%m-%d"), date_range)
f = lambda x: os.system("python foo.py --date %s" % x)
children = []
for date in get_days(start, end):
p = Process(target=f, args=(date,))
p.start()
children.append(p)
for x in children:
x.join()
# merge results
all_df = (pd.read_csv(filename) for filename in glob.glob("*.csv"))
merge = pd.concat(all_df, ignore_index = True)