enzoampil · ajdajd · Oct 25, 2020 · Feb 3, 2021 · Feb 4, 2021 · Feb 4, 2021
diff --git a/examples/get_pse_data_multiple_demo.ipynb b/examples/get_pse_data_multiple_demo.ipynb
@@ -0,0 +1,259 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from fastquant import get_pse_data, get_pse_data_multiple"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Define variables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define a list of stock symbols\n",
+    "symbols = [\n",
+    "    \"AEV\",\n",
+    "    \"AP\",\n",
+    "    \"AGI\",\n",
+    "    \"AC\",\n",
+    "    \"ALI\",\n",
+    "]\n",
+    "\n",
+    "# Define start and stop dates\n",
+    "start_date = \"2021-02-01\"\n",
+    "end_date = \"2021-02-04\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Method 1: Without parallelization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "186it [01:20,  2.83it/s]\n",
+      "186it [01:18,  2.68it/s]\n",
+      "186it [01:21,  2.58it/s]\n",
+      "186it [01:19,  2.69it/s]\n",
+      "186it [01:22,  2.78it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 5.41 s, sys: 498 ms, total: 5.91 s\n",
+      "Wall time: 6min 45s\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "\n",
+    "for symbol in symbols:\n",
+    "    get_pse_data(symbol, start_date, end_date)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Method 2: With parallelization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 44 ms, sys: 31.3 ms, total: 75.3 ms\n",
+      "Wall time: 2min 52s\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=4)]: Done   5 out of   5 | elapsed:  2.9min finished\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "\n",
+    "data = get_pse_data_multiple(\n",
+    "    symbols, n_jobs=4, start_date=start_date, end_date=end_date\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Accessing individual stock data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>open</th>\n",
+       "      <th>high</th>\n",
+       "      <th>low</th>\n",
+       "      <th>close</th>\n",
+       "      <th>value</th>\n",
+       "      <th>volume</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>dt</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2021-02-01</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>40.8</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2456600.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2021-02-02</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>42.5</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>922600.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2021-02-03</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>42.2</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>827400.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2021-02-04</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>43.5</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>857700.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            open  high  low  close  value     volume\n",
+       "dt                                                  \n",
+       "2021-02-01   NaN   NaN  NaN   40.8    NaN  2456600.0\n",
+       "2021-02-02   NaN   NaN  NaN   42.5    NaN   922600.0\n",
+       "2021-02-03   NaN   NaN  NaN   42.2    NaN   827400.0\n",
+       "2021-02-04   NaN   NaN  NaN   43.5    NaN   857700.0"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data[\"AEV\"].head()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/python/fastquant/data/__init__.py b/python/fastquant/data/__init__.py
@@ -9,6 +9,7 @@
     get_stock_table,
     # Combines get_phisix_data and get_pse_data_cache
     get_pse_data,
+    get_pse_data_multiple,
     # Gets data from PHISIX
     get_phisix_data,
     # Gets data from PSE Data Cache

diff --git a/python/fastquant/data/stocks/pse.py b/python/fastquant/data/stocks/pse.py
@@ -15,6 +15,7 @@
 import pandas as pd
 from pandas.io.json import json_normalize
 import numpy as np
+from joblib import Parallel, delayed
 import lxml.html as LH
 from tqdm import tqdm
 
@@ -324,6 +325,37 @@ def get_pse_data(
     return pse_data_df.set_index("dt")
 
 
+def get_pse_data_multiple(symbols, n_jobs=None, verbose=1, **kwargs):
+    """Return a dictionary of pricing data for the given PHISIX stock symbols.
+
+    This is a utility function for `get_pse_data` to be able to query multiple stock data with parallelization using joblib. The return format is a dictionary whose key-value pairsare the stock symbols and their respective pricing dataframes.
+
+    Parameters
+    ----------
+    symbols : list of str
+        List of symbols of the stock in the PSE. You can refer to this link: https://www.pesobility.com/stock.
+    n_jobs : int
+        The maximum number of concurrently running jobs. Refer to joblib.Parallel docs for more information: https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html.
+    verbose : int
+        The verbosity level: if non zero, progress messages are printed. Above 50, the output is sent to stdout. The frequency of the messages increases with the verbosity level. If it more than 10, all iterations are reported.
+
+    Returns
+    -------
+    data : dict
+        Dictionary of symbols and their pricing dataframe
+
+    Examples
+    --------
+    # TODO: write sample usage
+    """
+    lst = Parallel(n_jobs=n_jobs, verbose=verbose)(
+        delayed(get_pse_data)(symbol, **kwargs) for symbol in symbols
+    )
+    data = dict(zip(symbols, lst))
+
+    return data
+
+
 def datestring_to_datetime(date, sep="-"):
     ymd = date.split(sep)
     errmsg = "date format must be YYYY-MM-DD"