Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
259 changes: 259 additions & 0 deletions examples/get_pse_data_multiple_demo.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
{
Comment thread
ajdajd marked this conversation as resolved.
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from fastquant import get_pse_data, get_pse_data_multiple"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Define variables"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Define a list of stock symbols\n",
"symbols = [\n",
" \"AEV\",\n",
" \"AP\",\n",
" \"AGI\",\n",
" \"AC\",\n",
" \"ALI\",\n",
"]\n",
"\n",
"# Define start and stop dates\n",
"start_date = \"2021-02-01\"\n",
"end_date = \"2021-02-04\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Method 1: Without parallelization"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"186it [01:20, 2.83it/s]\n",
"186it [01:18, 2.68it/s]\n",
"186it [01:21, 2.58it/s]\n",
"186it [01:19, 2.69it/s]\n",
"186it [01:22, 2.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 5.41 s, sys: 498 ms, total: 5.91 s\n",
"Wall time: 6min 45s\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"%%time\n",
"\n",
"for symbol in symbols:\n",
" get_pse_data(symbol, start_date, end_date)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Method 2: With parallelization"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 44 ms, sys: 31.3 ms, total: 75.3 ms\n",
"Wall time: 2min 52s\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=4)]: Done 5 out of 5 | elapsed: 2.9min finished\n"
]
}
],
"source": [
"%%time\n",
"\n",
"data = get_pse_data_multiple(\n",
" symbols, n_jobs=4, start_date=start_date, end_date=end_date\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Accessing individual stock data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>open</th>\n",
" <th>high</th>\n",
" <th>low</th>\n",
" <th>close</th>\n",
" <th>value</th>\n",
" <th>volume</th>\n",
" </tr>\n",
" <tr>\n",
" <th>dt</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2021-02-01</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>40.8</td>\n",
" <td>NaN</td>\n",
" <td>2456600.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2021-02-02</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>42.5</td>\n",
" <td>NaN</td>\n",
" <td>922600.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2021-02-03</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>42.2</td>\n",
" <td>NaN</td>\n",
" <td>827400.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2021-02-04</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>43.5</td>\n",
" <td>NaN</td>\n",
" <td>857700.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" open high low close value volume\n",
"dt \n",
"2021-02-01 NaN NaN NaN 40.8 NaN 2456600.0\n",
"2021-02-02 NaN NaN NaN 42.5 NaN 922600.0\n",
"2021-02-03 NaN NaN NaN 42.2 NaN 827400.0\n",
"2021-02-04 NaN NaN NaN 43.5 NaN 857700.0"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[\"AEV\"].head()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
1 change: 1 addition & 0 deletions python/fastquant/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
get_stock_table,
# Combines get_phisix_data and get_pse_data_cache
get_pse_data,
get_pse_data_multiple,
# Gets data from PHISIX
get_phisix_data,
# Gets data from PSE Data Cache
Expand Down
32 changes: 32 additions & 0 deletions python/fastquant/data/stocks/pse.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np
from joblib import Parallel, delayed
import lxml.html as LH
from tqdm import tqdm

Expand Down Expand Up @@ -324,6 +325,37 @@ def get_pse_data(
return pse_data_df.set_index("dt")


def get_pse_data_multiple(symbols, n_jobs=None, verbose=1, **kwargs):
"""Return a dictionary of pricing data for the given PHISIX stock symbols.

This is a utility function for `get_pse_data` to be able to query multiple stock data with parallelization using joblib. The return format is a dictionary whose key-value pairsare the stock symbols and their respective pricing dataframes.

Parameters
----------
symbols : list of str
List of symbols of the stock in the PSE. You can refer to this link: https://www.pesobility.com/stock.
n_jobs : int
The maximum number of concurrently running jobs. Refer to joblib.Parallel docs for more information: https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html.
verbose : int
The verbosity level: if non zero, progress messages are printed. Above 50, the output is sent to stdout. The frequency of the messages increases with the verbosity level. If it more than 10, all iterations are reported.

Returns
-------
data : dict
Dictionary of symbols and their pricing dataframe

Examples
--------
# TODO: write sample usage
"""
lst = Parallel(n_jobs=n_jobs, verbose=verbose)(
Comment thread
ajdajd marked this conversation as resolved.
delayed(get_pse_data)(symbol, **kwargs) for symbol in symbols
)
data = dict(zip(symbols, lst))

return data


def datestring_to_datetime(date, sep="-"):
ymd = date.split(sep)
errmsg = "date format must be YYYY-MM-DD"
Expand Down