Update projects/conda.md

This commit is contained in:
2024-05-24 04:04:35 +00:00
parent 2e43907aba
commit df68f6d09b

View File

@@ -1,3 +1,208 @@
Here's the complete project structure and code in one document:
Project Structure:
```
stock_prediction/
├── data/
│ └── stock_data.db
├── scripts/
│ ├── fetch_stock_data.py
│ └── verify_data.py
├── app/
│ └── stock_prediction_app.py
├── environment.yml
└── requirements.txt
```
1. `stock_prediction/data/stock_data.db`:
- This is the SQLite database file that will store the historical stock data.
2. `stock_prediction/scripts/fetch_stock_data.py`:
```python
import argparse
import yfinance as yf
import pandas as pd
import sqlite3
parser = argparse.ArgumentParser(description="Fetch historical stock data from Yahoo Finance.")
parser.add_argument("--symbol", type=str, default="^GSPC", help="Stock symbol (default: ^GSPC)")
parser.add_argument("--start_date", type=str, default="2000-01-01", help="Start date (default: 2000-01-01)")
parser.add_argument("--end_date", type=str, default="2023-05-31", help="End date (default: 2023-05-31)")
args = parser.parse_args()
symbol = args.symbol
start_date = args.start_date
end_date = args.end_date
conn = sqlite3.connect("../data/stock_data.db")
data = yf.download(symbol, start=start_date, end=end_date)
table_name = f"{symbol}_prices"
data.to_sql(name=table_name, con=conn, if_exists="replace")
print(f"Data for {symbol} stored in the database.")
conn.close()
```
3. `stock_prediction/scripts/verify_data.py`:
```python
import sqlite3
import pandas as pd
conn = sqlite3.connect("../data/stock_data.db")
tables = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table'", conn)
print("Tables in the database:")
print(tables)
table_name = "^GSPC_prices"
data = pd.read_sql_query(f"SELECT * FROM {table_name}", conn)
print(f"\nData from the {table_name} table:")
print(data.head())
conn.close()
```
4. `stock_prediction/app/stock_prediction_app.py`:
```python
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import pandas as pd
import sqlite3
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import plotly.graph_objects as go
app = dash.Dash(__name__)
app.layout = html.Div([
html.H1("Stock Price Prediction"),
html.Div([
html.Label("Select Stock Symbol"),
dcc.Dropdown(
id="stock-dropdown",
options=[{"label": "S&P 500", "value": "^GSPC"},
{"label": "Dow Jones", "value": "^DJI"},
{"label": "Nasdaq", "value": "^IXIC"}],
value="^GSPC"
)
]),
html.Div([
dcc.Graph(id="stock-graph")
])
])
@app.callback(Output("stock-graph", "figure"),
[Input("stock-dropdown", "value")])
def update_graph(stock_symbol):
conn = sqlite3.connect("../data/stock_data.db")
data = pd.read_sql_query(f"SELECT Date, Close FROM {stock_symbol}_prices", conn)
conn.close()
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data["Close"].values.reshape(-1, 1))
lookback = 60
X, y = [], []
for i in range(lookback, len(scaled_data)):
X.append(scaled_data[i - lookback:i, 0])
y.append(scaled_data[i, 0])
X, y = np.array(X), np.array(y)
X = np.reshape(X, (X.shape[0], X.shape[1], 1))
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)))
model.add(LSTM(units=50))
model.add(Dense(1))
model.compile(loss="mean_squared_error", optimizer="adam")
model.fit(X, y, epochs=10, batch_size=32)
last_data = scaled_data[-lookback:]
X_test = []
for i in range(lookback, len(last_data)):
X_test.append(last_data[i - lookback:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_prices = model.predict(X_test)
predicted_prices = scaler.inverse_transform(predicted_prices)
fig = go.Figure()
fig.add_trace(go.Scatter(x=data["Date"], y=data["Close"], name="Actual Price"))
fig.add_trace(go.Scatter(x=data["Date"][-len(predicted_prices):], y=predicted_prices.flatten(), name="Predicted Price"))
fig.update_layout(title=f"{stock_symbol} Stock Price Prediction", xaxis_title="Date", yaxis_title="Price")
return fig
if __name__ == "__main__":
app.run_server(debug=True)
```
5. `stock_prediction/environment.yml`:
```yaml
name: stock_prediction
channels:
- defaults
dependencies:
- python=3.9
- pip
- pip:
- -r requirements.txt
```
6. `stock_prediction/requirements.txt`:
```
dash
pandas
numpy
yfinance
scikit-learn
tensorflow
keras
plotly
```
To run the project:
1. Create a conda environment using the `environment.yml` file:
```
conda env create -f environment.yml
```
2. Activate the conda environment:
```
conda activate stock_prediction
```
3. Fetch historical stock data:
```
python scripts/fetch_stock_data.py --symbol ^GSPC --start_date 2000-01-01 --end_date 2023-05-31
```
You can change the stock symbol and date range as needed.
4. Verify the stored data (optional):
```
python scripts/verify_data.py
```
5. Run the Dash application:
```
python app/stock_prediction_app.py
```
6. Open a web browser and go to `http://localhost:8050` to access the application.
7. Select a stock symbol from the dropdown menu to see the actual and predicted stock prices on the graph.
This document provides the complete project structure, code files, and instructions for running the stock prediction project using conda for environment management and pip for package management.
---
Great! Let's update the project to use conda for creating the Python environment and pip for package management. Here's the updated project structure and instructions:
Project Structure: