import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
bsd_df = pd.read_csv('https://think.cs.vt.edu/corgis/datasets/csv/business_dynamics/business_dynamics.csv')
snp_df = pd.read_csv('https://datahub.io/core/s-and-p-500/r/data.csv')
em_df = pd.read_csv('https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23e1e9f0&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1318&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=LREM64TTUSM156S&scale=left&cosd=1977-01-01&coed=2023-10-01&line_color=%234572a7&link_values=false&line_style=solid&mark_type=none&mw=3&lw=2&ost=-99999&oet=99999&mma=0&fml=a&fq=Monthly&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2023-12-01&revision_date=2023-12-01&nd=1977-01-01')


snp_df.head()


bsd_df.head()


em_df.head()


snp_df['Year'] = pd.DatetimeIndex(snp_df['Date']).year
snp_df['Month'] = pd.DatetimeIndex(snp_df['Date']).month
snp = snp_df.groupby('Year').mean()



em_df['Year']= pd.DatetimeIndex(em_df['DATE']).year
em_df['Month'] = pd.DatetimeIndex(em_df['DATE']).month
em = em_df.groupby('Year').mean()

<ipython-input-74-a866a2b50a94>:3: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  snp = snp_df.groupby('Year').mean()
<ipython-input-74-a866a2b50a94>:9: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  em = em_df.groupby('Year').mean()


snp.drop(['Dividend', 'Real Price', 'Real Dividend', 'Real Earnings', 'PE10','Month'], axis=1, inplace=True)


bsd_df.drop(['Data.Calculated.Net Job Creation Rate', 'Data.Calculated.Reallocation Rate', 'Data.Establishments.Physical Locations', 'Data.Firm Exits.Establishment Exit', 'Data.Firm Exits.Employments', 'Data.Job Creation.Births', 'Data.Job Creation.Continuers', 'Data.Job Creation.Rate/Births', 'Data.Job Destruction.Continuers', 'Data.Job Destruction.Count', 'Data.Job Destruction.Deaths', 'Data.Job Destruction.Rate', 'Data.Job Destruction.Rate/Deaths'], axis=1, inplace=True)


em.drop(['Month'], axis=1, inplace=True)


bsd_df = bsd_df.rename(columns = { "Data.DHS Denominator" : "DHS Denom",
                                    "Data.Number of Firms" : "Number of Firms",
                                     "Data.Calculated.Net Job Creation" : "Net Job Creation",
                                     "Data.Establishments.Entered" : "Establishments Entered",
                                     "Data.Establishments.Entered Rate" : "Establishments Entered Rate",
                                     "Data.Establishments.Exited" : "Establishments Exited",
                                     "Data.Establishments.Exited Rate" : "Establishments Exited Rate",
                                     "Data.Firm Exits.Count" : "Firms Exits",
                                     "Data.Job Creation.Count" : "Jobs Created",
                                     "Data.Job Creation.Rate" : "Job Creation Rate"
})

em = em.rename(columns = {"LREM64TTUSM156S" : "Employment Rate"})


national_bsd_df=bsd_df.groupby(bsd_df['Year']).sum()
national_bsd_df['Establishments Entered Rate'] = national_bsd_df['Establishments Entered Rate']/50
national_bsd_df['Establishments Exited Rate'] = national_bsd_df['Establishments Exited Rate']/50
national_bsd_df['Job Creation Rate'] = national_bsd_df['Job Creation Rate']/50

<ipython-input-79-3bf9c52b1275>:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  national_bsd_df=bsd_df.groupby(bsd_df['Year']).sum()


for i in snp:
  snp['Interest Change'] = ((snp['Long Interest Rate'] - snp['Long Interest Rate'].shift(-1))/snp['Long Interest Rate'].shift(-1))*100
  snp['CPI Change'] = ((snp['Consumer Price Index'] - snp['Consumer Price Index'].shift(-1))/snp['Consumer Price Index'].shift(-1))*100


modern_snp = snp.loc['2000':'2018']
modern_snp.head()


df_min_max_scaled = modern_snp.copy()

# apply normalization techniques
for column in df_min_max_scaled.columns:
    df_min_max_scaled[column] = (df_min_max_scaled[column] - df_min_max_scaled[column].min()) / (df_min_max_scaled[column].max() - df_min_max_scaled[column].min())

# view normalized data
print(df_min_max_scaled)

         SP500  Earnings  Consumer Price Index  Long Interest Rate  \
Year                                                                 
2000  0.272380  0.399960              0.000000            1.000000   
2001  0.139143  0.222137              0.063317            0.760647   
2002  0.027730  0.120617              0.099854            0.664432   
2003  0.009615  0.226342              0.152979            0.523462   
2004  0.104246  0.443464              0.217054            0.584779   
2005  0.147640  0.542862              0.300428            0.588525   
2006  0.206400  0.677961              0.382393            0.707216   
2007  0.300524  0.719677              0.457234            0.668770   
2008  0.155482  0.353515              0.560763            0.441049   
2009  0.000000  0.000000              0.550810            0.344046   
2010  0.109214  0.578335              0.596596            0.333991   
2011  0.182705  0.770917              0.686139            0.232650   
2012  0.245472  0.810044              0.746701            0.000000   
2013  0.394600  0.864396              0.790470            0.129732   
2014  0.558023  0.986802              0.839616            0.174685   
2015  0.632053  0.887100              0.843302            0.078864   
2016  0.649433  0.823490              0.882192            0.009267   
2017  0.851545  1.000000              0.948696            0.124803   
2018  1.000000       NaN              1.000000            0.228904   

      Interest Change  CPI Change  
Year                               
2000         0.558403    0.233799  
2001         0.412754    0.526755  
2002         0.490059    0.364281  
2003         0.221645    0.268607  
2004         0.294765    0.102296  
2005         0.165070    0.140861  
2006         0.344579    0.227413  
2007         0.636566    0.000000  
2008         0.461160    1.000000  
2009         0.316483    0.513773  
2010         0.496931    0.156999  
2011         1.000000    0.411710  
2012         0.000000    0.555665  
2013         0.203485    0.518268  
2014         0.542987    0.882513  
2015         0.504603    0.604687  
2016         0.030388    0.397427  
2017         0.095540    0.521326  
2018              NaN         NaN


national_bsd_df.head()


bsd_df.head()


em.head()


plt.scatter(em['Employment Rate'].loc['2000':'2018'], modern_snp['SP500'])

#obtain m (slope) and b(intercept) of linear regression line
m, b = np.polyfit(em['Employment Rate'].loc['2000':'2018'], modern_snp['SP500'], 1)

#add linear regression line to scatterplot
plt.plot(em['Employment Rate'].loc['2000':'2018'], m*(em['Employment Rate'].loc['2000':'2018'])+b)


plt.scatter(national_bsd_df['Number of Firms'].loc['2000':'2018'], modern_snp['SP500'])

m, b = np.polyfit(national_bsd_df['Number of Firms'].loc['2000':'2018'], modern_snp['SP500'], 1)

plt.plot(national_bsd_df['Number of Firms'].loc['2000':'2018'], m*(national_bsd_df['Number of Firms'].loc['2000':'2018'])+b)

[<matplotlib.lines.Line2D at 0x7e6e39a906a0>]


fig, ax = plt.subplots()

# Plot the first x and y axes:
modern_snp.plot(use_index = True, y = 'SP500', ax = ax)
modern_snp.plot(use_index = True, y = 'Long Interest Rate', ax = ax, secondary_y = True)

<Axes: >


plt.scatter(modern_snp['Long Interest Rate'], national_bsd_df['Number of Firms'].loc['2000':'2018'])

m, b = np.polyfit(modern_snp['Long Interest Rate'], national_bsd_df['Number of Firms'].loc['2000':'2018'], 1)

plt.plot(modern_snp['Long Interest Rate'], m*(modern_snp['Long Interest Rate'])+b)


df_min_max_scaled.plot(use_index = True, y=["CPI Change", "Interest Change"], kind="bar")

<Axes: xlabel='Year'>


import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

#loading the datasets
bsd_df = pd.read_csv('https://think.cs.vt.edu/corgis/datasets/csv/business_dynamics/business_dynamics.csv')
snp_df = pd.read_csv('https://datahub.io/core/s-and-p-500/r/data.csv')
em_df = pd.read_csv('https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23e1e9f0&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1318&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=LREM64TTUSM156S&scale=left&cosd=1977-01-01&coed=2023-10-01&line_color=%234572a7&link_values=false&line_style=solid&mark_type=none&mw=3&lw=2&ost=-99999&oet=99999&mma=0&fml=a&fq=Monthly&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2023-12-01&revision_date=2023-12-01&nd=1977-01-01')

# reprocessing S&P 500 Data
snp_df['Year'] = pd.DatetimeIndex(snp_df['Date']).year
snp_df = snp_df.groupby('Year').mean()

# reprocessing Employment Data
em_df['Year'] = pd.DatetimeIndex(em_df['DATE']).year
em_df = em_df.groupby('Year').mean()

# renaming columns for clarity
bsd_df = bsd_df.rename(columns={
    "Data.DHS Denominator": "DHS Denom",
    "Data.Number of Firms": "Number of Firms",
    "Data.Calculated.Net Job Creation": "Net Job Creation",
    "Data.Establishments.Entered": "Establishments Entered",
    "Data.Establishments.Entered Rate": "Establishments Entered Rate",
    "Data.Establishments.Exited": "Establishments Exited",
    "Data.Establishments.Exited Rate": "Establishments Exited Rate",
    "Data.Firm Exits.Count": "Firms Exits",
    "Data.Job Creation.Count": "Jobs Created",
    "Data.Job Creation.Rate": "Job Creation Rate"
})

# converting the BSD data to a national scale
national_bsd_df = bsd_df.groupby('Year').sum()
national_bsd_df['Establishments Entered Rate'] = national_bsd_df['Establishments Entered Rate'] / 50
national_bsd_df['Establishments Exited Rate'] = national_bsd_df['Establishments Exited Rate'] / 50
national_bsd_df['Job Creation Rate'] = national_bsd_df['Job Creation Rate'] / 50

# switching the data for the period 1978-2018
modern_snp = snp_df.loc[1978:2018]
modern_em = em_df.loc[1978:2018]
modern_bsd = national_bsd_df.loc[1978:2018]

# combining datasets for analysis
combined_df = pd.concat([modern_snp, modern_em, modern_bsd], axis=1)


combined_df.head()

<ipython-input-91-8ca7537911db>:12: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  snp_df = snp_df.groupby('Year').mean()
<ipython-input-91-8ca7537911db>:16: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  em_df = em_df.groupby('Year').mean()
<ipython-input-91-8ca7537911db>:33: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  national_bsd_df = bsd_df.groupby('Year').sum()


import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split


# separating features and target
X = combined_df.drop('SP500', axis=1)
y = combined_df['SP500']

# impute missing values if there are any
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

# splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.3, random_state=42)


rf = RandomForestRegressor(n_estimators=100, random_state=42)


rf.fit(X_train, y_train)

importances = rf.feature_importances_

# converting the importances into a DataFrame
feature_importances = pd.DataFrame(importances, index=X.columns, columns=["Importance"]).sort_values(by="Importance", ascending=False)


print(feature_importances)

                                        Importance
Long Interest Rate                        0.162479
Real Price                                0.107764
Dividend                                  0.097069
Establishments Entered Rate               0.093644
Consumer Price Index                      0.084320
Data.Establishments.Physical Locations    0.070976
DHS Denom                                 0.067088
Real Dividend                             0.064239
Earnings                                  0.059980
Number of Firms                           0.052412
Data.Calculated.Reallocation Rate         0.033000
Establishments Exited                     0.021036
Job Creation Rate                         0.018142
Establishments Exited Rate                0.015618
Real Earnings                             0.011821
Data.Job Destruction.Rate                 0.005615
Data.Job Destruction.Rate/Deaths          0.005541
Data.Job Destruction.Continuers           0.004686
Firms Exits                               0.003992
Data.Firm Exits.Establishment Exit        0.003759
PE10                                      0.002570
Net Job Creation                          0.002053
Data.Job Creation.Rate/Births             0.001705
Data.Firm Exits.Employments               0.001651
LREM64TTUSM156S                           0.001438
Data.Calculated.Net Job Creation Rate     0.001410
Establishments Entered                    0.001316
Data.Job Destruction.Count                0.001130
Data.Job Destruction.Deaths               0.000944
Jobs Created                              0.000928
Data.Job Creation.Continuers              0.000893
Data.Job Creation.Births                  0.000779


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

y = combined_df['SP500']

# individual features to test
features_to_test = ['Consumer Price Index', 'Long Interest Rate', 'DHS Denom']

# store results
results = {}

# looping over each feature to evaluate the model
for feature in features_to_test:
    # selects the feature for the model
    X = combined_df[[feature]]

    # splits data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # train the regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # evaluating the model
    y_pred = model.predict(X_test)
    r_squared = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))

    # results
    results[feature] = {'R-squared': r_squared, 'RMSE': rmse}


for feature, metrics in results.items():
    print(f'Feature: {feature}, Linear Regression R-squared: {metrics["R-squared"]}, RMSE: {metrics["RMSE"]}')

Feature: Consumer Price Index, Linear Regression R-squared: 0.8144860090222715, RMSE: 345.33851831162474
Feature: Long Interest Rate, Linear Regression R-squared: 0.6197036930717091, RMSE: 494.44471711601204
Feature: DHS Denom, Linear Regression R-squared: 0.7915089626998075, RMSE: 366.1005506400515


from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


X = combined_df[['Consumer Price Index']]
#X = combined_df[['Consumer Price Index', 'Long Interest Rate', 'DHS Denom']]

y = combined_df['SP500']

# splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# training the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# evaluating the model
y_pred = model.predict(X_test)
print(f'Linear Regression R-squared: {r2_score(y_test, y_pred)}')
print(f'Linear Regression RMSE: {np.sqrt(mean_squared_error(y_test, y_pred))}')

Linear Regression R-squared: 0.8144860090222715
Linear Regression RMSE: 345.33851831162474


from sklearn.ensemble import GradientBoostingRegressor

# Gradient Boosting Regressor
gb_model = GradientBoostingRegressor(random_state=42)

# fitting model on the training data
gb_model.fit(X_train, y_train)


y_pred_gb = gb_model.predict(X_test)

# evaluate the model
print(f'Gradient Boosting R-squared: {r2_score(y_test, y_pred_gb)}')
print(f'Gradient Boosting RMSE: {np.sqrt(mean_squared_error(y_test, y_pred_gb))}')

Gradient Boosting R-squared: 0.9164525900606401
Gradient Boosting RMSE: 231.75183548891127


from sklearn.ensemble import RandomForestRegressor

# Random Forest Regressor
rf_model = RandomForestRegressor(random_state=42)


rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)

# evaluate the model
print(f'Random Forest R-squared: {r2_score(y_test, y_pred_rf)}')
print(f'Random Forest RMSE: {np.sqrt(mean_squared_error(y_test, y_pred_rf))}')

Random Forest R-squared: 0.9054539073026859
Random Forest RMSE: 246.53494097662892


from sklearn.model_selection import cross_val_score

# models
lr_model = LinearRegression()
rf_model = RandomForestRegressor(random_state=42)
gb_model = GradientBoostingRegressor(random_state=42)

# cross-validation for Linear Regression
cv_scores_lr = cross_val_score(lr_model, X, y, cv=5, scoring='neg_mean_squared_error')
rmse_scores_lr = np.sqrt(-cv_scores_lr)

# cross-validation for Random Forest
cv_scores_rf = cross_val_score(rf_model, X, y, cv=5, scoring='neg_mean_squared_error')
rmse_scores_rf = np.sqrt(-cv_scores_rf)

# cross-validation for Gradient Boosting
cv_scores_gb = cross_val_score(gb_model, X, y, cv=5, scoring='neg_mean_squared_error')
rmse_scores_gb = np.sqrt(-cv_scores_gb)

# RMSE scores
print("Linear Regression - Cross-validated RMSE scores:", rmse_scores_lr)
print("Random Forest - Cross-validated RMSE scores:", rmse_scores_rf)
print("Gradient Boosting - Cross-validated RMSE scores:", rmse_scores_gb)

Linear Regression - Cross-validated RMSE scores: [354.09047432 224.78266018 249.29325151 355.77029918 564.76291953]
Random Forest - Cross-validated RMSE scores: [153.46716735 139.232752   354.94663443 204.01469503 920.43292515]
Gradient Boosting - Cross-validated RMSE scores: [148.08577     98.30223837 355.6041144  201.99914799 926.30728342]


combined_df_no_2008 = combined_df[combined_df.index !=  2008  ]

# defining features and target variable without 2008 data
#X_no_2008 = combined_df_no_2008[['Consumer Price Index', 'Long Interest Rate', 'DHS Denom']]
X_no_2008 = combined_df_no_2008[['Consumer Price Index']]
y_no_2008 = combined_df_no_2008['SP500']

model = GradientBoostingRegressor(random_state=42)

# cross-validation for the model without 2008 data
cv_scores_no_2008 = cross_val_score(model, X_no_2008, y_no_2008, cv=5, scoring='neg_mean_squared_error')
rmse_scores_no_2008 = np.sqrt(-cv_scores_no_2008)

# RMSE scores
print("RMSE scores without 2008 data:", rmse_scores_no_2008)

RMSE scores without 2008 data: [107.99136219  88.22442438 268.96826634 180.86562176 928.0975116 ]


from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error

# parameter grid to search
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.8, 0.9, 1.0],
    'max_depth': [3, 4, 5],
    'min_samples_split': [2, 4],
    'min_samples_leaf': [1, 2],

}

# regressor
gb_regressor = GradientBoostingRegressor(random_state=42)

# grid search with cross-validation
grid_search = GridSearchCV(estimator=gb_regressor, param_grid=param_grid, cv=5,
                           scoring='neg_mean_squared_error', verbose=2, n_jobs=-1)


grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_rmse = np.sqrt(-grid_search.best_score_)
print(f'Best parameters found: {best_params}')
print(f'Best RMSE: {best_rmse}')

Fitting 5 folds for each of 324 candidates, totalling 1620 fits
Best parameters found: {'learning_rate': 0.01, 'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300, 'subsample': 0.8}
Best RMSE: 123.86738179234608


# best parameters from grid search
best_params = {
    'learning_rate': 0.1,
    'max_depth': 3,
    'min_samples_leaf': 1,
    'min_samples_split': 2,
    'n_estimators': 300,
    'subsample': 0.8
}

# Gradient Boosting Regressor with the best parameters
gb_best = GradientBoostingRegressor(**best_params, random_state=42)

# fiting the model on the training data
gb_best.fit(X_train, y_train)


y_pred_gb = gb_best.predict(X_test)

# R-squared and RMSE
r2_gb = r2_score(y_test, y_pred_gb)
rmse_gb = np.sqrt(mean_squared_error(y_test, y_pred_gb))

# performance metrics
print(f'Gradient Boosting R-squared: {r2_gb}')
print(f'Gradient Boosting RMSE: {rmse_gb}')

Gradient Boosting R-squared: 0.9201413570136057
Gradient Boosting RMSE: 226.57795363366336


For a little more insight, a cross validation.

  File "<ipython-input-101-aa609efaf6fd>", line 1
    For a little more insight, a cross validation.
        ^
SyntaxError: invalid syntax


# model
gb_model = GradientBoostingRegressor(**best_params,random_state=42)


# cross-validation for Gradient Boosting
cv_scores_gb = cross_val_score(gb_model, X, y, cv=5, scoring='neg_mean_squared_error')
rmse_scores_gb = np.sqrt(-cv_scores_gb)

# RMSE scores

print("Gradient Boosting - Cross-validated RMSE scores:", rmse_scores_gb)


best_params = {
    'learning_rate': 0.1,
    'max_depth': 3,
    'min_samples_leaf': 1,
    'min_samples_split': 2,
    'n_estimators': 300,
    'subsample': 0.8
}
model = GradientBoostingRegressor(**best_params, random_state=42)

# training data
X_train = combined_df[['Consumer Price Index', 'Long Interest Rate']]
y_train = combined_df['SP500']
model.fit(X_train, y_train)

# new data for prediction (2019-2023)
new_data = {
    'Consumer Price Index': [255.657, 258.811, 270.970, 292.655, 304.515],
    'Long Interest Rate': [2.14, 0.89, 1.45, 2.95, 3.96]
}
new_data_df = pd.DataFrame(new_data)

# both features
predicted_sp500_prices_both = model.predict(new_data_df)

# using only CPI
model.fit(X_train[['Consumer Price Index']], y_train)  # retraining with only CPI
predicted_sp500_prices_cpi = model.predict(new_data_df[['Consumer Price Index']])

# using only Interest Rate
model.fit(X_train[['Long Interest Rate']], y_train)  # retraining with only interest rate
predicted_sp500_prices_ir = model.predict(new_data_df[['Long Interest Rate']])


print("Predictions with both CPI and Interest Rate:", predicted_sp500_prices_both)
print("Predictions with only CPI:", predicted_sp500_prices_cpi)
print("Predictions with only Interest Rate:", predicted_sp500_prices_ir)


%%shell
jupyter nbconvert --to html "/content/drive/MyDrive/Colab Notebooks/GriffinxDukeFinal.ipynb"

[NbConvertApp] Converting notebook /content/drive/MyDrive/Colab Notebooks/GriffinxDukeFinal.ipynb to html
[NbConvertApp] Writing 868347 bytes to /content/drive/MyDrive/Colab Notebooks/GriffinxDukeFinal.html


from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive

	Date	SP500	Dividend	Earnings	Consumer Price Index	Long Interest Rate	Real Price	Real Dividend	Real Earnings	PE10
0	1871-01-01	4.44	0.26	0.4	12.46	5.32	89.00	5.21	8.02	NaN
1	1871-02-01	4.50	0.26	0.4	12.84	5.32	87.53	5.06	7.78	NaN
2	1871-03-01	4.61	0.26	0.4	13.03	5.33	88.36	4.98	7.67	NaN
3	1871-04-01	4.74	0.26	0.4	12.56	5.33	94.29	5.17	7.96	NaN
4	1871-05-01	4.86	0.26	0.4	12.27	5.33	98.93	5.29	8.14	NaN

	State	Year	Data.DHS Denominator	Data.Number of Firms	Data.Calculated.Net Job Creation	Data.Calculated.Net Job Creation Rate	Data.Calculated.Reallocation Rate	Data.Establishments.Entered	Data.Establishments.Entered Rate	Data.Establishments.Exited	...	Data.Job Creation.Births	Data.Job Creation.Continuers	Data.Job Creation.Count	Data.Job Creation.Rate	Data.Job Creation.Rate/Births	Data.Job Destruction.Continuers	Data.Job Destruction.Count	Data.Job Destruction.Deaths	Data.Job Destruction.Rate	Data.Job Destruction.Rate/Deaths
0	Alabama	1978	972627	54597	74178	7.627	29.183	10457	16.375	7749	...	76167	139930	216097	22.218	7.831	81829	141919	60090	14.591	6.178
1	Alabama	1979	1037995	55893	58099	5.597	31.251	9668	14.701	7604	...	76618	143675	220293	21.223	7.381	101842	162194	60352	15.626	5.814
2	Alabama	1980	1064141	54838	-7253	-0.682	32.464	8714	13.168	8871	...	65734	106998	172732	16.232	6.177	120563	179985	59422	16.914	5.584
3	Alabama	1981	1051448	54791	-9132	-0.869	31.206	8371	12.687	7516	...	70115	93941	164056	15.603	6.668	121251	173188	51937	16.471	4.940
4	Alabama	1982	1037728	53219	-9658	-0.931	33.144	8185	12.497	8621	...	73083	98887	171970	16.572	7.043	119658	181628	61970	17.502	5.972

	DATE	LREM64TTUSM156S
0	1977-01-01	64.566791
1	1977-02-01	64.683217
2	1977-03-01	64.914083
3	1977-04-01	65.158112
4	1977-05-01	65.267202

	SP500	Earnings	Consumer Price Index	Long Interest Rate	Interest Change	CPI Change
Year
2000	1427.007500	51.490000	172.200000	6.029167	20.162764	-2.748494
2001	1192.078333	35.916667	177.066667	5.017500	8.819808	-1.561269
2002	995.630000	27.025833	179.875000	4.610833	14.840183	-2.219706
2003	963.689167	36.285000	183.958333	4.015000	-6.063560	-2.607430
2004	1130.547500	55.300000	188.883333	4.274167	-0.369075	-3.281417

	DHS Denom	Number of Firms	Net Job Creation	Establishments Entered	Establishments Entered Rate	Establishments Exited	Establishments Exited Rate	Firms Exits	Jobs Created	Job Creation Rate
Year
1978	63623982	3468661	4890247	619312	14.90834	464562	10.96870	312668	13977564	21.62646
1979	67864608	3575606	4007005	593369	13.72708	424937	9.83448	285625	13215426	18.60928
1980	70080779	3589225	387682	552961	12.32880	472336	10.83810	329217	11531276	15.67912
1981	70504576	3641065	798676	548290	12.01528	429210	9.62642	281901	11417554	15.44372
1982	70990844	3635666	465009	569180	12.48792	501537	11.09402	347321	12092409	16.07354

Griffin Olimpio and Duke Glenn US Business Dynamics and the S&P 500 Index 2000-2018¶

Project Goals¶

Project Datasets¶

Column Descriptions¶

	Employment Rate
Year
1977	65.417180
1978	67.118328
1979	67.924363
1980	67.183178
1981	67.070233

	SP500	Dividend	Earnings	Consumer Price Index	Long Interest Rate	Real Price	Real Dividend	Real Earnings	PE10	LREM64TTUSM156S	...	Data.Job Creation.Births	Data.Job Creation.Continuers	Jobs Created	Job Creation Rate	Data.Job Creation.Rate/Births	Data.Job Destruction.Continuers	Data.Job Destruction.Count	Data.Job Destruction.Deaths	Data.Job Destruction.Rate	Data.Job Destruction.Rate/Deaths
Year
1978	96.020000	4.916667	11.392500	65.233333	8.410000	367.645000	18.830000	43.618333	9.378333	67.118328	...	4962352	9015212	13977564	21.62646	391.116	5179679	9087317	3907638	685.347	298.134
1979	103.022500	5.376667	13.981667	72.575000	9.442500	354.778333	18.510000	48.105833	8.926667	67.924363	...	4632428	8582998	13215426	18.60928	334.204	5741812	9208421	3466609	652.701	248.511
1980	118.783333	5.950000	14.925833	82.408333	11.460000	359.646667	18.040833	45.305833	8.831667	67.183178	...	4428137	7103139	11531276	15.67912	303.970	7565920	11143594	3577674	765.026	253.869
1981	128.041667	6.415833	15.010000	90.925000	13.910833	352.351667	17.631667	41.251667	8.464167	67.070233	...	4442950	6974604	11417554	15.44372	302.958	7462470	10618878	3156408	713.231	216.159
1982	119.725000	6.792500	14.021667	96.500000	13.001667	309.840000	17.586667	36.340000	7.346667	65.746441	...	4974181	7118228	12092409	16.07354	332.512	8063916	11627400	3563484	784.128	241.401