import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

df_emailinfo=pd.read_csv("email_table.csv")
#df_emailinfo = df_emailinfo.reset_index(drop=True)

df_emailinfo.head(5)

df_emailinfo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 8 columns):
email_id               100000 non-null int64
email_text             100000 non-null object
email_version          100000 non-null object
hour                   100000 non-null int64
weekday                100000 non-null object
user_country           100000 non-null object
user_past_purchases    100000 non-null int64
converted              100000 non-null int64
dtypes: int64(4), object(4)
memory usage: 6.1+ MB

df_emailinfo.describe()

categorical_feature_mask = df_emailinfo.dtypes==object
categorical_feature_mask

# filter categorical columns using mask and turn it into a list
categorical_cols = df_emailinfo.columns[categorical_feature_mask].tolist()

categorical_cols.remove('weekday')
categorical_cols.remove('user_country')
from sklearn.preprocessing import LabelEncoder
# # # instantiate labelencoder object
le = LabelEncoder()
df_emailinfo[categorical_cols] = df_emailinfo[categorical_cols].apply(lambda col: le.fit_transform(col))
df_emailinfo[categorical_cols].head(10)

df_emailinfo.head(5)

Data Visualisation¶

import seaborn as sns

sns.boxplot(x='user_country',y='hour',data=df_emailinfo,palette='rainbow')

<matplotlib.axes._subplots.AxesSubplot at 0x1a1e30cf90>

sns.countplot(x='user_country',data=df_emailinfo,palette='rainbow')

<matplotlib.axes._subplots.AxesSubplot at 0x11b65aed0>

sns.countplot(x='converted' ,data=df_emailinfo)

<matplotlib.axes._subplots.AxesSubplot at 0x11b6d06d0>

sns.countplot(x='converted',hue='user_country' ,data=df_emailinfo)

<matplotlib.axes._subplots.AxesSubplot at 0x11b65ad90>

*******from above we can observe that e-mails sent across France and Spain was not converted¶

**maximum e-mails were sent across to US and UK citizens¶

sns.countplot(x='converted', hue='email_version', data=df_emailinfo)

# from graph we can observe that compared to generic emails, personalized emails is being converted

<matplotlib.axes._subplots.AxesSubplot at 0x11b741250>

sns.countplot(x='converted', hue='email_text', data=df_emailinfo)

# no major difference

<matplotlib.axes._subplots.AxesSubplot at 0x1a1e848090>

df_emailinfo['user_past_purchases'].hist(color='green',bins=50,figsize=(8,4))

<matplotlib.axes._subplots.AxesSubplot at 0x11b6ac8d0>

sns.countplot(x='hour', hue='user_country', data=df_emailinfo)
# most of mails were sent during day-time from 5:00 am to 3:00 pm across various countries

<matplotlib.axes._subplots.AxesSubplot at 0x1a1ece61d0>

sns.countplot(x='user_past_purchases', hue='user_country', data=df_emailinfo)
# we can observe a constant decline of purchases

<matplotlib.axes._subplots.AxesSubplot at 0x1a1ee8ab10>

Merging other two tables wil email_info table¶

df_emailopen=pd.read_csv("email_opened_table.csv")
df_emailopen = df_emailopen.reset_index(drop=True)
df_emailopen.shape

(10345, 1)

df_emaillink=pd.read_csv("link_clicked_table.csv")
df_emaillink.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2119 entries, 0 to 2118
Data columns (total 1 columns):
email_id    2119 non-null int64
dtypes: int64(1)
memory usage: 16.7 KB

df_emaillink.shape

(2119, 1)

df_emailopen['email_opened']=1
df_emailopen.head(5)

df_emailinfo = pd.merge(left=df_emailinfo, right=df_emailopen, on='email_id',how='outer')
df_emailinfo.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100000 entries, 0 to 99999
Data columns (total 9 columns):
email_id               100000 non-null int64
email_text             100000 non-null int64
email_version          100000 non-null int64
hour                   100000 non-null int64
weekday                100000 non-null object
user_country           100000 non-null object
user_past_purchases    100000 non-null int64
converted              100000 non-null int64
email_opened           10345 non-null float64
dtypes: float64(1), int64(6), object(2)
memory usage: 7.6+ MB

df_emailinfo['email_opened']=df_emailinfo['email_opened'].fillna(0)
df_emailinfo['email_opened']=df_emailinfo['email_opened'].astype(int)

df_emaillink['link_opened']=1
df_emaillink.head(5)

df_emailinfo = pd.merge(left=df_emailinfo, right=df_emaillink, on='email_id',how='outer')
df_emailinfo.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100000 entries, 0 to 99999
Data columns (total 10 columns):
email_id               100000 non-null int64
email_text             100000 non-null int64
email_version          100000 non-null int64
hour                   100000 non-null int64
weekday                100000 non-null object
user_country           100000 non-null object
user_past_purchases    100000 non-null int64
converted              100000 non-null int64
email_opened           100000 non-null int64
link_opened            2119 non-null float64
dtypes: float64(1), int64(7), object(2)
memory usage: 8.4+ MB

df_emailinfo['link_opened']=df_emailinfo['link_opened'].fillna(0)
df_emailinfo['link_opened']=df_emailinfo['link_opened'].astype(int)
df_emailinfo['user_past_purchases']=df_emailinfo['user_past_purchases'].astype(int)

Data Visualization after merging¶

sns.countplot(x='email_opened',data=df_emailinfo)

<matplotlib.axes._subplots.AxesSubplot at 0x1196ef690>

sns.countplot(x='email_opened', hue='email_text', data=df_emailinfo)
# short emails are preferrable

<matplotlib.axes._subplots.AxesSubplot at 0x11b8fd190>

sns.countplot(x='email_opened', hue='email_version', data=df_emailinfo)

<matplotlib.axes._subplots.AxesSubplot at 0x1a1e645d50>

sns.countplot(x='email_opened', hue='weekday', data=df_emailinfo)

# mostly opened during weekdays

<matplotlib.axes._subplots.AxesSubplot at 0x119759810>

sns.countplot(x='email_opened', hue='user_country', data=df_emailinfo)

<matplotlib.axes._subplots.AxesSubplot at 0x1a1f1d05d0>

sns.countplot(x='email_opened', hue='user_country', data=df_emailinfo)

<matplotlib.axes._subplots.AxesSubplot at 0x1a20a99150>

sns.countplot(x='link_opened', hue='email_text', data=df_emailinfo)
#does not matter whether long or short emails are sent- equal chance

<matplotlib.axes._subplots.AxesSubplot at 0x1a1e700150>

sns.countplot(x='email_opened', hue='email_version', data=df_emailinfo)
# to prefer more personalised emails to convert into more buyers

<matplotlib.axes._subplots.AxesSubplot at 0x1a20c92cd0>

sns.countplot(x='link_opened', hue='weekday', data=df_emailinfo)
# maximum mails should be triggered on weekdays

<matplotlib.axes._subplots.AxesSubplot at 0x1a20e427d0>

sns.countplot(x='link_opened', hue='user_country', data=df_emailinfo)
# should focus on increasing the count for US and UK
# should come up with new strategies to attract France and Spain customers

<matplotlib.axes._subplots.AxesSubplot at 0x1a20ed1a50>

Feature engineering¶

Changing categorical to numerical

df_text_ohe=pd.get_dummies(df_emailinfo, columns=['weekday','user_country'])

df_text_ohe.head(5)

df_emailinfo.drop(['email_text','weekday','email_version','user_country'],axis=1,inplace=True)

df_emailinfo.head(5)

df_emailinfo=df_emailinfo.merge(df_text_ohe)
df_emailinfo.head()

df_emailinfo.shape

(100000, 19)

#join_df=df_emailinfo.join(df_emailopen,how='outer',rsuffix='_open',on='email_id')
#join_df.info()

#join_df.sample(50)

#df_emailinfo[df_emailinfo['email_id'] == 950040]

#merge_df=pd.concat([df_emailinfo,df_emailopen],ignore_index=True)
#merge_df.info()
#merged_inner = pd.merge(left=df_emailinfo, right=df_emailopen, on='email_id',how='outer',suffixes=('','_open'))
#merged_inner.info()

Transformation - Rescaling and Standardization¶

df_emailinfo.describe()

from sklearn import preprocessing
x1_array = pd.Series(df_emailinfo['user_past_purchases'])
normalized_X = preprocessing.normalize([x1_array])
df_emailinfo['user_past_purchases'].shape

(100000,)

user_purchase=normalized_X.reshape(100000,)
user_purchase

array([0.00314612, 0.00125845, 0.00125845, ..., 0.00377535, 0.        ,
       0.00062922])

df_emailinfo['user_past_purchases']=user_purchase

df_emailinfo['user_past_purchases'].head(10)

0    0.003146
1    0.001258
2    0.001258
3    0.000629
4    0.003775
5    0.001888
6    0.005034
7    0.001258
8    0.002517
9    0.001888
Name: user_past_purchases, dtype: float64

df_emailinfo.describe()

#df_emailinfo.user_past_purchases.hist(bins=50)

#df_emailinfo.hour.hist(bins=10)

# x2_array = pd.Series(df_emailinfo['hour'])
# normalized_X2 = preprocessing.normalize([x2_array])
# df_emailinfo['hour'].shape
# hour=normalized_X2.reshape(100000,)
# hour
# df_emailinfo['hour']=hour
# df_emailinfo['hour'].head(10)

df_emailinfo.describe()

df_emailinfo.hour.hist(bins=10)

<matplotlib.axes._subplots.AxesSubplot at 0x1a21019b10>

scaler = preprocessing.StandardScaler()
df_emailinfo['hour']=scaler.fit_transform(df_emailinfo[['hour']])
df_emailinfo['hour'].head()

0   -1.590070
1    0.662377
2    0.437132
3   -0.689091
4    1.112867
Name: hour, dtype: float64

df_emailinfo.describe()

df_emailinfo.hour.hist(bins=10)

<matplotlib.axes._subplots.AxesSubplot at 0x11a6ee790>

import seaborn as sns
sns.kdeplot(df_emailinfo.hour)

<matplotlib.axes._subplots.AxesSubplot at 0x1a1e4b4890>

Data Visualisation¶

# correlation=df_emailinfo.corr()
# fig=plt.figure
# plt.show()

sns.heatmap(df_emailinfo.corr(),cmap='coolwarm',)
plt.title('email_info.corr()')

Text(0.5, 1, 'email_info.corr()')

#df_emailinfo.drop('email_text_short_email',axis=1,inplace=True)
#df_emailinfo.drop('email_version_generic',axis=1,inplace=True)
df_emailinfo.drop('link_opened',axis=1,inplace=True)
df_emailinfo.drop('user_country_ES',axis=1,inplace=True)
df_emailinfo.drop('email_opened',axis=1,inplace=True)
#df_emailinfo.drop('user_past_purchases',axis=1,inplace=True)

sns.heatmap(df_emailinfo.corr(),cmap='coolwarm',)
plt.title('email_info.corr()')

Text(0.5, 1, 'email_info.corr()')

df_emailinfo.shape

(100000, 16)

df_emailinfo.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100000 entries, 0 to 99999
Data columns (total 16 columns):
email_id               100000 non-null int64
hour                   100000 non-null float64
user_past_purchases    100000 non-null float64
converted              100000 non-null int64
email_text             100000 non-null int64
email_version          100000 non-null int64
weekday_Friday         100000 non-null uint8
weekday_Monday         100000 non-null uint8
weekday_Saturday       100000 non-null uint8
weekday_Sunday         100000 non-null uint8
weekday_Thursday       100000 non-null uint8
weekday_Tuesday        100000 non-null uint8
weekday_Wednesday      100000 non-null uint8
user_country_FR        100000 non-null uint8
user_country_UK        100000 non-null uint8
user_country_US        100000 non-null uint8
dtypes: float64(2), int64(4), uint8(10)
memory usage: 6.3 MB

#input_columns=[columns for columns in df_emailinfo.columns if columns!='converted']

#output_column=df_emailinfo['converted']

#not able to execute
#X = df.loc[:,input_columns].values
#y = df.loc[:,output_column]
#print (X.shape, y.shape)

X = df_emailinfo[['email_id','hour','user_past_purchases','email_text','weekday_Friday','weekday_Monday','weekday_Saturday','weekday_Sunday','weekday_Thursday','weekday_Tuesday','weekday_Wednesday','email_version','user_country_FR','user_country_UK','user_country_US']]
y = df_emailinfo['converted']

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100, stratify=y)

print (X_train.shape)
print (y_train.shape)

print (X_test.shape)
print (y_test.shape)

(70000, 15)
(70000,)
(30000, 15)
(30000,)

non_converted=df_emailinfo[df_emailinfo['converted']==0]
converted=df_emailinfo[df_emailinfo['converted']==1]

print(non_converted.shape, converted.shape)

(97881, 16) (2119, 16)

from sklearn.utils import resample
converted_upsampled = resample(converted,
                          replace=True, # sample with replacement
                          n_samples=len(non_converted), # match number in majority class
                          random_state=27)

converted_upsampled.shape

(97881, 16)

non_converted.shape

(97881, 16)

upsampled=pd.concat([converted_upsampled,non_converted])
upsampled.head(5)

upsampled.converted.value_counts()

1    97881
0    97881
Name: converted, dtype: int64

y_train = upsampled.converted
X_train = upsampled.drop('converted', axis=1)

from sklearn.linear_model import LogisticRegression

logmodel = LogisticRegression()
logmodel.fit(X_train,y_train)

/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

logmodel.score(X_train, y_train)

0.6050867890601853

logmodel.score(X_test, y_test)

0.6947

predictions = logmodel.predict(X_test)
predictions

array([0, 0, 1, ..., 1, 0, 0])

from sklearn.metrics import classification_report,accuracy_score
from sklearn.metrics import confusion_matrix

print(accuracy_score(y_test,predictions))

0.6947

print(classification_report(y_test,predictions))

              precision    recall  f1-score   support

           0       0.98      0.70      0.82     29364
           1       0.03      0.38      0.05       636

    accuracy                           0.69     30000
   macro avg       0.50      0.54      0.43     30000
weighted avg       0.96      0.69      0.80     30000

print(confusion_matrix(y_test,predictions))

[[20601  8763]
 [  396   240]]

from sklearn.metrics import roc_auc_score,roc_curve,auc
false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, predictions)
roc_auc = auc(false_positive_rate, true_positive_rate)
roc_auc

0.5394659228473834

plt.plot(false_positive_rate,true_positive_rate)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.show()

predicted_probs=logmodel.predict_proba(X_test)

predicted_probs[100:110]

array([[0.6284114 , 0.3715886 ],
       [0.50485609, 0.49514391],
       [0.48033055, 0.51966945],
       [0.55562665, 0.44437335],
       [0.65107419, 0.34892581],
       [0.37728975, 0.62271025],
       [0.74302312, 0.25697688],
       [0.64252236, 0.35747764],
       [0.59497748, 0.40502252],
       [0.46132606, 0.53867394]])

y_pred_lower_threshold = logmodel.predict_proba(X_test)[:,1] <0.5

print(classification_report(y_test, y_pred_lower_threshold))

              precision    recall  f1-score   support

           0       0.97      0.30      0.46     29364
           1       0.02      0.62      0.04       636

    accuracy                           0.31     30000
   macro avg       0.50      0.46      0.25     30000
weighted avg       0.95      0.31      0.45     30000

from sklearn.model_selection import GridSearchCV

log_model_1=LogisticRegression()

param_grid = {'C': [0.001, 0.04, 0.07,0.08, 1,10],'max_iter':[50,100,200,500]}

grid_search = GridSearchCV(log_model_1, param_grid, cv=5)

grid_search.fit(X_train, y_train)

/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=100, multi_class='warn',
                                          n_jobs=None, penalty='l2',
                                          random_state=None, solver='warn',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='warn', n_jobs=None,
             param_grid={'C': [0.001, 0.04, 0.07, 0.08, 1, 10],
                         'max_iter': [50, 100, 200, 500]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

print("Test set score: {:.2f}".format(grid_search.score(X_test, y_test)))

Test set score: 0.70

print("Best parameters: {}".format(grid_search.best_params_))
print("Best cross-validation score: {:.2f}".format(grid_search.best_score_))

Best parameters: {'C': 0.04, 'max_iter': 50}
Best cross-validation score: 0.60

print("Best estimator:\n{}".format(grid_search.best_estimator_))

Best estimator:
LogisticRegression(C=0.04, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=50,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

logreg = LogisticRegression(class_weight='balanced',C=0.002,max_iter=50).fit(X_train,y_train)

/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)

logreg.score(X_test, y_test)

0.7040666666666666

predictions=logreg.predict(X_test)

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

print(classification_report(y_test, predictions,target_names=["Non Converted", "Converted"]))

               precision    recall  f1-score   support

Non Converted       0.98      0.71      0.82     29364
    Converted       0.03      0.36      0.05       636

     accuracy                           0.70     30000
    macro avg       0.50      0.54      0.44     30000
 weighted avg       0.96      0.70      0.81     30000

	email_id	hour	user_past_purchases	converted
count	100000.000000	100000.000000	100000.00000	100000.000000
mean	498690.196160	9.059300	3.87845	0.021190
std	289230.727534	4.439637	3.19611	0.144018
min	8.000000	1.000000	0.00000	0.000000
25%	246708.250000	6.000000	1.00000	0.000000
50%	498447.000000	9.000000	3.00000	0.000000
75%	749942.750000	12.000000	6.00000	0.000000
max	999998.000000	24.000000	22.00000	1.000000

	email_id	email_opened
0	284534	1
1	609056	1
2	220820	1
3	905936	1
4	164034	1

	email_id	link_opened
0	609056	1
1	870980	1
2	935124	1
3	158501	1
4	177561	1

	email_id	hour	user_past_purchases	converted	email_opened	link_opened	email_text	email_version	weekday_Friday	weekday_Monday	weekday_Saturday	weekday_Sunday	weekday_Thursday	weekday_Tuesday	weekday_Wednesday	user_country_ES	user_country_FR	user_country_UK	user_country_US
count	100000.000000	100000.000000	100000.00000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	100000.00000	100000.00000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000
mean	498690.196160	9.059300	3.87845	0.021190	0.103450	0.021190	0.497240	0.497910	0.141770	0.143630	0.145690	0.14387	0.14277	0.141430	0.140840	0.099670	0.099950	0.199390	0.600990
std	289230.727534	4.439637	3.19611	0.144018	0.304547	0.144018	0.499995	0.499998	0.348816	0.350716	0.352797	0.35096	0.34984	0.348466	0.347858	0.299561	0.299935	0.399544	0.489697
min	8.000000	1.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
25%	246708.250000	6.000000	1.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
50%	498447.000000	9.000000	3.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	1.000000
75%	749942.750000	12.000000	6.00000	0.000000	0.000000	0.000000	1.000000	1.000000	0.000000	0.000000	0.000000	0.00000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	1.000000
max	999998.000000	24.000000	22.00000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.00000	1.00000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000

	email_id	hour	user_past_purchases	converted	email_opened	link_opened	email_text	email_version	weekday_Friday	weekday_Monday	weekday_Saturday	weekday_Sunday	weekday_Thursday	weekday_Tuesday	weekday_Wednesday	user_country_ES	user_country_FR	user_country_UK	user_country_US
count	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	100000.00000	100000.00000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000
mean	498690.196160	9.059300	0.002440	0.021190	0.103450	0.021190	0.497240	0.497910	0.141770	0.143630	0.145690	0.14387	0.14277	0.141430	0.140840	0.099670	0.099950	0.199390	0.600990
std	289230.727534	4.439637	0.002011	0.144018	0.304547	0.144018	0.499995	0.499998	0.348816	0.350716	0.352797	0.35096	0.34984	0.348466	0.347858	0.299561	0.299935	0.399544	0.489697
min	8.000000	1.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
25%	246708.250000	6.000000	0.000629	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
50%	498447.000000	9.000000	0.001888	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	1.000000
75%	749942.750000	12.000000	0.003775	0.000000	0.000000	0.000000	1.000000	1.000000	0.000000	0.000000	0.000000	0.00000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	1.000000
max	999998.000000	24.000000	0.013843	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.00000	1.00000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000

	email_id	email_text	email_version	hour	weekday	user_country	user_past_purchases	converted
0	85120	short_email	personalized	2	Sunday	US	5	0
1	966622	long_email	personalized	12	Sunday	UK	2	1
2	777221	long_email	personalized	11	Wednesday	US	2	0
3	493711	short_email	generic	6	Monday	UK	1	0
4	106887	long_email	generic	14	Monday	US	6	0

	email_id	hour	user_past_purchases	converted	email_opened	link_opened	email_text	email_version	weekday_Friday	weekday_Monday	weekday_Saturday	weekday_Sunday	weekday_Thursday	weekday_Tuesday	weekday_Wednesday	user_country_ES	user_country_FR	user_country_UK	user_country_US
count	100000.000000	1.000000e+05	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	100000.00000	100000.00000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000
mean	498690.196160	-6.131573e-16	0.002440	0.021190	0.103450	0.021190	0.497240	0.497910	0.141770	0.143630	0.145690	0.14387	0.14277	0.141430	0.140840	0.099670	0.099950	0.199390	0.600990
std	289230.727534	1.000005e+00	0.002011	0.144018	0.304547	0.144018	0.499995	0.499998	0.348816	0.350716	0.352797	0.35096	0.34984	0.348466	0.347858	0.299561	0.299935	0.399544	0.489697
min	8.000000	-1.815315e+00	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
25%	246708.250000	-6.890912e-01	0.000629	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
50%	498447.000000	-1.335701e-02	0.001888	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	1.000000
75%	749942.750000	6.623772e-01	0.003775	0.000000	0.000000	0.000000	1.000000	1.000000	0.000000	0.000000	0.000000	0.00000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	1.000000
max	999998.000000	3.365314e+00	0.013843	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.00000	1.00000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000

	email_id	hour	user_past_purchases	converted	email_text	email_version	weekday_Friday	weekday_Sunday	weekday_Thursday	weekday_Tuesday	weekday_Wednesday	user_country_UK	user_country_US
49114	939228	1.338111	0.005663	1	0	0	1	0	0	0	0	1	0
35950	815518	-0.238602	0.006921	1	0	1	0	0	0	0	1	0	1
61976	217459	0.437132	0.001258	1	1	1	0	1	0	0	0	0	1
6780	195134	-0.463847	0.001888	1	1	1	0	0	1	0	0	0	1
48520	17253	-0.463847	0.005663	1	1	1	0	0	0	1	0	1	0