-
Notifications
You must be signed in to change notification settings - Fork 0
/
NASA GISTEMP climate score.py
116 lines (84 loc) · 3.4 KB
/
NASA GISTEMP climate score.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import pandas as pd
import requests
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import joblib
import os
# Constants
CLIMATE_DATA_URL = 'https://example.com/climate-data.csv' # Replace with actual source
MODEL_PATH = 'models/climate_model.pkl' # Corrected string
LOG_PATH = 'logs/predictions_log.csv'
DATA_DIR = 'data/'
MODEL_DIR = 'models/'
LOG_DIR = 'logs/'
# Create directories if they do not exist
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(LOG_DIR, exist_ok=True)
def download_climate_data():
"""Download and save the latest climate data as a CSV."""
response = requests.get(CLIMATE_DATA_URL)
filename = f"{DATA_DIR}climate_data_{datetime.now().strftime('%Y%m%d')}.csv"
with open(filename, 'wb') as f:
f.write(response.content)
print(f"Downloaded climate data: {filename}")
def load_climate_data():
"""Load the latest climate data into a pandas DataFrame."""
latest_file = sorted(os.listdir(DATA_DIR))[-1] # Load the most recent file
df = pd.read_csv(f"{DATA_DIR}{latest_file}")
return df
def preprocess_data(df):
"""Preprocess the data for training or prediction."""
# Drop rows with missing values
df = df.dropna()
# Example features: temperature, CO2 levels, sunspot numbers (replace with real columns)
X = df[['temperature', 'co2_level', 'sunspot_number']] # Add more features as needed
y = df['gistemp_score'] # Target variable
return X, y
def train_model(X, y):
"""Train a simple regression model."""
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Using a simple Linear Regression model here
model = LinearRegression()
model.fit(X_train, y_train)
# Save the model to a file
joblib.dump(model, MODEL_PATH)
print("Model trained and saved.")
def load_model():
"""Load the trained machine learning model."""
if not os.path.exists(MODEL_PATH):
print("Model not found. Please train the model first.")
return None
model = joblib.load(MODEL_PATH)
return model
def make_prediction(model, latest_data):
"""Make predictions based on the latest data."""
prediction = model.predict(latest_data)
return prediction[0]
def log_prediction(prediction):
"""Log the prediction to a file."""
with open(LOG_PATH, 'a') as f:
log_line = f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}, {prediction:.6f}\n"
f.write(log_line)
print(f"Prediction logged: {prediction:.6f}")
def main():
# Step 1: Download new climate data
download_climate_data()
# Step 2: Load and preprocess the data
df = load_climate_data()
X, y = preprocess_data(df)
# Step 3: Train the model if it doesn't exist
if not os.path.exists(MODEL_PATH):
print("Training the model since no pre-trained model was found.")
train_model(X, y)
# Step 4: Load the trained model
model = load_model()
if model is None:
return # Exit if no model is loaded
# Step 5: Make a prediction using the latest data
prediction = make_prediction(model, X)
# Step 6: Log the prediction
log_prediction(prediction)
if __name__ == "__main__":
main()