Expand source code
def extract_formatted(r_output: str):
lines = r_output.strip().split('\n')
# print("Lines:", lines)
call = ""
residuals = []
coef_lines = []
summary_info = {}
i = 0
while i < len(lines):
line = lines[i].strip()
# Call
if line.startswith("Call:"):
call = lines[i + 1].strip()
i += 2
continue
# Residuals
elif line.startswith("Residuals:"):
i += 1
while i < len(lines) and lines[i].strip():
residuals.append(lines[i].strip())
i += 1
continue
# Coefficients
elif line.startswith("Coefficients:"):
i += 1
while i < len(lines) and not lines[i].startswith("---") and not lines[i].startswith("Signif. codes:"):
coef_lines.append(lines[i].strip())
i += 1
continue
# Residual std error
elif "Residual standard error" in line:
match = re.search(r"Residual standard error:\s+([\d.]+) on (\d+) degrees of freedom", line)
if match:
summary_info["Residual Std Error"] = float(match.group(1))
summary_info["DF"] = int(match.group(2))
# R-squared
elif "Multiple R-squared" in line:
match = re.search(r"Multiple R-squared:\s+([\d.]+),\s+Adjusted R-squared:\s+([\d.]+)", line)
if match:
summary_info["R-squared"] = float(match.group(1))
summary_info["Adj R-squared"] = float(match.group(2))
# F-statistic
elif "F-statistic" in line:
match = re.search(r"F-statistic:\s+([\d.]+) on (\d+) and (\d+) DF,\s+p-value:\s+([^\s]+)", line)
if match:
summary_info["F-statistic"] = float(match.group(1))
summary_info["DF1"] = int(match.group(2))
summary_info["DF2"] = int(match.group(3))
summary_info["p-value"] = match.group(4)
i += 1
def extract_residuals(residuals: list[str]) -> pl.DataFrame:
labels = residuals[0].split()
# Ubah nama kolom sesuai permintaan
labels = [label.replace("Min", "Minimum").replace("1Q", "Quartile 1").replace("3Q", "Quartile 3").replace("Max", "Maximum") for label in labels]
values = list(map(float, residuals[1].split()))
return pl.DataFrame([dict(zip(labels, values))])
residual_df = extract_residuals(residuals)
def extract_coefficients(coef_lines: list[str]) -> pl.DataFrame:
import re
columns = ["Variable", "Estimate", "Std. Error", "t statistic", "p-value", "Significance"]
rows = []
signif_map = {
'***': "Significant at 0.1% level",
'**': "Significant at 1% level",
'*': "Significant at 5% level",
'.': "significant at 10% level",
'': "Not significant at 10% level"
}
for line in coef_lines[1:]:
match = re.match(
r"^(`[^`]+`|\S+)\s+"
r"([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?|\d+)\s+"
r"([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?|\d+)\s+"
r"([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?|\d+)\s+"
r"([<>=]*\s*[\d.eE+-]+)\s*"
r"(\*\*\*|\*\*|\*|\.|)?",
line
)
if match:
variable = match.group(1).strip('`')
estimate = float(match.group(2))
std_error = float(match.group(3))
t_value = float(match.group(4))
p_value = match.group(5).strip()
signif_code = match.group(6).strip() if match.group(6) else ''
significance = signif_map.get(signif_code, "Not significant")
rows.append({
"Variable": variable,
"Estimate": estimate,
"Std. Error": std_error,
"t statistic": t_value,
"P-value": p_value,
"Significance": significance
})
return pl.DataFrame(rows)
coef_df = extract_coefficients(coef_lines)
def summary_info_to_table(summary_info: dict) -> pl.DataFrame:
"""
Convert summary info dictionary to a Polars DataFrame with float values.
"""
return pl.DataFrame({
"Description": list(summary_info.keys()),
"Value": [float(v) for v in summary_info.values()]
})
summary_table = summary_info_to_table(summary_info)
# print("Call:", call)
# print("Residuals DataFrame:", residual_df)
# print("Coefficients DataFrame:", coef_df)
# print("Summary Table:", summary_table)
return {
"call": call,
"residuals": residual_df,
"coefficients": coef_df,
"summary": summary_table,
}