´ÙÇ× È¸±Í ºÐ¼®

 À̹øÀå¿¡¼­´Â ´ÙÇ× È¸±Í ºÐ¼®(Polynomial Regression)¿¡ ´ëÇØ ¾Ë¾Æ º¸ÀÚ.
ºÐ¼® ¹× ÀÌÇØ´Â ÆнºÇÏ°í °á°ú ±¸ÇÏ´Â °Í¿¡¸¸ ÁýÁßÇÑ´Ù.

¿¢¼¿·Î 3Â÷ ´ÙÇ× È¸±Í½ÄÀ» ±¸ÇÏ´Â ¹æ¹ý¿¡ ´ëÇؼ­ ¾Ë¾Æ º¸ÀÚ.


1. µ¥ÀÌŸ¸¦ ÀÔ·ÂÇÏ°í ¼±ÅÃÇÑ´Ù.

2. ¸Þ´º¿¡¼­ »ðÀÔÀ» ¼±ÅÃÇÑ´Ù.

3. ºÐ»êÇüÀ» ¼±ÅÃÇÑ´Ù.

4. Â÷Æ® Á¾·ù´Â 3°¡Áö Áß¿¡ Çϳª¸¦ ¼±ÅÃÇÑ´Ù.
   ¿©±â¼­´Â "Ç¥½Ä¸¸ ÀÖ´Â ºÐ»êÇü" ù¹ø° Â÷Æ® Á¾·ù¸¦ ¼±Åà ÇÑ´Ù.

 

5. Â÷Æ®¿¡¼­ ÀÓÀÇÀÇ Á¡À» Ŭ¸¯ÇÑ´Ù.  (±×·¯¸é ¸ðµçÁ¡ÀÌ ¼±ÅõȴÙ.)

6. ¸ðµç Á¡ÀÌ ¼±ÅÃµÈ »óÅ¿¡¼­ ¿À¸¥ÂÊ Æ˾÷ ¸Þ´º¸¦ ¶ç¿î ÈÄ¿¡ "Ãß¼¼¼± Ãß°¡"¸¦ ¼±ÅÃÇÑ´Ù.


7. "Ãß¼¼¼± ¼­½Ä" ´ëÈ­ »óÀÚ¿¡¼­ ´ÙÇ×½ÄÀ» ¼±ÅÃÇÏ°í "Â÷¼ö"¸¦ 3À¸·Î ¼³Á¤ÇÑ´Ù.

8. "¼ö½ÄÀ» Â÷Æ®¿¡ Ç¥½Ã", "R-Á¦°ö °ªÀ» Â÷Æ®¿¡ Ç¥½Ã"¸¦ üũÇÑ´Ù.

°á°ú : ¿ÞÂÊ¿¡ ´ÙÇ× È¸±Í¿Í R-Square °ªÀ» È®ÀÎ ÇÒ¼ö ÀÖ´Ù.


2Â÷ ´ÙÇ×½Ä È¸±Í  (polynomial regression of order 2)

¼Ò½º´Â https://adnoctum.tistory.com/104 ¿¡¼­ ±×´ë·Î °¡Á®¿Í¼­ Å×½ºÆ® Çß´Ù.

#include <stdio.h>
#include <vector>
#include <numeric>

double square(double init, double x)
{
    return init + x * x;
}

double cubic(double init, double x)
{
    return init + x * x * x;
}

double forth_power(double init, double x)
{
    return init + x * x * x * x;
}

// Yi = b0 + b1*Xi + b2*Xi^2 + ei À¸·Î, sum(e_i)¸¦ ÃÖ¼ÒÈ­½ÃÅ°°Ô²û
// regression µÈ º¯¼ö¸¦ ãµµ·Ï Á¤±Ô¹æÁ¤½ÄÀ» Æí¹ÌºÐÇؼ­ 0 ÀÌ µÇ´Â
// °¢°¢ÀÇ bi¸¦ ±¸ÇÏ¸é ´ÙÀ½°ú °°´Ù.
bool get2ndOrderRegression(std::vector<double>* srcX, std::vector<double>* srcY, double* b0, double* b1, double* b2)
{
    double Y = std::accumulate(srcY->begin(), srcY->end(), 0.0);
    double X = std::accumulate(srcX->begin(), srcX->end(), 0.0);
    double X2 = std::accumulate(srcX->begin(), srcX->end(), 0.0, square);
    double X3 = std::accumulate(srcX->begin(), srcX->end(), 0.0, cubic);
    double X4 = std::accumulate(srcX->begin(), srcX->end(), 0.0, forth_power);
    double K = 0.0;
    double L = 0.0;

    int i = 0;
    int n = (int)srcX->size();

    for (i = 0; i < n; i++)
    {
        K += ((*srcY)[i] * (*srcX)[i] * (*srcX)[i]);
        L += ((*srcY)[i] * (*srcX)[i]);
    }

    double denominator = -n * X4 * X2 + X4 * X * X + X2 * X2 * X2 + X3 * X3 * n - 2 * X3 * X * X2;
    double b0p = -(Y * X4 * X2 - Y * X3 * X3 - X * L * X4 + X * X3 * K - X2 * X2 * K + X2 * X3 * L);
    double b1p = X * Y * X4 - X * K * X2 - L * n * X4 + X3 * n * K - Y * X2 * X3 + X2 * X2 * L;
    double b2p = -(K * n * X2 - K * X * X - X2 * X2 * Y - X3 * n * L + X3 * X * Y + X * X2 * L);

    *b0 = b0p / denominator;
    *b1 = b1p / denominator;
    *b2 = b2p / denominator;
    return true;
}

double PolynomialRegression2(double a, double b, double c, double x)
{
    return a + b * x + c * x * x;
};

void main_0()
{
    double b0 = 0, b1 = 0, b2;
    std::vector<double> x, y;
   
    double xx[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
    x.assign(&xx[0], &xx[0] + sizeof(xx)/sizeof(double));

    double yy[] = { 1, 6, 17, 34, 57, 86, 121, 162, 209, 262, 321 };
    y.assign(&yy[0], &yy[0] + sizeof(yy)/sizeof(double));

    get2ndOrderRegression(&x, &y, &b0, &b1, &b2);

    //Yi = b0 + b1 * Xi + b2 * Xi ^ 2 + ei
    printf("%f + %f*x + %f*x^2\n", b0, b1, b2);

    double t1 = PolynomialRegression2(b0, b1, b2, 9);
    double t2 = PolynomialRegression2(b0, b1, b2, 10);
    double t3 = PolynomialRegression2(b0, b1, b2, 9.5);
    printf("result 9 = %f,  10 = %f, 9.5 = %f\n", t1, t2, t3);
}

/*
°á°ú
1.000000 + 2.000000*x + 3.000000*x^2
result 9 = 262.000000,  10 = 321.000000, 9.5 = 290.750000
*/

ÁÖ¼®ÀÇ °á°ú°ªÃ³·³ Àß ³ª¿Â´Ù. ÀÚ¼¼ÇÑ ¼³¸íÀº ÇØ´ç ÆäÀÌÁö¸¦ ÂüÁ¶ÇÑ´Ù.

NÂ÷ ´ÙÇ×½Ä È¸±Í (¿©±â¼­´Â 3Â÷ ´ÙÇ׽ĸ¸ Å×½ºÆ®)

¾Æ·¡½ÄÀº https://mhyun.tistory.com/90 ¿¡¼­ °¡Á®¿Â ÄÚµåÀÌ´Ù.
°áÁ¤ °è¼ö °ªÀ» ±¸ÇÏ´Â Äڵ常 º°µµ·Î Ãß°¡ ÇÏ¿´´Ù.
visual studio 2005¿¡¼­ 3Â÷ ´ÙÇ×½ÄÀÌ ¹®Á¦¾øÀÌ ½ÇÇà µÇ´ÂÁö È®ÀÎ ÇÏ¿´´Ù.

// Least Square Method (ÃÖ¼Ò Àڽ¹ý)

#include <stdio.h>
#include <math.h>
#include <numeric>
#include <vector>
#include <algorithm>
#include <functional>
#include <cmath>

int  lstsq(double x[], double y[], int n, int number, double c[]);

#if 1
/*
const int n = 1; // ±¸ÇÏ·Á´Â ÇÔ¼öÀÇ Â÷¼ö
const int number = 10; // µ¥ÀÌÅÍÀÇ °¹¼ö

double x[number] = { 4.0, 4.2, 4.5, 4.7, 5.1, 5.5, 5.9, 6.3, 6.8, 7.1 };
double y[number] = { 102.56, 113.18, 130.11, 142.05, 167.53, 195.14, 224.87, 256.73,299.50,326.72};
*/

const int n = 3; // ±¸ÇÏ·Á´Â ÇÔ¼öÀÇ Â÷¼ö
const int number = 10; // µ¥ÀÌÅÍÀÇ °¹¼ö

double x[number] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
double y[number] = { 0.1, 0.3, 0.4, 0.8, 0.9, 1, 1.1, 2, 3, 5};

//excel¿¡¼­ ±¸ÇÑ °ª
//y = -0.84 + 1.040x - 0.243x^2 + 0.019x^3
//R©÷ = 0.990

#else

const int n = 2; // ±¸ÇÏ·Á´Â ÇÔ¼öÀÇ Â÷¼ö
const int number = 11; // µ¥ÀÌÅÍÀÇ °¹¼ö

double x[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
double y[] = { 1, 6, 17, 34, 57, 86, 121, 162, 209, 262, 321 };

/* °á°ú
 < Least Squre Method >

a0 = 1.00000000
a1 = 2.00000000
a2 = 3.00000000

*/
#endif


double p;

void main()
{
    double c[number];
    int chkerr, i;

    printf(" < Least Squre Method > \n\n");

    chkerr = lstsq(x, y, n, number, c);

    // °á°ú Ãâ·Â
    for(i=0; i<=n; i++)
        printf("a%d = %10.8f\n",i, c[i]);
    if(chkerr==0)
        printf("No Error\n");
    else
        printf("Error\n");

    //x°¡ 3Â÷À϶§¸¸ °áÁ¤ °è¼ö °è»ê
    if(n != 3)
        return;

    double ym = std::accumulate(y, y+number,  static_cast<double>(0)) / (double)number;

    std::vector<double> meandiff( y, y + sizeof(y)/sizeof(y[0]));
    std::transform( meandiff.begin(), meandiff.end(), meandiff.begin(), std::bind2nd( std::minus<double>(), ym));
    double sst = std::inner_product( meandiff.begin(),meandiff.end(), meandiff.begin(), static_cast<double>(0) );

    std::vector<double> r;
    for(int i=0; i < number; ++i)
    {
        double xi = x[i];
        double xx = xi*xi;
        double xxx = xx*xi;
        double k =  c[0] + c[1]*xi + c[2]*xx + c[3]*xxx;
        r.push_back(k);
    }
   
    std::transform( r.begin(), r.end(), r.begin(), std::bind2nd( std::minus<double>(), ym));
    double ssr = std::inner_product( r.begin(),r.end(), r.begin(), static_cast<double>(0));
    double rsquare = ssr/sst;

    printf("ym = %f, %, rsquare = %f\n", ym, rsquare);      
}

// 0 Ãâ·Â-> ¿¡·¯¾øÀ½, 1 Ãâ·Â-> °è»ê¿¡·¯
int  lstsq(double x[], double y[], int n, int number, double c[])
{
    int  i, j, k, l;
    double  w1, w2, w3, pivot, aik, a[21][22], w[42];

    if(n >= number || n < 1 || n > 20)
        return 1;
    for(i = 0; i < n*2; i++)
    {
        w1 = 0.0;
        for(j = 0; j < number; j++)
        {
            w2 = w3 = x[j];
            for(k = 0; k < i; k++)
                w2 *= w3;
            w1 += w2;
        }
        w[i] = w1;
    }

    // matrix ÀÔ·Â
    for(i = 0; i < n+1; i++)
    {
        for(j = 0; j < n+1; j++)
        {
            l = i + j - 1;
            a[i][j] = w[l];
        }
    }
    a[0][0] = number;
    w1 = 0.0;
   
    for(i = 0; i < number; i++)
        w1 += y[i];
    a[0][n+1] = w1;

    // sigma(Yi Xi) °è»êÇؼ­ ´ëÀÔ
    for(i = 0; i < n; i++)
    {
        w1 = 0.0;
        for(j = 0; j < number; j++)
        {
            w2 = w3 = x[j];
            for(k = 0; k < i; k++)
                w2 *= w3;
              w1 += y[j] * w2;
        }
        a[i+1][n+1] = w1;
    }

    // matrix °è»ê
    for(k = 0; k < n+1; k++)
    {
        pivot = a[k][k];
        for(j = k; j < n+2; j++)
            a[k][j] /= pivot;
        for(i = 0; i < n+1; i++)
        {
            if(i != k)
            {
                aik = a[i][k];
                for(j = k; j < n+2; j++)
                    a[i][j] -= aik * a[k][j];
            }
        }
    }

    // ´ÙÇ×½ÄÀÇ °è¼ö¸¦ ÃÖÁ¾ÀûÀ¸·Î Àü´Þ (pass by reference)
    for(i = 0; i < n+1; i++)
        c[i] = a[i][n+1];
    return(0);
}      

°á°ú°ªÀÌ ¿¢¼¿°ú ÀÏÄ¡ÇÑ´Ù.
´ÙÇ×½Ä °è¼ö¸¦ ¸¶Áö¸· ÀÎÀÚ c·Î ³Ñ°Ü ÁÖ°í ÀÖ´Ù.

°áÁ¤ °è¼ö ±¸Çϱâ
R©÷= SSR / SST = ¥Ò(yr - ym)©÷/ ¥Ò(yi - ym)©÷
yr : yÀÇ È¸±Í°ª
ym : yÀÇ Æò±Õ°ª
yi : yÀÇ ½ÇÁ¦°ª

2Â÷ ´ÙÇ×½Ä È¸±Í ¼Ò½º :  main.cpp
NÂ÷ ´ÙÇ×½Ä È¸±Í ¼Ò½º :  main1.cpp
ÇÁ·ÎÁ§Æ® :  regression_analysis3.zip

Âü°í ¼Ò½º)
https://adnoctum.tistory.com/104
https://rosettacode.org/wiki/Polynomial_regression#C


°áÁ¤ °è¼ö ¼³¸í
https://newsight.tistory.com/259
https://agronomy4future.com/2020/10/27/ȸ±ÍºÐ¼®ÀÇ-°áÁ¤°è¼ö-r-squared-¸¦-°¡Àå-½±°Ô-¼³¸íÇØ-º¸ÀÚ/