?? traindatasql語句.txt
字號:
/* 1、 ageType分6類:<=20為year0_20,21-30為year21_30,31-40為year31_40,41-50為year41_50,51-60為year51_60,>=61為yearover60
2、 原有education_numType: 1-16 ,分成6級: edu1_3,edu4_6,edu7_9,edu10_12,edu13_14,edu15_16
3、 occupationType共14類:Tech_support, Craft_repair, Other_service(?), Sales, Exec_managerial, Prof_specialty,
Handlers_cleaners, Machine_op_inspct, Adm_clerical, Farming_fishing, Transport_moving,
Priv_house_serv, Protective_serv, Armed_Forces
4 、原有native-countryType:United-States, Cambodia, England, Puerto-Rico, Canada, Germany,
Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras,
Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France,
Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua,
Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands}
native_country分成5類developNO1,developNO2,developNO3,developNO4,developNO5后:
developNO1:(<0.1) Outlying-US(Guam-USVI-etc),Vietnam,Mexico,Dominican-Republic,Laos,Haiti,Hungary,
Guatemala,Nicaragua,Scotland,El-Salvador,Trinadad&Tobago,Holand-Netherlands
developNO2:(>=0.1 <0.2) Puerto-Rico,South,China,Cuba,Poland,Jamaica,Portugal,Ireland,Ecuador,Peru,?
developNO3:(>=0.2 <0.3) Honduras,France,Columbia,United-States,England,Germany,Greece,Philippines,Thailand, Yugoslavia
developNO4:(>=0.3 <0.4) India.Japan.
developNO5:(>=0.4 ) Cambodia,Canada,Iran,Italy,Taiwan,Hong
平均:0.2375
計算方法:
SELECT COUNT(*)
FROM traindata1
WHERE native_country = ' ?'
SELECT COUNT(*)
FROM traindata1
WHERE native_country = ' ?' AND makeover = ' >50K'
具體數值:
Outlying-US(Guam-USVI-etc):0 Vietnam:0.08 Mexico:0.064 Dominican-Republic:0.083 Laos:0 Haiti:0.067 Hungary:0
Guatemala:0.048 Nicaragua:0 Scotland:0 El-Salvador:0.097 Trinadad&Tobago:0 Holand-Netherlands:0
Puerto-Rico:0.1395 South:0.1 China:0.1613 Cuba:0.1516 Poland:0.1739 Jamaica:0.1875 Portugal:0.1667
Ireland:0.125 Ecuador:0.125 Peru:0.1111 ?:0.1111
Honduras:0.2 France:0.2 Columbia:0.2 United-States:0.2418 England:0.2812 Germany:0.2653 Greece:0.2222 Philippines:0.2632 Thailand:0.25 Yugoslavia:0.25
India:0.3 Japan:0.3
Cambodia:0.4285 Canada:0.404 Iran:0.4783 Italy:0.4545 Taiwan:0.4211 Hong:0.5
5、sexType分為Male、Female兩類
*/
CREATE DATABASE adult_data
ON
(
NAME=adult_data,
FILENAME='adult_data.mdf')
CREATE TABLE traindata1
(
age INT,
workclass VARCHAR(30),
fnlwgt INT,
education VARCHAR(20),
education_num INT,
marital_status VARCHAR(30),
occupation VARCHAR(30),
relationship VARCHAR(30),
race VARCHAR(20),
sex VARCHAR(10),
capital_gain INT,
capital_loss INT,
hours_per_week INT,
native_country VARCHAR(30),
makeover VARCHAR (10)
)
BULK INSERT traindata1
FROM 'adult.data.txt'
WITH
(
FIELDTERMINATOR = ',',
ROWTERMINATOR = '\n'
)
CREATE TABLE traindata2
(
age INT,
workclass VARCHAR(30),
fnlwgt INT,
education VARCHAR(20),
education_num INT,
marital_status VARCHAR(30),
occupation VARCHAR(30),
relationship VARCHAR(30),
race VARCHAR(20),
sex VARCHAR(10),
capital_gain INT,
capital_loss INT,
hours_per_week INT,
native_country VARCHAR(30),
makeover50k VARCHAR (10)
)
BULK INSERT traindata2
FROM 'adult.data.txt'
WITH
(
FIELDTERMINATOR = ',',
ROWTERMINATOR = '\n'
)
ALTER TABLE traindata2 DROP COLUMN workclass
ALTER TABLE traindata2 DROP COLUMN fnlwgt
ALTER TABLE traindata2 DROP COLUMN marital_status
ALTER TABLE traindata2 DROP COLUMN relationship
ALTER TABLE traindata2 DROP COLUMN race
ALTER TABLE traindata2 DROP COLUMN capital_gain
ALTER TABLE traindata2 DROP COLUMN capital_loss
ALTER TABLE traindata2 DROP COLUMN hours_per_week
//ageType分6類:<=20為year0_20,21-30為year21_30,31-40為year31_40,41-50為year41_50,51-60為year51_60,>=61為yearover60
UPDATE traindata2
SET age=0
WHERE age<=20
UPDATE traindata2
SET age=1
WHERE age>20 AND age<=30
UPDATE traindata2
SET age=2
WHERE age>30 AND age<=40
UPDATE traindata2
SET age=3
WHERE age>40 AND age<=50
UPDATE traindata2
SET age=4
WHERE age>50 AND age<=60
UPDATE traindata2
SET age=5
WHERE age>60
ALTER TABLE traindata2 ALTER COLUMN age VARCHAR(15)
UPDATE traindata2
SET age = 'year0_20'
WHERE age = '0'
UPDATE traindata2
SET age= 'year21_30'
WHERE age = '1'
UPDATE traindata2
SET age = 'year31_40'
WHERE age = '2'
UPDATE traindata2
SET age = 'year31_40'
WHERE age = '3'
UPDATE traindata2
SET age = 'year41_50'
WHERE age = '4'
UPDATE traindata2
SET age = 'yearover60'
WHERE age = '5'
//原有education_numType: 1-16 ,分成6級: edu1_3,edu4_6,edu7_9,edu10_12,edu13_14,edu15_16
UPDATE traindata2
SET education = 'edu1_3'
WHERE education_num >= 0 AND education_num <= 3
UPDATE traindata2
SET education = 'edu4_6'
WHERE education_num >= 4 AND education_num <= 6
UPDATE traindata2
SET education = 'edu7_9'
WHERE education_num >= 7 AND education_num <= 9
UPDATE traindata2
SET education = 'edu10_12'
WHERE education_num >= 10 AND education_num <= 12
UPDATE traindata2
SET education = 'edu13_14'
WHERE education_num >= 13 AND education_num <= 14
UPDATE traindata2
SET education = 'edu15_16'
WHERE education_num >= 15 AND education_num <= 16
ALTER TABLE traindata2 DROP COLUMN education_num
//occupationType共14類:Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty,
// Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving,
// Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving
// Priv-house-serv, Protective-serv, Armed-Forces
//occupationType共14類(將原"-"改為"_"便于編程處理,并去掉前面的空格):Tech_support, Craft_repair, Other_service, Sales, //Exec_managerial, Prof_specialty, Handlers_cleaners, Machine_op_inspct, Adm_clerical, Farming_fishing, Transport_moving,
//Priv_house_serv, Protective_serv, Armed_Forces
UPDATE traindata2
SET occupation = 'Tech_support'
WHERE occupation =' Tech-support'
UPDATE traindata2
SET occupation = 'Craft_repair'
WHERE occupation =' Craft-repair'
UPDATE traindata2
SET occupation = 'Other_service'
WHERE occupation =' Other-service'
UPDATE traindata2
SET occupation = 'Exec_managerial'
WHERE occupation =' Exec-managerial'
UPDATE traindata2
SET occupation = 'Prof_specialty'
WHERE occupation =' Prof-specialty'
UPDATE traindata2
SET occupation = 'Handlers_cleaners'
WHERE occupation =' Handlers-cleaners'
UPDATE traindata2
SET occupation = 'Machine_op_inspct'
WHERE occupation =' Machine-op-inspct'
UPDATE traindata2
SET occupation = 'Adm_clerical'
WHERE occupation =' Adm-clerical'
UPDATE traindata2
SET occupation = 'Farming_fishing'
WHERE occupation =' Farming-fishing'
UPDATE traindata2
SET occupation = 'Transport_moving'
WHERE occupation =' Transport-moving'
UPDATE traindata2
SET occupation = 'Priv_house_serv'
WHERE occupation =' Priv-house-serv'
UPDATE traindata2
SET occupation = 'Protective_serv'
WHERE occupation =' Protective-serv'
UPDATE traindata2
SET occupation = 'Armed_Forces'
WHERE occupation =' Armed-Forces'
UPDATE traindata2
SET occupation = 'Other_service'
WHERE occupation =' ?'
/*原有native-countryType:United-States, Cambodia, England, Puerto-Rico, Canada, Germany,
Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras,
Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France,
Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua,
Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands}
native_country分成5類developNO1,developNO2,developNO3,developNO4,developNO5后:
developNO1:(<0.1) Outlying-US(Guam-USVI-etc),Vietnam,Mexico,Dominican-Republic,Laos,Haiti,Hungary,
Guatemala,Nicaragua,Scotland,El-Salvador,Trinadad&Tobago,Holand-Netherlands
developNO2:(>=0.1 <0.2) Puerto-Rico,South,China,Cuba,Poland,Jamaica,Portugal,Ireland,Ecuador,Peru,?
developNO3:(>=0.2 <0.3) Honduras,France,Columbia,United-States,England,Germany,Greece,Philippines,Thailand, Yugoslavia
developNO4:(>=0.3 <0.4) India.Japan.
developNO5:(>=0.4 ) Cambodia,Canada,Iran,Italy,Taiwan,Hong
*/
UPDATE traindata2
SET native_country = 'developNO1'
WHERE native_country = ' Outlying-US(Guam-USVI-etc)' OR native_country = ' Vietnam'
OR native_country = ' Mexico' OR native_country = ' Dominican-Republic'
OR native_country = ' Laos' OR native_country = ' Haiti'
OR native_country = ' Hungary' OR native_country = ' Guatemala'
OR native_country = ' Nicaragua' OR native_country = ' Scotland'
OR native_country = ' El-Salvador' OR native_country = ' Trinadad&Tobago'
OR native_country = ' Holand-Netherlands'
UPDATE traindata2
SET native_country = 'developNO2'
WHERE native_country = ' Puerto-Rico' OR native_country = ' South'
OR native_country = ' China' OR native_country = ' Cuba'
OR native_country = ' Poland' OR native_country = ' Jamaica'
OR native_country = ' Portugal' OR native_country = ' Ireland'
OR native_country = ' Ecuador' OR native_country = ' Peru'
OR native_country = ' ?'
UPDATE traindata2
SET native_country = 'developNO3'
WHERE native_country = ' Honduras' OR native_country = ' France'
OR native_country = ' Columbia' OR native_country = ' United-States'
OR native_country = ' England' OR native_country = ' Germany'
OR native_country = ' Greece' OR native_country = ' Philippines'
OR native_country = ' Thailand' OR native_country = ' Yugoslavia'
UPDATE traindata2
SET native_country = 'developNO4'
WHERE native_country = ' India' OR native_country = ' Japan'
UPDATE traindata2
SET native_country = 'developNO5'
WHERE native_country = ' Cambodia' OR native_country = ' Canada'
OR native_country = ' Iran' OR native_country = ' Italy'
OR native_country = ' Taiwan' OR native_country = ' Hong'
UPDATE traindata2
SET makeover50k = 'yes'
WHERE makeover50k = ' >50K'
UPDATE traindata2
SET makeover50k = 'no'
WHERE makeover50k = ' <=50K'
SELECT *
FROM traindata2
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -