Скрипты. Анализ текстовых данных ( MC Excel, Mystem, yED Graph Editor)
1. 110 статей, охватывающие все 4 сезона 2015-2016 гг. Копирование ссылок в Excel , затем преобразование в текст
Public Sub one()
Dim HL As Hyperlink
i = 1
For Each HL In Sheets("All").Hyperlinks
Sheets("URL").Cells(i, 1) = HL.Address
i = i + 1
Next
End Sub
-------------------------------------------------------------
Sub two()
Dim IE As InternetExplorer, html As HTMLDocument, oElement As Object
Set IE = New InternetExplorer
IE.Visible = False
For L = 1 To 110
URL = Sheets("URL").Cells(L, 1).Value
IE.Navigate URL
Do While IE.readyState <> READYSTATE_COMPLETE
Application.StatusBar = "Trying to go to " + URL
DoEvents
Loop
Set html = IE.document
j = 1
For Each oElement In html.getElementsByClassName("box")
Sheets("HTML").Cells(L, j).Value = oElement.innerText
j = j + 1
Next oElement
Next L
Set IE = Nothing
Application.StatusBar = ""
End Sub
------------------------------------------------------------------
Sub WriteToText()
FilePath = "C:\news\"
For i = 1 To 110
num = Trim(Str(i))
FName = FilePath & "news" & num & ".txt"
Open FName For Output As #1
Write #1, Sheets("HTML").Cells(i, 1).Value
Close #1
Next i
End Sub
----------------------------------------------------------------------
2. Использование программы Mystem , разбиение всех статей на отдельные слова в начальной форме, составление матрицы слов
Sub RunMystem()
FilePath = "C:\news\"
For i = 1 To 110
num = Trim(Str(i))
fileIn = FilePath & "news" & num & ".txt"
fileOut = FilePath & "news" & num & "_res.txt"
params = "C:\mystem.exe -l -n -e win " + fileIn + " " + fileOut
retVal = Shell(params, vbNormalFocus)
Application.Wait (Now + TimeValue("0:00:02"))
Next i
End Sub
-------------------------------------------------------------------
Sub AddAll()
For i = 1 To 110
AddFile (i)
Next i
End Sub
--------------------------------------------------------------------
Sub AddFile(N)
'
' Макрос1 Макрос
'
' Сочетание клавиш: Ctrl+Shift+A
'
num = Trim(Str(N))
With ActiveSheet.QueryTables.Add(Connection:= _
"TEXT;C:\news\news" + num + "_res.txt", Destination:=Sheets("TABLE").Cells(1, N))
.Name = "news" + num + "_res"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.TextFilePromptOnRefresh = False
.TextFilePlatform = 1251
.TextFileStartRow = 1
.TextFileParseType = xlDelimited
.TextFileTextQualifier = xlTextQualifierDoubleQuote
.TextFileConsecutiveDelimiter = False
.TextFileTabDelimiter = True
.TextFileSemicolonDelimiter = False
.TextFileCommaDelimiter = False
.TextFileSpaceDelimiter = False
.TextFileColumnDataTypes = Array(1)
.TextFileTrailingMinusNumbers = True
.Refresh BackgroundQuery:=False
End With
End Sub
-------------------------------------------------------------------
Sub RemoveDuble()
For i = 1 To 110
ActiveSheet.Range(Cells(1, i), Cells(600, i)).RemoveDuplicates Columns:=1, Header:=xlYes
Next i
End Sub
---------------------------------------------------------------------
Sub Макрос2()
'
' Макрос2 Макрос
'
' Сочетание клавиш: Ctrl+Shift+V
'
For i = 1 To 110
Range(Cells(1, i), Cells(600, i)).Select
ActiveWorkbook.Worksheets("TABLE").Sort.SortFields.cLEAR
ActiveWorkbook.Worksheets("TABLE").Sort.SortFields.Add Key:=Range(Cells(1, i), Cells(1, i)), _
SortOn:=xlSortOnValues, Order:=xlAscending, DataOption:=xlSortNormal
With ActiveWorkbook.Worksheets("TABLE").Sort
.SetRange Range(Cells(1, i), Cells(600, i))
.Header = xlNo
.MatchCase = False
.Orientation = xlTopToBottom
.SortMethod = xlPinYin
.Apply
End With
Next i
End Sub
---------------------------------------------------------------------
Sub AllWords()
k = 1
For i = 1 To 110
For j = 1 To 600
Worksheets("OMG").Cells(k, 1) = Worksheets("TABLE").Cells(j, i)
k = k + 1
Next j
Next i
LastRow = ActiveSheet.UsedRange.Row - 1 + ActiveSheet.UsedRange.Rows.Count
Application.ScreenUpdating = False
For j = LastRow To 1 Step -1
If Application.CountA(Rows(j)) = 0 Then Rows(j).Delete
Next j
End Sub
--------------------------------------------------------------------
Public Sub LALA()
ActiveSheet.Range("$A$1:$A$17094").RemoveDuplicates Columns:=1, Header:=xlNo
ActiveSheet.Range("$A$1:$A$2990" & Cells(Rows.Count, 1).End(xlUp).Row).Sort Key1:=Range("A1"), _
Orientation:=xlTopToBottom
' после сделать сортировку от а до я вручную:)
End Sub
------------------------------------------------------------
3. Удаление дубликатов, удаление слов, встречающихся менее 7 раз, составление пар слов
Public Sub WORDS()
For i = 1 To 110
Worksheets("WORDS").Cells(1, i + 1).Value = i
Next i
For i = 2 To 2991
Worksheets("OMG").Cells(i - 1, 1).Copy Worksheets("WORDS").Cells(i, 1)
Next i
End Sub
---------------------------------------------------------------------
Public Sub Dell()
Dim i As Integer
For i = 1 To 334
If Worksheets("WORDS").Cells(i, 112).Value <= 6 Then
Range(Cells(i, 1), Cells(i, 112)).Delete
End If
Next i
End Sub
--------------------------------------------------------------------
Public Sub Pairs()
pair = 1
For i = 2 To 281
For j = i + 1 To 282
word1 = Sheets("WORDS").Cells(i, 1)
word2 = Sheets("WORDS").Cells(j, 1)
k = 0
For N = 2 To 111
If Sheets("WORDS").Cells(i, N).Value * Sheets("WORDS").Cells(j, N).Value > 0 Then k = k + 1
Next N
If k >= 2 Then
Sheets("PAIRS").Cells(pair, 1) = word1
Sheets("PAIRS").Cells(pair, 2) = word2
Sheets("PAIRS").Cells(pair, 3) = k
pair = pair + 1
End If
Next j
Next i
End Sub