Скрипты. Анализ текстовых данных ( MC Excel, Mystem, yED Graph Editor)

1. 110 статей, охватывающие все 4 сезона 2015-2016 гг. Копирование ссылок в Excel , затем преобразование в текст

Public Sub one()

Dim HL As Hyperlink

i = 1

For Each HL In Sheets("All").Hyperlinks

Sheets("URL").Cells(i, 1) = HL.Address

i = i + 1

Next

End Sub

-------------------------------------------------------------

Sub two()

Dim IE As InternetExplorer, html As HTMLDocument, oElement As Object

Set IE = New InternetExplorer

IE.Visible = False

For L = 1 To 110

URL = Sheets("URL").Cells(L, 1).Value

IE.Navigate URL

Do While IE.readyState <> READYSTATE_COMPLETE

Application.StatusBar = "Trying to go to " + URL

DoEvents

Loop

Set html = IE.document

j = 1

For Each oElement In html.getElementsByClassName("box")

Sheets("HTML").Cells(L, j).Value = oElement.innerText

j = j + 1

Next oElement

Next L

Set IE = Nothing

Application.StatusBar = ""

End Sub

------------------------------------------------------------------

Sub WriteToText()

FilePath = "C:\news\"

For i = 1 To 110

num = Trim(Str(i))

FName = FilePath & "news" & num & ".txt"

Open FName For Output As #1

 

Write #1, Sheets("HTML").Cells(i, 1).Value

Close #1

Next i

End Sub

----------------------------------------------------------------------

2. Использование программы Mystem , разбиение всех статей на отдельные слова в начальной форме, составление матрицы слов

Sub RunMystem()

FilePath = "C:\news\"

 

For i = 1 To 110

num = Trim(Str(i))

fileIn = FilePath & "news" & num & ".txt"

fileOut = FilePath & "news" & num & "_res.txt"

params = "C:\mystem.exe -l -n -e win " + fileIn + " " + fileOut

retVal = Shell(params, vbNormalFocus)

Application.Wait (Now + TimeValue("0:00:02"))

Next i

End Sub

-------------------------------------------------------------------

Sub AddAll()

For i = 1 To 110

AddFile (i)

Next i

End Sub

--------------------------------------------------------------------

Sub AddFile(N)

'

' Макрос1 Макрос

'

' Сочетание клавиш: Ctrl+Shift+A

'

num = Trim(Str(N))

 

With ActiveSheet.QueryTables.Add(Connection:= _

"TEXT;C:\news\news" + num + "_res.txt", Destination:=Sheets("TABLE").Cells(1, N))

.Name = "news" + num + "_res"

.FieldNames = True

.RowNumbers = False

.FillAdjacentFormulas = False

.PreserveFormatting = True

.RefreshOnFileOpen = False

.RefreshStyle = xlInsertDeleteCells

.SavePassword = False

.SaveData = True

.AdjustColumnWidth = True

.RefreshPeriod = 0

.TextFilePromptOnRefresh = False

.TextFilePlatform = 1251

.TextFileStartRow = 1

.TextFileParseType = xlDelimited

.TextFileTextQualifier = xlTextQualifierDoubleQuote

.TextFileConsecutiveDelimiter = False

.TextFileTabDelimiter = True

.TextFileSemicolonDelimiter = False

.TextFileCommaDelimiter = False

.TextFileSpaceDelimiter = False

.TextFileColumnDataTypes = Array(1)

.TextFileTrailingMinusNumbers = True

.Refresh BackgroundQuery:=False

End With

End Sub

-------------------------------------------------------------------

Sub RemoveDuble()

For i = 1 To 110

ActiveSheet.Range(Cells(1, i), Cells(600, i)).RemoveDuplicates Columns:=1, Header:=xlYes

Next i

End Sub

---------------------------------------------------------------------

Sub Макрос2()

'

' Макрос2 Макрос

'

' Сочетание клавиш: Ctrl+Shift+V

'

For i = 1 To 110

Range(Cells(1, i), Cells(600, i)).Select

ActiveWorkbook.Worksheets("TABLE").Sort.SortFields.cLEAR

ActiveWorkbook.Worksheets("TABLE").Sort.SortFields.Add Key:=Range(Cells(1, i), Cells(1, i)), _

SortOn:=xlSortOnValues, Order:=xlAscending, DataOption:=xlSortNormal

With ActiveWorkbook.Worksheets("TABLE").Sort

.SetRange Range(Cells(1, i), Cells(600, i))

.Header = xlNo

.MatchCase = False

.Orientation = xlTopToBottom

.SortMethod = xlPinYin

.Apply

End With

Next i

End Sub

---------------------------------------------------------------------

Sub AllWords()

k = 1

For i = 1 To 110

For j = 1 To 600

Worksheets("OMG").Cells(k, 1) = Worksheets("TABLE").Cells(j, i)

k = k + 1

Next j

Next i

LastRow = ActiveSheet.UsedRange.Row - 1 + ActiveSheet.UsedRange.Rows.Count

Application.ScreenUpdating = False

For j = LastRow To 1 Step -1

If Application.CountA(Rows(j)) = 0 Then Rows(j).Delete

Next j

 

End Sub

--------------------------------------------------------------------

Public Sub LALA()

 

ActiveSheet.Range("$A$1:$A$17094").RemoveDuplicates Columns:=1, Header:=xlNo

ActiveSheet.Range("$A$1:$A$2990" & Cells(Rows.Count, 1).End(xlUp).Row).Sort Key1:=Range("A1"), _

Orientation:=xlTopToBottom

' после сделать сортировку от а до я вручную:)

End Sub

------------------------------------------------------------

3. Удаление дубликатов, удаление слов, встречающихся менее 7 раз, составление пар слов

Public Sub WORDS()

 

For i = 1 To 110

Worksheets("WORDS").Cells(1, i + 1).Value = i

Next i

For i = 2 To 2991

Worksheets("OMG").Cells(i - 1, 1).Copy Worksheets("WORDS").Cells(i, 1)

Next i

 

End Sub

---------------------------------------------------------------------

Public Sub Dell()

Dim i As Integer

For i = 1 To 334

If Worksheets("WORDS").Cells(i, 112).Value <= 6 Then

Range(Cells(i, 1), Cells(i, 112)).Delete

End If

Next i

End Sub

--------------------------------------------------------------------

Public Sub Pairs()

pair = 1

For i = 2 To 281

For j = i + 1 To 282

word1 = Sheets("WORDS").Cells(i, 1)

word2 = Sheets("WORDS").Cells(j, 1)

k = 0

For N = 2 To 111

If Sheets("WORDS").Cells(i, N).Value * Sheets("WORDS").Cells(j, N).Value > 0 Then k = k + 1

Next N

If k >= 2 Then

Sheets("PAIRS").Cells(pair, 1) = word1

Sheets("PAIRS").Cells(pair, 2) = word2

Sheets("PAIRS").Cells(pair, 3) = k

pair = pair + 1

End If

Next j

Next i

End Sub